aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorShih-wei Liao <sliao@google.com>2010-04-07 12:21:42 -0700
committerShih-wei Liao <sliao@google.com>2010-04-07 12:21:42 -0700
commite4454320b3cfffe926a487c33fbeb454366de2f8 (patch)
tree133c05da684edf4a3b2529bcacfa996298c455f6 /lib
parent20570085304f0a4ab4f112a01d77958bbd2827a1 (diff)
downloadexternal_llvm-e4454320b3cfffe926a487c33fbeb454366de2f8.zip
external_llvm-e4454320b3cfffe926a487c33fbeb454366de2f8.tar.gz
external_llvm-e4454320b3cfffe926a487c33fbeb454366de2f8.tar.bz2
libbcc
Change-Id: Ieaa3ebd5a38f370752495549f8870b534eeedfc5
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysisCounter.cpp2
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp6
-rw-r--r--lib/Analysis/Android.mk69
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp4
-rw-r--r--lib/Analysis/CaptureTracking.cpp2
-rw-r--r--lib/Analysis/ConstantFolding.cpp57
-rw-r--r--lib/Analysis/DebugInfo.cpp9
-rw-r--r--lib/Analysis/IPA/Andersens.cpp2868
-rw-r--r--lib/Analysis/IPA/CMakeLists.txt1
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp4
-rw-r--r--lib/Analysis/IVUsers.cpp206
-rw-r--r--lib/Analysis/InlineCost.cpp4
-rw-r--r--lib/Analysis/InstructionSimplify.cpp33
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp15
-rw-r--r--lib/Analysis/PHITransAddr.cpp60
-rw-r--r--lib/Analysis/PointerTracking.cpp2
-rw-r--r--lib/Analysis/ProfileInfo.cpp8
-rw-r--r--lib/Analysis/ScalarEvolution.cpp241
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp8
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp400
-rw-r--r--lib/Analysis/ValueTracking.cpp153
-rw-r--r--lib/AsmParser/Android.mk28
-rw-r--r--lib/AsmParser/LLLexer.cpp1
-rw-r--r--lib/AsmParser/LLParser.cpp192
-rw-r--r--lib/AsmParser/LLParser.h5
-rw-r--r--lib/AsmParser/LLToken.h1
-rw-r--r--lib/Bitcode/Reader/Android.mk29
-rw-r--r--lib/Bitcode/Reader/BitReader.cpp63
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp34
-rw-r--r--lib/Bitcode/Writer/BitWriter.cpp14
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp61
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp5
-rw-r--r--lib/CodeGen/Android.mk99
-rw-r--r--lib/CodeGen/AsmPrinter/Android.mk31
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp43
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp1
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp17
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp232
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h5
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfPrinter.cpp77
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfPrinter.h12
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp1
-rw-r--r--lib/CodeGen/BranchFolding.cpp6
-rw-r--r--lib/CodeGen/CMakeLists.txt3
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp5
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp17
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp4
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp42
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp4
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp51
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp142
-rw-r--r--lib/CodeGen/LiveVariables.cpp43
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp30
-rw-r--r--lib/CodeGen/MachineCSE.cpp268
-rw-r--r--lib/CodeGen/MachineFunction.cpp3
-rw-r--r--lib/CodeGen/MachineInstr.cpp90
-rw-r--r--lib/CodeGen/MachineLICM.cpp45
-rw-r--r--lib/CodeGen/MachineModuleInfoImpls.cpp11
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp13
-rw-r--r--lib/CodeGen/MachineSink.cpp16
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp189
-rw-r--r--lib/CodeGen/PBQP/HeuristicSolver.h29
-rw-r--r--lib/CodeGen/PBQP/Heuristics/Briggs.h9
-rw-r--r--lib/CodeGen/PHIElimination.cpp41
-rw-r--r--lib/CodeGen/PHIElimination.h72
-rw-r--r--lib/CodeGen/Passes.cpp2
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp2
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp5
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp22
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp37
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp4
-rw-r--r--lib/CodeGen/RegAllocLocal.cpp33
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp6
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/Android.mk48
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp178
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp28
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp51
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h11
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp204
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp17
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp39
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp19
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp154
-rw-r--r--lib/CodeGen/SelectionDAG/SDDbgValue.h67
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp16
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp320
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp171
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp1361
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp72
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp68
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp67
-rw-r--r--lib/CodeGen/StackProtector.cpp2
-rw-r--r--lib/CodeGen/TailDuplication.cpp33
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp36
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp902
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp20
-rw-r--r--lib/CodeGen/VirtRegMap.cpp6
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp33
-rw-r--r--lib/CompilerDriver/Action.cpp15
-rw-r--r--lib/CompilerDriver/CompilationGraph.cpp5
-rw-r--r--lib/CompilerDriver/Main.cpp7
-rw-r--r--lib/CompilerDriver/Tool.cpp21
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp12
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp77
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp28
-rw-r--r--lib/ExecutionEngine/JIT/Android.mk32
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp75
-rw-r--r--lib/ExecutionEngine/JIT/JIT.h4
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp6
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp444
-rw-r--r--lib/Linker/LinkModules.cpp2
-rw-r--r--lib/MC/Android.mk45
-rw-r--r--lib/MC/CMakeLists.txt1
-rw-r--r--lib/MC/MCAsmStreamer.cpp21
-rw-r--r--lib/MC/MCAssembler.cpp386
-rw-r--r--lib/MC/MCCodeEmitter.cpp12
-rw-r--r--lib/MC/MCInstPrinter.cpp7
-rw-r--r--lib/MC/MCMachOStreamer.cpp49
-rw-r--r--lib/MC/MCNullStreamer.cpp3
-rw-r--r--lib/MC/MCParser/AsmParser.cpp38
-rw-r--r--lib/MC/MCSectionMachO.cpp1
-rw-r--r--lib/MC/TargetAsmBackend.cpp19
-rw-r--r--lib/Support/APFloat.cpp88
-rw-r--r--lib/Support/APInt.cpp10
-rw-r--r--lib/Support/Android.mk72
-rw-r--r--lib/Support/CommandLine.cpp14
-rw-r--r--lib/Support/FormattedStream.cpp5
-rw-r--r--lib/Support/GraphWriter.cpp2
-rw-r--r--lib/Support/MemoryBuffer.cpp3
-rw-r--r--lib/Support/Regex.cpp76
-rw-r--r--lib/Support/StringRef.cpp105
-rw-r--r--lib/Support/Triple.cpp9
-rw-r--r--lib/Support/raw_ostream.cpp19
-rw-r--r--lib/System/Android.mk46
-rw-r--r--lib/System/Unix/Host.inc1
-rw-r--r--lib/System/Unix/Program.inc2
-rw-r--r--lib/System/Unix/Signals.inc11
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp24
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h6
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp119
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h1
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp101
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp7
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp188
-rw-r--r--lib/Target/ARM/ARMISelLowering.h6
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td86
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td1056
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td803
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td128
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td818
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td97
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp24
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp18
-rw-r--r--lib/Target/ARM/ARMRelocations.h8
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp40
-rw-r--r--lib/Target/ARM/ARMSubtarget.h4
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h4
-rw-r--r--lib/Target/ARM/Android.mk49
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp9
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp6
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h1
-rw-r--r--lib/Target/ARM/README.txt2
-rw-r--r--lib/Target/ARM/TargetInfo/Android.mk24
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp28
-rw-r--r--lib/Target/Alpha/AlphaCallingConv.td3
-rw-r--r--lib/Target/Alpha/AlphaISelDAGToDAG.cpp17
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp45
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td6
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.cpp7
-rw-r--r--lib/Target/Android.mk38
-rw-r--r--lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp10
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.cpp7
-rw-r--r--lib/Target/CBackend/CBackend.cpp256
-rw-r--r--lib/Target/CBackend/CTargetMachine.h3
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td8
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp129
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp26
-rw-r--r--lib/Target/CellSPU/SPUMCAsmInfo.cpp3
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp41
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h3
-rw-r--r--lib/Target/MBlaze/AsmPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp302
-rw-r--r--lib/Target/MBlaze/AsmPrinter/Makefile17
-rw-r--r--lib/Target/MBlaze/CMakeLists.txt27
-rw-r--r--lib/Target/MBlaze/MBlaze.h39
-rw-r--r--lib/Target/MBlaze/MBlaze.td85
-rw-r--r--lib/Target/MBlaze/MBlazeCallingConv.td26
-rw-r--r--lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp75
-rw-r--r--lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp339
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp975
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.h149
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFPU.td223
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFSL.td153
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFormats.td246
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp222
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h242
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td672
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp109
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsicInfo.h33
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsics.td137
-rw-r--r--lib/Target/MBlaze/MBlazeMCAsmInfo.cpp27
-rw-r--r--lib/Target/MBlaze/MBlazeMCAsmInfo.h30
-rw-r--r--lib/Target/MBlaze/MBlazeMachineFunction.h136
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp421
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h96
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.td186
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule.td63
-rw-r--r--lib/Target/MBlaze/MBlazeSubtarget.cpp31
-rw-r--r--lib/Target/MBlaze/MBlazeSubtarget.h79
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp66
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h69
-rw-r--r--lib/Target/MBlaze/MBlazeTargetObjectFile.cpp88
-rw-r--r--lib/Target/MBlaze/MBlazeTargetObjectFile.h41
-rw-r--r--lib/Target/MBlaze/Makefile23
-rw-r--r--lib/Target/MBlaze/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp19
-rw-r--r--lib/Target/MBlaze/TargetInfo/Makefile15
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp18
-rw-r--r--lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp23
-rw-r--r--lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp23
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp330
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp31
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp35
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp23
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td11
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.h2
-rw-r--r--lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp27
-rw-r--r--lib/Target/PIC16/PIC16ABINames.h68
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.cpp2
-rw-r--r--lib/Target/PIC16/PIC16ISelDAGToDAG.cpp10
-rw-r--r--lib/Target/PIC16/PIC16ISelDAGToDAG.h4
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.cpp32
-rw-r--r--lib/Target/PIC16/PIC16MemSelOpt.cpp105
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp299
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.h83
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp26
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Overlay.h23
-rw-r--r--lib/Target/PIC16/PIC16TargetObjectFile.cpp6
-rw-r--r--lib/Target/PIC16/PIC16TargetObjectFile.h3
-rw-r--r--lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp3
-rw-r--r--lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp2
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td17
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp12
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp124
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td11
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp94
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td28
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp12
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td12
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp29
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h12
-rw-r--r--lib/Target/PowerPC/README.txt19
-rw-r--r--lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp47
-rw-r--r--lib/Target/Sparc/README.txt1
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp16
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp51
-rw-r--r--lib/Target/Sparc/SparcMachineFunctionInfo.h2
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp59
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp10
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td2
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td2
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.h3
-rw-r--r--lib/Target/TargetData.cpp2
-rw-r--r--lib/Target/TargetInstrInfo.cpp1
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp844
-rw-r--r--lib/Target/X86/Android.mk47
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp49
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp7
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h5
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp78
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.h3
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp4
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h2
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/README-SSE.txt8
-rw-r--r--lib/Target/X86/README.txt71
-rw-r--r--lib/Target/X86/TargetInfo/Android.mk24
-rw-r--r--lib/Target/X86/X86.h19
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp34
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.cpp97
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h27
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp387
-rw-r--r--lib/Target/X86/X86FastISel.cpp77
-rw-r--r--lib/Target/X86/X86FixupKinds.h25
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp403
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp512
-rw-r--r--lib/Target/X86/X86ISelLowering.h16
-rw-r--r--lib/Target/X86/X86Instr64bit.td79
-rw-r--r--lib/Target/X86/X86InstrFPStack.td2
-rw-r--r--lib/Target/X86/X86InstrFormats.td36
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp110
-rw-r--r--lib/Target/X86/X86InstrInfo.h54
-rw-r--r--lib/Target/X86/X86InstrInfo.td420
-rw-r--r--lib/Target/X86/X86InstrMMX.td17
-rw-r--r--lib/Target/X86/X86InstrSSE.td427
-rw-r--r--lib/Target/X86/X86MCAsmInfo.cpp30
-rw-r--r--lib/Target/X86/X86MCAsmInfo.h5
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp350
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h15
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp22
-rw-r--r--lib/Target/X86/X86RegisterInfo.h3
-rw-r--r--lib/Target/X86/X86RegisterInfo.td63
-rw-r--r--lib/Target/X86/X86Subtarget.h25
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp43
-rw-r--r--lib/Target/X86/X86TargetMachine.h12
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp139
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h42
-rw-r--r--lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp23
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp17
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp81
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h12
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp13
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td38
-rw-r--r--lib/Target/XCore/XCoreTargetObjectFile.h3
-rw-r--r--lib/Transforms/Hello/Hello.cpp1
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp6
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp41
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp4
-rw-r--r--lib/Transforms/IPO/DeadTypeElimination.cpp4
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp8
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp178
-rw-r--r--lib/Transforms/IPO/Inliner.cpp25
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp9
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h24
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp18
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp394
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp207
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp47
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp43
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp30
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp20
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp32
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp14
-rw-r--r--lib/Transforms/Instrumentation/ProfilingUtils.cpp2
-rw-r--r--lib/Transforms/Scalar/ABCD.cpp4
-rw-r--r--lib/Transforms/Scalar/Android.mk55
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp167
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp8
-rw-r--r--lib/Transforms/Scalar/GVN.cpp139
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp208
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp17
-rw-r--r--lib/Transforms/Scalar/LICM.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp5117
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp12
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp2
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp34
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp58
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp39
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp969
-rw-r--r--lib/Transforms/Utils/AddrModeMatcher.cpp8
-rw-r--r--lib/Transforms/Utils/Android.mk49
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp29
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp86
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp324
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/Local.cpp15
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp48
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp12
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp12
-rw-r--r--lib/VMCore/Android.mk61
-rw-r--r--lib/VMCore/AsmWriter.cpp66
-rw-r--r--lib/VMCore/Attributes.cpp9
-rw-r--r--lib/VMCore/ConstantFold.cpp154
-rw-r--r--lib/VMCore/Constants.cpp176
-rw-r--r--lib/VMCore/ConstantsContext.h15
-rw-r--r--lib/VMCore/Core.cpp227
-rw-r--r--lib/VMCore/Function.cpp12
-rw-r--r--lib/VMCore/Globals.cpp4
-rw-r--r--lib/VMCore/IRBuilder.cpp2
-rw-r--r--lib/VMCore/InlineAsm.cpp2
-rw-r--r--lib/VMCore/Instructions.cpp173
-rw-r--r--lib/VMCore/LLVMContextImpl.h35
-rw-r--r--lib/VMCore/Makefile4
-rw-r--r--lib/VMCore/Metadata.cpp7
-rw-r--r--lib/VMCore/Pass.cpp9
-rw-r--r--lib/VMCore/PassManager.cpp33
-rw-r--r--lib/VMCore/Type.cpp207
-rw-r--r--lib/VMCore/TypesContext.h26
-rw-r--r--lib/VMCore/Value.cpp12
-rw-r--r--lib/VMCore/ValueTypes.cpp7
-rw-r--r--lib/VMCore/Verifier.cpp145
389 files changed, 24366 insertions, 13935 deletions
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 761cd46..1053955 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -162,7 +162,7 @@ AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
errs() << MRString << ": Ptr: ";
errs() << "[" << Size << "B] ";
WriteAsOperand(errs(), P, true, M);
- errs() << "\t<->" << *CS.getInstruction();
+ errs() << "\t<->" << *CS.getInstruction() << '\n';
}
return R;
}
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 6b0a956..308b9e3 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -115,11 +115,11 @@ bool AAEval::runOnFunction(Function &F) {
SetVector<CallSite> CallSites;
for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
- if (isa<PointerType>(I->getType())) // Add all pointer arguments
+ if (I->getType()->isPointerTy()) // Add all pointer arguments
Pointers.insert(I);
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
- if (isa<PointerType>(I->getType())) // Add all pointer instructions
+ if (I->getType()->isPointerTy()) // Add all pointer instructions
Pointers.insert(&*I);
Instruction &Inst = *I;
User::op_iterator OI = Inst.op_begin();
@@ -128,7 +128,7 @@ bool AAEval::runOnFunction(Function &F) {
isa<Function>(CS.getCalledValue()))
++OI; // Skip actual functions for direct function calls.
for (; OI != Inst.op_end(); ++OI)
- if (isa<PointerType>((*OI)->getType()) && !isa<ConstantPointerNull>(*OI))
+ if ((*OI)->getType()->isPointerTy() && !isa<ConstantPointerNull>(*OI))
Pointers.insert(*OI);
if (CS.getInstruction()) CallSites.insert(CS);
diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk
new file mode 100644
index 0000000..1d038f2
--- /dev/null
+++ b/lib/Analysis/Android.mk
@@ -0,0 +1,69 @@
+LOCAL_PATH:= $(call my-dir)
+
+analysis_SRC_FILES := \
+ AliasAnalysis.cpp \
+ AliasAnalysisCounter.cpp \
+ AliasAnalysisEvaluator.cpp \
+ AliasDebugger.cpp \
+ AliasSetTracker.cpp \
+ Analysis.cpp \
+ BasicAliasAnalysis.cpp \
+ CFGPrinter.cpp \
+ CaptureTracking.cpp \
+ ConstantFolding.cpp \
+ DbgInfoPrinter.cpp \
+ DebugInfo.cpp \
+ DomPrinter.cpp \
+ IVUsers.cpp \
+ InlineCost.cpp \
+ InstCount.cpp \
+ InstructionSimplify.cpp \
+ Interval.cpp \
+ IntervalPartition.cpp \
+ LazyValueInfo.cpp \
+ LibCallAliasAnalysis.cpp \
+ LibCallSemantics.cpp \
+ LiveValues.cpp \
+ MemoryBuiltins.cpp \
+ MemoryDependenceAnalysis.cpp \
+ LoopDependenceAnalysis.cpp \
+ LoopInfo.cpp \
+ LoopPass.cpp \
+ PHITransAddr.cpp \
+ PointerTracking.cpp \
+ PostDominators.cpp \
+ ProfileEstimatorPass.cpp \
+ ProfileInfo.cpp \
+ ProfileInfoLoader.cpp \
+ ProfileInfoLoaderPass.cpp \
+ ProfileVerifierPass.cpp \
+ ScalarEvolution.cpp \
+ ScalarEvolutionAliasAnalysis.cpp \
+ ScalarEvolutionExpander.cpp \
+ SparsePropagation.cpp \
+ Trace.cpp \
+ ValueTracking.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(analysis_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMAnalysis
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(analysis_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMAnalysis
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 36b831c..31a649d 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -290,7 +290,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI, ++ArgNo) {
// Only look at the no-capture pointer arguments.
- if (!isa<PointerType>((*CI)->getType()) ||
+ if (!(*CI)->getType()->isPointerTy() ||
!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))
continue;
@@ -662,7 +662,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
// Are we checking for alias of the same value?
if (V1 == V2) return MustAlias;
- if (!isa<PointerType>(V1->getType()) || !isa<PointerType>(V2->getType()))
+ if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
return NoAlias; // Scalars cannot alias each other
// Figure out what objects these things are pointing to if we can.
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 10a8b11..8767c18 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -44,7 +44,7 @@ static int const Threshold = 20;
/// counts as capturing it or not.
bool llvm::PointerMayBeCaptured(const Value *V,
bool ReturnCaptures, bool StoreCaptures) {
- assert(isa<PointerType>(V->getType()) && "Capture is for pointers only!");
+ assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
SmallVector<Use*, Threshold> Worklist;
SmallSet<Use*, Threshold> Visited;
int Count = 0;
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index ba87040..114db2d 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -80,7 +80,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
// First thing is first. We only want to think about integer here, so if
// we have something in FP form, recast it as integer.
- if (DstEltTy->isFloatingPoint()) {
+ if (DstEltTy->isFloatingPointTy()) {
// Fold to an vector of integers with same size as our FP type.
unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
const Type *DestIVTy =
@@ -95,7 +95,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
// Okay, we know the destination is integer, if the input is FP, convert
// it to integer first.
- if (SrcEltTy->isFloatingPoint()) {
+ if (SrcEltTy->isFloatingPointTy()) {
unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
const Type *SrcIVTy =
VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
@@ -359,7 +359,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
MapTy = Type::getInt32PtrTy(C->getContext());
else if (LoadTy->isDoubleTy())
MapTy = Type::getInt64PtrTy(C->getContext());
- else if (isa<VectorType>(LoadTy)) {
+ else if (LoadTy->isVectorTy()) {
MapTy = IntegerType::get(C->getContext(),
TD.getTypeAllocSizeInBits(LoadTy));
MapTy = PointerType::getUnqual(MapTy);
@@ -605,7 +605,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
SmallVector<Constant*, 32> NewIdxs;
do {
if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
- if (isa<PointerType>(ATy)) {
+ if (ATy->isPointerTy()) {
// The only pointer indexing we'll do is on the first index of the GEP.
if (!NewIdxs.empty())
break;
@@ -783,45 +783,12 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
// If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
// the int size is >= the ptr size. This requires knowing the width of a
// pointer, so it can't be done in ConstantExpr::getCast.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
if (TD &&
- TD->getPointerSizeInBits() <=
- CE->getType()->getScalarSizeInBits()) {
- if (CE->getOpcode() == Instruction::PtrToInt)
- return FoldBitCast(CE->getOperand(0), DestTy, *TD);
-
- // If there's a constant offset added to the integer value before
- // it is casted back to a pointer, see if the expression can be
- // converted into a GEP.
- if (CE->getOpcode() == Instruction::Add)
- if (ConstantInt *L = dyn_cast<ConstantInt>(CE->getOperand(0)))
- if (ConstantExpr *R = dyn_cast<ConstantExpr>(CE->getOperand(1)))
- if (R->getOpcode() == Instruction::PtrToInt)
- if (GlobalVariable *GV =
- dyn_cast<GlobalVariable>(R->getOperand(0))) {
- const PointerType *GVTy = cast<PointerType>(GV->getType());
- if (const ArrayType *AT =
- dyn_cast<ArrayType>(GVTy->getElementType())) {
- const Type *ElTy = AT->getElementType();
- uint64_t AllocSize = TD->getTypeAllocSize(ElTy);
- APInt PSA(L->getValue().getBitWidth(), AllocSize);
- if (ElTy == cast<PointerType>(DestTy)->getElementType() &&
- L->getValue().urem(PSA) == 0) {
- APInt ElemIdx = L->getValue().udiv(PSA);
- if (ElemIdx.ult(APInt(ElemIdx.getBitWidth(),
- AT->getNumElements()))) {
- Constant *Index[] = {
- Constant::getNullValue(CE->getType()),
- ConstantInt::get(ElTy->getContext(), ElemIdx)
- };
- return
- ConstantExpr::getGetElementPtr(GV, &Index[0], 2);
- }
- }
- }
- }
- }
- }
+ TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
+ CE->getOpcode() == Instruction::PtrToInt)
+ return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::Trunc:
case Instruction::ZExt:
@@ -1179,6 +1146,12 @@ llvm::ConstantFoldCall(Function *F,
return 0;
}
+ if (isa<UndefValue>(Operands[0])) {
+ if (Name.startswith("llvm.bswap"))
+ return Operands[0];
+ return 0;
+ }
+
return 0;
}
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 258f1db..5cfe666 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -1007,12 +1007,15 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
/// CreateBlock - This creates a descriptor for a lexical block with the
/// specified parent VMContext.
-DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context) {
+DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context,
+ unsigned LineNo, unsigned Col) {
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_lexical_block),
- Context.getNode()
+ Context.getNode(),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Col)
};
- return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 2));
+ return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 4));
}
/// CreateNameSpace - This creates new descriptor for a namespace
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
deleted file mode 100644
index 4180206..0000000
--- a/lib/Analysis/IPA/Andersens.cpp
+++ /dev/null
@@ -1,2868 +0,0 @@
-//===- Andersens.cpp - Andersen's Interprocedural Alias Analysis ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an implementation of Andersen's interprocedural alias
-// analysis
-//
-// In pointer analysis terms, this is a subset-based, flow-insensitive,
-// field-sensitive, and context-insensitive algorithm pointer algorithm.
-//
-// This algorithm is implemented as three stages:
-// 1. Object identification.
-// 2. Inclusion constraint identification.
-// 3. Offline constraint graph optimization
-// 4. Inclusion constraint solving.
-//
-// The object identification stage identifies all of the memory objects in the
-// program, which includes globals, heap allocated objects, and stack allocated
-// objects.
-//
-// The inclusion constraint identification stage finds all inclusion constraints
-// in the program by scanning the program, looking for pointer assignments and
-// other statements that effect the points-to graph. For a statement like "A =
-// B", this statement is processed to indicate that A can point to anything that
-// B can point to. Constraints can handle copies, loads, and stores, and
-// address taking.
-//
-// The offline constraint graph optimization portion includes offline variable
-// substitution algorithms intended to compute pointer and location
-// equivalences. Pointer equivalences are those pointers that will have the
-// same points-to sets, and location equivalences are those variables that
-// always appear together in points-to sets. It also includes an offline
-// cycle detection algorithm that allows cycles to be collapsed sooner
-// during solving.
-//
-// The inclusion constraint solving phase iteratively propagates the inclusion
-// constraints until a fixed point is reached. This is an O(N^3) algorithm.
-//
-// Function constraints are handled as if they were structs with X fields.
-// Thus, an access to argument X of function Y is an access to node index
-// getNode(Y) + X. This representation allows handling of indirect calls
-// without any issues. To wit, an indirect call Y(a,b) is equivalent to
-// *(Y + 1) = a, *(Y + 2) = b.
-// The return node for a function is always located at getNode(F) +
-// CallReturnPos. The arguments start at getNode(F) + CallArgPos.
-//
-// Future Improvements:
-// Use of BDD's.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "anders-aa"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/System/Atomic.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/DenseSet.h"
-#include <algorithm>
-#include <set>
-#include <list>
-#include <map>
-#include <stack>
-#include <vector>
-#include <queue>
-
-// Determining the actual set of nodes the universal set can consist of is very
-// expensive because it means propagating around very large sets. We rely on
-// other analysis being able to determine which nodes can never be pointed to in
-// order to disambiguate further than "points-to anything".
-#define FULL_UNIVERSAL 0
-
-using namespace llvm;
-#ifndef NDEBUG
-STATISTIC(NumIters , "Number of iterations to reach convergence");
-#endif
-STATISTIC(NumConstraints, "Number of constraints");
-STATISTIC(NumNodes , "Number of nodes");
-STATISTIC(NumUnified , "Number of variables unified");
-STATISTIC(NumErased , "Number of redundant constraints erased");
-
-static const unsigned SelfRep = (unsigned)-1;
-static const unsigned Unvisited = (unsigned)-1;
-// Position of the function return node relative to the function node.
-static const unsigned CallReturnPos = 1;
-// Position of the function call node relative to the function node.
-static const unsigned CallFirstArgPos = 2;
-
-namespace {
- struct BitmapKeyInfo {
- static inline SparseBitVector<> *getEmptyKey() {
- return reinterpret_cast<SparseBitVector<> *>(-1);
- }
- static inline SparseBitVector<> *getTombstoneKey() {
- return reinterpret_cast<SparseBitVector<> *>(-2);
- }
- static unsigned getHashValue(const SparseBitVector<> *bitmap) {
- return bitmap->getHashValue();
- }
- static bool isEqual(const SparseBitVector<> *LHS,
- const SparseBitVector<> *RHS) {
- if (LHS == RHS)
- return true;
- else if (LHS == getEmptyKey() || RHS == getEmptyKey()
- || LHS == getTombstoneKey() || RHS == getTombstoneKey())
- return false;
-
- return *LHS == *RHS;
- }
- };
-
- class Andersens : public ModulePass, public AliasAnalysis,
- private InstVisitor<Andersens> {
- struct Node;
-
- /// Constraint - Objects of this structure are used to represent the various
- /// constraints identified by the algorithm. The constraints are 'copy',
- /// for statements like "A = B", 'load' for statements like "A = *B",
- /// 'store' for statements like "*A = B", and AddressOf for statements like
- /// A = alloca; The Offset is applied as *(A + K) = B for stores,
- /// A = *(B + K) for loads, and A = B + K for copies. It is
- /// illegal on addressof constraints (because it is statically
- /// resolvable to A = &C where C = B + K)
-
- struct Constraint {
- enum ConstraintType { Copy, Load, Store, AddressOf } Type;
- unsigned Dest;
- unsigned Src;
- unsigned Offset;
-
- Constraint(ConstraintType Ty, unsigned D, unsigned S, unsigned O = 0)
- : Type(Ty), Dest(D), Src(S), Offset(O) {
- assert((Offset == 0 || Ty != AddressOf) &&
- "Offset is illegal on addressof constraints");
- }
-
- bool operator==(const Constraint &RHS) const {
- return RHS.Type == Type
- && RHS.Dest == Dest
- && RHS.Src == Src
- && RHS.Offset == Offset;
- }
-
- bool operator!=(const Constraint &RHS) const {
- return !(*this == RHS);
- }
-
- bool operator<(const Constraint &RHS) const {
- if (RHS.Type != Type)
- return RHS.Type < Type;
- else if (RHS.Dest != Dest)
- return RHS.Dest < Dest;
- else if (RHS.Src != Src)
- return RHS.Src < Src;
- return RHS.Offset < Offset;
- }
- };
-
- // Information DenseSet requires implemented in order to be able to do
- // it's thing
- struct PairKeyInfo {
- static inline std::pair<unsigned, unsigned> getEmptyKey() {
- return std::make_pair(~0U, ~0U);
- }
- static inline std::pair<unsigned, unsigned> getTombstoneKey() {
- return std::make_pair(~0U - 1, ~0U - 1);
- }
- static unsigned getHashValue(const std::pair<unsigned, unsigned> &P) {
- return P.first ^ P.second;
- }
- static unsigned isEqual(const std::pair<unsigned, unsigned> &LHS,
- const std::pair<unsigned, unsigned> &RHS) {
- return LHS == RHS;
- }
- };
-
- struct ConstraintKeyInfo {
- static inline Constraint getEmptyKey() {
- return Constraint(Constraint::Copy, ~0U, ~0U, ~0U);
- }
- static inline Constraint getTombstoneKey() {
- return Constraint(Constraint::Copy, ~0U - 1, ~0U - 1, ~0U - 1);
- }
- static unsigned getHashValue(const Constraint &C) {
- return C.Src ^ C.Dest ^ C.Type ^ C.Offset;
- }
- static bool isEqual(const Constraint &LHS,
- const Constraint &RHS) {
- return LHS.Type == RHS.Type && LHS.Dest == RHS.Dest
- && LHS.Src == RHS.Src && LHS.Offset == RHS.Offset;
- }
- };
-
- // Node class - This class is used to represent a node in the constraint
- // graph. Due to various optimizations, it is not always the case that
- // there is a mapping from a Node to a Value. In particular, we add
- // artificial Node's that represent the set of pointed-to variables shared
- // for each location equivalent Node.
- struct Node {
- private:
- static volatile sys::cas_flag Counter;
-
- public:
- Value *Val;
- SparseBitVector<> *Edges;
- SparseBitVector<> *PointsTo;
- SparseBitVector<> *OldPointsTo;
- std::list<Constraint> Constraints;
-
- // Pointer and location equivalence labels
- unsigned PointerEquivLabel;
- unsigned LocationEquivLabel;
- // Predecessor edges, both real and implicit
- SparseBitVector<> *PredEdges;
- SparseBitVector<> *ImplicitPredEdges;
- // Set of nodes that point to us, only use for location equivalence.
- SparseBitVector<> *PointedToBy;
- // Number of incoming edges, used during variable substitution to early
- // free the points-to sets
- unsigned NumInEdges;
- // True if our points-to set is in the Set2PEClass map
- bool StoredInHash;
- // True if our node has no indirect constraints (complex or otherwise)
- bool Direct;
- // True if the node is address taken, *or* it is part of a group of nodes
- // that must be kept together. This is set to true for functions and
- // their arg nodes, which must be kept at the same position relative to
- // their base function node.
- bool AddressTaken;
-
- // Nodes in cycles (or in equivalence classes) are united together using a
- // standard union-find representation with path compression. NodeRep
- // gives the index into GraphNodes for the representative Node.
- unsigned NodeRep;
-
- // Modification timestamp. Assigned from Counter.
- // Used for work list prioritization.
- unsigned Timestamp;
-
- explicit Node(bool direct = true) :
- Val(0), Edges(0), PointsTo(0), OldPointsTo(0),
- PointerEquivLabel(0), LocationEquivLabel(0), PredEdges(0),
- ImplicitPredEdges(0), PointedToBy(0), NumInEdges(0),
- StoredInHash(false), Direct(direct), AddressTaken(false),
- NodeRep(SelfRep), Timestamp(0) { }
-
- Node *setValue(Value *V) {
- assert(Val == 0 && "Value already set for this node!");
- Val = V;
- return this;
- }
-
- /// getValue - Return the LLVM value corresponding to this node.
- ///
- Value *getValue() const { return Val; }
-
- /// addPointerTo - Add a pointer to the list of pointees of this node,
- /// returning true if this caused a new pointer to be added, or false if
- /// we already knew about the points-to relation.
- bool addPointerTo(unsigned Node) {
- return PointsTo->test_and_set(Node);
- }
-
- /// intersects - Return true if the points-to set of this node intersects
- /// with the points-to set of the specified node.
- bool intersects(Node *N) const;
-
- /// intersectsIgnoring - Return true if the points-to set of this node
- /// intersects with the points-to set of the specified node on any nodes
- /// except for the specified node to ignore.
- bool intersectsIgnoring(Node *N, unsigned) const;
-
- // Timestamp a node (used for work list prioritization)
- void Stamp() {
- Timestamp = sys::AtomicIncrement(&Counter);
- --Timestamp;
- }
-
- bool isRep() const {
- return( (int) NodeRep < 0 );
- }
- };
-
- struct WorkListElement {
- Node* node;
- unsigned Timestamp;
- WorkListElement(Node* n, unsigned t) : node(n), Timestamp(t) {}
-
- // Note that we reverse the sense of the comparison because we
- // actually want to give low timestamps the priority over high,
- // whereas priority is typically interpreted as a greater value is
- // given high priority.
- bool operator<(const WorkListElement& that) const {
- return( this->Timestamp > that.Timestamp );
- }
- };
-
- // Priority-queue based work list specialized for Nodes.
- class WorkList {
- std::priority_queue<WorkListElement> Q;
-
- public:
- void insert(Node* n) {
- Q.push( WorkListElement(n, n->Timestamp) );
- }
-
- // We automatically discard non-representative nodes and nodes
- // that were in the work list twice (we keep a copy of the
- // timestamp in the work list so we can detect this situation by
- // comparing against the node's current timestamp).
- Node* pop() {
- while( !Q.empty() ) {
- WorkListElement x = Q.top(); Q.pop();
- Node* INode = x.node;
-
- if( INode->isRep() &&
- INode->Timestamp == x.Timestamp ) {
- return(x.node);
- }
- }
- return(0);
- }
-
- bool empty() {
- return Q.empty();
- }
- };
-
- /// GraphNodes - This vector is populated as part of the object
- /// identification stage of the analysis, which populates this vector with a
- /// node for each memory object and fills in the ValueNodes map.
- std::vector<Node> GraphNodes;
-
- /// ValueNodes - This map indicates the Node that a particular Value* is
- /// represented by. This contains entries for all pointers.
- DenseMap<Value*, unsigned> ValueNodes;
-
- /// ObjectNodes - This map contains entries for each memory object in the
- /// program: globals, alloca's and mallocs.
- DenseMap<Value*, unsigned> ObjectNodes;
-
- /// ReturnNodes - This map contains an entry for each function in the
- /// program that returns a value.
- DenseMap<Function*, unsigned> ReturnNodes;
-
- /// VarargNodes - This map contains the entry used to represent all pointers
- /// passed through the varargs portion of a function call for a particular
- /// function. An entry is not present in this map for functions that do not
- /// take variable arguments.
- DenseMap<Function*, unsigned> VarargNodes;
-
-
- /// Constraints - This vector contains a list of all of the constraints
- /// identified by the program.
- std::vector<Constraint> Constraints;
-
- // Map from graph node to maximum K value that is allowed (for functions,
- // this is equivalent to the number of arguments + CallFirstArgPos)
- std::map<unsigned, unsigned> MaxK;
-
- /// This enum defines the GraphNodes indices that correspond to important
- /// fixed sets.
- enum {
- UniversalSet = 0,
- NullPtr = 1,
- NullObject = 2,
- NumberSpecialNodes
- };
- // Stack for Tarjan's
- std::stack<unsigned> SCCStack;
- // Map from Graph Node to DFS number
- std::vector<unsigned> Node2DFS;
- // Map from Graph Node to Deleted from graph.
- std::vector<bool> Node2Deleted;
- // Same as Node Maps, but implemented as std::map because it is faster to
- // clear
- std::map<unsigned, unsigned> Tarjan2DFS;
- std::map<unsigned, bool> Tarjan2Deleted;
- // Current DFS number
- unsigned DFSNumber;
-
- // Work lists.
- WorkList w1, w2;
- WorkList *CurrWL, *NextWL; // "current" and "next" work lists
-
- // Offline variable substitution related things
-
- // Temporary rep storage, used because we can't collapse SCC's in the
- // predecessor graph by uniting the variables permanently, we can only do so
- // for the successor graph.
- std::vector<unsigned> VSSCCRep;
- // Mapping from node to whether we have visited it during SCC finding yet.
- std::vector<bool> Node2Visited;
- // During variable substitution, we create unknowns to represent the unknown
- // value that is a dereference of a variable. These nodes are known as
- // "ref" nodes (since they represent the value of dereferences).
- unsigned FirstRefNode;
- // During HVN, we create represent address taken nodes as if they were
- // unknown (since HVN, unlike HU, does not evaluate unions).
- unsigned FirstAdrNode;
- // Current pointer equivalence class number
- unsigned PEClass;
- // Mapping from points-to sets to equivalence classes
- typedef DenseMap<SparseBitVector<> *, unsigned, BitmapKeyInfo> BitVectorMap;
- BitVectorMap Set2PEClass;
- // Mapping from pointer equivalences to the representative node. -1 if we
- // have no representative node for this pointer equivalence class yet.
- std::vector<int> PEClass2Node;
- // Mapping from pointer equivalences to representative node. This includes
- // pointer equivalent but not location equivalent variables. -1 if we have
- // no representative node for this pointer equivalence class yet.
- std::vector<int> PENLEClass2Node;
- // Union/Find for HCD
- std::vector<unsigned> HCDSCCRep;
- // HCD's offline-detected cycles; "Statically DeTected"
- // -1 if not part of such a cycle, otherwise a representative node.
- std::vector<int> SDT;
- // Whether to use SDT (UniteNodes can use it during solving, but not before)
- bool SDTActive;
-
- public:
- static char ID;
- Andersens() : ModulePass(&ID) {}
-
- bool runOnModule(Module &M) {
- InitializeAliasAnalysis(this);
- IdentifyObjects(M);
- CollectConstraints(M);
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-constraints"
- DEBUG(PrintConstraints());
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
- SolveConstraints();
- DEBUG(PrintPointsToGraph());
-
- // Free the constraints list, as we don't need it to respond to alias
- // requests.
- std::vector<Constraint>().swap(Constraints);
- //These are needed for Print() (-analyze in opt)
- //ObjectNodes.clear();
- //ReturnNodes.clear();
- //VarargNodes.clear();
- return false;
- }
-
- void releaseMemory() {
- // FIXME: Until we have transitively required passes working correctly,
- // this cannot be enabled! Otherwise, using -count-aa with the pass
- // causes memory to be freed too early. :(
-#if 0
- // The memory objects and ValueNodes data structures at the only ones that
- // are still live after construction.
- std::vector<Node>().swap(GraphNodes);
- ValueNodes.clear();
-#endif
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AliasAnalysis::getAnalysisUsage(AU);
- AU.setPreservesAll(); // Does not transform code
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
- return (AliasAnalysis*)this;
- return this;
- }
-
- //------------------------------------------------
- // Implement the AliasAnalysis API
- //
- AliasResult alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size);
- virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
- virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
- bool pointsToConstantMemory(const Value *P);
-
- virtual void deleteValue(Value *V) {
- ValueNodes.erase(V);
- getAnalysis<AliasAnalysis>().deleteValue(V);
- }
-
- virtual void copyValue(Value *From, Value *To) {
- ValueNodes[To] = ValueNodes[From];
- getAnalysis<AliasAnalysis>().copyValue(From, To);
- }
-
- private:
- /// getNode - Return the node corresponding to the specified pointer scalar.
- ///
- unsigned getNode(Value *V) {
- if (Constant *C = dyn_cast<Constant>(V))
- if (!isa<GlobalValue>(C))
- return getNodeForConstantPointer(C);
-
- DenseMap<Value*, unsigned>::iterator I = ValueNodes.find(V);
- if (I == ValueNodes.end()) {
-#ifndef NDEBUG
- V->dump();
-#endif
- llvm_unreachable("Value does not have a node in the points-to graph!");
- }
- return I->second;
- }
-
- /// getObject - Return the node corresponding to the memory object for the
- /// specified global or allocation instruction.
- unsigned getObject(Value *V) const {
- DenseMap<Value*, unsigned>::const_iterator I = ObjectNodes.find(V);
- assert(I != ObjectNodes.end() &&
- "Value does not have an object in the points-to graph!");
- return I->second;
- }
-
- /// getReturnNode - Return the node representing the return value for the
- /// specified function.
- unsigned getReturnNode(Function *F) const {
- DenseMap<Function*, unsigned>::const_iterator I = ReturnNodes.find(F);
- assert(I != ReturnNodes.end() && "Function does not return a value!");
- return I->second;
- }
-
- /// getVarargNode - Return the node representing the variable arguments
- /// formal for the specified function.
- unsigned getVarargNode(Function *F) const {
- DenseMap<Function*, unsigned>::const_iterator I = VarargNodes.find(F);
- assert(I != VarargNodes.end() && "Function does not take var args!");
- return I->second;
- }
-
- /// getNodeValue - Get the node for the specified LLVM value and set the
- /// value for it to be the specified value.
- unsigned getNodeValue(Value &V) {
- unsigned Index = getNode(&V);
- GraphNodes[Index].setValue(&V);
- return Index;
- }
-
- unsigned UniteNodes(unsigned First, unsigned Second,
- bool UnionByRank = true);
- unsigned FindNode(unsigned Node);
- unsigned FindNode(unsigned Node) const;
-
- void IdentifyObjects(Module &M);
- void CollectConstraints(Module &M);
- bool AnalyzeUsesOfFunction(Value *);
- void CreateConstraintGraph();
- void OptimizeConstraints();
- unsigned FindEquivalentNode(unsigned, unsigned);
- void ClumpAddressTaken();
- void RewriteConstraints();
- void HU();
- void HVN();
- void HCD();
- void Search(unsigned Node);
- void UnitePointerEquivalences();
- void SolveConstraints();
- bool QueryNode(unsigned Node);
- void Condense(unsigned Node);
- void HUValNum(unsigned Node);
- void HVNValNum(unsigned Node);
- unsigned getNodeForConstantPointer(Constant *C);
- unsigned getNodeForConstantPointerTarget(Constant *C);
- void AddGlobalInitializerConstraints(unsigned, Constant *C);
-
- void AddConstraintsForNonInternalLinkage(Function *F);
- void AddConstraintsForCall(CallSite CS, Function *F);
- bool AddConstraintsForExternalCall(CallSite CS, Function *F);
-
-
- void PrintNode(const Node *N) const;
- void PrintConstraints() const ;
- void PrintConstraint(const Constraint &) const;
- void PrintLabels() const;
- void PrintPointsToGraph() const;
-
- //===------------------------------------------------------------------===//
- // Instruction visitation methods for adding constraints
- //
- friend class InstVisitor<Andersens>;
- void visitReturnInst(ReturnInst &RI);
- void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); }
- void visitCallInst(CallInst &CI) {
- if (isMalloc(&CI)) visitAlloc(CI);
- else visitCallSite(CallSite(&CI));
- }
- void visitCallSite(CallSite CS);
- void visitAllocaInst(AllocaInst &I);
- void visitAlloc(Instruction &I);
- void visitLoadInst(LoadInst &LI);
- void visitStoreInst(StoreInst &SI);
- void visitGetElementPtrInst(GetElementPtrInst &GEP);
- void visitPHINode(PHINode &PN);
- void visitCastInst(CastInst &CI);
- void visitICmpInst(ICmpInst &ICI) {} // NOOP!
- void visitFCmpInst(FCmpInst &ICI) {} // NOOP!
- void visitSelectInst(SelectInst &SI);
- void visitVAArg(VAArgInst &I);
- void visitInstruction(Instruction &I);
-
- //===------------------------------------------------------------------===//
- // Implement Analyize interface
- //
- void print(raw_ostream &O, const Module*) const {
- PrintPointsToGraph();
- }
- };
-}
-
-char Andersens::ID = 0;
-static RegisterPass<Andersens>
-X("anders-aa", "Andersen's Interprocedural Alias Analysis (experimental)",
- false, true);
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
-
-// Initialize Timestamp Counter (static).
-volatile llvm::sys::cas_flag Andersens::Node::Counter = 0;
-
-ModulePass *llvm::createAndersensPass() { return new Andersens(); }
-
-//===----------------------------------------------------------------------===//
-// AliasAnalysis Interface Implementation
-//===----------------------------------------------------------------------===//
-
-AliasAnalysis::AliasResult Andersens::alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size) {
- Node *N1 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V1)))];
- Node *N2 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V2)))];
-
- // Check to see if the two pointers are known to not alias. They don't alias
- // if their points-to sets do not intersect.
- if (!N1->intersectsIgnoring(N2, NullObject))
- return NoAlias;
-
- return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
-}
-
-AliasAnalysis::ModRefResult
-Andersens::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
- // The only thing useful that we can contribute for mod/ref information is
- // when calling external function calls: if we know that memory never escapes
- // from the program, it cannot be modified by an external call.
- //
- // NOTE: This is not really safe, at least not when the entire program is not
- // available. The deal is that the external function could call back into the
- // program and modify stuff. We ignore this technical niggle for now. This
- // is, after all, a "research quality" implementation of Andersen's analysis.
- if (Function *F = CS.getCalledFunction())
- if (F->isDeclaration()) {
- Node *N1 = &GraphNodes[FindNode(getNode(P))];
-
- if (N1->PointsTo->empty())
- return NoModRef;
-#if FULL_UNIVERSAL
- if (!UniversalSet->PointsTo->test(FindNode(getNode(P))))
- return NoModRef; // Universal set does not contain P
-#else
- if (!N1->PointsTo->test(UniversalSet))
- return NoModRef; // P doesn't point to the universal set.
-#endif
- }
-
- return AliasAnalysis::getModRefInfo(CS, P, Size);
-}
-
-AliasAnalysis::ModRefResult
-Andersens::getModRefInfo(CallSite CS1, CallSite CS2) {
- return AliasAnalysis::getModRefInfo(CS1,CS2);
-}
-
-/// pointsToConstantMemory - If we can determine that this pointer only points
-/// to constant memory, return true. In practice, this means that if the
-/// pointer can only point to constant globals, functions, or the null pointer,
-/// return true.
-///
-bool Andersens::pointsToConstantMemory(const Value *P) {
- Node *N = &GraphNodes[FindNode(getNode(const_cast<Value*>(P)))];
- unsigned i;
-
- for (SparseBitVector<>::iterator bi = N->PointsTo->begin();
- bi != N->PointsTo->end();
- ++bi) {
- i = *bi;
- Node *Pointee = &GraphNodes[i];
- if (Value *V = Pointee->getValue()) {
- if (!isa<GlobalValue>(V) || (isa<GlobalVariable>(V) &&
- !cast<GlobalVariable>(V)->isConstant()))
- return AliasAnalysis::pointsToConstantMemory(P);
- } else {
- if (i != NullObject)
- return AliasAnalysis::pointsToConstantMemory(P);
- }
- }
-
- return true;
-}
-
-//===----------------------------------------------------------------------===//
-// Object Identification Phase
-//===----------------------------------------------------------------------===//
-
-/// IdentifyObjects - This stage scans the program, adding an entry to the
-/// GraphNodes list for each memory object in the program (global stack or
-/// heap), and populates the ValueNodes and ObjectNodes maps for these objects.
-///
-void Andersens::IdentifyObjects(Module &M) {
- unsigned NumObjects = 0;
-
- // Object #0 is always the universal set: the object that we don't know
- // anything about.
- assert(NumObjects == UniversalSet && "Something changed!");
- ++NumObjects;
-
- // Object #1 always represents the null pointer.
- assert(NumObjects == NullPtr && "Something changed!");
- ++NumObjects;
-
- // Object #2 always represents the null object (the object pointed to by null)
- assert(NumObjects == NullObject && "Something changed!");
- ++NumObjects;
-
- // Add all the globals first.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- ObjectNodes[I] = NumObjects++;
- ValueNodes[I] = NumObjects++;
- }
-
- // Add nodes for all of the functions and the instructions inside of them.
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- // The function itself is a memory object.
- unsigned First = NumObjects;
- ValueNodes[F] = NumObjects++;
- if (isa<PointerType>(F->getFunctionType()->getReturnType()))
- ReturnNodes[F] = NumObjects++;
- if (F->getFunctionType()->isVarArg())
- VarargNodes[F] = NumObjects++;
-
-
- // Add nodes for all of the incoming pointer arguments.
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I)
- {
- if (isa<PointerType>(I->getType()))
- ValueNodes[I] = NumObjects++;
- }
- MaxK[First] = NumObjects - First;
-
- // Scan the function body, creating a memory object for each heap/stack
- // allocation in the body of the function and a node to represent all
- // pointer values defined by instructions and used as operands.
- for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
- // If this is an heap or stack allocation, create a node for the memory
- // object.
- if (isa<PointerType>(II->getType())) {
- ValueNodes[&*II] = NumObjects++;
- if (AllocaInst *AI = dyn_cast<AllocaInst>(&*II))
- ObjectNodes[AI] = NumObjects++;
- else if (isMalloc(&*II))
- ObjectNodes[&*II] = NumObjects++;
- }
-
- // Calls to inline asm need to be added as well because the callee isn't
- // referenced anywhere else.
- if (CallInst *CI = dyn_cast<CallInst>(&*II)) {
- Value *Callee = CI->getCalledValue();
- if (isa<InlineAsm>(Callee))
- ValueNodes[Callee] = NumObjects++;
- }
- }
- }
-
- // Now that we know how many objects to create, make them all now!
- GraphNodes.resize(NumObjects);
- NumNodes += NumObjects;
-}
-
-//===----------------------------------------------------------------------===//
-// Constraint Identification Phase
-//===----------------------------------------------------------------------===//
-
-/// getNodeForConstantPointer - Return the node corresponding to the constant
-/// pointer itself.
-unsigned Andersens::getNodeForConstantPointer(Constant *C) {
- assert(isa<PointerType>(C->getType()) && "Not a constant pointer!");
-
- if (isa<ConstantPointerNull>(C) || isa<UndefValue>(C))
- return NullPtr;
- else if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return getNode(GV);
- else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- switch (CE->getOpcode()) {
- case Instruction::GetElementPtr:
- return getNodeForConstantPointer(CE->getOperand(0));
- case Instruction::IntToPtr:
- return UniversalSet;
- case Instruction::BitCast:
- return getNodeForConstantPointer(CE->getOperand(0));
- default:
- errs() << "Constant Expr not yet handled: " << *CE << "\n";
- llvm_unreachable(0);
- }
- } else {
- llvm_unreachable("Unknown constant pointer!");
- }
- return 0;
-}
-
-/// getNodeForConstantPointerTarget - Return the node POINTED TO by the
-/// specified constant pointer.
-unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) {
- assert(isa<PointerType>(C->getType()) && "Not a constant pointer!");
-
- if (isa<ConstantPointerNull>(C))
- return NullObject;
- else if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return getObject(GV);
- else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- switch (CE->getOpcode()) {
- case Instruction::GetElementPtr:
- return getNodeForConstantPointerTarget(CE->getOperand(0));
- case Instruction::IntToPtr:
- return UniversalSet;
- case Instruction::BitCast:
- return getNodeForConstantPointerTarget(CE->getOperand(0));
- default:
- errs() << "Constant Expr not yet handled: " << *CE << "\n";
- llvm_unreachable(0);
- }
- } else {
- llvm_unreachable("Unknown constant pointer!");
- }
- return 0;
-}
-
-/// AddGlobalInitializerConstraints - Add inclusion constraints for the memory
-/// object N, which contains values indicated by C.
-void Andersens::AddGlobalInitializerConstraints(unsigned NodeIndex,
- Constant *C) {
- if (C->getType()->isSingleValueType()) {
- if (isa<PointerType>(C->getType()))
- Constraints.push_back(Constraint(Constraint::Copy, NodeIndex,
- getNodeForConstantPointer(C)));
- } else if (C->isNullValue()) {
- Constraints.push_back(Constraint(Constraint::Copy, NodeIndex,
- NullObject));
- return;
- } else if (!isa<UndefValue>(C)) {
- // If this is an array or struct, include constraints for each element.
- assert(isa<ConstantArray>(C) || isa<ConstantStruct>(C));
- for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
- AddGlobalInitializerConstraints(NodeIndex,
- cast<Constant>(C->getOperand(i)));
- }
-}
-
-/// AddConstraintsForNonInternalLinkage - If this function does not have
-/// internal linkage, realize that we can't trust anything passed into or
-/// returned by this function.
-void Andersens::AddConstraintsForNonInternalLinkage(Function *F) {
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
- if (isa<PointerType>(I->getType()))
- // If this is an argument of an externally accessible function, the
- // incoming pointer might point to anything.
- Constraints.push_back(Constraint(Constraint::Copy, getNode(I),
- UniversalSet));
-}
-
-/// AddConstraintsForCall - If this is a call to a "known" function, add the
-/// constraints and return true. If this is a call to an unknown function,
-/// return false.
-bool Andersens::AddConstraintsForExternalCall(CallSite CS, Function *F) {
- assert(F->isDeclaration() && "Not an external function!");
-
- // These functions don't induce any points-to constraints.
- if (F->getName() == "atoi" || F->getName() == "atof" ||
- F->getName() == "atol" || F->getName() == "atoll" ||
- F->getName() == "remove" || F->getName() == "unlink" ||
- F->getName() == "rename" || F->getName() == "memcmp" ||
- F->getName() == "llvm.memset" ||
- F->getName() == "strcmp" || F->getName() == "strncmp" ||
- F->getName() == "execl" || F->getName() == "execlp" ||
- F->getName() == "execle" || F->getName() == "execv" ||
- F->getName() == "execvp" || F->getName() == "chmod" ||
- F->getName() == "puts" || F->getName() == "write" ||
- F->getName() == "open" || F->getName() == "create" ||
- F->getName() == "truncate" || F->getName() == "chdir" ||
- F->getName() == "mkdir" || F->getName() == "rmdir" ||
- F->getName() == "read" || F->getName() == "pipe" ||
- F->getName() == "wait" || F->getName() == "time" ||
- F->getName() == "stat" || F->getName() == "fstat" ||
- F->getName() == "lstat" || F->getName() == "strtod" ||
- F->getName() == "strtof" || F->getName() == "strtold" ||
- F->getName() == "fopen" || F->getName() == "fdopen" ||
- F->getName() == "freopen" ||
- F->getName() == "fflush" || F->getName() == "feof" ||
- F->getName() == "fileno" || F->getName() == "clearerr" ||
- F->getName() == "rewind" || F->getName() == "ftell" ||
- F->getName() == "ferror" || F->getName() == "fgetc" ||
- F->getName() == "fgetc" || F->getName() == "_IO_getc" ||
- F->getName() == "fwrite" || F->getName() == "fread" ||
- F->getName() == "fgets" || F->getName() == "ungetc" ||
- F->getName() == "fputc" ||
- F->getName() == "fputs" || F->getName() == "putc" ||
- F->getName() == "ftell" || F->getName() == "rewind" ||
- F->getName() == "_IO_putc" || F->getName() == "fseek" ||
- F->getName() == "fgetpos" || F->getName() == "fsetpos" ||
- F->getName() == "printf" || F->getName() == "fprintf" ||
- F->getName() == "sprintf" || F->getName() == "vprintf" ||
- F->getName() == "vfprintf" || F->getName() == "vsprintf" ||
- F->getName() == "scanf" || F->getName() == "fscanf" ||
- F->getName() == "sscanf" || F->getName() == "__assert_fail" ||
- F->getName() == "modf")
- return true;
-
-
- // These functions do induce points-to edges.
- if (F->getName() == "llvm.memcpy" ||
- F->getName() == "llvm.memmove" ||
- F->getName() == "memmove") {
-
- const FunctionType *FTy = F->getFunctionType();
- if (FTy->getNumParams() > 1 &&
- isa<PointerType>(FTy->getParamType(0)) &&
- isa<PointerType>(FTy->getParamType(1))) {
-
- // *Dest = *Src, which requires an artificial graph node to represent the
- // constraint. It is broken up into *Dest = temp, temp = *Src
- unsigned FirstArg = getNode(CS.getArgument(0));
- unsigned SecondArg = getNode(CS.getArgument(1));
- unsigned TempArg = GraphNodes.size();
- GraphNodes.push_back(Node());
- Constraints.push_back(Constraint(Constraint::Store,
- FirstArg, TempArg));
- Constraints.push_back(Constraint(Constraint::Load,
- TempArg, SecondArg));
- // In addition, Dest = Src
- Constraints.push_back(Constraint(Constraint::Copy,
- FirstArg, SecondArg));
- return true;
- }
- }
-
- // Result = Arg0
- if (F->getName() == "realloc" || F->getName() == "strchr" ||
- F->getName() == "strrchr" || F->getName() == "strstr" ||
- F->getName() == "strtok") {
- const FunctionType *FTy = F->getFunctionType();
- if (FTy->getNumParams() > 0 &&
- isa<PointerType>(FTy->getParamType(0))) {
- Constraints.push_back(Constraint(Constraint::Copy,
- getNode(CS.getInstruction()),
- getNode(CS.getArgument(0))));
- return true;
- }
- }
-
- return false;
-}
-
-
-
-/// AnalyzeUsesOfFunction - Look at all of the users of the specified function.
-/// If this is used by anything complex (i.e., the address escapes), return
-/// true.
-bool Andersens::AnalyzeUsesOfFunction(Value *V) {
-
- if (!isa<PointerType>(V->getType())) return true;
-
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
- if (isa<LoadInst>(*UI)) {
- return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
- if (V == SI->getOperand(1)) {
- return false;
- } else if (SI->getOperand(1)) {
- return true; // Storing the pointer
- }
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
- if (AnalyzeUsesOfFunction(GEP)) return true;
- } else if (isFreeCall(*UI)) {
- return false;
- } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
- // Make sure that this is just the function being called, not that it is
- // passing into the function.
- for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i)
- if (CI->getOperand(i) == V) return true;
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
- // Make sure that this is just the function being called, not that it is
- // passing into the function.
- for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i)
- if (II->getOperand(i) == V) return true;
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
- if (CE->getOpcode() == Instruction::GetElementPtr ||
- CE->getOpcode() == Instruction::BitCast) {
- if (AnalyzeUsesOfFunction(CE))
- return true;
- } else {
- return true;
- }
- } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
- if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
- return true; // Allow comparison against null.
- } else {
- return true;
- }
- return false;
-}
-
-/// CollectConstraints - This stage scans the program, adding a constraint to
-/// the Constraints list for each instruction in the program that induces a
-/// constraint, and setting up the initial points-to graph.
-///
-void Andersens::CollectConstraints(Module &M) {
- // First, the universal set points to itself.
- Constraints.push_back(Constraint(Constraint::AddressOf, UniversalSet,
- UniversalSet));
- Constraints.push_back(Constraint(Constraint::Store, UniversalSet,
- UniversalSet));
-
- // Next, the null pointer points to the null object.
- Constraints.push_back(Constraint(Constraint::AddressOf, NullPtr, NullObject));
-
- // Next, add any constraints on global variables and their initializers.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- // Associate the address of the global object as pointing to the memory for
- // the global: &G = <G memory>
- unsigned ObjectIndex = getObject(I);
- Node *Object = &GraphNodes[ObjectIndex];
- Object->setValue(I);
- Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I),
- ObjectIndex));
-
- if (I->hasDefinitiveInitializer()) {
- AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer());
- } else {
- // If it doesn't have an initializer (i.e. it's defined in another
- // translation unit), it points to the universal set.
- Constraints.push_back(Constraint(Constraint::Copy, ObjectIndex,
- UniversalSet));
- }
- }
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- // Set up the return value node.
- if (isa<PointerType>(F->getFunctionType()->getReturnType()))
- GraphNodes[getReturnNode(F)].setValue(F);
- if (F->getFunctionType()->isVarArg())
- GraphNodes[getVarargNode(F)].setValue(F);
-
- // Set up incoming argument nodes.
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I)
- if (isa<PointerType>(I->getType()))
- getNodeValue(*I);
-
- // At some point we should just add constraints for the escaping functions
- // at solve time, but this slows down solving. For now, we simply mark
- // address taken functions as escaping and treat them as external.
- if (!F->hasLocalLinkage() || AnalyzeUsesOfFunction(F))
- AddConstraintsForNonInternalLinkage(F);
-
- if (!F->isDeclaration()) {
- // Scan the function body, creating a memory object for each heap/stack
- // allocation in the body of the function and a node to represent all
- // pointer values defined by instructions and used as operands.
- visit(F);
- } else {
- // External functions that return pointers return the universal set.
- if (isa<PointerType>(F->getFunctionType()->getReturnType()))
- Constraints.push_back(Constraint(Constraint::Copy,
- getReturnNode(F),
- UniversalSet));
-
- // Any pointers that are passed into the function have the universal set
- // stored into them.
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I)
- if (isa<PointerType>(I->getType())) {
- // Pointers passed into external functions could have anything stored
- // through them.
- Constraints.push_back(Constraint(Constraint::Store, getNode(I),
- UniversalSet));
- // Memory objects passed into external function calls can have the
- // universal set point to them.
-#if FULL_UNIVERSAL
- Constraints.push_back(Constraint(Constraint::Copy,
- UniversalSet,
- getNode(I)));
-#else
- Constraints.push_back(Constraint(Constraint::Copy,
- getNode(I),
- UniversalSet));
-#endif
- }
-
- // If this is an external varargs function, it can also store pointers
- // into any pointers passed through the varargs section.
- if (F->getFunctionType()->isVarArg())
- Constraints.push_back(Constraint(Constraint::Store, getVarargNode(F),
- UniversalSet));
- }
- }
- NumConstraints += Constraints.size();
-}
-
-
-void Andersens::visitInstruction(Instruction &I) {
-#ifdef NDEBUG
- return; // This function is just a big assert.
-#endif
- if (isa<BinaryOperator>(I))
- return;
- // Most instructions don't have any effect on pointer values.
- switch (I.getOpcode()) {
- case Instruction::Br:
- case Instruction::Switch:
- case Instruction::Unwind:
- case Instruction::Unreachable:
- case Instruction::ICmp:
- case Instruction::FCmp:
- return;
- default:
- // Is this something we aren't handling yet?
- errs() << "Unknown instruction: " << I;
- llvm_unreachable(0);
- }
-}
-
-void Andersens::visitAllocaInst(AllocaInst &I) {
- visitAlloc(I);
-}
-
-void Andersens::visitAlloc(Instruction &I) {
- unsigned ObjectIndex = getObject(&I);
- GraphNodes[ObjectIndex].setValue(&I);
- Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(I),
- ObjectIndex));
-}
-
-void Andersens::visitReturnInst(ReturnInst &RI) {
- if (RI.getNumOperands() && isa<PointerType>(RI.getOperand(0)->getType()))
- // return V --> <Copy/retval{F}/v>
- Constraints.push_back(Constraint(Constraint::Copy,
- getReturnNode(RI.getParent()->getParent()),
- getNode(RI.getOperand(0))));
-}
-
-void Andersens::visitLoadInst(LoadInst &LI) {
- if (isa<PointerType>(LI.getType()))
- // P1 = load P2 --> <Load/P1/P2>
- Constraints.push_back(Constraint(Constraint::Load, getNodeValue(LI),
- getNode(LI.getOperand(0))));
-}
-
-void Andersens::visitStoreInst(StoreInst &SI) {
- if (isa<PointerType>(SI.getOperand(0)->getType()))
- // store P1, P2 --> <Store/P2/P1>
- Constraints.push_back(Constraint(Constraint::Store,
- getNode(SI.getOperand(1)),
- getNode(SI.getOperand(0))));
-}
-
-void Andersens::visitGetElementPtrInst(GetElementPtrInst &GEP) {
- // P1 = getelementptr P2, ... --> <Copy/P1/P2>
- Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(GEP),
- getNode(GEP.getOperand(0))));
-}
-
-void Andersens::visitPHINode(PHINode &PN) {
- if (isa<PointerType>(PN.getType())) {
- unsigned PNN = getNodeValue(PN);
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
- // P1 = phi P2, P3 --> <Copy/P1/P2>, <Copy/P1/P3>, ...
- Constraints.push_back(Constraint(Constraint::Copy, PNN,
- getNode(PN.getIncomingValue(i))));
- }
-}
-
-void Andersens::visitCastInst(CastInst &CI) {
- Value *Op = CI.getOperand(0);
- if (isa<PointerType>(CI.getType())) {
- if (isa<PointerType>(Op->getType())) {
- // P1 = cast P2 --> <Copy/P1/P2>
- Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI),
- getNode(CI.getOperand(0))));
- } else {
- // P1 = cast int --> <Copy/P1/Univ>
-#if 0
- Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI),
- UniversalSet));
-#else
- getNodeValue(CI);
-#endif
- }
- } else if (isa<PointerType>(Op->getType())) {
- // int = cast P1 --> <Copy/Univ/P1>
-#if 0
- Constraints.push_back(Constraint(Constraint::Copy,
- UniversalSet,
- getNode(CI.getOperand(0))));
-#else
- getNode(CI.getOperand(0));
-#endif
- }
-}
-
-void Andersens::visitSelectInst(SelectInst &SI) {
- if (isa<PointerType>(SI.getType())) {
- unsigned SIN = getNodeValue(SI);
- // P1 = select C, P2, P3 ---> <Copy/P1/P2>, <Copy/P1/P3>
- Constraints.push_back(Constraint(Constraint::Copy, SIN,
- getNode(SI.getOperand(1))));
- Constraints.push_back(Constraint(Constraint::Copy, SIN,
- getNode(SI.getOperand(2))));
- }
-}
-
-void Andersens::visitVAArg(VAArgInst &I) {
- llvm_unreachable("vaarg not handled yet!");
-}
-
-/// AddConstraintsForCall - Add constraints for a call with actual arguments
-/// specified by CS to the function specified by F. Note that the types of
-/// arguments might not match up in the case where this is an indirect call and
-/// the function pointer has been casted. If this is the case, do something
-/// reasonable.
-void Andersens::AddConstraintsForCall(CallSite CS, Function *F) {
- Value *CallValue = CS.getCalledValue();
- bool IsDeref = F == NULL;
-
- // If this is a call to an external function, try to handle it directly to get
- // some taste of context sensitivity.
- if (F && F->isDeclaration() && AddConstraintsForExternalCall(CS, F))
- return;
-
- if (isa<PointerType>(CS.getType())) {
- unsigned CSN = getNode(CS.getInstruction());
- if (!F || isa<PointerType>(F->getFunctionType()->getReturnType())) {
- if (IsDeref)
- Constraints.push_back(Constraint(Constraint::Load, CSN,
- getNode(CallValue), CallReturnPos));
- else
- Constraints.push_back(Constraint(Constraint::Copy, CSN,
- getNode(CallValue) + CallReturnPos));
- } else {
- // If the function returns a non-pointer value, handle this just like we
- // treat a nonpointer cast to pointer.
- Constraints.push_back(Constraint(Constraint::Copy, CSN,
- UniversalSet));
- }
- } else if (F && isa<PointerType>(F->getFunctionType()->getReturnType())) {
-#if FULL_UNIVERSAL
- Constraints.push_back(Constraint(Constraint::Copy,
- UniversalSet,
- getNode(CallValue) + CallReturnPos));
-#else
- Constraints.push_back(Constraint(Constraint::Copy,
- getNode(CallValue) + CallReturnPos,
- UniversalSet));
-#endif
-
-
- }
-
- CallSite::arg_iterator ArgI = CS.arg_begin(), ArgE = CS.arg_end();
- bool external = !F || F->isDeclaration();
- if (F) {
- // Direct Call
- Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI)
- {
-#if !FULL_UNIVERSAL
- if (external && isa<PointerType>((*ArgI)->getType()))
- {
- // Add constraint that ArgI can now point to anything due to
- // escaping, as can everything it points to. The second portion of
- // this should be taken care of by universal = *universal
- Constraints.push_back(Constraint(Constraint::Copy,
- getNode(*ArgI),
- UniversalSet));
- }
-#endif
- if (isa<PointerType>(AI->getType())) {
- if (isa<PointerType>((*ArgI)->getType())) {
- // Copy the actual argument into the formal argument.
- Constraints.push_back(Constraint(Constraint::Copy, getNode(AI),
- getNode(*ArgI)));
- } else {
- Constraints.push_back(Constraint(Constraint::Copy, getNode(AI),
- UniversalSet));
- }
- } else if (isa<PointerType>((*ArgI)->getType())) {
-#if FULL_UNIVERSAL
- Constraints.push_back(Constraint(Constraint::Copy,
- UniversalSet,
- getNode(*ArgI)));
-#else
- Constraints.push_back(Constraint(Constraint::Copy,
- getNode(*ArgI),
- UniversalSet));
-#endif
- }
- }
- } else {
- //Indirect Call
- unsigned ArgPos = CallFirstArgPos;
- for (; ArgI != ArgE; ++ArgI) {
- if (isa<PointerType>((*ArgI)->getType())) {
- // Copy the actual argument into the formal argument.
- Constraints.push_back(Constraint(Constraint::Store,
- getNode(CallValue),
- getNode(*ArgI), ArgPos++));
- } else {
- Constraints.push_back(Constraint(Constraint::Store,
- getNode (CallValue),
- UniversalSet, ArgPos++));
- }
- }
- }
- // Copy all pointers passed through the varargs section to the varargs node.
- if (F && F->getFunctionType()->isVarArg())
- for (; ArgI != ArgE; ++ArgI)
- if (isa<PointerType>((*ArgI)->getType()))
- Constraints.push_back(Constraint(Constraint::Copy, getVarargNode(F),
- getNode(*ArgI)));
- // If more arguments are passed in than we track, just drop them on the floor.
-}
-
-void Andersens::visitCallSite(CallSite CS) {
- if (isa<PointerType>(CS.getType()))
- getNodeValue(*CS.getInstruction());
-
- if (Function *F = CS.getCalledFunction()) {
- AddConstraintsForCall(CS, F);
- } else {
- AddConstraintsForCall(CS, NULL);
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Constraint Solving Phase
-//===----------------------------------------------------------------------===//
-
-/// intersects - Return true if the points-to set of this node intersects
-/// with the points-to set of the specified node.
-bool Andersens::Node::intersects(Node *N) const {
- return PointsTo->intersects(N->PointsTo);
-}
-
-/// intersectsIgnoring - Return true if the points-to set of this node
-/// intersects with the points-to set of the specified node on any nodes
-/// except for the specified node to ignore.
-bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const {
- // TODO: If we are only going to call this with the same value for Ignoring,
- // we should move the special values out of the points-to bitmap.
- bool WeHadIt = PointsTo->test(Ignoring);
- bool NHadIt = N->PointsTo->test(Ignoring);
- bool Result = false;
- if (WeHadIt)
- PointsTo->reset(Ignoring);
- if (NHadIt)
- N->PointsTo->reset(Ignoring);
- Result = PointsTo->intersects(N->PointsTo);
- if (WeHadIt)
- PointsTo->set(Ignoring);
- if (NHadIt)
- N->PointsTo->set(Ignoring);
- return Result;
-}
-
-
-/// Clump together address taken variables so that the points-to sets use up
-/// less space and can be operated on faster.
-
-void Andersens::ClumpAddressTaken() {
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-renumber"
- std::vector<unsigned> Translate;
- std::vector<Node> NewGraphNodes;
-
- Translate.resize(GraphNodes.size());
- unsigned NewPos = 0;
-
- for (unsigned i = 0; i < Constraints.size(); ++i) {
- Constraint &C = Constraints[i];
- if (C.Type == Constraint::AddressOf) {
- GraphNodes[C.Src].AddressTaken = true;
- }
- }
- for (unsigned i = 0; i < NumberSpecialNodes; ++i) {
- unsigned Pos = NewPos++;
- Translate[i] = Pos;
- NewGraphNodes.push_back(GraphNodes[i]);
- DEBUG(dbgs() << "Renumbering node " << i << " to node " << Pos << "\n");
- }
-
- // I believe this ends up being faster than making two vectors and splicing
- // them.
- for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) {
- if (GraphNodes[i].AddressTaken) {
- unsigned Pos = NewPos++;
- Translate[i] = Pos;
- NewGraphNodes.push_back(GraphNodes[i]);
- DEBUG(dbgs() << "Renumbering node " << i << " to node " << Pos << "\n");
- }
- }
-
- for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) {
- if (!GraphNodes[i].AddressTaken) {
- unsigned Pos = NewPos++;
- Translate[i] = Pos;
- NewGraphNodes.push_back(GraphNodes[i]);
- DEBUG(dbgs() << "Renumbering node " << i << " to node " << Pos << "\n");
- }
- }
-
- for (DenseMap<Value*, unsigned>::iterator Iter = ValueNodes.begin();
- Iter != ValueNodes.end();
- ++Iter)
- Iter->second = Translate[Iter->second];
-
- for (DenseMap<Value*, unsigned>::iterator Iter = ObjectNodes.begin();
- Iter != ObjectNodes.end();
- ++Iter)
- Iter->second = Translate[Iter->second];
-
- for (DenseMap<Function*, unsigned>::iterator Iter = ReturnNodes.begin();
- Iter != ReturnNodes.end();
- ++Iter)
- Iter->second = Translate[Iter->second];
-
- for (DenseMap<Function*, unsigned>::iterator Iter = VarargNodes.begin();
- Iter != VarargNodes.end();
- ++Iter)
- Iter->second = Translate[Iter->second];
-
- for (unsigned i = 0; i < Constraints.size(); ++i) {
- Constraint &C = Constraints[i];
- C.Src = Translate[C.Src];
- C.Dest = Translate[C.Dest];
- }
-
- GraphNodes.swap(NewGraphNodes);
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
-}
-
-/// The technique used here is described in "Exploiting Pointer and Location
-/// Equivalence to Optimize Pointer Analysis. In the 14th International Static
-/// Analysis Symposium (SAS), August 2007." It is known as the "HVN" algorithm,
-/// and is equivalent to value numbering the collapsed constraint graph without
-/// evaluating unions. This is used as a pre-pass to HU in order to resolve
-/// first order pointer dereferences and speed up/reduce memory usage of HU.
-/// Running both is equivalent to HRU without the iteration
-/// HVN in more detail:
-/// Imagine the set of constraints was simply straight line code with no loops
-/// (we eliminate cycles, so there are no loops), such as:
-/// E = &D
-/// E = &C
-/// E = F
-/// F = G
-/// G = F
-/// Applying value numbering to this code tells us:
-/// G == F == E
-///
-/// For HVN, this is as far as it goes. We assign new value numbers to every
-/// "address node", and every "reference node".
-/// To get the optimal result for this, we use a DFS + SCC (since all nodes in a
-/// cycle must have the same value number since the = operation is really
-/// inclusion, not overwrite), and value number nodes we receive points-to sets
-/// before we value our own node.
-/// The advantage of HU over HVN is that HU considers the inclusion property, so
-/// that if you have
-/// E = &D
-/// E = &C
-/// E = F
-/// F = G
-/// F = &D
-/// G = F
-/// HU will determine that G == F == E. HVN will not, because it cannot prove
-/// that the points to information ends up being the same because they all
-/// receive &D from E anyway.
-
-void Andersens::HVN() {
- DEBUG(dbgs() << "Beginning HVN\n");
- // Build a predecessor graph. This is like our constraint graph with the
- // edges going in the opposite direction, and there are edges for all the
- // constraints, instead of just copy constraints. We also build implicit
- // edges for constraints are implied but not explicit. I.E for the constraint
- // a = &b, we add implicit edges *a = b. This helps us capture more cycles
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- Constraint &C = Constraints[i];
- if (C.Type == Constraint::AddressOf) {
- GraphNodes[C.Src].AddressTaken = true;
- GraphNodes[C.Src].Direct = false;
-
- // Dest = &src edge
- unsigned AdrNode = C.Src + FirstAdrNode;
- if (!GraphNodes[C.Dest].PredEdges)
- GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
- GraphNodes[C.Dest].PredEdges->set(AdrNode);
-
- // *Dest = src edge
- unsigned RefNode = C.Dest + FirstRefNode;
- if (!GraphNodes[RefNode].ImplicitPredEdges)
- GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
- GraphNodes[RefNode].ImplicitPredEdges->set(C.Src);
- } else if (C.Type == Constraint::Load) {
- if (C.Offset == 0) {
- // dest = *src edge
- if (!GraphNodes[C.Dest].PredEdges)
- GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
- GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode);
- } else {
- GraphNodes[C.Dest].Direct = false;
- }
- } else if (C.Type == Constraint::Store) {
- if (C.Offset == 0) {
- // *dest = src edge
- unsigned RefNode = C.Dest + FirstRefNode;
- if (!GraphNodes[RefNode].PredEdges)
- GraphNodes[RefNode].PredEdges = new SparseBitVector<>;
- GraphNodes[RefNode].PredEdges->set(C.Src);
- }
- } else {
- // Dest = Src edge and *Dest = *Src edge
- if (!GraphNodes[C.Dest].PredEdges)
- GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
- GraphNodes[C.Dest].PredEdges->set(C.Src);
- unsigned RefNode = C.Dest + FirstRefNode;
- if (!GraphNodes[RefNode].ImplicitPredEdges)
- GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
- GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode);
- }
- }
- PEClass = 1;
- // Do SCC finding first to condense our predecessor graph
- DFSNumber = 0;
- Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
- Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
- Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
-
- for (unsigned i = 0; i < FirstRefNode; ++i) {
- unsigned Node = VSSCCRep[i];
- if (!Node2Visited[Node])
- HVNValNum(Node);
- }
- for (BitVectorMap::iterator Iter = Set2PEClass.begin();
- Iter != Set2PEClass.end();
- ++Iter)
- delete Iter->first;
- Set2PEClass.clear();
- Node2DFS.clear();
- Node2Deleted.clear();
- Node2Visited.clear();
- DEBUG(dbgs() << "Finished HVN\n");
-
-}
-
-/// This is the workhorse of HVN value numbering. We combine SCC finding at the
-/// same time because it's easy.
-void Andersens::HVNValNum(unsigned NodeIndex) {
- unsigned MyDFS = DFSNumber++;
- Node *N = &GraphNodes[NodeIndex];
- Node2Visited[NodeIndex] = true;
- Node2DFS[NodeIndex] = MyDFS;
-
- // First process all our explicit edges
- if (N->PredEdges)
- for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
- Iter != N->PredEdges->end();
- ++Iter) {
- unsigned j = VSSCCRep[*Iter];
- if (!Node2Deleted[j]) {
- if (!Node2Visited[j])
- HVNValNum(j);
- if (Node2DFS[NodeIndex] > Node2DFS[j])
- Node2DFS[NodeIndex] = Node2DFS[j];
- }
- }
-
- // Now process all the implicit edges
- if (N->ImplicitPredEdges)
- for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin();
- Iter != N->ImplicitPredEdges->end();
- ++Iter) {
- unsigned j = VSSCCRep[*Iter];
- if (!Node2Deleted[j]) {
- if (!Node2Visited[j])
- HVNValNum(j);
- if (Node2DFS[NodeIndex] > Node2DFS[j])
- Node2DFS[NodeIndex] = Node2DFS[j];
- }
- }
-
- // See if we found any cycles
- if (MyDFS == Node2DFS[NodeIndex]) {
- while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) {
- unsigned CycleNodeIndex = SCCStack.top();
- Node *CycleNode = &GraphNodes[CycleNodeIndex];
- VSSCCRep[CycleNodeIndex] = NodeIndex;
- // Unify the nodes
- N->Direct &= CycleNode->Direct;
-
- if (CycleNode->PredEdges) {
- if (!N->PredEdges)
- N->PredEdges = new SparseBitVector<>;
- *(N->PredEdges) |= CycleNode->PredEdges;
- delete CycleNode->PredEdges;
- CycleNode->PredEdges = NULL;
- }
- if (CycleNode->ImplicitPredEdges) {
- if (!N->ImplicitPredEdges)
- N->ImplicitPredEdges = new SparseBitVector<>;
- *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges;
- delete CycleNode->ImplicitPredEdges;
- CycleNode->ImplicitPredEdges = NULL;
- }
-
- SCCStack.pop();
- }
-
- Node2Deleted[NodeIndex] = true;
-
- if (!N->Direct) {
- GraphNodes[NodeIndex].PointerEquivLabel = PEClass++;
- return;
- }
-
- // Collect labels of successor nodes
- bool AllSame = true;
- unsigned First = ~0;
- SparseBitVector<> *Labels = new SparseBitVector<>;
- bool Used = false;
-
- if (N->PredEdges)
- for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
- Iter != N->PredEdges->end();
- ++Iter) {
- unsigned j = VSSCCRep[*Iter];
- unsigned Label = GraphNodes[j].PointerEquivLabel;
- // Ignore labels that are equal to us or non-pointers
- if (j == NodeIndex || Label == 0)
- continue;
- if (First == (unsigned)~0)
- First = Label;
- else if (First != Label)
- AllSame = false;
- Labels->set(Label);
- }
-
- // We either have a non-pointer, a copy of an existing node, or a new node.
- // Assign the appropriate pointer equivalence label.
- if (Labels->empty()) {
- GraphNodes[NodeIndex].PointerEquivLabel = 0;
- } else if (AllSame) {
- GraphNodes[NodeIndex].PointerEquivLabel = First;
- } else {
- GraphNodes[NodeIndex].PointerEquivLabel = Set2PEClass[Labels];
- if (GraphNodes[NodeIndex].PointerEquivLabel == 0) {
- unsigned EquivClass = PEClass++;
- Set2PEClass[Labels] = EquivClass;
- GraphNodes[NodeIndex].PointerEquivLabel = EquivClass;
- Used = true;
- }
- }
- if (!Used)
- delete Labels;
- } else {
- SCCStack.push(NodeIndex);
- }
-}
-
-/// The technique used here is described in "Exploiting Pointer and Location
-/// Equivalence to Optimize Pointer Analysis. In the 14th International Static
-/// Analysis Symposium (SAS), August 2007." It is known as the "HU" algorithm,
-/// and is equivalent to value numbering the collapsed constraint graph
-/// including evaluating unions.
-void Andersens::HU() {
- DEBUG(dbgs() << "Beginning HU\n");
- // Build a predecessor graph. This is like our constraint graph with the
- // edges going in the opposite direction, and there are edges for all the
- // constraints, instead of just copy constraints. We also build implicit
- // edges for constraints are implied but not explicit. I.E for the constraint
- // a = &b, we add implicit edges *a = b. This helps us capture more cycles
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- Constraint &C = Constraints[i];
- if (C.Type == Constraint::AddressOf) {
- GraphNodes[C.Src].AddressTaken = true;
- GraphNodes[C.Src].Direct = false;
-
- GraphNodes[C.Dest].PointsTo->set(C.Src);
- // *Dest = src edge
- unsigned RefNode = C.Dest + FirstRefNode;
- if (!GraphNodes[RefNode].ImplicitPredEdges)
- GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
- GraphNodes[RefNode].ImplicitPredEdges->set(C.Src);
- GraphNodes[C.Src].PointedToBy->set(C.Dest);
- } else if (C.Type == Constraint::Load) {
- if (C.Offset == 0) {
- // dest = *src edge
- if (!GraphNodes[C.Dest].PredEdges)
- GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
- GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode);
- } else {
- GraphNodes[C.Dest].Direct = false;
- }
- } else if (C.Type == Constraint::Store) {
- if (C.Offset == 0) {
- // *dest = src edge
- unsigned RefNode = C.Dest + FirstRefNode;
- if (!GraphNodes[RefNode].PredEdges)
- GraphNodes[RefNode].PredEdges = new SparseBitVector<>;
- GraphNodes[RefNode].PredEdges->set(C.Src);
- }
- } else {
- // Dest = Src edge and *Dest = *Src edg
- if (!GraphNodes[C.Dest].PredEdges)
- GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
- GraphNodes[C.Dest].PredEdges->set(C.Src);
- unsigned RefNode = C.Dest + FirstRefNode;
- if (!GraphNodes[RefNode].ImplicitPredEdges)
- GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
- GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode);
- }
- }
- PEClass = 1;
- // Do SCC finding first to condense our predecessor graph
- DFSNumber = 0;
- Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
- Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
- Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
-
- for (unsigned i = 0; i < FirstRefNode; ++i) {
- if (FindNode(i) == i) {
- unsigned Node = VSSCCRep[i];
- if (!Node2Visited[Node])
- Condense(Node);
- }
- }
-
- // Reset tables for actual labeling
- Node2DFS.clear();
- Node2Visited.clear();
- Node2Deleted.clear();
- // Pre-grow our densemap so that we don't get really bad behavior
- Set2PEClass.resize(GraphNodes.size());
-
- // Visit the condensed graph and generate pointer equivalence labels.
- Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
- for (unsigned i = 0; i < FirstRefNode; ++i) {
- if (FindNode(i) == i) {
- unsigned Node = VSSCCRep[i];
- if (!Node2Visited[Node])
- HUValNum(Node);
- }
- }
- // PEClass nodes will be deleted by the deleting of N->PointsTo in our caller.
- Set2PEClass.clear();
- DEBUG(dbgs() << "Finished HU\n");
-}
-
-
-/// Implementation of standard Tarjan SCC algorithm as modified by Nuutilla.
-void Andersens::Condense(unsigned NodeIndex) {
- unsigned MyDFS = DFSNumber++;
- Node *N = &GraphNodes[NodeIndex];
- Node2Visited[NodeIndex] = true;
- Node2DFS[NodeIndex] = MyDFS;
-
- // First process all our explicit edges
- if (N->PredEdges)
- for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
- Iter != N->PredEdges->end();
- ++Iter) {
- unsigned j = VSSCCRep[*Iter];
- if (!Node2Deleted[j]) {
- if (!Node2Visited[j])
- Condense(j);
- if (Node2DFS[NodeIndex] > Node2DFS[j])
- Node2DFS[NodeIndex] = Node2DFS[j];
- }
- }
-
- // Now process all the implicit edges
- if (N->ImplicitPredEdges)
- for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin();
- Iter != N->ImplicitPredEdges->end();
- ++Iter) {
- unsigned j = VSSCCRep[*Iter];
- if (!Node2Deleted[j]) {
- if (!Node2Visited[j])
- Condense(j);
- if (Node2DFS[NodeIndex] > Node2DFS[j])
- Node2DFS[NodeIndex] = Node2DFS[j];
- }
- }
-
- // See if we found any cycles
- if (MyDFS == Node2DFS[NodeIndex]) {
- while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) {
- unsigned CycleNodeIndex = SCCStack.top();
- Node *CycleNode = &GraphNodes[CycleNodeIndex];
- VSSCCRep[CycleNodeIndex] = NodeIndex;
- // Unify the nodes
- N->Direct &= CycleNode->Direct;
-
- *(N->PointsTo) |= CycleNode->PointsTo;
- delete CycleNode->PointsTo;
- CycleNode->PointsTo = NULL;
- if (CycleNode->PredEdges) {
- if (!N->PredEdges)
- N->PredEdges = new SparseBitVector<>;
- *(N->PredEdges) |= CycleNode->PredEdges;
- delete CycleNode->PredEdges;
- CycleNode->PredEdges = NULL;
- }
- if (CycleNode->ImplicitPredEdges) {
- if (!N->ImplicitPredEdges)
- N->ImplicitPredEdges = new SparseBitVector<>;
- *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges;
- delete CycleNode->ImplicitPredEdges;
- CycleNode->ImplicitPredEdges = NULL;
- }
- SCCStack.pop();
- }
-
- Node2Deleted[NodeIndex] = true;
-
- // Set up number of incoming edges for other nodes
- if (N->PredEdges)
- for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
- Iter != N->PredEdges->end();
- ++Iter)
- ++GraphNodes[VSSCCRep[*Iter]].NumInEdges;
- } else {
- SCCStack.push(NodeIndex);
- }
-}
-
-void Andersens::HUValNum(unsigned NodeIndex) {
- Node *N = &GraphNodes[NodeIndex];
- Node2Visited[NodeIndex] = true;
-
- // Eliminate dereferences of non-pointers for those non-pointers we have
- // already identified. These are ref nodes whose non-ref node:
- // 1. Has already been visited determined to point to nothing (and thus, a
- // dereference of it must point to nothing)
- // 2. Any direct node with no predecessor edges in our graph and with no
- // points-to set (since it can't point to anything either, being that it
- // receives no points-to sets and has none).
- if (NodeIndex >= FirstRefNode) {
- unsigned j = VSSCCRep[FindNode(NodeIndex - FirstRefNode)];
- if ((Node2Visited[j] && !GraphNodes[j].PointerEquivLabel)
- || (GraphNodes[j].Direct && !GraphNodes[j].PredEdges
- && GraphNodes[j].PointsTo->empty())){
- return;
- }
- }
- // Process all our explicit edges
- if (N->PredEdges)
- for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
- Iter != N->PredEdges->end();
- ++Iter) {
- unsigned j = VSSCCRep[*Iter];
- if (!Node2Visited[j])
- HUValNum(j);
-
- // If this edge turned out to be the same as us, or got no pointer
- // equivalence label (and thus points to nothing) , just decrement our
- // incoming edges and continue.
- if (j == NodeIndex || GraphNodes[j].PointerEquivLabel == 0) {
- --GraphNodes[j].NumInEdges;
- continue;
- }
-
- *(N->PointsTo) |= GraphNodes[j].PointsTo;
-
- // If we didn't end up storing this in the hash, and we're done with all
- // the edges, we don't need the points-to set anymore.
- --GraphNodes[j].NumInEdges;
- if (!GraphNodes[j].NumInEdges && !GraphNodes[j].StoredInHash) {
- delete GraphNodes[j].PointsTo;
- GraphNodes[j].PointsTo = NULL;
- }
- }
- // If this isn't a direct node, generate a fresh variable.
- if (!N->Direct) {
- N->PointsTo->set(FirstRefNode + NodeIndex);
- }
-
- // See If we have something equivalent to us, if not, generate a new
- // equivalence class.
- if (N->PointsTo->empty()) {
- delete N->PointsTo;
- N->PointsTo = NULL;
- } else {
- if (N->Direct) {
- N->PointerEquivLabel = Set2PEClass[N->PointsTo];
- if (N->PointerEquivLabel == 0) {
- unsigned EquivClass = PEClass++;
- N->StoredInHash = true;
- Set2PEClass[N->PointsTo] = EquivClass;
- N->PointerEquivLabel = EquivClass;
- }
- } else {
- N->PointerEquivLabel = PEClass++;
- }
- }
-}
-
-/// Rewrite our list of constraints so that pointer equivalent nodes are
-/// replaced by their the pointer equivalence class representative.
-void Andersens::RewriteConstraints() {
- std::vector<Constraint> NewConstraints;
- DenseSet<Constraint, ConstraintKeyInfo> Seen;
-
- PEClass2Node.clear();
- PENLEClass2Node.clear();
-
- // We may have from 1 to Graphnodes + 1 equivalence classes.
- PEClass2Node.insert(PEClass2Node.begin(), GraphNodes.size() + 1, -1);
- PENLEClass2Node.insert(PENLEClass2Node.begin(), GraphNodes.size() + 1, -1);
-
- // Rewrite constraints, ignoring non-pointer constraints, uniting equivalent
- // nodes, and rewriting constraints to use the representative nodes.
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- Constraint &C = Constraints[i];
- unsigned RHSNode = FindNode(C.Src);
- unsigned LHSNode = FindNode(C.Dest);
- unsigned RHSLabel = GraphNodes[VSSCCRep[RHSNode]].PointerEquivLabel;
- unsigned LHSLabel = GraphNodes[VSSCCRep[LHSNode]].PointerEquivLabel;
-
- // First we try to eliminate constraints for things we can prove don't point
- // to anything.
- if (LHSLabel == 0) {
- DEBUG(PrintNode(&GraphNodes[LHSNode]));
- DEBUG(dbgs() << " is a non-pointer, ignoring constraint.\n");
- continue;
- }
- if (RHSLabel == 0) {
- DEBUG(PrintNode(&GraphNodes[RHSNode]));
- DEBUG(dbgs() << " is a non-pointer, ignoring constraint.\n");
- continue;
- }
- // This constraint may be useless, and it may become useless as we translate
- // it.
- if (C.Src == C.Dest && C.Type == Constraint::Copy)
- continue;
-
- C.Src = FindEquivalentNode(RHSNode, RHSLabel);
- C.Dest = FindEquivalentNode(FindNode(LHSNode), LHSLabel);
- if ((C.Src == C.Dest && C.Type == Constraint::Copy)
- || Seen.count(C))
- continue;
-
- Seen.insert(C);
- NewConstraints.push_back(C);
- }
- Constraints.swap(NewConstraints);
- PEClass2Node.clear();
-}
-
-/// See if we have a node that is pointer equivalent to the one being asked
-/// about, and if so, unite them and return the equivalent node. Otherwise,
-/// return the original node.
-unsigned Andersens::FindEquivalentNode(unsigned NodeIndex,
- unsigned NodeLabel) {
- if (!GraphNodes[NodeIndex].AddressTaken) {
- if (PEClass2Node[NodeLabel] != -1) {
- // We found an existing node with the same pointer label, so unify them.
- // We specifically request that Union-By-Rank not be used so that
- // PEClass2Node[NodeLabel] U= NodeIndex and not the other way around.
- return UniteNodes(PEClass2Node[NodeLabel], NodeIndex, false);
- } else {
- PEClass2Node[NodeLabel] = NodeIndex;
- PENLEClass2Node[NodeLabel] = NodeIndex;
- }
- } else if (PENLEClass2Node[NodeLabel] == -1) {
- PENLEClass2Node[NodeLabel] = NodeIndex;
- }
-
- return NodeIndex;
-}
-
-void Andersens::PrintLabels() const {
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- if (i < FirstRefNode) {
- PrintNode(&GraphNodes[i]);
- } else if (i < FirstAdrNode) {
- DEBUG(dbgs() << "REF(");
- PrintNode(&GraphNodes[i-FirstRefNode]);
- DEBUG(dbgs() <<")");
- } else {
- DEBUG(dbgs() << "ADR(");
- PrintNode(&GraphNodes[i-FirstAdrNode]);
- DEBUG(dbgs() <<")");
- }
-
- DEBUG(dbgs() << " has pointer label " << GraphNodes[i].PointerEquivLabel
- << " and SCC rep " << VSSCCRep[i]
- << " and is " << (GraphNodes[i].Direct ? "Direct" : "Not direct")
- << "\n");
- }
-}
-
-/// The technique used here is described in "The Ant and the
-/// Grasshopper: Fast and Accurate Pointer Analysis for Millions of
-/// Lines of Code. In Programming Language Design and Implementation
-/// (PLDI), June 2007." It is known as the "HCD" (Hybrid Cycle
-/// Detection) algorithm. It is called a hybrid because it performs an
-/// offline analysis and uses its results during the solving (online)
-/// phase. This is just the offline portion; the results of this
-/// operation are stored in SDT and are later used in SolveContraints()
-/// and UniteNodes().
-void Andersens::HCD() {
- DEBUG(dbgs() << "Starting HCD.\n");
- HCDSCCRep.resize(GraphNodes.size());
-
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- GraphNodes[i].Edges = new SparseBitVector<>;
- HCDSCCRep[i] = i;
- }
-
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- Constraint &C = Constraints[i];
- assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size());
- if (C.Type == Constraint::AddressOf) {
- continue;
- } else if (C.Type == Constraint::Load) {
- if( C.Offset == 0 )
- GraphNodes[C.Dest].Edges->set(C.Src + FirstRefNode);
- } else if (C.Type == Constraint::Store) {
- if( C.Offset == 0 )
- GraphNodes[C.Dest + FirstRefNode].Edges->set(C.Src);
- } else {
- GraphNodes[C.Dest].Edges->set(C.Src);
- }
- }
-
- Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
- Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
- Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
- SDT.insert(SDT.begin(), GraphNodes.size() / 2, -1);
-
- DFSNumber = 0;
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- unsigned Node = HCDSCCRep[i];
- if (!Node2Deleted[Node])
- Search(Node);
- }
-
- for (unsigned i = 0; i < GraphNodes.size(); ++i)
- if (GraphNodes[i].Edges != NULL) {
- delete GraphNodes[i].Edges;
- GraphNodes[i].Edges = NULL;
- }
-
- while( !SCCStack.empty() )
- SCCStack.pop();
-
- Node2DFS.clear();
- Node2Visited.clear();
- Node2Deleted.clear();
- HCDSCCRep.clear();
- DEBUG(dbgs() << "HCD complete.\n");
-}
-
-// Component of HCD:
-// Use Nuutila's variant of Tarjan's algorithm to detect
-// Strongly-Connected Components (SCCs). For non-trivial SCCs
-// containing ref nodes, insert the appropriate information in SDT.
-void Andersens::Search(unsigned Node) {
- unsigned MyDFS = DFSNumber++;
-
- Node2Visited[Node] = true;
- Node2DFS[Node] = MyDFS;
-
- for (SparseBitVector<>::iterator Iter = GraphNodes[Node].Edges->begin(),
- End = GraphNodes[Node].Edges->end();
- Iter != End;
- ++Iter) {
- unsigned J = HCDSCCRep[*Iter];
- assert(GraphNodes[J].isRep() && "Debug check; must be representative");
- if (!Node2Deleted[J]) {
- if (!Node2Visited[J])
- Search(J);
- if (Node2DFS[Node] > Node2DFS[J])
- Node2DFS[Node] = Node2DFS[J];
- }
- }
-
- if( MyDFS != Node2DFS[Node] ) {
- SCCStack.push(Node);
- return;
- }
-
- // This node is the root of a SCC, so process it.
- //
- // If the SCC is "non-trivial" (not a singleton) and contains a reference
- // node, we place this SCC into SDT. We unite the nodes in any case.
- if (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) {
- SparseBitVector<> SCC;
-
- SCC.set(Node);
-
- bool Ref = (Node >= FirstRefNode);
-
- Node2Deleted[Node] = true;
-
- do {
- unsigned P = SCCStack.top(); SCCStack.pop();
- Ref |= (P >= FirstRefNode);
- SCC.set(P);
- HCDSCCRep[P] = Node;
- } while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS);
-
- if (Ref) {
- unsigned Rep = SCC.find_first();
- assert(Rep < FirstRefNode && "The SCC didn't have a non-Ref node!");
-
- SparseBitVector<>::iterator i = SCC.begin();
-
- // Skip over the non-ref nodes
- while( *i < FirstRefNode )
- ++i;
-
- while( i != SCC.end() )
- SDT[ (*i++) - FirstRefNode ] = Rep;
- }
- }
-}
-
-
-/// Optimize the constraints by performing offline variable substitution and
-/// other optimizations.
-void Andersens::OptimizeConstraints() {
- DEBUG(dbgs() << "Beginning constraint optimization\n");
-
- SDTActive = false;
-
- // Function related nodes need to stay in the same relative position and can't
- // be location equivalent.
- for (std::map<unsigned, unsigned>::iterator Iter = MaxK.begin();
- Iter != MaxK.end();
- ++Iter) {
- for (unsigned i = Iter->first;
- i != Iter->first + Iter->second;
- ++i) {
- GraphNodes[i].AddressTaken = true;
- GraphNodes[i].Direct = false;
- }
- }
-
- ClumpAddressTaken();
- FirstRefNode = GraphNodes.size();
- FirstAdrNode = FirstRefNode + GraphNodes.size();
- GraphNodes.insert(GraphNodes.end(), 2 * GraphNodes.size(),
- Node(false));
- VSSCCRep.resize(GraphNodes.size());
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- VSSCCRep[i] = i;
- }
- HVN();
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- Node *N = &GraphNodes[i];
- delete N->PredEdges;
- N->PredEdges = NULL;
- delete N->ImplicitPredEdges;
- N->ImplicitPredEdges = NULL;
- }
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-labels"
- DEBUG(PrintLabels());
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
- RewriteConstraints();
- // Delete the adr nodes.
- GraphNodes.resize(FirstRefNode * 2);
-
- // Now perform HU
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- Node *N = &GraphNodes[i];
- if (FindNode(i) == i) {
- N->PointsTo = new SparseBitVector<>;
- N->PointedToBy = new SparseBitVector<>;
- // Reset our labels
- }
- VSSCCRep[i] = i;
- N->PointerEquivLabel = 0;
- }
- HU();
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-labels"
- DEBUG(PrintLabels());
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
- RewriteConstraints();
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- if (FindNode(i) == i) {
- Node *N = &GraphNodes[i];
- delete N->PointsTo;
- N->PointsTo = NULL;
- delete N->PredEdges;
- N->PredEdges = NULL;
- delete N->ImplicitPredEdges;
- N->ImplicitPredEdges = NULL;
- delete N->PointedToBy;
- N->PointedToBy = NULL;
- }
- }
-
- // perform Hybrid Cycle Detection (HCD)
- HCD();
- SDTActive = true;
-
- // No longer any need for the upper half of GraphNodes (for ref nodes).
- GraphNodes.erase(GraphNodes.begin() + FirstRefNode, GraphNodes.end());
-
- // HCD complete.
-
- DEBUG(dbgs() << "Finished constraint optimization\n");
- FirstRefNode = 0;
- FirstAdrNode = 0;
-}
-
-/// Unite pointer but not location equivalent variables, now that the constraint
-/// graph is built.
-void Andersens::UnitePointerEquivalences() {
- DEBUG(dbgs() << "Uniting remaining pointer equivalences\n");
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- if (GraphNodes[i].AddressTaken && GraphNodes[i].isRep()) {
- unsigned Label = GraphNodes[i].PointerEquivLabel;
-
- if (Label && PENLEClass2Node[Label] != -1)
- UniteNodes(i, PENLEClass2Node[Label]);
- }
- }
- DEBUG(dbgs() << "Finished remaining pointer equivalences\n");
- PENLEClass2Node.clear();
-}
-
-/// Create the constraint graph used for solving points-to analysis.
-///
-void Andersens::CreateConstraintGraph() {
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- Constraint &C = Constraints[i];
- assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size());
- if (C.Type == Constraint::AddressOf)
- GraphNodes[C.Dest].PointsTo->set(C.Src);
- else if (C.Type == Constraint::Load)
- GraphNodes[C.Src].Constraints.push_back(C);
- else if (C.Type == Constraint::Store)
- GraphNodes[C.Dest].Constraints.push_back(C);
- else if (C.Offset != 0)
- GraphNodes[C.Src].Constraints.push_back(C);
- else
- GraphNodes[C.Src].Edges->set(C.Dest);
- }
-}
-
-// Perform DFS and cycle detection.
-bool Andersens::QueryNode(unsigned Node) {
- assert(GraphNodes[Node].isRep() && "Querying a non-rep node");
- unsigned OurDFS = ++DFSNumber;
- SparseBitVector<> ToErase;
- SparseBitVector<> NewEdges;
- Tarjan2DFS[Node] = OurDFS;
-
- // Changed denotes a change from a recursive call that we will bubble up.
- // Merged is set if we actually merge a node ourselves.
- bool Changed = false, Merged = false;
-
- for (SparseBitVector<>::iterator bi = GraphNodes[Node].Edges->begin();
- bi != GraphNodes[Node].Edges->end();
- ++bi) {
- unsigned RepNode = FindNode(*bi);
- // If this edge points to a non-representative node but we are
- // already planning to add an edge to its representative, we have no
- // need for this edge anymore.
- if (RepNode != *bi && NewEdges.test(RepNode)){
- ToErase.set(*bi);
- continue;
- }
-
- // Continue about our DFS.
- if (!Tarjan2Deleted[RepNode]){
- if (Tarjan2DFS[RepNode] == 0) {
- Changed |= QueryNode(RepNode);
- // May have been changed by QueryNode
- RepNode = FindNode(RepNode);
- }
- if (Tarjan2DFS[RepNode] < Tarjan2DFS[Node])
- Tarjan2DFS[Node] = Tarjan2DFS[RepNode];
- }
-
- // We may have just discovered that this node is part of a cycle, in
- // which case we can also erase it.
- if (RepNode != *bi) {
- ToErase.set(*bi);
- NewEdges.set(RepNode);
- }
- }
-
- GraphNodes[Node].Edges->intersectWithComplement(ToErase);
- GraphNodes[Node].Edges |= NewEdges;
-
- // If this node is a root of a non-trivial SCC, place it on our
- // worklist to be processed.
- if (OurDFS == Tarjan2DFS[Node]) {
- while (!SCCStack.empty() && Tarjan2DFS[SCCStack.top()] >= OurDFS) {
- Node = UniteNodes(Node, SCCStack.top());
-
- SCCStack.pop();
- Merged = true;
- }
- Tarjan2Deleted[Node] = true;
-
- if (Merged)
- NextWL->insert(&GraphNodes[Node]);
- } else {
- SCCStack.push(Node);
- }
-
- return(Changed | Merged);
-}
-
-/// SolveConstraints - This stage iteratively processes the constraints list
-/// propagating constraints (adding edges to the Nodes in the points-to graph)
-/// until a fixed point is reached.
-///
-/// We use a variant of the technique called "Lazy Cycle Detection", which is
-/// described in "The Ant and the Grasshopper: Fast and Accurate Pointer
-/// Analysis for Millions of Lines of Code. In Programming Language Design and
-/// Implementation (PLDI), June 2007."
-/// The paper describes performing cycle detection one node at a time, which can
-/// be expensive if there are no cycles, but there are long chains of nodes that
-/// it heuristically believes are cycles (because it will DFS from each node
-/// without state from previous nodes).
-/// Instead, we use the heuristic to build a worklist of nodes to check, then
-/// cycle detect them all at the same time to do this more cheaply. This
-/// catches cycles slightly later than the original technique did, but does it
-/// make significantly cheaper.
-
-void Andersens::SolveConstraints() {
- CurrWL = &w1;
- NextWL = &w2;
-
- OptimizeConstraints();
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-constraints"
- DEBUG(PrintConstraints());
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
-
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- Node *N = &GraphNodes[i];
- N->PointsTo = new SparseBitVector<>;
- N->OldPointsTo = new SparseBitVector<>;
- N->Edges = new SparseBitVector<>;
- }
- CreateConstraintGraph();
- UnitePointerEquivalences();
- assert(SCCStack.empty() && "SCC Stack should be empty by now!");
- Node2DFS.clear();
- Node2Deleted.clear();
- Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
- Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
- DFSNumber = 0;
- DenseSet<Constraint, ConstraintKeyInfo> Seen;
- DenseSet<std::pair<unsigned,unsigned>, PairKeyInfo> EdgesChecked;
-
- // Order graph and add initial nodes to work list.
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- Node *INode = &GraphNodes[i];
-
- // Add to work list if it's a representative and can contribute to the
- // calculation right now.
- if (INode->isRep() && !INode->PointsTo->empty()
- && (!INode->Edges->empty() || !INode->Constraints.empty())) {
- INode->Stamp();
- CurrWL->insert(INode);
- }
- }
- std::queue<unsigned int> TarjanWL;
-#if !FULL_UNIVERSAL
- // "Rep and special variables" - in order for HCD to maintain conservative
- // results when !FULL_UNIVERSAL, we need to treat the special variables in
- // the same way that the !FULL_UNIVERSAL tweak does throughout the rest of
- // the analysis - it's ok to add edges from the special nodes, but never
- // *to* the special nodes.
- std::vector<unsigned int> RSV;
-#endif
- while( !CurrWL->empty() ) {
- DEBUG(dbgs() << "Starting iteration #" << ++NumIters << "\n");
-
- Node* CurrNode;
- unsigned CurrNodeIndex;
-
- // Actual cycle checking code. We cycle check all of the lazy cycle
- // candidates from the last iteration in one go.
- if (!TarjanWL.empty()) {
- DFSNumber = 0;
-
- Tarjan2DFS.clear();
- Tarjan2Deleted.clear();
- while (!TarjanWL.empty()) {
- unsigned int ToTarjan = TarjanWL.front();
- TarjanWL.pop();
- if (!Tarjan2Deleted[ToTarjan]
- && GraphNodes[ToTarjan].isRep()
- && Tarjan2DFS[ToTarjan] == 0)
- QueryNode(ToTarjan);
- }
- }
-
- // Add to work list if it's a representative and can contribute to the
- // calculation right now.
- while( (CurrNode = CurrWL->pop()) != NULL ) {
- CurrNodeIndex = CurrNode - &GraphNodes[0];
- CurrNode->Stamp();
-
-
- // Figure out the changed points to bits
- SparseBitVector<> CurrPointsTo;
- CurrPointsTo.intersectWithComplement(CurrNode->PointsTo,
- CurrNode->OldPointsTo);
- if (CurrPointsTo.empty())
- continue;
-
- *(CurrNode->OldPointsTo) |= CurrPointsTo;
-
- // Check the offline-computed equivalencies from HCD.
- bool SCC = false;
- unsigned Rep;
-
- if (SDT[CurrNodeIndex] >= 0) {
- SCC = true;
- Rep = FindNode(SDT[CurrNodeIndex]);
-
-#if !FULL_UNIVERSAL
- RSV.clear();
-#endif
- for (SparseBitVector<>::iterator bi = CurrPointsTo.begin();
- bi != CurrPointsTo.end(); ++bi) {
- unsigned Node = FindNode(*bi);
-#if !FULL_UNIVERSAL
- if (Node < NumberSpecialNodes) {
- RSV.push_back(Node);
- continue;
- }
-#endif
- Rep = UniteNodes(Rep,Node);
- }
-#if !FULL_UNIVERSAL
- RSV.push_back(Rep);
-#endif
-
- NextWL->insert(&GraphNodes[Rep]);
-
- if ( ! CurrNode->isRep() )
- continue;
- }
-
- Seen.clear();
-
- /* Now process the constraints for this node. */
- for (std::list<Constraint>::iterator li = CurrNode->Constraints.begin();
- li != CurrNode->Constraints.end(); ) {
- li->Src = FindNode(li->Src);
- li->Dest = FindNode(li->Dest);
-
- // Delete redundant constraints
- if( Seen.count(*li) ) {
- std::list<Constraint>::iterator lk = li; li++;
-
- CurrNode->Constraints.erase(lk);
- ++NumErased;
- continue;
- }
- Seen.insert(*li);
-
- // Src and Dest will be the vars we are going to process.
- // This may look a bit ugly, but what it does is allow us to process
- // both store and load constraints with the same code.
- // Load constraints say that every member of our RHS solution has K
- // added to it, and that variable gets an edge to LHS. We also union
- // RHS+K's solution into the LHS solution.
- // Store constraints say that every member of our LHS solution has K
- // added to it, and that variable gets an edge from RHS. We also union
- // RHS's solution into the LHS+K solution.
- unsigned *Src;
- unsigned *Dest;
- unsigned K = li->Offset;
- unsigned CurrMember;
- if (li->Type == Constraint::Load) {
- Src = &CurrMember;
- Dest = &li->Dest;
- } else if (li->Type == Constraint::Store) {
- Src = &li->Src;
- Dest = &CurrMember;
- } else {
- // TODO Handle offseted copy constraint
- li++;
- continue;
- }
-
- // See if we can use Hybrid Cycle Detection (that is, check
- // if it was a statically detected offline equivalence that
- // involves pointers; if so, remove the redundant constraints).
- if( SCC && K == 0 ) {
-#if FULL_UNIVERSAL
- CurrMember = Rep;
-
- if (GraphNodes[*Src].Edges->test_and_set(*Dest))
- if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo))
- NextWL->insert(&GraphNodes[*Dest]);
-#else
- for (unsigned i=0; i < RSV.size(); ++i) {
- CurrMember = RSV[i];
-
- if (*Dest < NumberSpecialNodes)
- continue;
- if (GraphNodes[*Src].Edges->test_and_set(*Dest))
- if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo))
- NextWL->insert(&GraphNodes[*Dest]);
- }
-#endif
- // since all future elements of the points-to set will be
- // equivalent to the current ones, the complex constraints
- // become redundant.
- //
- std::list<Constraint>::iterator lk = li; li++;
-#if !FULL_UNIVERSAL
- // In this case, we can still erase the constraints when the
- // elements of the points-to sets are referenced by *Dest,
- // but not when they are referenced by *Src (i.e. for a Load
- // constraint). This is because if another special variable is
- // put into the points-to set later, we still need to add the
- // new edge from that special variable.
- if( lk->Type != Constraint::Load)
-#endif
- GraphNodes[CurrNodeIndex].Constraints.erase(lk);
- } else {
- const SparseBitVector<> &Solution = CurrPointsTo;
-
- for (SparseBitVector<>::iterator bi = Solution.begin();
- bi != Solution.end();
- ++bi) {
- CurrMember = *bi;
-
- // Need to increment the member by K since that is where we are
- // supposed to copy to/from. Note that in positive weight cycles,
- // which occur in address taking of fields, K can go past
- // MaxK[CurrMember] elements, even though that is all it could point
- // to.
- if (K > 0 && K > MaxK[CurrMember])
- continue;
- else
- CurrMember = FindNode(CurrMember + K);
-
- // Add an edge to the graph, so we can just do regular
- // bitmap ior next time. It may also let us notice a cycle.
-#if !FULL_UNIVERSAL
- if (*Dest < NumberSpecialNodes)
- continue;
-#endif
- if (GraphNodes[*Src].Edges->test_and_set(*Dest))
- if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo))
- NextWL->insert(&GraphNodes[*Dest]);
-
- }
- li++;
- }
- }
- SparseBitVector<> NewEdges;
- SparseBitVector<> ToErase;
-
- // Now all we have left to do is propagate points-to info along the
- // edges, erasing the redundant edges.
- for (SparseBitVector<>::iterator bi = CurrNode->Edges->begin();
- bi != CurrNode->Edges->end();
- ++bi) {
-
- unsigned DestVar = *bi;
- unsigned Rep = FindNode(DestVar);
-
- // If we ended up with this node as our destination, or we've already
- // got an edge for the representative, delete the current edge.
- if (Rep == CurrNodeIndex ||
- (Rep != DestVar && NewEdges.test(Rep))) {
- ToErase.set(DestVar);
- continue;
- }
-
- std::pair<unsigned,unsigned> edge(CurrNodeIndex,Rep);
-
- // This is where we do lazy cycle detection.
- // If this is a cycle candidate (equal points-to sets and this
- // particular edge has not been cycle-checked previously), add to the
- // list to check for cycles on the next iteration.
- if (!EdgesChecked.count(edge) &&
- *(GraphNodes[Rep].PointsTo) == *(CurrNode->PointsTo)) {
- EdgesChecked.insert(edge);
- TarjanWL.push(Rep);
- }
- // Union the points-to sets into the dest
-#if !FULL_UNIVERSAL
- if (Rep >= NumberSpecialNodes)
-#endif
- if (GraphNodes[Rep].PointsTo |= CurrPointsTo) {
- NextWL->insert(&GraphNodes[Rep]);
- }
- // If this edge's destination was collapsed, rewrite the edge.
- if (Rep != DestVar) {
- ToErase.set(DestVar);
- NewEdges.set(Rep);
- }
- }
- CurrNode->Edges->intersectWithComplement(ToErase);
- CurrNode->Edges |= NewEdges;
- }
-
- // Switch to other work list.
- WorkList* t = CurrWL; CurrWL = NextWL; NextWL = t;
- }
-
-
- Node2DFS.clear();
- Node2Deleted.clear();
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- Node *N = &GraphNodes[i];
- delete N->OldPointsTo;
- delete N->Edges;
- }
- SDTActive = false;
- SDT.clear();
-}
-
-//===----------------------------------------------------------------------===//
-// Union-Find
-//===----------------------------------------------------------------------===//
-
-// Unite nodes First and Second, returning the one which is now the
-// representative node. First and Second are indexes into GraphNodes
-unsigned Andersens::UniteNodes(unsigned First, unsigned Second,
- bool UnionByRank) {
- assert (First < GraphNodes.size() && Second < GraphNodes.size() &&
- "Attempting to merge nodes that don't exist");
-
- Node *FirstNode = &GraphNodes[First];
- Node *SecondNode = &GraphNodes[Second];
-
- assert (SecondNode->isRep() && FirstNode->isRep() &&
- "Trying to unite two non-representative nodes!");
- if (First == Second)
- return First;
-
- if (UnionByRank) {
- int RankFirst = (int) FirstNode ->NodeRep;
- int RankSecond = (int) SecondNode->NodeRep;
-
- // Rank starts at -1 and gets decremented as it increases.
- // Translation: higher rank, lower NodeRep value, which is always negative.
- if (RankFirst > RankSecond) {
- unsigned t = First; First = Second; Second = t;
- Node* tp = FirstNode; FirstNode = SecondNode; SecondNode = tp;
- } else if (RankFirst == RankSecond) {
- FirstNode->NodeRep = (unsigned) (RankFirst - 1);
- }
- }
-
- SecondNode->NodeRep = First;
-#if !FULL_UNIVERSAL
- if (First >= NumberSpecialNodes)
-#endif
- if (FirstNode->PointsTo && SecondNode->PointsTo)
- FirstNode->PointsTo |= *(SecondNode->PointsTo);
- if (FirstNode->Edges && SecondNode->Edges)
- FirstNode->Edges |= *(SecondNode->Edges);
- if (!SecondNode->Constraints.empty())
- FirstNode->Constraints.splice(FirstNode->Constraints.begin(),
- SecondNode->Constraints);
- if (FirstNode->OldPointsTo) {
- delete FirstNode->OldPointsTo;
- FirstNode->OldPointsTo = new SparseBitVector<>;
- }
-
- // Destroy interesting parts of the merged-from node.
- delete SecondNode->OldPointsTo;
- delete SecondNode->Edges;
- delete SecondNode->PointsTo;
- SecondNode->Edges = NULL;
- SecondNode->PointsTo = NULL;
- SecondNode->OldPointsTo = NULL;
-
- NumUnified++;
- DEBUG(dbgs() << "Unified Node ");
- DEBUG(PrintNode(FirstNode));
- DEBUG(dbgs() << " and Node ");
- DEBUG(PrintNode(SecondNode));
- DEBUG(dbgs() << "\n");
-
- if (SDTActive)
- if (SDT[Second] >= 0) {
- if (SDT[First] < 0)
- SDT[First] = SDT[Second];
- else {
- UniteNodes( FindNode(SDT[First]), FindNode(SDT[Second]) );
- First = FindNode(First);
- }
- }
-
- return First;
-}
-
-// Find the index into GraphNodes of the node representing Node, performing
-// path compression along the way
-unsigned Andersens::FindNode(unsigned NodeIndex) {
- assert (NodeIndex < GraphNodes.size()
- && "Attempting to find a node that can't exist");
- Node *N = &GraphNodes[NodeIndex];
- if (N->isRep())
- return NodeIndex;
- else
- return (N->NodeRep = FindNode(N->NodeRep));
-}
-
-// Find the index into GraphNodes of the node representing Node,
-// don't perform path compression along the way (for Print)
-unsigned Andersens::FindNode(unsigned NodeIndex) const {
- assert (NodeIndex < GraphNodes.size()
- && "Attempting to find a node that can't exist");
- const Node *N = &GraphNodes[NodeIndex];
- if (N->isRep())
- return NodeIndex;
- else
- return FindNode(N->NodeRep);
-}
-
-//===----------------------------------------------------------------------===//
-// Debugging Output
-//===----------------------------------------------------------------------===//
-
-void Andersens::PrintNode(const Node *N) const {
- if (N == &GraphNodes[UniversalSet]) {
- dbgs() << "<universal>";
- return;
- } else if (N == &GraphNodes[NullPtr]) {
- dbgs() << "<nullptr>";
- return;
- } else if (N == &GraphNodes[NullObject]) {
- dbgs() << "<null>";
- return;
- }
- if (!N->getValue()) {
- dbgs() << "artificial" << (intptr_t) N;
- return;
- }
-
- assert(N->getValue() != 0 && "Never set node label!");
- Value *V = N->getValue();
- if (Function *F = dyn_cast<Function>(V)) {
- if (isa<PointerType>(F->getFunctionType()->getReturnType()) &&
- N == &GraphNodes[getReturnNode(F)]) {
- dbgs() << F->getName() << ":retval";
- return;
- } else if (F->getFunctionType()->isVarArg() &&
- N == &GraphNodes[getVarargNode(F)]) {
- dbgs() << F->getName() << ":vararg";
- return;
- }
- }
-
- if (Instruction *I = dyn_cast<Instruction>(V))
- dbgs() << I->getParent()->getParent()->getName() << ":";
- else if (Argument *Arg = dyn_cast<Argument>(V))
- dbgs() << Arg->getParent()->getName() << ":";
-
- if (V->hasName())
- dbgs() << V->getName();
- else
- dbgs() << "(unnamed)";
-
- if (isa<GlobalValue>(V) || isa<AllocaInst>(V) || isMalloc(V))
- if (N == &GraphNodes[getObject(V)])
- dbgs() << "<mem>";
-}
-void Andersens::PrintConstraint(const Constraint &C) const {
- if (C.Type == Constraint::Store) {
- dbgs() << "*";
- if (C.Offset != 0)
- dbgs() << "(";
- }
- PrintNode(&GraphNodes[C.Dest]);
- if (C.Type == Constraint::Store && C.Offset != 0)
- dbgs() << " + " << C.Offset << ")";
- dbgs() << " = ";
- if (C.Type == Constraint::Load) {
- dbgs() << "*";
- if (C.Offset != 0)
- dbgs() << "(";
- }
- else if (C.Type == Constraint::AddressOf)
- dbgs() << "&";
- PrintNode(&GraphNodes[C.Src]);
- if (C.Offset != 0 && C.Type != Constraint::Store)
- dbgs() << " + " << C.Offset;
- if (C.Type == Constraint::Load && C.Offset != 0)
- dbgs() << ")";
- dbgs() << "\n";
-}
-
-void Andersens::PrintConstraints() const {
- dbgs() << "Constraints:\n";
-
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i)
- PrintConstraint(Constraints[i]);
-}
-
-void Andersens::PrintPointsToGraph() const {
- dbgs() << "Points-to graph:\n";
- for (unsigned i = 0, e = GraphNodes.size(); i != e; ++i) {
- const Node *N = &GraphNodes[i];
- if (FindNode(i) != i) {
- PrintNode(N);
- dbgs() << "\t--> same as ";
- PrintNode(&GraphNodes[FindNode(i)]);
- dbgs() << "\n";
- } else {
- dbgs() << "[" << (N->PointsTo->count()) << "] ";
- PrintNode(N);
- dbgs() << "\t--> ";
-
- bool first = true;
- for (SparseBitVector<>::iterator bi = N->PointsTo->begin();
- bi != N->PointsTo->end();
- ++bi) {
- if (!first)
- dbgs() << ", ";
- PrintNode(&GraphNodes[*bi]);
- first = false;
- }
- dbgs() << "\n";
- }
- }
-}
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index 1ebb0be..007ad22 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -1,5 +1,4 @@
add_llvm_library(LLVMipa
- Andersens.cpp
CallGraph.cpp
CallGraphSCCPass.cpp
FindUsedTypes.cpp
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index ec94bc8..7b43089 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -213,7 +213,7 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
++NumNonAddrTakenGlobalVars;
// If this global holds a pointer type, see if it is an indirect global.
- if (isa<PointerType>(I->getType()->getElementType()) &&
+ if (I->getType()->getElementType()->isPointerTy() &&
AnalyzeIndirectGlobalMemory(I))
++NumIndirectGlobalVars;
}
@@ -231,7 +231,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
std::vector<Function*> &Readers,
std::vector<Function*> &Writers,
GlobalValue *OkayStoreDest) {
- if (!isa<PointerType>(V->getType())) return true;
+ if (!V->getType()->isPointerTy()) return true;
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 9c472ae..98a436f 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Assembly/AsmAnnotationWriter.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -35,42 +36,30 @@ Pass *llvm::createIVUsersPass() {
return new IVUsers();
}
-/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
-/// subexpression that is an AddRec from a loop other than L. An outer loop
-/// of L is OK, but not an inner loop nor a disjoint loop.
-static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
- // This is very common, put it first.
- if (isa<SCEVConstant>(S))
- return false;
- if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) {
- for (unsigned int i=0; i< AE->getNumOperands(); i++)
- if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
- return true;
- return false;
- }
- if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) {
- if (const Loop *newLoop = AE->getLoop()) {
- if (newLoop == L)
- return false;
- // if newLoop is an outer loop of L, this is OK.
- if (newLoop->contains(L))
- return false;
+/// CollectSubexprs - Split S into subexpressions which can be pulled out into
+/// separate registers.
+static void CollectSubexprs(const SCEV *S,
+ SmallVectorImpl<const SCEV *> &Ops,
+ ScalarEvolution &SE) {
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ // Break out add operands.
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I)
+ CollectSubexprs(*I, Ops, SE);
+ return;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // Split a non-zero base out of an addrec.
+ if (!AR->getStart()->isZero()) {
+ CollectSubexprs(AR->getStart(), Ops, SE);
+ CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+ AR->getStepRecurrence(SE),
+ AR->getLoop()), Ops, SE);
+ return;
}
- return true;
}
- if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S))
- return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
- containsAddRecFromDifferentLoop(DE->getRHS(), L);
-#if 0
- // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
- // need this when it is.
- if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
- return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
- containsAddRecFromDifferentLoop(DE->getRHS(), L);
-#endif
- if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S))
- return containsAddRecFromDifferentLoop(CE->getOperand(), L);
- return false;
+
+ // Otherwise use the value itself.
+ Ops.push_back(S);
}
/// getSCEVStartAndStride - Compute the start and stride of this expression,
@@ -89,35 +78,42 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
if (const SCEVAddRecExpr *AddRec =
- dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) {
- if (AddRec->getLoop() == L)
- TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
- else
- return false; // Nested IV of some sort?
- } else {
+ dyn_cast<SCEVAddRecExpr>(AE->getOperand(i)))
+ TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
+ else
Start = SE->getAddExpr(Start, AE->getOperand(i));
- }
} else if (isa<SCEVAddRecExpr>(SH)) {
TheAddRec = SH;
} else {
return false; // not analyzable.
}
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec);
- if (!AddRec || AddRec->getLoop() != L) return false;
+ // Break down TheAddRec into its component parts.
+ SmallVector<const SCEV *, 4> Subexprs;
+ CollectSubexprs(TheAddRec, Subexprs, *SE);
+
+ // Look for an addrec on the current loop among the parts.
+ const SCEV *AddRecStride = 0;
+ for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(),
+ E = Subexprs.end(); I != E; ++I) {
+ const SCEV *S = *I;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ if (AR->getLoop() == L) {
+ *I = AR->getStart();
+ AddRecStride = AR->getStepRecurrence(*SE);
+ break;
+ }
+ }
+ if (!AddRecStride)
+ return false;
+
+ // Add up everything else into a start value (which may not be
+ // loop-invariant).
+ const SCEV *AddRecStart = SE->getAddExpr(Subexprs);
// Use getSCEVAtScope to attempt to simplify other loops out of
// the picture.
- const SCEV *AddRecStart = AddRec->getStart();
AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
- const SCEV *AddRecStride = AddRec->getStepRecurrence(*SE);
-
- // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other
- // than an outer loop of the current loop, reject it. LSR has no concept of
- // operating on more than one loop at a time so don't confuse it with such
- // expressions.
- if (containsAddRecFromDifferentLoop(AddRecStart, L))
- return false;
Start = SE->getAddExpr(Start, AddRecStart);
@@ -130,7 +126,7 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
DEBUG(dbgs() << "[";
WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
- dbgs() << "] Variable stride: " << *AddRec << "\n");
+ dbgs() << "] Variable stride: " << *AddRecStride << "\n");
}
Stride = AddRecStride;
@@ -146,8 +142,7 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
/// should use the post-inc value).
static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
- Loop *L, LoopInfo *LI, DominatorTree *DT,
- Pass *P) {
+ Loop *L, DominatorTree *DT) {
// If the user is in the loop, use the preinc value.
if (L->contains(User)) return false;
@@ -227,7 +222,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
// Descend recursively, but not into PHI nodes outside the current loop.
// It's important to see the entire expression outside the loop to get
// choices that depend on addressing mode use right, although we won't
- // consider references ouside the loop in all cases.
+ // consider references outside the loop in all cases.
// If User is already in Processed, we don't want to recurse into it again,
// but do want to record a second reference in the same instruction.
bool AddUserToIVUsers = false;
@@ -246,42 +241,28 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
}
if (AddUserToIVUsers) {
- IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
- if (!StrideUses) { // First occurrence of this stride?
- StrideOrder.push_back(Stride);
- StrideUses = new IVUsersOfOneStride(Stride);
- IVUses.push_back(StrideUses);
- IVUsesByStride[Stride] = StrideUses;
- }
-
// Okay, we found a user that we cannot reduce. Analyze the instruction
// and decide what to do with it. If we are a use inside of the loop, use
// the value before incrementation, otherwise use it after incrementation.
- if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) {
+ if (IVUseShouldUsePostIncValue(User, I, L, DT)) {
// The value used will be incremented by the stride more than we are
// expecting, so subtract this off.
const SCEV *NewStart = SE->getMinusSCEV(Start, Stride);
- StrideUses->addUser(NewStart, User, I);
- StrideUses->Users.back().setIsUseOfPostIncrementedValue(true);
+ IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I));
+ IVUses.back().setIsUseOfPostIncrementedValue(true);
DEBUG(dbgs() << " USING POSTINC SCEV, START=" << *NewStart<< "\n");
} else {
- StrideUses->addUser(Start, User, I);
+ IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I));
}
}
}
return true;
}
-void IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
- Instruction *User, Value *Operand) {
- IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
- if (!StrideUses) { // First occurrence of this stride?
- StrideOrder.push_back(Stride);
- StrideUses = new IVUsersOfOneStride(Stride);
- IVUses.push_back(StrideUses);
- IVUsesByStride[Stride] = StrideUses;
- }
- IVUsesByStride[Stride]->addUser(Offset, User, Operand);
+IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
+ Instruction *User, Value *Operand) {
+ IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand));
+ return IVUses.back();
}
IVUsers::IVUsers()
@@ -315,15 +296,15 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
/// value of the OperandValToReplace of the given IVStrideUse.
const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
// Start with zero.
- const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
+ const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
// Create the basic add recurrence.
- RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
+ RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
// Add the offset in a separate step, because it may be loop-variant.
RetVal = SE->getAddExpr(RetVal, U.getOffset());
// For uses of post-incremented values, add an extra stride to compute
// the actual replacement value.
if (U.isUseOfPostIncrementedValue())
- RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
+ RetVal = SE->getAddExpr(RetVal, U.getStride());
return RetVal;
}
@@ -332,9 +313,9 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
/// isUseOfPostIncrementedValue flag.
const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const {
// Start with zero.
- const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
+ const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
// Create the basic add recurrence.
- RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
+ RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
// Add the offset in a separate step, because it may be loop-variant.
RetVal = SE->getAddExpr(RetVal, U.getOffset());
return RetVal;
@@ -349,24 +330,20 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
}
OS << ":\n";
- for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
- std::map<const SCEV *, IVUsersOfOneStride*>::const_iterator SI =
- IVUsesByStride.find(StrideOrder[Stride]);
- assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
- OS << " Stride " << *SI->first->getType() << " " << *SI->first << ":\n";
-
- for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(),
- E = SI->second->Users.end(); UI != E; ++UI) {
- OS << " ";
- WriteAsOperand(OS, UI->getOperandValToReplace(), false);
- OS << " = ";
- OS << *getReplacementExpr(*UI);
- if (UI->isUseOfPostIncrementedValue())
- OS << " (post-inc)";
- OS << " in ";
- UI->getUser()->print(OS);
- OS << '\n';
- }
+ // Use a default AssemblyAnnotationWriter to suppress the default info
+ // comments, which aren't relevant here.
+ AssemblyAnnotationWriter Annotator;
+ for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
+ E = IVUses.end(); UI != E; ++UI) {
+ OS << " ";
+ WriteAsOperand(OS, UI->getOperandValToReplace(), false);
+ OS << " = "
+ << *getReplacementExpr(*UI);
+ if (UI->isUseOfPostIncrementedValue())
+ OS << " (post-inc)";
+ OS << " in ";
+ UI->getUser()->print(OS, &Annotator);
+ OS << '\n';
}
}
@@ -375,37 +352,12 @@ void IVUsers::dump() const {
}
void IVUsers::releaseMemory() {
- IVUsesByStride.clear();
- StrideOrder.clear();
Processed.clear();
IVUses.clear();
}
void IVStrideUse::deleted() {
// Remove this user from the list.
- Parent->Users.erase(this);
+ Parent->IVUses.erase(this);
// this now dangles!
}
-
-void IVUsersOfOneStride::print(raw_ostream &OS) const {
- OS << "IV Users of one stride:\n";
-
- if (Stride)
- OS << " Stride: " << *Stride << '\n';
-
- OS << " Users:\n";
-
- unsigned Count = 1;
-
- for (ilist<IVStrideUse>::const_iterator
- I = Users.begin(), E = Users.end(); I != E; ++I) {
- const IVStrideUse &SU = *I;
- OS << " " << Count++ << '\n';
- OS << " Offset: " << *SU.getOffset() << '\n';
- OS << " Instr: " << *SU << '\n';
- }
-}
-
-void IVUsersOfOneStride::dump() const {
- print(dbgs());
-}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 972d034..ca50a17 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -84,7 +84,7 @@ unsigned InlineCostAnalyzer::FunctionInfo::
//
unsigned InlineCostAnalyzer::FunctionInfo::
CountCodeReductionForAlloca(Value *V) {
- if (!isa<PointerType>(V->getType())) return 0; // Not a pointer
+ if (!V->getType()->isPointerTy()) return 0; // Not a pointer
unsigned Reduction = 0;
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
Instruction *I = cast<Instruction>(*UI);
@@ -175,7 +175,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
this->usesDynamicAlloca = true;
}
- if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType()))
+ if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
++NumVectorInsts;
if (const CastInst *CI = dyn_cast<CastInst>(II)) {
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b53ac13..8288e96 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -194,11 +194,10 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const Type *ITy = GetCompareTy(LHS);
// icmp X, X -> true/false
- if (LHS == RHS)
+ // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false
+ // because X could be 0.
+ if (LHS == RHS || isa<UndefValue>(RHS))
return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
-
- if (isa<UndefValue>(RHS)) // X icmp undef -> undef
- return UndefValue::get(ITy);
// icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
// addresses never equal each other! We already know that Op0 != Op1.
@@ -283,6 +282,32 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// True if unordered.
return ConstantInt::getTrue(CFP->getContext());
}
+ // Check whether the constant is an infinity.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ switch (Pred) {
+ case FCmpInst::FCMP_OLT:
+ // No value is ordered and less than negative infinity.
+ return ConstantInt::getFalse(CFP->getContext());
+ case FCmpInst::FCMP_UGE:
+ // All values are unordered with or at least negative infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
+ }
+ } else {
+ switch (Pred) {
+ case FCmpInst::FCMP_OGT:
+ // No value is ordered and greater than infinity.
+ return ConstantInt::getFalse(CFP->getContext());
+ case FCmpInst::FCMP_ULE:
+ // All values are unordered with and at most infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
+ }
+ }
+ }
}
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 2d74709d..2aa2f17 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -580,7 +580,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
void MemoryDependenceAnalysis::
getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
SmallVectorImpl<NonLocalDepResult> &Result) {
- assert(isa<PointerType>(Pointer->getType()) &&
+ assert(Pointer->getType()->isPointerTy() &&
"Can't get pointer deps of a non-pointer!");
Result.clear();
@@ -861,7 +861,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
// Get the PHI translated pointer in this predecessor. This can fail if
// not translatable, in which case the getAddr() returns null.
PHITransAddr PredPointer(Pointer);
- PredPointer.PHITranslateValue(BB, Pred);
+ PredPointer.PHITranslateValue(BB, Pred, 0);
Value *PredPtrVal = PredPointer.getAddr();
@@ -1009,13 +1009,20 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
/// in more places that cached info does not necessarily keep.
void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
// If Ptr isn't really a pointer, just ignore it.
- if (!isa<PointerType>(Ptr->getType())) return;
+ if (!Ptr->getType()->isPointerTy()) return;
// Flush store info for the pointer.
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
// Flush load info for the pointer.
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
}
+/// invalidateCachedPredecessors - Clear the PredIteratorCache info.
+/// This needs to be done when the CFG changes, e.g., due to splitting
+/// critical edges.
+void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
+ PredCache->clear();
+}
+
/// removeInstruction - Remove an instruction from the dependence analysis,
/// updating the dependence of instructions that previously depended on it.
/// This method attempts to keep the cache coherent using the reverse map.
@@ -1050,7 +1057,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
// Remove it from both the load info and the store info. The instruction
// can't be in either of these maps if it is non-pointer.
- if (isa<PointerType>(RemInst->getType())) {
+ if (RemInst->getType()->isPointerTy()) {
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
}
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 334a188..8e4fa03 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -134,7 +134,8 @@ static void RemoveInstInputs(Value *V,
}
Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
- BasicBlock *PredBB) {
+ BasicBlock *PredBB,
+ const DominatorTree *DT) {
// If this is a non-instruction value, it can't require PHI translation.
Instruction *Inst = dyn_cast<Instruction>(V);
if (Inst == 0) return V;
@@ -177,7 +178,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// operands need to be phi translated, and if so, reconstruct it.
if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
- Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB);
+ Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB, DT);
if (PHIIn == 0) return 0;
if (PHIIn == BC->getOperand(0))
return BC;
@@ -193,7 +194,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
UI != E; ++UI) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI))
- if (BCI->getType() == BC->getType())
+ if (BCI->getType() == BC->getType() &&
+ (!DT || DT->dominates(BCI->getParent(), PredBB)))
return BCI;
}
return 0;
@@ -204,7 +206,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
SmallVector<Value*, 8> GEPOps;
bool AnyChanged = false;
for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
- Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB);
+ Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
if (GEPOp == 0) return 0;
AnyChanged |= GEPOp != GEP->getOperand(i);
@@ -229,7 +231,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
if (GEPI->getType() == GEP->getType() &&
GEPI->getNumOperands() == GEPOps.size() &&
- GEPI->getParent()->getParent() == CurBB->getParent()) {
+ GEPI->getParent()->getParent() == CurBB->getParent() &&
+ (!DT || DT->dominates(GEPI->getParent(), PredBB))) {
bool Mismatch = false;
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
if (GEPI->getOperand(i) != GEPOps[i]) {
@@ -251,7 +254,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
- Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB);
+ Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
if (LHS == 0) return 0;
// If the PHI translated LHS is an add of a constant, fold the immediates.
@@ -287,7 +290,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
if (BO->getOpcode() == Instruction::Add &&
BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
- BO->getParent()->getParent() == CurBB->getParent())
+ BO->getParent()->getParent() == CurBB->getParent() &&
+ (!DT || DT->dominates(BO->getParent(), PredBB)))
return BO;
}
@@ -300,33 +304,24 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
/// PHITranslateValue - PHI translate the current address up the CFG from
-/// CurBB to Pred, updating our state the reflect any needed changes. This
-/// returns true on failure and sets Addr to null.
-bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB) {
+/// CurBB to Pred, updating our state to reflect any needed changes. If the
+/// dominator tree DT is non-null, the translated value must dominate
+/// PredBB. This returns true on failure and sets Addr to null.
+bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree *DT) {
assert(Verify() && "Invalid PHITransAddr!");
- Addr = PHITranslateSubExpr(Addr, CurBB, PredBB);
+ Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT);
assert(Verify() && "Invalid PHITransAddr!");
- return Addr == 0;
-}
-/// GetAvailablePHITranslatedSubExpr - Return the value computed by
-/// PHITranslateSubExpr if it dominates PredBB, otherwise return null.
-Value *PHITransAddr::
-GetAvailablePHITranslatedSubExpr(Value *V, BasicBlock *CurBB,BasicBlock *PredBB,
- const DominatorTree &DT) const {
- PHITransAddr Tmp(V, TD);
- Tmp.PHITranslateValue(CurBB, PredBB);
-
- // See if PHI translation succeeds.
- V = Tmp.getAddr();
-
- // Make sure the value is live in the predecessor.
- if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
- if (!DT.dominates(Inst->getParent(), PredBB))
- return 0;
- return V;
-}
+ if (DT) {
+ // Make sure the value is live in the predecessor.
+ if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
+ if (!DT->dominates(Inst->getParent(), PredBB))
+ Addr = 0;
+ }
+ return Addr == 0;
+}
/// PHITranslateWithInsertion - PHI translate this value into the specified
/// predecessor block, inserting a computation of the value if it is
@@ -365,8 +360,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
SmallVectorImpl<Instruction*> &NewInsts) {
// See if we have a version of this value already available and dominating
// PredBB. If so, there is no need to insert a new instance of it.
- if (Value *Res = GetAvailablePHITranslatedSubExpr(InVal, CurBB, PredBB, DT))
- return Res;
+ PHITransAddr Tmp(InVal, TD);
+ if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
+ return Tmp.getAddr();
// If we don't have an available version of this value, it must be an
// instruction.
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
index 8da07e7..ce7ac89 100644
--- a/lib/Analysis/PointerTracking.cpp
+++ b/lib/Analysis/PointerTracking.cpp
@@ -231,7 +231,7 @@ void PointerTracking::print(raw_ostream &OS, const Module* M) const {
// this should be safe for the same reason its safe for SCEV.
PointerTracking &PT = *const_cast<PointerTracking*>(this);
for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) {
- if (!isa<PointerType>(I->getType()))
+ if (!I->getType()->isPointerTy())
continue;
Value *Base;
const SCEV *Limit, *Offset;
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 85531be..66760c6 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -246,7 +246,7 @@ const BasicBlock *ProfileInfoT<Function,BasicBlock>::
succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
if (Succ == End) {
- P[0] = BB;
+ P[ reinterpret_cast<const llvm::BasicBlock*>(0) ] = BB;
if (Mode & GetPathToExit) {
hasFoundPath = true;
BB = 0;
@@ -753,10 +753,10 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
Succ != End; ++Succ) {
Path P;
GetPath(*Succ, 0, P, GetPathToExit);
- if (Dest && Dest != P[0]) {
+ if (Dest && Dest != P[ reinterpret_cast<const llvm::BasicBlock*>(0) ]) {
AllEdgesHaveSameReturn = false;
}
- Dest = P[0];
+ Dest = P[ reinterpret_cast<const llvm::BasicBlock*>(0) ];
}
if (AllEdgesHaveSameReturn) {
if(EstimateMissingEdges(BB)) {
@@ -928,7 +928,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
Path P;
const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
- Dest = P[0];
+ Dest = P[ reinterpret_cast<const llvm::BasicBlock*>(0) ];
if (!Dest) continue;
if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 82be9cd..b979f33 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -214,8 +214,8 @@ bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID,
const SCEV *op, const Type *ty)
: SCEVCastExpr(ID, scTruncate, op, ty) {
- assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
- (Ty->isInteger() || isa<PointerType>(Ty)) &&
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate non-integer value!");
}
@@ -226,8 +226,8 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const {
SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID,
const SCEV *op, const Type *ty)
: SCEVCastExpr(ID, scZeroExtend, op, ty) {
- assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
- (Ty->isInteger() || isa<PointerType>(Ty)) &&
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot zero extend non-integer value!");
}
@@ -238,8 +238,8 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID,
const SCEV *op, const Type *ty)
: SCEVCastExpr(ID, scSignExtend, op, ty) {
- assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
- (Ty->isInteger() || isa<PointerType>(Ty)) &&
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot sign extend non-integer value!");
}
@@ -312,6 +312,21 @@ bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
return true;
}
+bool
+SCEVAddRecExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
+ return DT->dominates(L->getHeader(), BB) &&
+ SCEVNAryExpr::dominates(BB, DT);
+}
+
+bool
+SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+ // This uses a "dominates" query instead of "properly dominates" query because
+ // the instruction which produces the addrec's value is a PHI, and a PHI
+ // effectively properly dominates its entire containing block.
+ return DT->dominates(L->getHeader(), BB) &&
+ SCEVNAryExpr::properlyDominates(BB, DT);
+}
+
void SCEVAddRecExpr::print(raw_ostream &OS) const {
OS << "{" << *Operands[0];
for (unsigned i = 1, e = Operands.size(); i != e; ++i)
@@ -379,7 +394,7 @@ bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
if (CI->isOne() &&
STy->getNumElements() == 2 &&
- STy->getElementType(0)->isInteger(1)) {
+ STy->getElementType(0)->isIntegerTy(1)) {
AllocTy = STy->getElementType(1);
return true;
}
@@ -401,7 +416,7 @@ bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
// Ignore vector types here so that ScalarEvolutionExpander doesn't
// emit getelementptrs that index into vectors.
- if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
+ if (Ty->isStructTy() || Ty->isArrayTy()) {
CTy = Ty;
FieldNo = CE->getOperand(2);
return true;
@@ -503,9 +518,9 @@ namespace {
// Order pointer values after integer values. This helps SCEVExpander
// form GEPs.
- if (isa<PointerType>(LU->getType()) && !isa<PointerType>(RU->getType()))
+ if (LU->getType()->isPointerTy() && !RU->getType()->isPointerTy())
return false;
- if (isa<PointerType>(RU->getType()) && !isa<PointerType>(LU->getType()))
+ if (RU->getType()->isPointerTy() && !LU->getType()->isPointerTy())
return true;
// Compare getValueID values.
@@ -601,7 +616,7 @@ namespace {
/// When this routine is finished, we know that any duplicates in the vector are
/// consecutive and that complexity is monotonically increasing.
///
-/// Note that we go take special precautions to ensure that we get determinstic
+/// Note that we go take special precautions to ensure that we get deterministic
/// results from this routine. In other words, we don't want the results of
/// this to depend on where the addresses of various SCEV objects happened to
/// land in memory.
@@ -729,7 +744,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
// We need at least W + T bits for the multiplication step
unsigned CalculationBits = W + T;
- // Calcuate 2^T, at width T+W.
+ // Calculate 2^T, at width T+W.
APInt DivFactor = APInt(CalculationBits, 1).shl(T);
// Calculate the multiplicative inverse of K! / 2^T;
@@ -906,9 +921,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
if (MaxBECount == RecastedMaxBECount) {
const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no unsigned overflow.
- const SCEV *ZMul =
- getMulExpr(CastedMaxBECount,
- getTruncateOrZeroExtend(Step, Start->getType()));
+ const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
const SCEV *Add = getAddExpr(Start, ZMul);
const SCEV *OperandExtendedAdd =
getAddExpr(getZeroExtendExpr(Start, WideTy),
@@ -922,9 +935,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// Similar to above, only this time treat the step value as signed.
// This covers loops that count down.
- const SCEV *SMul =
- getMulExpr(CastedMaxBECount,
- getTruncateOrSignExtend(Step, Start->getType()));
+ const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
Add = getAddExpr(Start, SMul);
OperandExtendedAdd =
getAddExpr(getZeroExtendExpr(Start, WideTy),
@@ -1045,9 +1056,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
if (MaxBECount == RecastedMaxBECount) {
const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no signed overflow.
- const SCEV *SMul =
- getMulExpr(CastedMaxBECount,
- getTruncateOrSignExtend(Step, Start->getType()));
+ const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
const SCEV *Add = getAddExpr(Start, SMul);
const SCEV *OperandExtendedAdd =
getAddExpr(getSignExtendExpr(Start, WideTy),
@@ -1061,9 +1070,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// Similar to above, only this time treat the step value as unsigned.
// This covers loops that count up with an unsigned step.
- const SCEV *UMul =
- getMulExpr(CastedMaxBECount,
- getTruncateOrZeroExtend(Step, Start->getType()));
+ const SCEV *UMul = getMulExpr(CastedMaxBECount, Step);
Add = getAddExpr(Start, UMul);
OperandExtendedAdd =
getAddExpr(getSignExtendExpr(Start, WideTy),
@@ -1403,7 +1410,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// If we deleted at least one add, we added operands to the end of the list,
// and they are not necessarily sorted. Recurse to resort and resimplify
- // any operands we just aquired.
+ // any operands we just acquired.
if (DeletedAdd)
return getAddExpr(Ops);
}
@@ -1710,7 +1717,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// If we deleted at least one mul, we added operands to the end of the list,
// and they are not necessarily sorted. Recurse to resort and resimplify
- // any operands we just aquired.
+ // any operands we just acquired.
if (DeletedMul)
return getMulExpr(Ops);
}
@@ -1958,6 +1965,12 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0} --> X
}
+ // It's tempting to want to call getMaxBackedgeTakenCount count here and
+ // use that information to infer NUW and NSW flags. However, computing a
+ // BE count requires calling getAddRecExpr, so we may not yet have a
+ // meaningful BE count at this point (and if we don't, we'd be stuck
+ // with a SCEVCouldNotCompute as the cached BE count).
+
// If HasNSW is true and all the operands are non-negative, infer HasNUW.
if (!HasNUW && HasNSW) {
bool All = true;
@@ -2293,7 +2306,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
/// has access to target-specific information.
bool ScalarEvolution::isSCEVable(const Type *Ty) const {
// Integers and pointers are always SCEVable.
- return Ty->isInteger() || isa<PointerType>(Ty);
+ return Ty->isIntegerTy() || Ty->isPointerTy();
}
/// getTypeSizeInBits - Return the size in bits of the specified type,
@@ -2306,12 +2319,12 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
return TD->getTypeSizeInBits(Ty);
// Integer types have fixed sizes.
- if (Ty->isInteger())
+ if (Ty->isIntegerTy())
return Ty->getPrimitiveSizeInBits();
// The only other support type is pointer. Without TargetData, conservatively
// assume pointers are 64-bit.
- assert(isa<PointerType>(Ty) && "isSCEVable permitted a non-SCEVable type!");
+ assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
return 64;
}
@@ -2322,11 +2335,11 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
- if (Ty->isInteger())
+ if (Ty->isIntegerTy())
return Ty;
// The only other support type is pointer.
- assert(isa<PointerType>(Ty) && "Unexpected non-pointer non-integer type!");
+ assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
if (TD) return TD->getIntPtrType(getContext());
// Without TargetData, conservatively assume pointers are 64-bit.
@@ -2397,8 +2410,8 @@ const SCEV *
ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V,
const Type *Ty) {
const Type *SrcTy = V->getType();
- assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
- (Ty->isInteger() || isa<PointerType>(Ty)) &&
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
@@ -2414,8 +2427,8 @@ const SCEV *
ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
const Type *Ty) {
const Type *SrcTy = V->getType();
- assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
- (Ty->isInteger() || isa<PointerType>(Ty)) &&
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
@@ -2430,8 +2443,8 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
const SCEV *
ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
const Type *SrcTy = V->getType();
- assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
- (Ty->isInteger() || isa<PointerType>(Ty)) &&
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or zero extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrZeroExtend cannot truncate!");
@@ -2446,8 +2459,8 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
const SCEV *
ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
const Type *SrcTy = V->getType();
- assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
- (Ty->isInteger() || isa<PointerType>(Ty)) &&
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or sign extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrSignExtend cannot truncate!");
@@ -2463,8 +2476,8 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
const SCEV *
ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
const Type *SrcTy = V->getType();
- assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
- (Ty->isInteger() || isa<PointerType>(Ty)) &&
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or any extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrAnyExtend cannot truncate!");
@@ -2478,8 +2491,8 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
const SCEV *
ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
const Type *SrcTy = V->getType();
- assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
- (Ty->isInteger() || isa<PointerType>(Ty)) &&
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or noop with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
"getTruncateOrNoop cannot extend!");
@@ -2536,12 +2549,12 @@ PushDefUseChildren(Instruction *I,
/// the Scalars map if they reference SymName. This is used during PHI
/// resolution.
void
-ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) {
+ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
SmallVector<Instruction *, 16> Worklist;
- PushDefUseChildren(I, Worklist);
+ PushDefUseChildren(PN, Worklist);
SmallPtrSet<Instruction *, 8> Visited;
- Visited.insert(I);
+ Visited.insert(PN);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
@@ -2551,16 +2564,19 @@ ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) {
if (It != Scalars.end()) {
// Short-circuit the def-use traversal if the symbolic name
// ceases to appear in expressions.
- if (!It->second->hasOperand(SymName))
+ if (It->second != SymName && !It->second->hasOperand(SymName))
continue;
// SCEVUnknown for a PHI either means that it has an unrecognized
- // structure, or it's a PHI that's in the progress of being computed
- // by createNodeForPHI. In the former case, additional loop trip
- // count information isn't going to change anything. In the later
- // case, createNodeForPHI will perform the necessary updates on its
- // own when it gets to that point.
- if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) {
+ // structure, it's a PHI that's in the progress of being computed
+ // by createNodeForPHI, or it's a single-value PHI. In the first case,
+ // additional loop trip count information isn't going to change anything.
+ // In the second case, createNodeForPHI will perform the necessary
+ // updates on its own when it gets to that point. In the third, we do
+ // want to forget the SCEVUnknown.
+ if (!isa<PHINode>(I) ||
+ !isa<SCEVUnknown>(It->second) ||
+ (I != PN && It->second == SymName)) {
ValuesAtScopes.erase(It->second);
Scalars.erase(It);
}
@@ -2683,9 +2699,21 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
return SymbolicName;
}
- // It's tempting to recognize PHIs with a unique incoming value, however
- // this leads passes like indvars to break LCSSA form. Fortunately, such
- // PHIs are rare, as instcombine zaps them.
+ // If the PHI has a single incoming value, follow that value, unless the
+ // PHI's incoming blocks are in a different loop, in which case doing so
+ // risks breaking LCSSA form. Instcombine would normally zap these, but
+ // it doesn't have DominatorTree information, so it may miss cases.
+ if (Value *V = PN->hasConstantValue(DT)) {
+ bool AllSameLoop = true;
+ Loop *PNLoop = LI->getLoopFor(PN->getParent());
+ for (size_t i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (LI->getLoopFor(PN->getIncomingBlock(i)) != PNLoop) {
+ AllSameLoop = false;
+ break;
+ }
+ if (AllSameLoop)
+ return getSCEV(V);
+ }
// If it's not a loop phi, we can't handle it yet.
return getUnknown(PN);
@@ -2718,7 +2746,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
} else {
// For an array, add the element offset, explicitly scaled.
const SCEV *LocalOffset = getSCEV(Index);
- // Getelementptr indicies are signed.
+ // Getelementptr indices are signed.
LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy);
// Lower "inbounds" GEPs to NSW arithmetic.
LocalOffset = getMulExpr(LocalOffset, getSizeOfExpr(*GTI),
@@ -2921,7 +2949,6 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
- unsigned BitWidth = getTypeSizeInBits(U->getType());
APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
@@ -3053,7 +3080,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
- if (!U->getValue()->getType()->isInteger() && !TD)
+ if (!U->getValue()->getType()->isIntegerTy() && !TD)
return ConservativeResult;
unsigned NS = ComputeNumSignBits(U->getValue(), TD);
if (NS == 1)
@@ -3193,7 +3220,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
const Type *Z0Ty = Z0->getType();
unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
- // If C is a low-bits mask, the zero extend is zerving to
+ // If C is a low-bits mask, the zero extend is serving to
// mask off the high bits. Complement the operand and
// re-apply the zext.
if (APIntOps::isMask(Z0TySize, CI->getValue()))
@@ -3378,7 +3405,7 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
const ScalarEvolution::BackedgeTakenInfo &
ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// Initially insert a CouldNotCompute for this loop. If the insertion
- // succeeds, procede to actually compute a backedge-taken count and
+ // succeeds, proceed to actually compute a backedge-taken count and
// update the value. The temporary CouldNotCompute value tells SCEV
// code elsewhere that it shouldn't attempt to request a new
// backedge-taken count, which could result in infinite recursion.
@@ -3470,6 +3497,35 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
}
}
+/// forgetValue - This method should be called by the client when it has
+/// changed a value in a way that may effect its value, or which may
+/// disconnect it from a def-use chain linking it to a loop.
+void ScalarEvolution::forgetValue(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return;
+
+ // Drop information about expressions based on loop-header PHIs.
+ SmallVector<Instruction *, 16> Worklist;
+ Worklist.push_back(I);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ std::map<SCEVCallbackVH, const SCEV *>::iterator It =
+ Scalars.find(static_cast<Value *>(I));
+ if (It != Scalars.end()) {
+ ValuesAtScopes.erase(It->second);
+ Scalars.erase(It);
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+}
+
/// ComputeBackedgeTakenCount - Compute the number of times the backedge
/// of the specified loop will execute.
ScalarEvolution::BackedgeTakenInfo
@@ -3566,7 +3622,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
return getCouldNotCompute();
}
- // Procede to the next level to examine the exit condition expression.
+ // Proceed to the next level to examine the exit condition expression.
return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
ExitBr->getSuccessor(0),
ExitBr->getSuccessor(1));
@@ -3655,10 +3711,23 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
}
// With an icmp, it may be feasible to compute an exact backedge-taken count.
- // Procede to the next level to examine the icmp.
+ // Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
+ // Check for a constant condition. These are normally stripped out by
+ // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
+ // preserve the CFG and is temporarily leaving constant conditions
+ // in place.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
+ if (L->contains(FBB) == !CI->getZExtValue())
+ // The backedge is always taken.
+ return getCouldNotCompute();
+ else
+ // The backedge is never taken.
+ return getIntegerSCEV(0, CI->getType());
+ }
+
// If it's not an integer or pointer comparison then compute it the hard way.
return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
}
@@ -3682,14 +3751,10 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
// Handle common loops like: for (X = "string"; *X; ++X)
if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
- const SCEV *ItCnt =
+ BackedgeTakenInfo ItCnt =
ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
- if (!isa<SCEVCouldNotCompute>(ItCnt)) {
- unsigned BitWidth = getTypeSizeInBits(ItCnt->getType());
- return BackedgeTakenInfo(ItCnt,
- isa<SCEVConstant>(ItCnt) ? ItCnt :
- getConstant(APInt::getMaxValue(BitWidth)-1));
- }
+ if (ItCnt.hasAnyInfo())
+ return ItCnt;
}
const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
@@ -3723,14 +3788,14 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
switch (Cond) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
- const SCEV *TC = HowFarToZero(getMinusSCEV(LHS, RHS), L);
- if (!isa<SCEVCouldNotCompute>(TC)) return TC;
+ BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+ if (BTI.hasAnyInfo()) return BTI;
break;
}
case ICmpInst::ICMP_EQ: { // while (X == Y)
// Convert to: while (X-Y == 0)
- const SCEV *TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
- if (!isa<SCEVCouldNotCompute>(TC)) return TC;
+ BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+ if (BTI.hasAnyInfo()) return BTI;
break;
}
case ICmpInst::ICMP_SLT: {
@@ -3817,7 +3882,7 @@ GetAddressedElementFromGlobal(GlobalVariable *GV,
/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
/// 'icmp op load X, cst', try to see if we can compute the backedge
/// execution count.
-const SCEV *
+ScalarEvolution::BackedgeTakenInfo
ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
LoadInst *LI,
Constant *RHS,
@@ -3826,6 +3891,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
if (LI->isVolatile()) return getCouldNotCompute();
// Check to see if the loaded pointer is a getelementptr of a global.
+ // TODO: Use SCEV instead of manually grubbing with GEPs.
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
if (!GEP) return getCouldNotCompute();
@@ -4175,14 +4241,15 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
}
}
- Constant *C;
+ Constant *C = 0;
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
C = ConstantFoldCompareInstOperands(CI->getPredicate(),
Operands[0], Operands[1], TD);
else
C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
&Operands[0], Operands.size(), TD);
- return getSCEV(C);
+ if (C)
+ return getSCEV(C);
}
}
@@ -4390,7 +4457,8 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
/// HowFarToZero - Return the number of times a backedge comparing the specified
/// value to zero will execute. If not computable, return CouldNotCompute.
-const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
// If the value is a constant
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
// If the value is already zero, the branch will execute zero times.
@@ -4435,7 +4503,7 @@ const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
-StartC->getValue()->getValue(),
*this);
}
- } else if (AddRec->isQuadratic() && AddRec->getType()->isInteger()) {
+ } else if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
// If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
// the quadratic equation to solve it.
std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec,
@@ -4470,7 +4538,8 @@ const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
/// HowFarToNonZero - Return the number of times a backedge checking the
/// specified value for nonzero will execute. If not computable, return
/// CouldNotCompute
-const SCEV *ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
// Loops that look like: while (X == 0) are very strange indeed. We don't
// handle them yet except for the trivial case. This could be expanded in the
// future as needed.
@@ -4711,7 +4780,7 @@ bool ScalarEvolution::isImpliedCond(Value *CondValue,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
bool Inverse) {
- // Recursivly handle And and Or conditions.
+ // Recursively handle And and Or conditions.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) {
if (BO->getOpcode() == Instruction::And) {
if (!Inverse)
@@ -4914,7 +4983,7 @@ bool ScalarEvolution::isImpliedCond(Value *CondValue,
}
/// isImpliedCondOperands - Test whether the condition described by Pred,
-/// LHS, and RHS is true whenever the condition desribed by Pred, FoundLHS,
+/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
/// and FoundRHS is true.
bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
@@ -4929,7 +4998,7 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
}
/// isImpliedCondOperandsHelper - Test whether the condition described by
-/// Pred, LHS, and RHS is true whenever the condition desribed by Pred,
+/// Pred, LHS, and RHS is true whenever the condition described by Pred,
/// FoundLHS, and FoundRHS is true.
bool
ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
@@ -5087,7 +5156,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
// If MaxEnd is within a step of the maximum integer value in its type,
// adjust it down to the minimum value which would produce the same effect.
- // This allows the subsequent ceiling divison of (N+(step-1))/step to
+ // This allows the subsequent ceiling division of (N+(step-1))/step to
// compute the correct value.
const SCEV *StepMinusOne = getMinusSCEV(Step,
getIntegerSCEV(1, Step->getType()));
@@ -5304,8 +5373,8 @@ ScalarEvolution::ScalarEvolution()
bool ScalarEvolution::runOnFunction(Function &F) {
this->F = &F;
LI = &getAnalysis<LoopInfo>();
- DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
+ DT = &getAnalysis<DominatorTree>();
return false;
}
@@ -5364,7 +5433,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
}
void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
- // ScalarEvolution's implementaiton of the print method is to print
+ // ScalarEvolution's implementation of the print method is to print
// out SCEV values of all instructions that are interesting. Doing
// this potentially causes it to create new SCEV objects though,
// which technically conflicts with the const qualifier. This isn't
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 498c4a8..17b254f 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -10,6 +10,10 @@
// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a
// simple alias analysis implemented in terms of ScalarEvolution queries.
//
+// This differs from traditional loop dependence analysis in that it tests
+// for dependencies within a single iteration of a loop, rather than
+// dependences between different iterations.
+//
// ScalarEvolution has a more complete understanding of pointer arithmetic
// than BasicAliasAnalysis' collection of ad-hoc analyses.
//
@@ -41,7 +45,7 @@ namespace {
return (AliasAnalysis*)this;
return this;
}
-
+
private:
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual bool runOnFunction(Function &F);
@@ -89,7 +93,7 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
} else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
// If there's a pointer operand, it'll be sorted at the end of the list.
const SCEV *Last = A->getOperand(A->getNumOperands()-1);
- if (isa<PointerType>(Last->getType()))
+ if (Last->getType()->isPointerTy())
return GetBaseValue(Last);
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// This is a leaf node.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 4310e3c..e27da96 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/STLExtras.h"
@@ -137,6 +138,10 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
if (IP != BlockBegin) {
--IP;
for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
IP->getOperand(1) == RHS)
return IP;
@@ -144,15 +149,34 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
}
}
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
// If we haven't found this binop, insert it.
Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
rememberInstruction(BO);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
return BO;
}
/// FactorOutConstant - Test if S is divisible by Factor, using signed
/// division. If so, update S with Factor divided out and return true.
-/// S need not be evenly divisble if a reasonable remainder can be
+/// S need not be evenly divisible if a reasonable remainder can be
/// computed.
/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
@@ -462,7 +486,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
break;
}
- // If none of the operands were convertable to proper GEP indices, cast
+ // If none of the operands were convertible to proper GEP indices, cast
// the base to i8* and do an ugly getelementptr with that. It's still
// better than ptrtoint+arithmetic+inttoptr at least.
if (!AnyNonZeroIndices) {
@@ -486,6 +510,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
if (IP != BlockBegin) {
--IP;
for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
if (IP->getOpcode() == Instruction::GetElementPtr &&
IP->getOperand(0) == V && IP->getOperand(1) == Idx)
return IP;
@@ -493,12 +521,56 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
}
}
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
// Emit a GEP.
Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
rememberInstruction(GEP);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
return GEP;
}
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V)) break;
+
+ bool AnyIndexNotLoopInvariant = false;
+ for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
+ E = GepIndices.end(); I != E; ++I)
+ if (!L->isLoopInvariant(*I)) {
+ AnyIndexNotLoopInvariant = true;
+ break;
+ }
+ if (AnyIndexNotLoopInvariant)
+ break;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
// Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
// because ScalarEvolution may have changed the address arithmetic to
// compute a value which is beyond the end of the allocated object.
@@ -511,6 +583,11 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
"scevgep");
Ops.push_back(SE.getUnknown(GEP));
rememberInstruction(GEP);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
return expand(SE.getAddExpr(Ops));
}
@@ -528,70 +605,179 @@ static bool isNonConstantNegative(const SCEV *F) {
return SC->getValue()->getValue().isNegative();
}
-Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
- int NumOperands = S->getNumOperands();
- const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
+/// SCEV expansion. If they are nested, this is the most nested. If they are
+/// neighboring, pick the later.
+static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
+ DominatorTree &DT) {
+ if (!A) return B;
+ if (!B) return A;
+ if (A->contains(B)) return B;
+ if (B->contains(A)) return A;
+ if (DT.dominates(A->getHeader(), B->getHeader())) return B;
+ if (DT.dominates(B->getHeader(), A->getHeader())) return A;
+ return A; // Arbitrarily break the tie.
+}
- // Find the index of an operand to start with. Choose the operand with
- // pointer type, if there is one, or the last operand otherwise.
- int PIdx = 0;
- for (; PIdx != NumOperands - 1; ++PIdx)
- if (isa<PointerType>(S->getOperand(PIdx)->getType())) break;
+/// GetRelevantLoop - Get the most relevant loop associated with the given
+/// expression, according to PickMostRelevantLoop.
+static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI,
+ DominatorTree &DT) {
+ if (isa<SCEVConstant>(S))
+ return 0;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
+ return LI.getLoopFor(I->getParent());
+ return 0;
+ }
+ if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
+ const Loop *L = 0;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ L = AR->getLoop();
+ for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
+ I != E; ++I)
+ L = PickMostRelevantLoop(L, GetRelevantLoop(*I, LI, DT), DT);
+ return L;
+ }
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+ return GetRelevantLoop(C->getOperand(), LI, DT);
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S))
+ return PickMostRelevantLoop(GetRelevantLoop(D->getLHS(), LI, DT),
+ GetRelevantLoop(D->getRHS(), LI, DT),
+ DT);
+ llvm_unreachable("Unexpected SCEV type!");
+}
- // Expand code for the operand that we chose.
- Value *V = expand(S->getOperand(PIdx));
+/// LoopCompare - Compare loops by PickMostRelevantLoop.
+class LoopCompare {
+ DominatorTree &DT;
+public:
+ explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
+
+ bool operator()(std::pair<const Loop *, const SCEV *> LHS,
+ std::pair<const Loop *, const SCEV *> RHS) const {
+ // Compare loops with PickMostRelevantLoop.
+ if (LHS.first != RHS.first)
+ return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
+
+ // If one operand is a non-constant negative and the other is not,
+ // put the non-constant negative on the right so that a sub can
+ // be used instead of a negate and add.
+ if (isNonConstantNegative(LHS.second)) {
+ if (!isNonConstantNegative(RHS.second))
+ return false;
+ } else if (isNonConstantNegative(RHS.second))
+ return true;
- // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
- // comments on expandAddToGEP for details.
- if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
- // Take the operand at PIdx out of the list.
- const SmallVectorImpl<const SCEV *> &Ops = S->getOperands();
- SmallVector<const SCEV *, 8> NewOps;
- NewOps.insert(NewOps.end(), Ops.begin(), Ops.begin() + PIdx);
- NewOps.insert(NewOps.end(), Ops.begin() + PIdx + 1, Ops.end());
- // Make a GEP.
- return expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, V);
+ // Otherwise they are equivalent according to this comparison.
+ return false;
}
+};
- // Otherwise, we'll expand the rest of the SCEVAddExpr as plain integer
- // arithmetic.
- V = InsertNoopCastOfTo(V, Ty);
+Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
- // Emit a bunch of add instructions
- for (int i = NumOperands-1; i >= 0; --i) {
- if (i == PIdx) continue;
- const SCEV *Op = S->getOperand(i);
- if (isNonConstantNegative(Op)) {
+ // Collect all the add operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal, and
+ // so that pointer operands are inserted first, which the code below relies on
+ // to form more involved GEPs.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
+ E(S->op_begin()); I != E; ++I)
+ OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT),
+ *I));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants and
+ // pointer operands precede non-pointer operands.
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+ // Emit instructions to add all the operands. Hoist as much as possible
+ // out of loops, and form meaningful getelementptrs where possible.
+ Value *Sum = 0;
+ for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+ I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+ const Loop *CurLoop = I->first;
+ const SCEV *Op = I->second;
+ if (!Sum) {
+ // This is the first operand. Just expand it.
+ Sum = expand(Op);
+ ++I;
+ } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+ // The running sum expression is a pointer. Try to form a getelementptr
+ // at this level with that as the base.
+ SmallVector<const SCEV *, 4> NewOps;
+ for (; I != E && I->first == CurLoop; ++I)
+ NewOps.push_back(I->second);
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
+ } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
+ // The running sum is an integer, and there's a pointer at this level.
+ // Try to form a getelementptr.
+ SmallVector<const SCEV *, 4> NewOps;
+ NewOps.push_back(SE.getUnknown(Sum));
+ for (++I; I != E && I->first == CurLoop; ++I)
+ NewOps.push_back(I->second);
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
+ } else if (isNonConstantNegative(Op)) {
+ // Instead of doing a negate and add, just do a subtract.
Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
- V = InsertBinop(Instruction::Sub, V, W);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ Sum = InsertBinop(Instruction::Sub, Sum, W);
+ ++I;
} else {
+ // A simple add.
Value *W = expandCodeFor(Op, Ty);
- V = InsertBinop(Instruction::Add, V, W);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Sum)) std::swap(Sum, W);
+ Sum = InsertBinop(Instruction::Add, Sum, W);
+ ++I;
}
}
- return V;
+
+ return Sum;
}
Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
const Type *Ty = SE.getEffectiveSCEVType(S->getType());
- int FirstOp = 0; // Set if we should emit a subtract.
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(0)))
- if (SC->getValue()->isAllOnesValue())
- FirstOp = 1;
-
- int i = S->getNumOperands()-2;
- Value *V = expandCodeFor(S->getOperand(i+1), Ty);
-
- // Emit a bunch of multiply instructions
- for (; i >= FirstOp; --i) {
- Value *W = expandCodeFor(S->getOperand(i), Ty);
- V = InsertBinop(Instruction::Mul, V, W);
+
+ // Collect all the mul operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
+ E(S->op_begin()); I != E; ++I)
+ OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT),
+ *I));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants.
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+ // Emit instructions to mul all the operands. Hoist as much as possible
+ // out of loops.
+ Value *Prod = 0;
+ for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+ I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+ const SCEV *Op = I->second;
+ if (!Prod) {
+ // This is the first operand. Just expand it.
+ Prod = expand(Op);
+ ++I;
+ } else if (Op->isAllOnesValue()) {
+ // Instead of doing a multiply by negative one, just do a negate.
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+ ++I;
+ } else {
+ // A simple mul.
+ Value *W = expandCodeFor(Op, Ty);
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Prod)) std::swap(Prod, W);
+ Prod = InsertBinop(Instruction::Mul, Prod, W);
+ ++I;
+ }
}
- // -1 * ... ---> 0 - ...
- if (FirstOp == 1)
- V = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), V);
- return V;
+ return Prod;
}
Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
@@ -641,8 +827,65 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Reuse a previously-inserted PHI, if present.
for (BasicBlock::iterator I = L->getHeader()->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I)
- if (isInsertedInstruction(PN) && SE.getSCEV(PN) == Normalized)
- return PN;
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(Normalized->getType())) &&
+ SE.getSCEV(PN) == Normalized)
+ if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+ Instruction *IncV =
+ cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+ // Determine if this is a well-behaved chain of instructions leading
+ // back to the PHI. It probably will be, if we're scanning an inner
+ // loop already visited by LSR for example, but it wouldn't have
+ // to be.
+ do {
+ if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV)) {
+ IncV = 0;
+ break;
+ }
+ // If any of the operands don't dominate the insert position, bail.
+ // Addrec operands are always loop-invariant, so this can only happen
+ // if there are instructions which haven't been hoisted.
+ for (User::op_iterator OI = IncV->op_begin()+1,
+ OE = IncV->op_end(); OI != OE; ++OI)
+ if (Instruction *OInst = dyn_cast<Instruction>(OI))
+ if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
+ IncV = 0;
+ break;
+ }
+ if (!IncV)
+ break;
+ // Advance to the next instruction.
+ IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+ if (!IncV)
+ break;
+ if (IncV->mayHaveSideEffects()) {
+ IncV = 0;
+ break;
+ }
+ } while (IncV != PN);
+
+ if (IncV) {
+ // Ok, the add recurrence looks usable.
+ // Remember this PHI, even in post-inc mode.
+ InsertedValues.insert(PN);
+ // Remember the increment.
+ IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+ rememberInstruction(IncV);
+ if (L == IVIncInsertLoop)
+ do {
+ if (SE.DT->dominates(IncV, IVIncInsertPos))
+ break;
+ // Make sure the increment is where we want it. But don't move it
+ // down past a potential existing post-inc user.
+ IncV->moveBefore(IVIncInsertPos);
+ IVIncInsertPos = IncV;
+ IncV = cast<Instruction>(IncV->getOperand(0));
+ } while (IncV != PN);
+ return PN;
+ }
+ }
// Save the original insertion point so we can restore it when we're done.
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
@@ -658,7 +901,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// negative, insert a sub instead of an add for the increment (unless it's a
// constant, because subtracts of constants are canonicalized to adds).
const SCEV *Step = Normalized->getStepRecurrence(SE);
- bool isPointer = isa<PointerType>(ExpandTy);
+ bool isPointer = ExpandTy->isPointerTy();
bool isNegative = !isPointer && isNonConstantNegative(Step);
if (isNegative)
Step = SE.getNegativeSCEV(Step);
@@ -713,7 +956,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Restore the original insert point.
if (SaveInsertBB)
- Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
// Remember this PHI, even in post-inc mode.
InsertedValues.insert(PN);
@@ -763,7 +1006,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
const Type *ExpandTy = PostLoopScale ? IntTy : STy;
PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
- // Accomodate post-inc mode, if necessary.
+ // Accommodate post-inc mode, if necessary.
Value *Result;
if (L != PostIncLoop)
Result = PN;
@@ -776,6 +1019,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Re-apply any non-loop-dominating scale.
if (PostLoopScale) {
+ Result = InsertNoopCastOfTo(Result, IntTy);
Result = Builder.CreateMul(Result,
expandCodeFor(PostLoopScale, IntTy));
rememberInstruction(Result);
@@ -787,6 +1031,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
const SCEV *const OffsetArray[1] = { PostLoopOffset };
Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
} else {
+ Result = InsertNoopCastOfTo(Result, IntTy);
Result = Builder.CreateAdd(Result,
expandCodeFor(PostLoopOffset, IntTy));
rememberInstruction(Result);
@@ -806,7 +1051,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
PHINode *CanonicalIV = 0;
if (PHINode *PN = L->getCanonicalInductionVariable())
if (SE.isSCEVable(PN->getType()) &&
- isa<IntegerType>(SE.getEffectiveSCEVType(PN->getType())) &&
+ SE.getEffectiveSCEVType(PN->getType())->isIntegerTy() &&
SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
CanonicalIV = PN;
@@ -827,7 +1072,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
while (isa<PHINode>(NewInsertPt)) ++NewInsertPt;
V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
NewInsertPt);
- Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
}
@@ -1022,13 +1267,24 @@ Value *SCEVExpander::expand(const SCEV *S) {
L = L->getParentLoop())
if (S->isLoopInvariant(L)) {
if (!L) break;
- if (BasicBlock *Preheader = L->getLoopPreheader())
+ if (BasicBlock *Preheader = L->getLoopPreheader()) {
InsertPt = Preheader->getTerminator();
+ BasicBlock::iterator IP = InsertPt;
+ // Back past any debug info instructions. Sometimes we inserted
+ // something earlier before debug info but after any real instructions.
+ // This should behave the same as if debug info was not present.
+ while (IP != Preheader->begin()) {
+ --IP;
+ if (!isa<DbgInfoIntrinsic>(IP))
+ break;
+ InsertPt = IP;
+ }
+ }
} else {
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
- if (L && S->hasComputableLoopEvolution(L))
+ if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop)
InsertPt = L->getHeader()->getFirstNonPHI();
while (isInsertedInstruction(InsertPt))
InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
@@ -1053,10 +1309,32 @@ Value *SCEVExpander::expand(const SCEV *S) {
if (!PostIncLoop)
InsertedExpressions[std::make_pair(S, InsertPt)] = V;
- Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
}
+void SCEVExpander::rememberInstruction(Value *I) {
+ if (!PostIncLoop)
+ InsertedValues.insert(I);
+
+ // If we just claimed an existing instruction and that instruction had
+ // been the insert point, adjust the insert point forward so that
+ // subsequently inserted code will be dominated.
+ if (Builder.GetInsertPoint() == I) {
+ BasicBlock::iterator It = cast<Instruction>(I);
+ do { ++It; } while (isInsertedInstruction(It));
+ Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
+ }
+}
+
+void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
+ // If we acquired more instructions since the old insert point was saved,
+ // advance past them.
+ while (isInsertedInstruction(I)) ++I;
+
+ Builder.SetInsertPoint(BB, I);
+}
+
/// getOrInsertCanonicalInductionVariable - This method returns the
/// canonical induction variable of the specified type for the specified
/// loop (inserting one if there is none). A canonical induction variable
@@ -1064,13 +1342,13 @@ Value *SCEVExpander::expand(const SCEV *S) {
Value *
SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
const Type *Ty) {
- assert(Ty->isInteger() && "Can only insert integer induction variables!");
+ assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
SE.getIntegerSCEV(1, Ty), L);
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
if (SaveInsertBB)
- Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index f9331e7..92cbb7c 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -23,6 +23,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include <cstring>
using namespace llvm;
@@ -49,11 +50,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
unsigned BitWidth = Mask.getBitWidth();
- assert((V->getType()->isIntOrIntVector() || isa<PointerType>(V->getType())) &&
- "Not integer or pointer type!");
+ assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy())
+ && "Not integer or pointer type!");
assert((!TD ||
TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
- (!V->getType()->isIntOrIntVector() ||
+ (!V->getType()->isIntOrIntVectorTy() ||
V->getType()->getScalarSizeInBits() == BitWidth) &&
KnownZero.getBitWidth() == BitWidth &&
KnownOne.getBitWidth() == BitWidth &&
@@ -249,7 +250,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
unsigned SrcBitWidth;
// Note that we handle pointer operands here because of inttoptr/ptrtoint
// which fall through here.
- if (isa<PointerType>(SrcTy))
+ if (SrcTy->isPointerTy())
SrcBitWidth = TD->getTypeSizeInBits(SrcTy);
else
SrcBitWidth = SrcTy->getScalarSizeInBits();
@@ -269,10 +270,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
}
case Instruction::BitCast: {
const Type *SrcTy = I->getOperand(0)->getType();
- if ((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+ if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
- !isa<VectorType>(I->getType())) {
+ !I->getType()->isVectorTy()) {
ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD,
Depth+1);
return;
@@ -649,7 +650,7 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
///
unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
unsigned Depth) {
- assert((TD || V->getType()->isIntOrIntVector()) &&
+ assert((TD || V->getType()->isIntOrIntVectorTy()) &&
"ComputeNumSignBits requires a TargetData object to operate "
"on non-integer values!");
const Type *Ty = V->getType();
@@ -823,7 +824,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
- assert(V->getType()->isInteger() && "Not integer or pointer type!");
+ assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
const Type *T = V->getType();
@@ -980,7 +981,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
/// may not be represented in the result.
static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
const TargetData *TD, unsigned Depth) {
- assert(isa<IntegerType>(V->getType()) && "Not an integer value");
+ assert(V->getType()->isIntegerTy() && "Not an integer value");
// Limit our recursion depth.
if (Depth == 6) {
@@ -1253,7 +1254,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
if (idx_begin == idx_end)
return V;
// We have indices, so V should have an indexable type
- assert((isa<StructType>(V->getType()) || isa<ArrayType>(V->getType()))
+ assert((V->getType()->isStructTy() || V->getType()->isArrayTy())
&& "Not looking at a struct or array?");
assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end)
&& "Invalid indices for type?");
@@ -1372,7 +1373,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
// Make sure the index-ee is a pointer to array of i8.
const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
- if (AT == 0 || !AT->getElementType()->isInteger(8))
+ if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
return false;
// Check to make sure that the first operand of the GEP is an integer and
@@ -1411,7 +1412,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
// Must be a Constant Array
ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
- if (Array == 0 || !Array->getType()->getElementType()->isInteger(8))
+ if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
return false;
// Get the number of elements in the array
@@ -1436,3 +1437,131 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
// The array isn't null terminated, but maybe this is a memcpy, not a strcpy.
return true;
}
+
+// These next two are very similar to the above, but also look through PHI
+// nodes.
+// TODO: See if we can integrate these two together.
+
+/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'. If we can't, return 0.
+static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+ // Look through noop bitcast instructions.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ return GetStringLengthH(BCI->getOperand(0), PHIs);
+
+ // If this is a PHI node, there are two cases: either we have already seen it
+ // or we haven't.
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (!PHIs.insert(PN))
+ return ~0ULL; // already in the set.
+
+ // If it was new, see if all the input strings are the same length.
+ uint64_t LenSoFar = ~0ULL;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+ if (Len == 0) return 0; // Unknown length -> unknown.
+
+ if (Len == ~0ULL) continue;
+
+ if (Len != LenSoFar && LenSoFar != ~0ULL)
+ return 0; // Disagree -> unknown.
+ LenSoFar = Len;
+ }
+
+ // Success, all agree.
+ return LenSoFar;
+ }
+
+ // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
+ if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+ if (Len1 == 0) return 0;
+ uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+ if (Len2 == 0) return 0;
+ if (Len1 == ~0ULL) return Len2;
+ if (Len2 == ~0ULL) return Len1;
+ if (Len1 != Len2) return 0;
+ return Len1;
+ }
+
+ // If the value is not a GEP instruction nor a constant expression with a
+ // GEP instruction, then return unknown.
+ User *GEP = 0;
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+ GEP = GEPI;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() != Instruction::GetElementPtr)
+ return 0;
+ GEP = CE;
+ } else {
+ return 0;
+ }
+
+ // Make sure the GEP has exactly three arguments.
+ if (GEP->getNumOperands() != 3)
+ return 0;
+
+ // Check to make sure that the first operand of the GEP is an integer and
+ // has value 0 so that we are sure we're indexing into the initializer.
+ if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
+ if (!Idx->isZero())
+ return 0;
+ } else
+ return 0;
+
+ // If the second index isn't a ConstantInt, then this is a variable index
+ // into the array. If this occurs, we can't say anything meaningful about
+ // the string.
+ uint64_t StartIdx = 0;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+ StartIdx = CI->getZExtValue();
+ else
+ return 0;
+
+ // The GEP instruction, constant or instruction, must reference a global
+ // variable that is a constant and is initialized. The referenced constant
+ // initializer is the array that we'll use for optimization.
+ GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+ if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+ GV->mayBeOverridden())
+ return 0;
+ Constant *GlobalInit = GV->getInitializer();
+
+ // Handle the ConstantAggregateZero case, which is a degenerate case. The
+ // initializer is constant zero so the length of the string must be zero.
+ if (isa<ConstantAggregateZero>(GlobalInit))
+ return 1; // Len = 0 offset by 1.
+
+ // Must be a Constant Array
+ ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+ if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
+ return false;
+
+ // Get the number of elements in the array
+ uint64_t NumElts = Array->getType()->getNumElements();
+
+ // Traverse the constant array from StartIdx (derived above) which is
+ // the place the GEP refers to in the array.
+ for (unsigned i = StartIdx; i != NumElts; ++i) {
+ Constant *Elt = Array->getOperand(i);
+ ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+ if (!CI) // This array isn't suitable, non-int initializer.
+ return 0;
+ if (CI->isZero())
+ return i-StartIdx+1; // We found end of string, success!
+ }
+
+ return 0; // The array isn't null terminated, conservatively return 'unknown'.
+}
+
+/// GetStringLength - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'. If we can't, return 0.
+uint64_t llvm::GetStringLength(Value *V) {
+ if (!V->getType()->isPointerTy()) return 0;
+
+ SmallPtrSet<PHINode*, 32> PHIs;
+ uint64_t Len = GetStringLengthH(V, PHIs);
+ // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
+ // an empty string as a length.
+ return Len == ~0ULL ? 1 : Len;
+}
diff --git a/lib/AsmParser/Android.mk b/lib/AsmParser/Android.mk
new file mode 100644
index 0000000..548f719
--- /dev/null
+++ b/lib/AsmParser/Android.mk
@@ -0,0 +1,28 @@
+LOCAL_PATH:= $(call my-dir)
+
+asm_parser_SRC_FILES := \
+ LLLexer.cpp \
+ LLParser.cpp \
+ Parser.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(asm_parser_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMAsmParser
+
+include $(LOCAL_PATH)/../../llvm-host-build.mk
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(asm_parser_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMAsmParser
+
+include $(LOCAL_PATH)/../../llvm-device-build.mk
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 8ad658d..46f3cbc 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -570,6 +570,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(type);
KEYWORD(opaque);
+ KEYWORD(union);
KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 5dd6569..8083a07 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -614,7 +614,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
Aliasee = ID.ConstantVal;
}
- if (!isa<PointerType>(Aliasee->getType()))
+ if (!Aliasee->getType()->isPointerTy())
return Error(AliaseeLoc, "alias must have pointer type");
// Okay, create the alias but do not insert it into the module yet.
@@ -685,7 +685,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
return true;
}
- if (isa<FunctionType>(Ty) || Ty->isLabelTy())
+ if (Ty->isFunctionTy() || Ty->isLabelTy())
return Error(TyLoc, "invalid type for global variable");
GlobalVariable *GV = 0;
@@ -791,7 +791,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
GlobalValue *FwdVal;
if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
// Function types can return opaque but functions can't.
- if (isa<OpaqueType>(FT->getReturnType())) {
+ if (FT->getReturnType()->isOpaqueTy()) {
Error(Loc, "function may not return opaque type");
return 0;
}
@@ -836,7 +836,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
GlobalValue *FwdVal;
if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
// Function types can return opaque but functions can't.
- if (isa<OpaqueType>(FT->getReturnType())) {
+ if (FT->getReturnType()->isOpaqueTy()) {
Error(Loc, "function may not return opaque type");
return 0;
}
@@ -956,6 +956,14 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
case lltok::kw_naked: Attrs |= Attribute::Naked; break;
+ case lltok::kw_alignstack: {
+ unsigned Alignment;
+ if (ParseOptionalStackAlignment(Alignment))
+ return true;
+ Attrs |= Attribute::constructStackAlignmentFromInt(Alignment);
+ continue;
+ }
+
case lltok::kw_align: {
unsigned Alignment;
if (ParseOptionalAlignment(Alignment))
@@ -963,6 +971,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
Attrs |= Attribute::constructAlignmentFromInt(Alignment);
continue;
}
+
}
Lex.Lex();
}
@@ -1131,6 +1140,25 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
return false;
}
+/// ParseOptionalStackAlignment
+/// ::= /* empty */
+/// ::= 'alignstack' '(' 4 ')'
+bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) {
+ Alignment = 0;
+ if (!EatIfPresent(lltok::kw_alignstack))
+ return false;
+ LocTy ParenLoc = Lex.getLoc();
+ if (!EatIfPresent(lltok::lparen))
+ return Error(ParenLoc, "expected '('");
+ LocTy AlignLoc = Lex.getLoc();
+ if (ParseUInt32(Alignment)) return true;
+ ParenLoc = Lex.getLoc();
+ if (!EatIfPresent(lltok::rparen))
+ return Error(ParenLoc, "expected ')'");
+ if (!isPowerOf2_32(Alignment))
+ return Error(AlignLoc, "stack alignment is not a power of two");
+ return false;
+}
/// ParseIndexList - This parses the index list for an insert/extractvalue
/// instruction. This sets AteExtraComma in the case where we eat an extra
@@ -1267,6 +1295,11 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
if (ParseStructType(Result, false))
return true;
break;
+ case lltok::kw_union:
+ // TypeRec ::= 'union' '{' ... '}'
+ if (ParseUnionType(Result))
+ return true;
+ break;
case lltok::lsquare:
// TypeRec ::= '[' ... ']'
Lex.Lex(); // eat the lsquare.
@@ -1482,7 +1515,7 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
Name = "";
}
- if (!ArgTy->isFirstClassType() && !isa<OpaqueType>(ArgTy))
+ if (!ArgTy->isFirstClassType() && !ArgTy->isOpaqueTy())
return Error(TypeLoc, "invalid type for function argument");
ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
@@ -1576,6 +1609,38 @@ bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
return false;
}
+/// ParseUnionType
+/// TypeRec
+/// ::= 'union' '{' TypeRec (',' TypeRec)* '}'
+bool LLParser::ParseUnionType(PATypeHolder &Result) {
+ assert(Lex.getKind() == lltok::kw_union);
+ Lex.Lex(); // Consume the 'union'
+
+ if (ParseToken(lltok::lbrace, "'{' expected after 'union'")) return true;
+
+ SmallVector<PATypeHolder, 8> ParamsList;
+ do {
+ LocTy EltTyLoc = Lex.getLoc();
+ if (ParseTypeRec(Result)) return true;
+ ParamsList.push_back(Result);
+
+ if (Result->isVoidTy())
+ return Error(EltTyLoc, "union element can not have void type");
+ if (!UnionType::isValidElementType(Result))
+ return Error(EltTyLoc, "invalid element type for union");
+
+ } while (EatIfPresent(lltok::comma)) ;
+
+ if (ParseToken(lltok::rbrace, "expected '}' at end of union"))
+ return true;
+
+ SmallVector<const Type*, 8> ParamsListTy;
+ for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
+ ParamsListTy.push_back(ParamsList[i].get());
+ Result = HandleUpRefs(UnionType::get(&ParamsListTy[0], ParamsListTy.size()));
+ return false;
+}
+
/// ParseArrayVectorType - Parse an array or vector type, assuming the first
/// token has already been consumed.
/// TypeRec
@@ -1720,7 +1785,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
}
// Don't make placeholders with invalid type.
- if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && !Ty->isLabelTy()) {
+ if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) {
P.Error(Loc, "invalid use of a non-first-class type");
return 0;
}
@@ -1761,7 +1826,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
return 0;
}
- if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && !Ty->isLabelTy()) {
+ if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) {
P.Error(Loc, "invalid use of a non-first-class type");
return 0;
}
@@ -1992,8 +2057,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
if (Elts.empty())
return Error(ID.Loc, "constant vector must not be empty");
- if (!Elts[0]->getType()->isInteger() &&
- !Elts[0]->getType()->isFloatingPoint())
+ if (!Elts[0]->getType()->isIntegerTy() &&
+ !Elts[0]->getType()->isFloatingPointTy())
return Error(FirstEltLoc,
"vector elements must have integer or floating point type");
@@ -2135,8 +2200,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr"))
return true;
- if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
- return Error(ID.Loc, "extractvalue operand must be array or struct");
+ if (!Val->getType()->isAggregateType())
+ return Error(ID.Loc, "extractvalue operand must be aggregate type");
if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
Indices.end()))
return Error(ID.Loc, "invalid indices for extractvalue");
@@ -2156,8 +2221,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
ParseIndexList(Indices) ||
ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr"))
return true;
- if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType()))
- return Error(ID.Loc, "extractvalue operand must be array or struct");
+ if (!Val0->getType()->isAggregateType())
+ return Error(ID.Loc, "insertvalue operand must be aggregate type");
if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
Indices.end()))
return Error(ID.Loc, "invalid indices for insertvalue");
@@ -2185,13 +2250,13 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal;
if (Opc == Instruction::FCmp) {
- if (!Val0->getType()->isFPOrFPVector())
+ if (!Val0->getType()->isFPOrFPVectorTy())
return Error(ID.Loc, "fcmp requires floating point operands");
ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1);
} else {
assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
- if (!Val0->getType()->isIntOrIntVector() &&
- !isa<PointerType>(Val0->getType()))
+ if (!Val0->getType()->isIntOrIntVectorTy() &&
+ !Val0->getType()->isPointerTy())
return Error(ID.Loc, "icmp requires pointer or integer operands");
ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
}
@@ -2241,7 +2306,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return true;
if (Val0->getType() != Val1->getType())
return Error(ID.Loc, "operands of constexpr must have same type");
- if (!Val0->getType()->isIntOrIntVector()) {
+ if (!Val0->getType()->isIntOrIntVectorTy()) {
if (NUW)
return Error(ModifierLoc, "nuw only applies to integer operations");
if (NSW)
@@ -2249,8 +2314,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
}
// API compatibility: Accept either integer or floating-point types with
// add, sub, and mul.
- if (!Val0->getType()->isIntOrIntVector() &&
- !Val0->getType()->isFPOrFPVector())
+ if (!Val0->getType()->isIntOrIntVectorTy() &&
+ !Val0->getType()->isFPOrFPVectorTy())
return Error(ID.Loc,"constexpr requires integer, fp, or vector operands");
unsigned Flags = 0;
if (NUW) Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
@@ -2280,7 +2345,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return true;
if (Val0->getType() != Val1->getType())
return Error(ID.Loc, "operands of constexpr must have same type");
- if (!Val0->getType()->isIntOrIntVector())
+ if (!Val0->getType()->isIntOrIntVectorTy())
return Error(ID.Loc,
"constexpr requires integer or integer vector operands");
ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
@@ -2305,7 +2370,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return true;
if (Opc == Instruction::GetElementPtr) {
- if (Elts.size() == 0 || !isa<PointerType>(Elts[0]->getType()))
+ if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy())
return Error(ID.Loc, "getelementptr requires pointer operand");
if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(),
@@ -2405,7 +2470,7 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
PerFunctionState *PFS) {
- if (isa<FunctionType>(Ty))
+ if (Ty->isFunctionTy())
return Error(ID.Loc, "functions are not values, refer to them as pointers");
switch (ID.Kind) {
@@ -2444,13 +2509,13 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc);
return V == 0;
case ValID::t_APSInt:
- if (!isa<IntegerType>(Ty))
+ if (!Ty->isIntegerTy())
return Error(ID.Loc, "integer constant must have integer type");
ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
V = ConstantInt::get(Context, ID.APSIntVal);
return false;
case ValID::t_APFloat:
- if (!Ty->isFloatingPoint() ||
+ if (!Ty->isFloatingPointTy() ||
!ConstantFP::isValueValidForType(Ty, ID.APFloatVal))
return Error(ID.Loc, "floating point constant invalid for type");
@@ -2470,19 +2535,19 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
return false;
case ValID::t_Null:
- if (!isa<PointerType>(Ty))
+ if (!Ty->isPointerTy())
return Error(ID.Loc, "null must be a pointer type");
V = ConstantPointerNull::get(cast<PointerType>(Ty));
return false;
case ValID::t_Undef:
// FIXME: LabelTy should not be a first-class type.
if ((!Ty->isFirstClassType() || Ty->isLabelTy()) &&
- !isa<OpaqueType>(Ty))
+ !Ty->isOpaqueTy())
return Error(ID.Loc, "invalid type for undef constant");
V = UndefValue::get(Ty);
return false;
case ValID::t_EmptyArray:
- if (!isa<ArrayType>(Ty) || cast<ArrayType>(Ty)->getNumElements() != 0)
+ if (!Ty->isArrayTy() || cast<ArrayType>(Ty)->getNumElements() != 0)
return Error(ID.Loc, "invalid empty array initializer");
V = UndefValue::get(Ty);
return false;
@@ -2493,8 +2558,17 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
V = Constant::getNullValue(Ty);
return false;
case ValID::t_Constant:
- if (ID.ConstantVal->getType() != Ty)
+ if (ID.ConstantVal->getType() != Ty) {
+ // Allow a constant struct with a single member to be converted
+ // to a union, if the union has a member which is the same type
+ // as the struct member.
+ if (const UnionType* utype = dyn_cast<UnionType>(Ty)) {
+ return ParseUnionValue(utype, ID, V);
+ }
+
return Error(ID.Loc, "constant expression type mismatch");
+ }
+
V = ID.ConstantVal;
return false;
}
@@ -2524,6 +2598,22 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
return false;
}
+bool LLParser::ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V) {
+ if (const StructType* stype = dyn_cast<StructType>(ID.ConstantVal->getType())) {
+ if (stype->getNumContainedTypes() != 1)
+ return Error(ID.Loc, "constant expression type mismatch");
+ int index = utype->getElementTypeIndex(stype->getContainedType(0));
+ if (index < 0)
+ return Error(ID.Loc, "initializer type is not a member of the union");
+
+ V = ConstantUnion::get(
+ utype, cast<Constant>(ID.ConstantVal->getOperand(0)));
+ return false;
+ }
+
+ return Error(ID.Loc, "constant expression type mismatch");
+}
+
/// FunctionHeader
/// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
@@ -2572,7 +2662,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
}
if (!FunctionType::isValidReturnType(RetType) ||
- isa<OpaqueType>(RetType))
+ RetType->isOpaqueTy())
return Error(RetTypeLoc, "invalid function return type");
LocTy NameLoc = Lex.getLoc();
@@ -2873,7 +2963,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
// API compatibility: Accept either integer or floating-point types.
bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 0);
if (!Result) {
- if (!Inst->getType()->isIntOrIntVector()) {
+ if (!Inst->getType()->isIntOrIntVectorTy()) {
if (NUW)
return Error(ModifierLoc, "nuw only applies to integer operations");
if (NSW)
@@ -3096,7 +3186,7 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
ParseToken(lltok::lsquare, "expected '[' with switch table"))
return true;
- if (!isa<IntegerType>(Cond->getType()))
+ if (!Cond->getType()->isIntegerTy())
return Error(CondLoc, "switch condition must have integer type");
// Parse the jump table pairs.
@@ -3139,7 +3229,7 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
ParseToken(lltok::lsquare, "expected '[' with indirectbr"))
return true;
- if (!isa<PointerType>(Address->getType()))
+ if (!Address->getType()->isPointerTy())
return Error(AddrLoc, "indirectbr address must have pointer type");
// Parse the destination list.
@@ -3292,11 +3382,11 @@ bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
switch (OperandType) {
default: llvm_unreachable("Unknown operand type!");
case 0: // int or FP.
- Valid = LHS->getType()->isIntOrIntVector() ||
- LHS->getType()->isFPOrFPVector();
+ Valid = LHS->getType()->isIntOrIntVectorTy() ||
+ LHS->getType()->isFPOrFPVectorTy();
break;
- case 1: Valid = LHS->getType()->isIntOrIntVector(); break;
- case 2: Valid = LHS->getType()->isFPOrFPVector(); break;
+ case 1: Valid = LHS->getType()->isIntOrIntVectorTy(); break;
+ case 2: Valid = LHS->getType()->isFPOrFPVectorTy(); break;
}
if (!Valid)
@@ -3316,7 +3406,7 @@ bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS,
ParseValue(LHS->getType(), RHS, PFS))
return true;
- if (!LHS->getType()->isIntOrIntVector())
+ if (!LHS->getType()->isIntOrIntVectorTy())
return Error(Loc,"instruction requires integer or integer vector operands");
Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
@@ -3340,13 +3430,13 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
return true;
if (Opc == Instruction::FCmp) {
- if (!LHS->getType()->isFPOrFPVector())
+ if (!LHS->getType()->isFPOrFPVectorTy())
return Error(Loc, "fcmp requires floating point operands");
Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS);
} else {
assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
- if (!LHS->getType()->isIntOrIntVector() &&
- !isa<PointerType>(LHS->getType()))
+ if (!LHS->getType()->isIntOrIntVectorTy() &&
+ !LHS->getType()->isPointerTy())
return Error(Loc, "icmp requires integer operands");
Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
}
@@ -3643,7 +3733,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
}
}
- if (Size && !Size->getType()->isInteger(32))
+ if (Size && !Size->getType()->isIntegerTy(32))
return Error(SizeLoc, "element count must be i32");
if (isAlloca) {
@@ -3671,7 +3761,7 @@ bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS,
BasicBlock* BB) {
Value *Val; LocTy Loc;
if (ParseTypeAndValue(Val, Loc, PFS)) return true;
- if (!isa<PointerType>(Val->getType()))
+ if (!Val->getType()->isPointerTy())
return Error(Loc, "operand to free must be a pointer");
Inst = CallInst::CreateFree(Val, BB);
return false;
@@ -3688,7 +3778,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
ParseOptionalCommaAlign(Alignment, AteExtraComma))
return true;
- if (!isa<PointerType>(Val->getType()) ||
+ if (!Val->getType()->isPointerTy() ||
!cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
return Error(Loc, "load operand must be a pointer to a first class type");
@@ -3709,7 +3799,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
ParseOptionalCommaAlign(Alignment, AteExtraComma))
return true;
- if (!isa<PointerType>(Ptr->getType()))
+ if (!Ptr->getType()->isPointerTy())
return Error(PtrLoc, "store operand must be a pointer");
if (!Val->getType()->isFirstClassType())
return Error(Loc, "store operand must be a first class value");
@@ -3731,7 +3821,7 @@ bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
ParseUInt32(Element, EltLoc))
return true;
- if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
+ if (!Val->getType()->isStructTy() && !Val->getType()->isArrayTy())
return Error(ValLoc, "getresult inst requires an aggregate operand");
if (!ExtractValueInst::getIndexedType(Val->getType(), Element))
return Error(EltLoc, "invalid getresult index for value");
@@ -3748,7 +3838,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
- if (!isa<PointerType>(Ptr->getType()))
+ if (!Ptr->getType()->isPointerTy())
return Error(Loc, "base of getelementptr must be a pointer");
SmallVector<Value*, 16> Indices;
@@ -3759,7 +3849,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
break;
}
if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
- if (!isa<IntegerType>(Val->getType()))
+ if (!Val->getType()->isIntegerTy())
return Error(EltLoc, "getelementptr index must be an integer");
Indices.push_back(Val);
}
@@ -3783,8 +3873,8 @@ int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
ParseIndexList(Indices, AteExtraComma))
return true;
- if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
- return Error(Loc, "extractvalue operand must be array or struct");
+ if (!Val->getType()->isAggregateType())
+ return Error(Loc, "extractvalue operand must be aggregate type");
if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
Indices.end()))
@@ -3805,8 +3895,8 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
ParseIndexList(Indices, AteExtraComma))
return true;
- if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType()))
- return Error(Loc0, "extractvalue operand must be array or struct");
+ if (!Val0->getType()->isAggregateType())
+ return Error(Loc0, "insertvalue operand must be aggregate type");
if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
Indices.end()))
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 85c07ff..9abe404 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -31,6 +31,7 @@ namespace llvm {
class GlobalValue;
class MDString;
class MDNode;
+ class UnionType;
/// ValID - Represents a reference of a definition of some sort with no type.
/// There are several cases where we have to parse the value but where the
@@ -169,6 +170,7 @@ namespace llvm {
bool ParseOptionalVisibility(unsigned &Visibility);
bool ParseOptionalCallingConv(CallingConv::ID &CC);
bool ParseOptionalAlignment(unsigned &Alignment);
+ bool ParseOptionalStackAlignment(unsigned &Alignment);
bool ParseInstructionMetadata(SmallVectorImpl<std::pair<unsigned,
MDNode *> > &);
bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
@@ -211,6 +213,7 @@ namespace llvm {
}
bool ParseTypeRec(PATypeHolder &H);
bool ParseStructType(PATypeHolder &H, bool Packed);
+ bool ParseUnionType(PATypeHolder &H);
bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
bool ParseFunctionType(PATypeHolder &Result);
PATypeHolder HandleUpRefs(const Type *Ty);
@@ -279,6 +282,8 @@ namespace llvm {
return ParseTypeAndBasicBlock(BB, Loc, PFS);
}
+ bool ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V);
+
struct ParamInfo {
LocTy Loc;
Value *V;
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 7f1807c..3ac9169 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -97,6 +97,7 @@ namespace lltok {
kw_type,
kw_opaque,
+ kw_union,
kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
diff --git a/lib/Bitcode/Reader/Android.mk b/lib/Bitcode/Reader/Android.mk
new file mode 100644
index 0000000..165b0d0
--- /dev/null
+++ b/lib/Bitcode/Reader/Android.mk
@@ -0,0 +1,29 @@
+LOCAL_PATH:= $(call my-dir)
+
+bitcode_reader_SRC_FILES := \
+ BitReader.cpp \
+ BitcodeReader.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(bitcode_reader_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMBitReader
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(bitcode_reader_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMBitReader
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 1facbc3..15844c0 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -21,17 +21,8 @@ using namespace llvm;
Optionally returns a human-readable error message via OutMessage. */
LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
LLVMModuleRef *OutModule, char **OutMessage) {
- std::string Message;
-
- *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), getGlobalContext(),
- &Message));
- if (!*OutModule) {
- if (OutMessage)
- *OutMessage = strdup(Message.c_str());
- return 1;
- }
-
- return 0;
+ return LLVMParseBitcodeInContext(wrap(&getGlobalContext()), MemBuf, OutModule,
+ OutMessage);
}
LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
@@ -54,36 +45,44 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
/* Reads a module from the specified path, returning via the OutModule parameter
a module provider which performs lazy deserialization. Returns 0 on success.
Optionally returns a human-readable error message via OutMessage. */
-LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
- LLVMModuleProviderRef *OutMP,
- char **OutMessage) {
+LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutM,
+ char **OutMessage) {
std::string Message;
-
- *OutMP = reinterpret_cast<LLVMModuleProviderRef>(
- getLazyBitcodeModule(unwrap(MemBuf), getGlobalContext(), &Message));
-
- if (!*OutMP) {
+
+ *OutM = wrap(getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef),
+ &Message));
+ if (!*OutM) {
if (OutMessage)
*OutMessage = strdup(Message.c_str());
- return 1;
+ return 1;
}
-
+
return 0;
+
}
+LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
+ char **OutMessage) {
+ return LLVMGetBitcodeModuleInContext(LLVMGetGlobalContext(), MemBuf, OutM,
+ OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModuleInContext instead. */
LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf,
LLVMModuleProviderRef *OutMP,
char **OutMessage) {
- std::string Message;
-
- *OutMP = reinterpret_cast<LLVMModuleProviderRef>(
- getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef), &Message));
- if (!*OutMP) {
- if (OutMessage)
- *OutMessage = strdup(Message.c_str());
- return 1;
- }
-
- return 0;
+ return LLVMGetBitcodeModuleInContext(ContextRef, MemBuf,
+ reinterpret_cast<LLVMModuleRef*>(OutMP),
+ OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModule instead. */
+LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
+ LLVMModuleProviderRef *OutMP,
+ char **OutMessage) {
+ return LLVMGetBitcodeModuleProviderInContext(LLVMGetGlobalContext(), MemBuf,
+ OutMP, OutMessage);
}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 4dfc6ce..a328837 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -108,17 +108,17 @@ static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) {
switch (Val) {
default: return -1;
case bitc::BINOP_ADD:
- return Ty->isFPOrFPVector() ? Instruction::FAdd : Instruction::Add;
+ return Ty->isFPOrFPVectorTy() ? Instruction::FAdd : Instruction::Add;
case bitc::BINOP_SUB:
- return Ty->isFPOrFPVector() ? Instruction::FSub : Instruction::Sub;
+ return Ty->isFPOrFPVectorTy() ? Instruction::FSub : Instruction::Sub;
case bitc::BINOP_MUL:
- return Ty->isFPOrFPVector() ? Instruction::FMul : Instruction::Mul;
+ return Ty->isFPOrFPVectorTy() ? Instruction::FMul : Instruction::Mul;
case bitc::BINOP_UDIV: return Instruction::UDiv;
case bitc::BINOP_SDIV:
- return Ty->isFPOrFPVector() ? Instruction::FDiv : Instruction::SDiv;
+ return Ty->isFPOrFPVectorTy() ? Instruction::FDiv : Instruction::SDiv;
case bitc::BINOP_UREM: return Instruction::URem;
case bitc::BINOP_SREM:
- return Ty->isFPOrFPVector() ? Instruction::FRem : Instruction::SRem;
+ return Ty->isFPOrFPVectorTy() ? Instruction::FRem : Instruction::SRem;
case bitc::BINOP_SHL: return Instruction::Shl;
case bitc::BINOP_LSHR: return Instruction::LShr;
case bitc::BINOP_ASHR: return Instruction::AShr;
@@ -585,6 +585,13 @@ bool BitcodeReader::ParseTypeTable() {
ResultTy = StructType::get(Context, EltTys, Record[0]);
break;
}
+ case bitc::TYPE_CODE_UNION: { // UNION: [eltty x N]
+ SmallVector<const Type*, 8> EltTys;
+ for (unsigned i = 0, e = Record.size(); i != e; ++i)
+ EltTys.push_back(getTypeByID(Record[i], true));
+ ResultTy = UnionType::get(&EltTys[0], EltTys.size());
+ break;
+ }
case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
if (Record.size() < 2)
return Error("Invalid ARRAY type record");
@@ -956,12 +963,12 @@ bool BitcodeReader::ParseConstants() {
V = Constant::getNullValue(CurTy);
break;
case bitc::CST_CODE_INTEGER: // INTEGER: [intval]
- if (!isa<IntegerType>(CurTy) || Record.empty())
+ if (!CurTy->isIntegerTy() || Record.empty())
return Error("Invalid CST_INTEGER record");
V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0]));
break;
case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
- if (!isa<IntegerType>(CurTy) || Record.empty())
+ if (!CurTy->isIntegerTy() || Record.empty())
return Error("Invalid WIDE_INTEGER record");
unsigned NumWords = Record.size();
@@ -1168,7 +1175,7 @@ bool BitcodeReader::ParseConstants() {
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
- if (OpTy->isFPOrFPVector())
+ if (OpTy->isFPOrFPVectorTy())
V = ConstantExpr::getFCmp(Record[3], Op0, Op1);
else
V = ConstantExpr::getICmp(Record[3], Op0, Op1);
@@ -1400,7 +1407,7 @@ bool BitcodeReader::ParseModule() {
if (Record.size() < 6)
return Error("Invalid MODULE_CODE_GLOBALVAR record");
const Type *Ty = getTypeByID(Record[0]);
- if (!isa<PointerType>(Ty))
+ if (!Ty->isPointerTy())
return Error("Global not a pointer type!");
unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
Ty = cast<PointerType>(Ty)->getElementType();
@@ -1443,7 +1450,7 @@ bool BitcodeReader::ParseModule() {
if (Record.size() < 8)
return Error("Invalid MODULE_CODE_FUNCTION record");
const Type *Ty = getTypeByID(Record[0]);
- if (!isa<PointerType>(Ty))
+ if (!Ty->isPointerTy())
return Error("Function not a pointer type!");
const FunctionType *FTy =
dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
@@ -1484,7 +1491,7 @@ bool BitcodeReader::ParseModule() {
if (Record.size() < 3)
return Error("Invalid MODULE_ALIAS record");
const Type *Ty = getTypeByID(Record[0]);
- if (!isa<PointerType>(Ty))
+ if (!Ty->isPointerTy())
return Error("Function not a pointer type!");
GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]),
@@ -1615,6 +1622,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
return Error("Malformed block record");
+ InstructionList.clear();
unsigned ModuleValueListSize = ValueList.size();
// Add all the function arguments to the value table.
@@ -1885,7 +1893,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
OpNum+1 != Record.size())
return Error("Invalid CMP record");
- if (LHS->getType()->isFPOrFPVector())
+ if (LHS->getType()->isFPOrFPVectorTy())
I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
else
I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
@@ -1925,7 +1933,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
const Type *ReturnType = F->getReturnType();
if (Vs.size() > 1 ||
- (isa<StructType>(ReturnType) &&
+ (ReturnType->isStructTy() &&
(Vs.empty() || Vs[0]->getType() != ReturnType))) {
Value *RV = UndefValue::get(ReturnType);
for (unsigned i = 0, e = Vs.size(); i != e; ++i) {
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
index 7ed651b..4288422 100644
--- a/lib/Bitcode/Writer/BitWriter.cpp
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -27,20 +27,14 @@ int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
return 0;
}
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR >= 4)
-#include <ext/stdio_filebuf.h>
-
-int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
- raw_fd_ostream OS(FileHandle, false);
+int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
+ int Unbuffered) {
+ raw_fd_ostream OS(FD, ShouldClose, Unbuffered);
WriteBitcodeToFile(unwrap(M), OS);
return 0;
}
-#else
-
int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
- return -1; // Not supported.
+ return LLVMWriteBitcodeToFD(M, FileHandle, true, false);
}
-
-#endif
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index a5bb526..82e73b5 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -181,6 +181,14 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Log2_32_Ceil(VE.getTypes().size()+1)));
unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
+ // Abbrev for TYPE_CODE_UNION.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_UNION));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(VE.getTypes().size()+1)));
+ unsigned UnionAbbrev = Stream.EmitAbbrev(Abbv);
+
// Abbrev for TYPE_CODE_ARRAY.
Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
@@ -250,6 +258,17 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
AbbrevToUse = StructAbbrev;
break;
}
+ case Type::UnionTyID: {
+ const UnionType *UT = cast<UnionType>(T);
+ // UNION: [eltty x N]
+ Code = bitc::TYPE_CODE_UNION;
+ // Output all of the element types.
+ for (UnionType::element_iterator I = UT->element_begin(),
+ E = UT->element_end(); I != E; ++I)
+ TypeVals.push_back(VE.getTypeID(*I));
+ AbbrevToUse = UnionAbbrev;
+ break;
+ }
case Type::ArrayTyID: {
const ArrayType *AT = cast<ArrayType>(T);
// ARRAY: [numelts, eltty]
@@ -789,7 +808,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
else if (isCStr7)
AbbrevToUse = CString7Abbrev;
} else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) ||
- isa<ConstantVector>(V)) {
+ isa<ConstantUnion>(C) || isa<ConstantVector>(V)) {
Code = bitc::CST_CODE_AGGREGATE;
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
Record.push_back(VE.getValueID(C->getOperand(i)));
@@ -1510,16 +1529,50 @@ enum {
DarwinBCHeaderSize = 5*4
};
+/// isARMTriplet - Return true if the triplet looks like:
+/// arm-*, thumb-*, armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*.
+static bool isARMTriplet(const std::string &TT) {
+ size_t Pos = 0;
+ size_t Size = TT.size();
+ if (Size >= 6 &&
+ TT[0] == 't' && TT[1] == 'h' && TT[2] == 'u' &&
+ TT[3] == 'm' && TT[4] == 'b')
+ Pos = 5;
+ else if (Size >= 4 && TT[0] == 'a' && TT[1] == 'r' && TT[2] == 'm')
+ Pos = 3;
+ else
+ return false;
+
+ if (TT[Pos] == '-')
+ return true;
+ else if (TT[Pos] == 'v') {
+ if (Size >= Pos+4 &&
+ TT[Pos+1] == '6' && TT[Pos+2] == 't' && TT[Pos+3] == '2')
+ return true;
+ else if (Size >= Pos+4 &&
+ TT[Pos+1] == '5' && TT[Pos+2] == 't' && TT[Pos+3] == 'e')
+ return true;
+ } else
+ return false;
+ while (++Pos < Size && TT[Pos] != '-') {
+ if (!isdigit(TT[Pos]))
+ return false;
+ }
+ return true;
+}
+
static void EmitDarwinBCHeader(BitstreamWriter &Stream,
const std::string &TT) {
unsigned CPUType = ~0U;
- // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*. The CPUType is a
- // magic number from /usr/include/mach/machine.h. It is ok to reproduce the
+ // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*,
+ // armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. The CPUType is a magic
+ // number from /usr/include/mach/machine.h. It is ok to reproduce the
// specific constants here because they are implicitly part of the Darwin ABI.
enum {
DARWIN_CPU_ARCH_ABI64 = 0x01000000,
DARWIN_CPU_TYPE_X86 = 7,
+ DARWIN_CPU_TYPE_ARM = 12,
DARWIN_CPU_TYPE_POWERPC = 18
};
@@ -1532,6 +1585,8 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream,
CPUType = DARWIN_CPU_TYPE_POWERPC;
else if (TT.find("powerpc64-") == 0)
CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
+ else if (isARMTriplet(TT))
+ CPUType = DARWIN_CPU_TYPE_ARM;
// Traditional Bitcode starts after header.
unsigned BCOffset = DarwinBCHeaderSize;
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 595497f..aa4c3af 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -27,7 +27,7 @@ static bool isSingleValueType(const std::pair<const llvm::Type*,
}
static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) {
- return isa<IntegerType>(V.first->getType());
+ return V.first->getType()->isIntegerTy();
}
static bool CompareByFrequency(const std::pair<const llvm::Type*,
@@ -39,8 +39,6 @@ static bool CompareByFrequency(const std::pair<const llvm::Type*,
/// ValueEnumerator - Enumerate module-level information.
ValueEnumerator::ValueEnumerator(const Module *M) {
- InstructionCount = 0;
-
// Enumerate the global variables.
for (Module::const_global_iterator I = M->global_begin(),
E = M->global_end(); I != E; ++I)
@@ -377,6 +375,7 @@ void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
void ValueEnumerator::incorporateFunction(const Function &F) {
+ InstructionCount = 0;
NumModuleValues = Values.size();
// Adding function arguments to the value table.
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
new file mode 100644
index 0000000..9fa2ecd
--- /dev/null
+++ b/lib/CodeGen/Android.mk
@@ -0,0 +1,99 @@
+LOCAL_PATH:= $(call my-dir)
+
+codegen_SRC_FILES := \
+ AggressiveAntiDepBreaker.cpp \
+ BranchFolding.cpp \
+ CalcSpillWeights.cpp \
+ CodePlacementOpt.cpp \
+ CriticalAntiDepBreaker.cpp \
+ DeadMachineInstructionElim.cpp \
+ DwarfEHPrepare.cpp \
+ ELFCodeEmitter.cpp \
+ ELFWriter.cpp \
+ ExactHazardRecognizer.cpp \
+ GCMetadata.cpp \
+ GCMetadataPrinter.cpp \
+ GCStrategy.cpp \
+ IfConversion.cpp \
+ IntrinsicLowering.cpp \
+ LLVMTargetMachine.cpp \
+ LatencyPriorityQueue.cpp \
+ LiveInterval.cpp \
+ LiveIntervalAnalysis.cpp \
+ LiveStackAnalysis.cpp \
+ LiveVariables.cpp \
+ LowerSubregs.cpp \
+ MachineBasicBlock.cpp \
+ MachineCSE.cpp \
+ MachineDominators.cpp \
+ MachineFunction.cpp \
+ MachineFunctionAnalysis.cpp \
+ MachineFunctionPass.cpp \
+ MachineInstr.cpp \
+ MachineLICM.cpp \
+ MachineLoopInfo.cpp \
+ MachineModuleInfo.cpp \
+ MachineModuleInfoImpls.cpp \
+ MachinePassRegistry.cpp \
+ MachineRegisterInfo.cpp \
+ MachineSSAUpdater.cpp \
+ MachineSink.cpp \
+ MachineVerifier.cpp \
+ ObjectCodeEmitter.cpp \
+ OcamlGC.cpp \
+ OptimizeExts.cpp \
+ OptimizePHIs.cpp \
+ PHIElimination.cpp \
+ Passes.cpp \
+ PostRASchedulerList.cpp \
+ PreAllocSplitting.cpp \
+ ProcessImplicitDefs.cpp \
+ PrologEpilogInserter.cpp \
+ PseudoSourceValue.cpp \
+ RegAllocLinearScan.cpp \
+ RegAllocLocal.cpp \
+ RegAllocPBQP.cpp \
+ RegisterCoalescer.cpp \
+ RegisterScavenging.cpp \
+ ScheduleDAG.cpp \
+ ScheduleDAGEmit.cpp \
+ ScheduleDAGInstrs.cpp \
+ ScheduleDAGPrinter.cpp \
+ ShadowStackGC.cpp \
+ ShrinkWrapping.cpp \
+ SimpleRegisterCoalescing.cpp \
+ SjLjEHPrepare.cpp \
+ SlotIndexes.cpp \
+ Spiller.cpp \
+ StackProtector.cpp \
+ StackSlotColoring.cpp \
+ StrongPHIElimination.cpp \
+ TailDuplication.cpp \
+ TargetInstrInfoImpl.cpp \
+ TargetLoweringObjectFileImpl.cpp \
+ TwoAddressInstructionPass.cpp \
+ UnreachableBlockElim.cpp \
+ VirtRegMap.cpp \
+ VirtRegRewriter.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_SRC_FILES)
+LOCAL_MODULE:= libLLVMCodeGen
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_SRC_FILES)
+LOCAL_MODULE:= libLLVMCodeGen
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk
new file mode 100644
index 0000000..62601f0
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/Android.mk
@@ -0,0 +1,31 @@
+LOCAL_PATH := $(call my-dir)
+
+codegen_asmprinter_SRC_FILES := \
+ AsmPrinter.cpp \
+ DIE.cpp \
+ DwarfDebug.cpp \
+ DwarfException.cpp \
+ DwarfLabel.cpp \
+ DwarfPrinter.cpp \
+ DwarfWriter.cpp \
+ OcamlGCPrinter.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES)
+LOCAL_MODULE:= libLLVMAsmPrinter
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES)
+LOCAL_MODULE:= libLLVMAsmPrinter
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index fc08384..bbeb026 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -50,6 +50,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
#include <cerrno>
+#include <ctype.h>
using namespace llvm;
STATISTIC(EmittedInsts, "Number of machine instrs printed");
@@ -917,11 +918,10 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
if (NumBits == 0) return; // No need to emit alignment.
- unsigned FillValue = 0;
if (getCurrentSection()->getKind().isText())
- FillValue = MAI->getTextAlignFillValue();
-
- OutStreamer.EmitValueToAlignment(1 << NumBits, FillValue, 1, 0);
+ OutStreamer.EmitCodeAlignment(1 << NumBits);
+ else
+ OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
}
/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
@@ -1717,7 +1717,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
}
// Print the main label for the block.
- if (MBB->pred_empty() || MBB->isOnlyReachableByFallthrough()) {
+ if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
if (VerboseAsm) {
// NOTE: Want this comment at start of line.
O << MAI->getCommentString() << " BB#" << MBB->getNumber() << ':';
@@ -1764,6 +1764,39 @@ void AsmPrinter::printOffset(int64_t Offset) const {
O << Offset;
}
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool AsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Otherwise, check the last instruction.
+ const MachineInstr &LastInst = Pred->back();
+ return !LastInst.getDesc().isBarrier();
+}
+
+
+
GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
if (!S->usesMetadata())
return 0;
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 349e0ac..63360c0 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -313,6 +313,7 @@ void DIESectionOffset::EmitValue(DwarfPrinter *D, unsigned Form) const {
D->EmitSectionOffset(Label.getTag(), Section.getTag(),
Label.getNumber(), Section.getNumber(),
IsSmall, IsEH, UseSet);
+ D->getAsm()->O << '\n'; // FIXME: Necesssary?
}
/// SizeOf - Determine size of delta value in bytes.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5093dd9..5ad1e5e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -238,7 +238,18 @@ public:
LIndex = DSI;
}
}
- setLastInsn(LastInsn);
+
+ unsigned CurrentLastInsnIndex = 0;
+ if (const MachineInstr *CL = getLastInsn())
+ CurrentLastInsnIndex = MIIndexMap[CL];
+ unsigned FIndex = MIIndexMap[getFirstInsn()];
+
+ // Set LastInsn as the last instruction for this scope only if
+ // it follows
+ // 1) this scope's first instruction and
+ // 2) current last instruction for this scope, if any.
+ if (LIndex >= CurrentLastInsnIndex && LIndex >= FIndex)
+ setLastInsn(LastInsn);
}
#ifndef NDEBUG
@@ -1166,7 +1177,9 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
return SPDie;
SPDie = new DIE(dwarf::DW_TAG_subprogram);
- addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
+ // Constructors and operators for anonymous aggregates do not have names.
+ if (!SP.getName().empty())
+ addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
StringRef LinkageName = SP.getLinkageName();
if (!LinkageName.empty())
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index b6801dc..2b08ba4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -50,26 +50,6 @@ DwarfException::~DwarfException() {
delete ExceptionTimer;
}
-/// SizeOfEncodedValue - Return the size of the encoding in bytes.
-unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) {
- if (Encoding == dwarf::DW_EH_PE_omit)
- return 0;
-
- switch (Encoding & 0x07) {
- case dwarf::DW_EH_PE_absptr:
- return TD->getPointerSize();
- case dwarf::DW_EH_PE_udata2:
- return 2;
- case dwarf::DW_EH_PE_udata4:
- return 4;
- case dwarf::DW_EH_PE_udata8:
- return 8;
- }
-
- assert(0 && "Invalid encoded value.");
- return 0;
-}
-
/// CreateLabelDiff - Emit a label and subtract it from the expression we
/// already have. This is equivalent to emitting "foo - .", but we have to emit
/// the label for "." directly.
@@ -100,7 +80,7 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
TD->getPointerSize() : -TD->getPointerSize();
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
+
// Begin eh frame section.
Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
@@ -128,30 +108,16 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
// The personality presence indicates that language specific information will
// show up in the eh frame. Find out how we are supposed to lower the
// personality function reference:
- const MCExpr *PersonalityRef = 0;
- bool IsPersonalityIndirect = false, IsPersonalityPCRel = false;
- if (PersonalityFn) {
- // FIXME: HANDLE STATIC CODEGEN MODEL HERE.
-
- // In non-static mode, ask the object file how to represent this reference.
- PersonalityRef =
- TLOF.getSymbolForDwarfGlobalReference(PersonalityFn, Asm->Mang,
- Asm->MMI,
- IsPersonalityIndirect,
- IsPersonalityPCRel);
- }
-
- unsigned PerEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
- if (IsPersonalityIndirect)
- PerEncoding |= dwarf::DW_EH_PE_indirect;
- unsigned LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
- unsigned FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ unsigned FDEEncoding = TLOF.getFDEEncoding();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
char Augmentation[6] = { 0 };
unsigned AugmentationSize = 0;
char *APtr = Augmentation + 1;
- if (PersonalityRef) {
+ if (PersonalityFn) {
// There is a personality function.
*APtr++ = 'P';
AugmentationSize += 1 + SizeOfEncodedValue(PerEncoding);
@@ -181,20 +147,19 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
EOL("CIE Return Address Column");
- EmitULEB128(AugmentationSize, "Augmentation Size");
- EmitEncodingByte(PerEncoding, "Personality");
-
- // If there is a personality, we need to indicate the function's location.
- if (PersonalityRef) {
- if (!IsPersonalityPCRel)
- PersonalityRef = CreateLabelDiff(PersonalityRef, "personalityref_addr",
- Index);
+ if (Augmentation[0]) {
+ EmitULEB128(AugmentationSize, "Augmentation Size");
- O << MAI->getData32bitsDirective() << *PersonalityRef;
- EOL("Personality");
-
- EmitEncodingByte(LSDAEncoding, "LSDA");
- EmitEncodingByte(FDEEncoding, "FDE");
+ // If there is a personality, we need to indicate the function's location.
+ if (PersonalityFn) {
+ EmitEncodingByte(PerEncoding, "Personality");
+ EmitReference(PersonalityFn, PerEncoding);
+ EOL("Personality");
+ }
+ if (UsesLSDA[Index])
+ EmitEncodingByte(LSDAEncoding, "LSDA");
+ if (FDEEncoding != dwarf::DW_EH_PE_absptr)
+ EmitEncodingByte(FDEEncoding, "FDE");
}
// Indicate locations of general callee saved registers in frame.
@@ -216,8 +181,12 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
"Should not emit 'available externally' functions at all");
const Function *TheFunc = EHFrameInfo.function;
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
- Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getEHFrameSection());
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ unsigned FDEEncoding = TLOF.getFDEEncoding();
+
+ Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
// Externally visible entry into the functions eh frame info. If the
// corresponding function is static, this should not be externally visible.
@@ -255,7 +224,8 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
// EH frame header.
EmitDifference("eh_frame_end", EHFrameInfo.Number,
- "eh_frame_begin", EHFrameInfo.Number, true);
+ "eh_frame_begin", EHFrameInfo.Number,
+ true);
EOL("Length of Frame Information Entry");
EmitLabel("eh_frame_begin", EHFrameInfo.Number);
@@ -266,33 +236,23 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
EOL("FDE CIE offset");
- EmitReference("eh_func_begin", EHFrameInfo.Number, true, true);
+ EmitReference("eh_func_begin", EHFrameInfo.Number, FDEEncoding);
EOL("FDE initial location");
EmitDifference("eh_func_end", EHFrameInfo.Number,
- "eh_func_begin", EHFrameInfo.Number, true);
+ "eh_func_begin", EHFrameInfo.Number,
+ SizeOfEncodedValue(FDEEncoding) == 4);
EOL("FDE address range");
// If there is a personality and landing pads then point to the language
// specific data area in the exception table.
if (MMI->getPersonalities()[0] != NULL) {
+ unsigned Size = SizeOfEncodedValue(LSDAEncoding);
- if (Asm->TM.getLSDAEncoding() != DwarfLSDAEncoding::EightByte) {
- EmitULEB128(4, "Augmentation size");
-
- if (EHFrameInfo.hasLandingPads)
- EmitReference("exception", EHFrameInfo.Number, true, true);
- else
- Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
- } else {
- EmitULEB128(TD->getPointerSize(), "Augmentation size");
-
- if (EHFrameInfo.hasLandingPads) {
- EmitReference("exception", EHFrameInfo.Number, true, false);
- } else {
- Asm->OutStreamer.EmitIntValue(0, TD->getPointerSize(),
- 0/*addrspace*/);
- }
- }
+ EmitULEB128(Size, "Augmentation size");
+ if (EHFrameInfo.hasLandingPads)
+ EmitReference("exception", EHFrameInfo.Number, LSDAEncoding);
+ else
+ Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
EOL("Language Specific Data Area");
} else {
@@ -407,20 +367,22 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
if (NumShared < TypeIds.size()) {
unsigned SizeAction = 0;
- ActionEntry *PrevAction = 0;
+ unsigned PrevAction = (unsigned)-1;
if (NumShared) {
const unsigned SizePrevIds = PrevLPI->TypeIds.size();
assert(Actions.size());
- PrevAction = &Actions.back();
- SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
- MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+ PrevAction = Actions.size() - 1;
+ SizeAction =
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) +
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
SizeAction -=
- MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
- SizeAction += -PrevAction->NextAction;
- PrevAction = PrevAction->Previous;
+ MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+ SizeAction += -Actions[PrevAction].NextAction;
+ PrevAction = Actions[PrevAction].Previous;
}
}
@@ -437,7 +399,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
Actions.push_back(Action);
- PrevAction = &Actions.back();
+ PrevAction = Actions.size() - 1;
}
// Record the first action of the landing pad site.
@@ -447,7 +409,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
// Information used when created the call-site table. The action record
// field of the call site record is the offset of the first associated
// action record, relative to the start of the actions table. This value is
- // biased by 1 (1 in dicating the start of the actions table), and 0
+ // biased by 1 (1 indicating the start of the actions table), and 0
// indicates that there are no actions.
FirstActions.push_back(FirstAction);
@@ -648,8 +610,7 @@ void DwarfException::EmitExceptionTable() {
// landing pad site.
SmallVector<ActionEntry, 32> Actions;
SmallVector<unsigned, 64> FirstActions;
- unsigned SizeActions = ComputeActionsTable(LandingPads, Actions,
- FirstActions);
+ unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);
// Invokes and nounwind calls have entries in PadMap (due to being bracketed
// by try-range labels when lowered). Ordinary calls do not, so appropriate
@@ -677,29 +638,29 @@ void DwarfException::EmitExceptionTable() {
const unsigned LandingPadSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
bool IsSJLJ = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
- unsigned SizeSites;
+ unsigned CallSiteTableLength;
if (IsSJLJ)
- SizeSites = 0;
+ CallSiteTableLength = 0;
else
- SizeSites = CallSites.size() *
+ CallSiteTableLength = CallSites.size() *
(SiteStartSize + SiteLengthSize + LandingPadSize);
for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
- SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+ CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action);
if (IsSJLJ)
- SizeSites += MCAsmInfo::getULEB128Size(i);
+ CallSiteTableLength += MCAsmInfo::getULEB128Size(i);
}
// Type infos.
const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
- unsigned TTypeFormat;
+ unsigned TTypeEncoding;
unsigned TypeFormatSize;
if (!HaveTTData) {
// For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
// that we're omitting that bit.
- TTypeFormat = dwarf::DW_EH_PE_omit;
+ TTypeEncoding = dwarf::DW_EH_PE_omit;
TypeFormatSize = SizeOfEncodedValue(dwarf::DW_EH_PE_absptr);
} else {
// Okay, we have actual filters or typeinfos to emit. As such, we need to
@@ -729,21 +690,28 @@ void DwarfException::EmitExceptionTable() {
// somewhere. This predicate should be moved to a shared location that is
// in target-independent code.
//
- if (LSDASection->getKind().isWriteable() ||
- Asm->TM.getRelocationModel() == Reloc::Static)
- TTypeFormat = dwarf::DW_EH_PE_absptr;
- else
- TTypeFormat = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
-
- TypeFormatSize = SizeOfEncodedValue(TTypeFormat);
+ TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
+ TypeFormatSize = SizeOfEncodedValue(TTypeEncoding);
}
// Begin the exception table.
Asm->OutStreamer.SwitchSection(LSDASection);
Asm->EmitAlignment(2, 0, 0, false);
+ // Emit the LSDA.
O << "GCC_except_table" << SubprogramCount << ":\n";
+ EmitLabel("exception", SubprogramCount);
+
+ if (IsSJLJ) {
+ SmallString<16> LSDAName;
+ raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
+ "_LSDA_" << Asm->getFunctionNumber();
+ O << LSDAName.str() << ":\n";
+ }
+
+ // Emit the LSDA header.
+ EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ EmitEncodingByte(TTypeEncoding, "@TType");
// The type infos need to be aligned. GCC does this by inserting padding just
// before the type infos. However, this changes the size of the exception
@@ -752,7 +720,7 @@ void DwarfException::EmitExceptionTable() {
// So by increasing the size by inserting padding, you may increase the number
// of bytes used for writing the size. If it increases, say by one byte, then
// you now need to output one less byte of padding to get the type infos
- // aligned. However this decreases the size of the exception table. This
+ // aligned. However this decreases the size of the exception table. This
// changes the value you have to output for the exception table size. Due to
// the variable length encoding, the number of bytes used for writing the
// length may decrease. If so, you then have to increase the amount of
@@ -761,41 +729,35 @@ void DwarfException::EmitExceptionTable() {
// We chose another solution: don't output padding inside the table like GCC
// does, instead output it before the table.
unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
- unsigned TyOffset = sizeof(int8_t) + // Call site format
- MCAsmInfo::getULEB128Size(SizeSites) + // Call site table length
- SizeSites + SizeActions + SizeTypes;
- unsigned TotalSize = sizeof(int8_t) + // LPStart format
- sizeof(int8_t) + // TType format
- (HaveTTData ?
- MCAsmInfo::getULEB128Size(TyOffset) : 0) + // TType base offset
- TyOffset;
+ unsigned CallSiteTableLengthSize =
+ MCAsmInfo::getULEB128Size(CallSiteTableLength);
+ unsigned TTypeBaseOffset =
+ sizeof(int8_t) + // Call site format
+ CallSiteTableLengthSize + // Call site table length size
+ CallSiteTableLength + // Call site table length
+ SizeActions + // Actions size
+ SizeTypes;
+ unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset);
+ unsigned TotalSize =
+ sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size
+ TTypeBaseOffset; // TType base offset
unsigned SizeAlign = (4 - TotalSize) & 3;
- for (unsigned i = 0; i != SizeAlign; ++i) {
- Asm->EmitInt8(0);
- EOL("Padding");
- }
-
- EmitLabel("exception", SubprogramCount);
-
- if (IsSJLJ) {
- SmallString<16> LSDAName;
- raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
- "_LSDA_" << Asm->getFunctionNumber();
- O << LSDAName.str() << ":\n";
+ if (HaveTTData) {
+ // Account for any extra padding that will be added to the call site table
+ // length.
+ EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
+ SizeAlign = 0;
}
- // Emit the header.
- EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
- EmitEncodingByte(TTypeFormat, "@TType");
-
- if (HaveTTData)
- EmitULEB128(TyOffset, "@TType base offset");
-
// SjLj Exception handling
if (IsSJLJ) {
EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
- EmitULEB128(SizeSites, "Call site table length");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
// Emit the landing pad site information.
unsigned idx = 0;
@@ -836,7 +798,9 @@ void DwarfException::EmitExceptionTable() {
// Emit the landing pad call site table.
EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
- EmitULEB128(SizeSites, "Call site table length");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
for (SmallVectorImpl<CallSiteEntry>::const_iterator
I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
@@ -906,23 +870,23 @@ void DwarfException::EmitExceptionTable() {
}
// Emit the Catch TypeInfos.
- if (TypeInfos.size() != 0) EOL("-- Catch TypeInfos --");
+ if (!TypeInfos.empty()) EOL("-- Catch TypeInfos --");
for (std::vector<GlobalVariable *>::const_reverse_iterator
I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
const GlobalVariable *GV = *I;
- PrintRelDirective();
if (GV) {
- O << *Asm->GetGlobalValueSymbol(GV);
+ EmitReference(GV, TTypeEncoding);
EOL("TypeInfo");
} else {
+ PrintRelDirective(TTypeEncoding);
O << "0x0";
EOL("");
}
}
// Emit the Exception Specifications.
- if (FilterIds.size() != 0) EOL("-- Filter IDs --");
+ if (!FilterIds.empty()) EOL("-- Filter IDs --");
for (std::vector<unsigned>::const_iterator
I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
unsigned TypeID = *I;
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 06033a1..3db1a00 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -76,9 +76,6 @@ class DwarfException : public DwarfPrinter {
/// ExceptionTimer - Timer for the Dwarf exception writer.
Timer *ExceptionTimer;
- /// SizeOfEncodedValue - Return the size of the encoding in bytes.
- unsigned SizeOfEncodedValue(unsigned Encoding);
-
/// EmitCIE - Emit a Common Information Entry (CIE). This holds information
/// that is shared among many Frame Description Entries. There is at least
/// one CIE in every non-empty .debug_frame section.
@@ -135,7 +132,7 @@ class DwarfException : public DwarfPrinter {
struct ActionEntry {
int ValueForTypeID; // The value to write - may not be equal to the type id.
int NextAction;
- struct ActionEntry *Previous;
+ unsigned Previous;
};
/// CallSiteEntry - Structure describing an entry in the call-site table.
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index 415390b..28ff0eb 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// Emit general DWARF directives.
-//
+//
//===----------------------------------------------------------------------===//
#include "DwarfPrinter.h"
@@ -18,13 +18,17 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
using namespace llvm;
DwarfPrinter::DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
@@ -33,6 +37,26 @@ DwarfPrinter::DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL),
SubprogramCount(0), Flavor(flavor), SetCounter(1) {}
+/// SizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned DwarfPrinter::SizeOfEncodedValue(unsigned Encoding) const {
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return 0;
+
+ switch (Encoding & 0x07) {
+ case dwarf::DW_EH_PE_absptr:
+ return TD->getPointerSize();
+ case dwarf::DW_EH_PE_udata2:
+ return 2;
+ case dwarf::DW_EH_PE_udata4:
+ return 4;
+ case dwarf::DW_EH_PE_udata8:
+ return 8;
+ }
+
+ assert(0 && "Invalid encoded value.");
+ return 0;
+}
+
void DwarfPrinter::PrintRelDirective(bool Force32Bit, bool isInSection) const {
if (isInSection && MAI->getDwarfSectionOffsetDirective())
O << MAI->getDwarfSectionOffsetDirective();
@@ -42,6 +66,14 @@ void DwarfPrinter::PrintRelDirective(bool Force32Bit, bool isInSection) const {
O << MAI->getData64bitsDirective();
}
+void DwarfPrinter::PrintRelDirective(unsigned Encoding) const {
+ unsigned Size = SizeOfEncodedValue(Encoding);
+ assert((Size == 4 || Size == 8) && "Do not support other types or rels!");
+
+ O << (Size == 4 ?
+ MAI->getData32bitsDirective() : MAI->getData64bitsDirective());
+}
+
/// EOL - Print a newline character to asm stream. If a comment is present
/// then it will be printed first. Comments should not contain '\n'.
void DwarfPrinter::EOL(const Twine &Comment) const {
@@ -127,29 +159,35 @@ void DwarfPrinter::EmitSLEB128(int Value, const char *Desc) const {
Value >>= 7;
IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
if (IsMore) Byte |= 0x80;
-
Asm->OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
} while (IsMore);
}
/// EmitULEB128 - emit the specified signed leb128 value.
-void DwarfPrinter::EmitULEB128(unsigned Value, const char *Desc) const {
+void DwarfPrinter::EmitULEB128(unsigned Value, const char *Desc,
+ unsigned PadTo) const {
if (Asm->VerboseAsm && Desc)
Asm->OutStreamer.AddComment(Desc);
- if (MAI->hasLEB128()) {
+ if (MAI->hasLEB128() && PadTo == 0) {
O << "\t.uleb128\t" << Value;
Asm->OutStreamer.AddBlankLine();
return;
}
- // If we don't have .uleb128, emit as .bytes.
+ // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
do {
unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
Value >>= 7;
- if (Value) Byte |= 0x80;
+ if (Value || PadTo != 0) Byte |= 0x80;
Asm->OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
} while (Value);
+
+ if (PadTo) {
+ if (PadTo > 1)
+ Asm->OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
+ Asm->OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
+ }
}
@@ -195,6 +233,31 @@ void DwarfPrinter::EmitReference(const MCSymbol *Sym, bool IsPCRelative,
if (IsPCRelative) O << "-" << MAI->getPCSymbol();
}
+void DwarfPrinter::EmitReference(const char *Tag, unsigned Number,
+ unsigned Encoding) const {
+ SmallString<64> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+ << Tag << Number;
+
+ MCSymbol *Sym = Asm->OutContext.GetOrCreateSymbol(Name.str());
+ EmitReference(Sym, Encoding);
+}
+
+void DwarfPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ PrintRelDirective(Encoding);
+ O << *TLOF.getSymbolForDwarfReference(Sym, Asm->MMI, Encoding);;
+}
+
+void DwarfPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ PrintRelDirective(Encoding);
+ O << *TLOF.getSymbolForDwarfGlobalReference(GV, Asm->Mang,
+ Asm->MMI, Encoding);;
+}
+
/// EmitDifference - Emit the difference between two labels. If this assembler
/// supports .set, we emit a .set of a temporary and then use it in the .word.
void DwarfPrinter::EmitDifference(const char *TagHi, unsigned NumberHi,
@@ -248,7 +311,6 @@ void DwarfPrinter::EmitSectionOffset(const char* Label, const char* Section,
PrintRelDirective(IsSmall);
PrintLabelName("set", SetCounter, Flavor);
++SetCounter;
- O << "\n";
} else {
PrintRelDirective(IsSmall, true);
PrintLabelName(Label, LabelNumber);
@@ -257,7 +319,6 @@ void DwarfPrinter::EmitSectionOffset(const char* Label, const char* Section,
O << "-";
PrintLabelName(Section, SectionNumber);
}
- O << "\n";
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
index 69d9c27..bd715f2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -28,6 +28,7 @@ class Module;
class MCAsmInfo;
class TargetData;
class TargetRegisterInfo;
+class GlobalValue;
class MCSymbol;
class Twine;
@@ -85,6 +86,10 @@ public:
const MCAsmInfo *getMCAsmInfo() const { return MAI; }
const TargetData *getTargetData() const { return TD; }
+ /// SizeOfEncodedValue - Return the size of the encoding in bytes.
+ unsigned SizeOfEncodedValue(unsigned Encoding) const;
+
+ void PrintRelDirective(unsigned Encoding) const;
void PrintRelDirective(bool Force32Bit = false,
bool isInSection = false) const;
@@ -106,7 +111,8 @@ public:
void EmitSLEB128(int Value, const char *Desc) const;
/// EmitULEB128 - emit the specified unsigned leb128 value.
- void EmitULEB128(unsigned Value, const char *Desc = 0) const;
+ void EmitULEB128(unsigned Value, const char *Desc = 0,
+ unsigned PadTo = 0) const;
/// PrintLabelName - Print label name in form used by Dwarf writer.
@@ -140,6 +146,10 @@ public:
void EmitReference(const MCSymbol *Sym, bool IsPCRelative = false,
bool Force32Bit = false) const;
+ void EmitReference(const char *Tag, unsigned Number, unsigned Encoding) const;
+ void EmitReference(const MCSymbol *Sym, unsigned Encoding) const;
+ void EmitReference(const GlobalValue *GV, unsigned Encoding) const;
+
/// EmitDifference - Emit the difference between two labels.
void EmitDifference(const DWLabel &LabelHi, const DWLabel &LabelLo,
bool IsSmall = false) {
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 3531ed6..a9502fd 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -22,6 +22,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include <ctype.h>
using namespace llvm;
namespace {
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index faf4d95..d94729a 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -334,7 +334,9 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
unsigned TailLen = 0;
while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
--I1; --I2;
- if (!I1->isIdenticalTo(I2) ||
+ // Don't merge debugging pseudos.
+ if (I1->isDebugValue() || I2->isDebugValue() ||
+ !I1->isIdenticalTo(I2) ||
// FIXME: This check is dubious. It's used to get around a problem where
// people incorrectly expect inline asm directives to remain in the same
// relative order. This is untenable because normal compiler
@@ -412,6 +414,8 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E) {
unsigned Time = 0;
for (; I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
const TargetInstrDesc &TID = I->getDesc();
if (TID.isCall())
Time += 10;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 9fcbea9..d385b86 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_library(LLVMCodeGen
LiveVariables.cpp
LowerSubregs.cpp
MachineBasicBlock.cpp
+ MachineCSE.cpp
MachineDominators.cpp
MachineFunction.cpp
MachineFunctionAnalysis.cpp
@@ -39,6 +40,7 @@ add_llvm_library(LLVMCodeGen
ObjectCodeEmitter.cpp
OcamlGC.cpp
OptimizeExts.cpp
+ OptimizePHIs.cpp
PHIElimination.cpp
Passes.cpp
PostRASchedulerList.cpp
@@ -66,6 +68,7 @@ add_llvm_library(LLVMCodeGen
StrongPHIElimination.cpp
TailDuplication.cpp
TargetInstrInfoImpl.cpp
+ TargetLoweringObjectFileImpl.cpp
TwoAddressInstructionPass.cpp
UnreachableBlockElim.cpp
VirtRegMap.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 2bedd04..a328d0e 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -131,10 +131,7 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
if (Hint.first || Hint.second)
li.weight *= 1.01F;
- // Divide the weight of the interval by its size. This encourages
- // spilling of intervals that are large and have few uses, and
- // discourages spilling of small intervals with many uses.
- li.weight /= lis->getApproximateInstructionCount(li) * SlotIndex::NUM;
+ lis->normalizeSpillWeight(li);
}
}
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 05a57d4..3ff2a04 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -102,22 +102,23 @@ bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
// Conservatively ignore EH landing pads.
if (MBB->isLandingPad()) return false;
- // Ignore blocks which look like they might have EH-related control flow.
- // At the time of this writing, there are blocks which AnalyzeBranch
- // thinks end in single uncoditional branches, yet which have two CFG
- // successors. Code in this file is not prepared to reason about such things.
- if (!MBB->empty() && MBB->back().isEHLabel())
- return false;
-
// Aggressively handle return blocks and similar constructs.
if (MBB->succ_empty()) return true;
// Ask the target's AnalyzeBranch if it can handle this block.
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
- // Make the terminator is understood.
+ // Make sure the terminator is understood.
if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
return false;
+ // Ignore blocks which look like they might have EH-related control flow.
+ // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't
+ // recognize the possibility of a control transfer through an unwind.
+ // Such blocks contain EH_LABEL instructions, however they may be in the
+ // middle of the block. Instead of searching for them, just check to see
+ // if the CFG disagrees with AnalyzeBranch.
+ if (1u + !Cond.empty() != MBB->succ_size())
+ return false;
// Make sure we have the option of reversing the condition.
if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
return false;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 056e2d5..7d3de89 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -119,6 +119,8 @@ void CriticalAntiDepBreaker::FinishBlock() {
void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
unsigned InsertPosIndex) {
+ if (MI->isDebugValue())
+ return;
assert(Count < InsertPosIndex && "Instruction index out of expected range!");
// Any register which was defined within the previous scheduling region
@@ -409,6 +411,8 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
for (MachineBasicBlock::iterator I = End, E = Begin;
I != E; --Count) {
MachineInstr *MI = --I;
+ if (MI->isDebugValue())
+ continue;
// Check if this instruction has a dependence on the critical path that
// is an anti-dependence that we may be able to break. If it is, set
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index b0cb24d..d69c995 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -55,7 +55,7 @@ FunctionPass *llvm::createDeadMachineInstructionElimPass() {
bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
// Don't delete instructions with side effects.
bool SawStore = false;
- if (!MI->isSafeToMove(TII, SawStore, 0))
+ if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
return false;
// Examine each operand.
@@ -64,8 +64,8 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
if (MO.isReg() && MO.isDef()) {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
- LivePhysRegs[Reg] : !MRI->use_empty(Reg)) {
- // This def has a use. Don't delete the instruction!
+ LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) {
+ // This def has a non-debug use. Don't delete the instruction!
return false;
}
}
@@ -111,23 +111,31 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
MIE = MBB->rend(); MII != MIE; ) {
MachineInstr *MI = &*MII;
- if (MI->isDebugValue()) {
- // Don't delete the DBG_VALUE itself, but if its Value operand is
- // a vreg and this is the only use, substitute an undef operand;
- // the former operand will then be deleted normally.
- if (MI->getNumOperands()==3 && MI->getOperand(0).isReg()) {
- unsigned Reg = MI->getOperand(0).getReg();
- MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
- assert(I != MRI->use_end());
- if (++I == MRI->use_end())
- // only one use, which must be this DBG_VALUE.
- MI->getOperand(0).setReg(0U);
- }
- }
-
// If the instruction is dead, delete it!
if (isDead(MI)) {
DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. Examine each def operand for such references;
+ // if found, mark the DBG_VALUE as undef (but don't delete it).
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MachineRegisterInfo::use_iterator nextI;
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+ E = MRI->use_end(); I!=E; I=nextI) {
+ nextI = llvm::next(I); // I is invalidated by the setReg
+ MachineOperand& Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ if (UseMI==MI)
+ continue;
+ assert(Use.isDebug());
+ UseMI->getOperand(0).setReg(0U);
+ }
+ }
AnyChanges = true;
MI->eraseFromParent();
++NumDeletes;
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 9997a48..87ab7ef 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -155,7 +155,7 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
/// LowerBSWAP - Emit the code to lower bswap of V before the specified
/// instruction IP.
static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
- assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
+ assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
@@ -251,7 +251,7 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
/// instruction IP.
static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
- assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
+ assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
static const uint64_t MaskValues[6] = {
0x5555555555555555ULL, 0x3333333333333333ULL,
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 40e0150..5e88865 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -14,6 +14,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/PassManager.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/Verifier.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/Passes.h"
@@ -66,6 +67,9 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+static cl::opt<bool> EnableMachineCSE("enable-machine-cse", cl::Hidden,
+ cl::desc("Enable Machine CSE"));
+
static cl::opt<cl::boolOrDefault>
AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
cl::init(cl::BOU_UNSET));
@@ -114,9 +118,10 @@ LLVMTargetMachine::setCodeModelForStatic() {
bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
// Add common CodeGen passes.
- if (addCommonCodeGenPasses(PM, OptLevel))
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify))
return true;
OwningPtr<MCContext> Context(new MCContext());
@@ -140,7 +145,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
case CGFT_ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this);
+ MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context);
if (MCE == 0)
return true;
@@ -192,12 +197,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
///
bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
JITCodeEmitter &JCE,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
// Make sure the code model is set.
setCodeModelForJIT();
// Add common CodeGen passes.
- if (addCommonCodeGenPasses(PM, OptLevel))
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify))
return true;
addCodeEmitter(PM, OptLevel, JCE);
@@ -206,6 +212,12 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
return false; // success!
}
+static void printNoVerify(PassManagerBase &PM,
+ const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+}
+
static void printAndVerify(PassManagerBase &PM,
const char *Banner,
bool allowDoubleDefs = false) {
@@ -220,13 +232,19 @@ static void printAndVerify(PassManagerBase &PM,
/// emitting to assembly files or machine code output.
///
bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
// Standard LLVM-Level Passes.
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
// Optionally, tun split-GEPs and no-load GVN.
if (EnableSplitGEPGVN) {
PM.add(createGEPSplitterPass());
- PM.add(createGVNPass(/*NoPRE=*/false, /*NoLoads=*/true));
+ PM.add(createGVNPass(/*NoLoads=*/true));
}
// Run loop strength reduction before anything else.
@@ -273,6 +291,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
"*** Final LLVM Code input to ISel ***\n",
&dbgs()));
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
// Standard Lower-Level Passes.
// Set up a MachineFunction for the rest of CodeGen to work on.
@@ -291,6 +314,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
printAndVerify(PM, "After Instruction Selection",
/* allowDoubleDefs= */ true);
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createOptimizePHIsPass());
// Delete dead machine instructions regardless of optimization level.
PM.add(createDeadMachineInstructionElimPass());
@@ -301,6 +328,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
PM.add(createOptimizeExtsPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
+ if (EnableMachineCSE)
+ PM.add(createMachineCSEPass());
if (!DisableMachineSink)
PM.add(createMachineSinkingPass());
printAndVerify(PM, "After MachineLICM and MachineSinking",
@@ -355,13 +384,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// Branch folding must be run after regalloc and prolog/epilog insertion.
if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
- printAndVerify(PM, "After BranchFolding");
+ printNoVerify(PM, "After BranchFolding");
}
// Tail duplication.
if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
PM.add(createTailDuplicatePass(false));
- printAndVerify(PM, "After TailDuplicate");
+ printNoVerify(PM, "After TailDuplicate");
}
PM.add(createGCMachineCodeAnalysisPass());
@@ -371,11 +400,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
PM.add(createCodePlacementOptPass());
- printAndVerify(PM, "After CodePlacementOpt");
+ printNoVerify(PM, "After CodePlacementOpt");
}
if (addPreEmitPass(PM, OptLevel))
- printAndVerify(PM, "After PreEmit passes");
+ printNoVerify(PM, "After PreEmit passes");
return false;
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 432409a..ccda66f 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -329,24 +329,43 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
DEBUG(dbgs() << " +" << NewLR);
interval.addRange(NewLR);
- // Iterate over all of the blocks that the variable is completely
- // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
- // live interval.
- for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
- E = vi.AliveBlocks.end(); I != E; ++I) {
- MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
- LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
- interval.addRange(LR);
- DEBUG(dbgs() << " +" << LR);
+ bool PHIJoin = lv_->isPHIJoin(interval.reg);
+
+ if (PHIJoin) {
+ // A phi join register is killed at the end of the MBB and revived as a new
+ // valno in the killing blocks.
+ assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
+ DEBUG(dbgs() << " phi-join");
+ ValNo->addKill(indexes_->getTerminatorGap(mbb));
+ ValNo->setHasPHIKill(true);
+ } else {
+ // Iterate over all of the blocks that the variable is completely
+ // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
+ // live interval.
+ for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
+ E = vi.AliveBlocks.end(); I != E; ++I) {
+ MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
+ LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
+ interval.addRange(LR);
+ DEBUG(dbgs() << " +" << LR);
+ }
}
// Finally, this virtual register is live from the start of any killing
// block to the 'use' slot of the killing instruction.
for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
MachineInstr *Kill = vi.Kills[i];
- SlotIndex killIdx =
- getInstructionIndex(Kill).getDefIndex();
- LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo);
+ SlotIndex Start = getMBBStartIdx(Kill->getParent());
+ SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex();
+
+ // Create interval with one of a NEW value number. Note that this value
+ // number isn't actually defined by an instruction, weird huh? :)
+ if (PHIJoin) {
+ ValNo = interval.getNextValue(SlotIndex(Start, true), 0, false,
+ VNInfoAllocator);
+ ValNo->setIsPHIDef(true);
+ }
+ LiveRange LR(Start, killIdx, ValNo);
interval.addRange(LR);
ValNo->addKill(killIdx);
DEBUG(dbgs() << " +" << LR);
@@ -409,48 +428,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
interval.print(dbgs(), tri_);
});
} else {
- // Otherwise, this must be because of phi elimination. If this is the
- // first redefinition of the vreg that we have seen, go back and change
- // the live range in the PHI block to be a different value number.
- if (interval.containsOneValue()) {
-
- VNInfo *VNI = interval.getValNumInfo(0);
- // Phi elimination may have reused the register for multiple identical
- // phi nodes. There will be a kill per phi. Remove the old ranges that
- // we now know have an incorrect number.
- for (unsigned ki=0, ke=vi.Kills.size(); ki != ke; ++ki) {
- MachineInstr *Killer = vi.Kills[ki];
- SlotIndex Start = getMBBStartIdx(Killer->getParent());
- SlotIndex End = getInstructionIndex(Killer).getDefIndex();
- DEBUG({
- dbgs() << "\n\t\trenaming [" << Start << "," << End << "] in: ";
- interval.print(dbgs(), tri_);
- });
- interval.removeRange(Start, End);
-
- // Replace the interval with one of a NEW value number. Note that
- // this value number isn't actually defined by an instruction, weird
- // huh? :)
- LiveRange LR(Start, End,
- interval.getNextValue(SlotIndex(Start, true),
- 0, false, VNInfoAllocator));
- LR.valno->setIsPHIDef(true);
- interval.addRange(LR);
- LR.valno->addKill(End);
- }
-
- MachineBasicBlock *killMBB = getMBBFromIndex(VNI->def);
- VNI->addKill(indexes_->getTerminatorGap(killMBB));
- VNI->setHasPHIKill(true);
- DEBUG({
- dbgs() << " RESULT: ";
- interval.print(dbgs(), tri_);
- });
- }
-
+ assert(lv_->isPHIJoin(interval.reg) && "Multiply defined register");
// In the case of PHI elimination, each variable definition is only
// live until the end of the block. We've already taken care of the
// rest of the live range.
+
SlotIndex defIndex = MIIdx.getDefIndex();
if (MO.isEarlyClobber())
defIndex = MIIdx.getUseIndex();
@@ -468,7 +450,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
interval.addRange(LR);
ValNo->addKill(indexes_->getTerminatorGap(mbb));
ValNo->setHasPHIKill(true);
- DEBUG(dbgs() << " +" << LR);
+ DEBUG(dbgs() << " phi-join +" << LR);
}
}
@@ -613,6 +595,9 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
while (mi != E) {
if (mi->isDebugValue()) {
++mi;
+ if (mi != E && !mi->isDebugValue()) {
+ baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+ }
continue;
}
if (mi->killsRegister(interval.reg, tri_)) {
@@ -1355,11 +1340,9 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
MachineBasicBlock *MBB = MI->getParent();
if (ImpUse && MI != ReMatDefMI) {
- // Re-matting an instruction with virtual register use. Update the
- // register interval's spill weight to HUGE_VALF to prevent it from
- // being spilled.
- LiveInterval &ImpLi = getInterval(ImpUse);
- ImpLi.weight = HUGE_VALF;
+ // Re-matting an instruction with virtual register use. Prevent interval
+ // from being spilled.
+ getInterval(ImpUse).markNotSpillable();
}
unsigned MBBId = MBB->getNumber();
@@ -1411,7 +1394,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
LiveInterval &nI = getOrCreateInterval(NewVReg);
if (!TrySplit) {
// The spill weight is now infinity as it cannot be spilled again.
- nI.weight = HUGE_VALF;
+ nI.markNotSpillable();
continue;
}
@@ -1559,6 +1542,28 @@ LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
}
}
+float
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
+ // Limit the loop depth ridiculousness.
+ if (loopDepth > 200)
+ loopDepth = 200;
+
+ // The loop depth is used to roughly estimate the number of times the
+ // instruction is executed. Something like 10^d is simple, but will quickly
+ // overflow a float. This expression behaves like 10^d for small d, but is
+ // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
+ // headroom before overflow.
+ float lc = powf(1 + (100.0f / (loopDepth+10)), (float)loopDepth);
+
+ return (isDef + isUse) * lc;
+}
+
+void
+LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
+ for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
+ normalizeSpillWeight(*NewLIs[i]);
+}
+
std::vector<LiveInterval*> LiveIntervals::
addIntervalsForSpillsFast(const LiveInterval &li,
const MachineLoopInfo *loopInfo,
@@ -1567,8 +1572,7 @@ addIntervalsForSpillsFast(const LiveInterval &li,
std::vector<LiveInterval*> added;
- assert(li.weight != HUGE_VALF &&
- "attempt to spill already spilled interval!");
+ assert(li.isSpillable() && "attempt to spill already spilled interval!");
DEBUG({
dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
@@ -1604,10 +1608,7 @@ addIntervalsForSpillsFast(const LiveInterval &li,
// create a new register for this spill
LiveInterval &nI = getOrCreateInterval(NewVReg);
-
- // the spill weight is now infinity as it
- // cannot be spilled again
- nI.weight = HUGE_VALF;
+ nI.markNotSpillable();
// Rewrite register operands to use the new vreg.
for (SmallVectorImpl<unsigned>::iterator I = Indices.begin(),
@@ -1661,8 +1662,7 @@ addIntervalsForSpills(const LiveInterval &li,
if (EnableFastSpilling)
return addIntervalsForSpillsFast(li, loopInfo, vrm);
- assert(li.weight != HUGE_VALF &&
- "attempt to spill already spilled interval!");
+ assert(li.isSpillable() && "attempt to spill already spilled interval!");
DEBUG({
dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
@@ -1736,6 +1736,7 @@ addIntervalsForSpills(const LiveInterval &li,
}
handleSpilledImpDefs(li, vrm, rc, NewLIs);
+ normalizeSpillWeights(NewLIs);
return NewLIs;
}
@@ -1811,6 +1812,7 @@ addIntervalsForSpills(const LiveInterval &li,
// Insert spills / restores if we are splitting.
if (!TrySplit) {
handleSpilledImpDefs(li, vrm, rc, NewLIs);
+ normalizeSpillWeights(NewLIs);
return NewLIs;
}
@@ -1927,11 +1929,10 @@ addIntervalsForSpills(const LiveInterval &li,
unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
if (ImpUse) {
// Re-matting an instruction with virtual register use. Add the
- // register as an implicit use on the use MI and update the register
- // interval's spill weight to HUGE_VALF to prevent it from being
- // spilled.
+ // register as an implicit use on the use MI and mark the register
+ // interval as unspillable.
LiveInterval &ImpLi = getInterval(ImpUse);
- ImpLi.weight = HUGE_VALF;
+ ImpLi.markNotSpillable();
MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
}
}
@@ -1970,6 +1971,7 @@ addIntervalsForSpills(const LiveInterval &li,
}
handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
+ normalizeSpillWeights(RetNewLIs);
return RetNewLIs;
}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 8a124dc..519990e 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -365,27 +365,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
}
}
- if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
- if (LastPartDef)
- // The last partial def kills the register.
- LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
- true/*IsImp*/, true/*IsKill*/));
- else {
- MachineOperand *MO =
- LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
- bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
- // If the last reference is the last def, then it's not used at all.
- // That is, unless we are currently processing the last reference itself.
- LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
- if (NeedEC) {
- // If we are adding a subreg def and the superreg def is marked early
- // clobber, add an early clobber marker to the subreg def.
- MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
- if (MO)
- MO->setIsEarlyClobber();
- }
- }
- } else if (!PhysRegUse[Reg]) {
+ if (!PhysRegUse[Reg]) {
// Partial uses. Mark register def dead and add implicit def of
// sub-registers which are used.
// EAX<dead> = op AL<imp-def>
@@ -419,6 +399,26 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
PartUses.erase(*SS);
}
+ } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+ if (LastPartDef)
+ // The last partial def kills the register.
+ LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+ true/*IsImp*/, true/*IsKill*/));
+ else {
+ MachineOperand *MO =
+ LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+ bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
+ // If the last reference is the last def, then it's not used at all.
+ // That is, unless we are currently processing the last reference itself.
+ LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+ if (NeedEC) {
+ // If we are adding a subreg def and the superreg def is marked early
+ // clobber, add an early clobber marker to the subreg def.
+ MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+ if (MO)
+ MO->setIsEarlyClobber();
+ }
+ }
} else
LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
return true;
@@ -510,6 +510,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ PHIJoins.clear();
/// Get some space for a respectable number of registers.
VirtRegInfo.resize(64);
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 655a0bf..64134ce 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -143,36 +143,6 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
return I;
}
-/// isOnlyReachableViaFallthough - Return true if this basic block has
-/// exactly one predecessor and the control transfer mechanism between
-/// the predecessor and this block is a fall-through.
-bool MachineBasicBlock::isOnlyReachableByFallthrough() const {
- // If this is a landing pad, it isn't a fall through. If it has no preds,
- // then nothing falls through to it.
- if (isLandingPad() || pred_empty())
- return false;
-
- // If there isn't exactly one predecessor, it can't be a fall through.
- const_pred_iterator PI = pred_begin(), PI2 = PI;
- ++PI2;
- if (PI2 != pred_end())
- return false;
-
- // The predecessor has to be immediately before this block.
- const MachineBasicBlock *Pred = *PI;
-
- if (!Pred->isLayoutSuccessor(this))
- return false;
-
- // If the block is completely empty, then it definitely does fall through.
- if (Pred->empty())
- return true;
-
- // Otherwise, check the last instruction.
- const MachineInstr &LastInst = Pred->back();
- return !LastInst.getDesc().isBarrier();
-}
-
void MachineBasicBlock::dump() const {
print(dbgs());
}
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
new file mode 100644
index 0000000..b376e3d
--- /dev/null
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -0,0 +1,268 @@
+//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global common subexpression elimination on machine
+// instructions using a scoped hash table based value numbering scheme. It
+// must be run while the machine function is still in SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-cse"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumCSEs, "Number of common subexpression eliminated");
+
+namespace {
+ class MachineCSE : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DT;
+ AliasAnalysis *AA;
+ public:
+ static char ID; // Pass identification
+ MachineCSE() : MachineFunctionPass(&ID), CurrVN(0) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+
+ private:
+ unsigned CurrVN;
+ ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
+ SmallVector<MachineInstr*, 64> Exps;
+
+ bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool isPhysDefTriviallyDead(unsigned Reg,
+ MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E);
+ bool hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool isCSECandidate(MachineInstr *MI);
+ bool ProcessBlock(MachineDomTreeNode *Node);
+ };
+} // end anonymous namespace
+
+char MachineCSE::ID = 0;
+static RegisterPass<MachineCSE>
+X("machine-cse", "Machine Common Subexpression Elimination");
+
+FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
+
+bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ bool Changed = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (!MRI->hasOneUse(Reg))
+ // Only coalesce single use copies. This ensure the copy will be
+ // deleted.
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (DefMI->getParent() != MBB)
+ continue;
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ !SrcSubIdx && !DstSubIdx) {
+ MO.setReg(SrcReg);
+ DefMI->eraseFromParent();
+ ++NumCoalesces;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
+ MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) {
+ unsigned LookAheadLeft = 5;
+ while (LookAheadLeft--) {
+ if (I == E)
+ // Reached end of block, register is obviously dead.
+ return true;
+
+ if (I->isDebugValue())
+ continue;
+ bool SeenDef = false;
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I->getOperand(i);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (!TRI->regsOverlap(MO.getReg(), Reg))
+ continue;
+ if (MO.isUse())
+ return false;
+ SeenDef = true;
+ }
+ if (SeenDef)
+ // See a def of Reg (or an alias) before encountering any use, it's
+ // trivially dead.
+ return true;
+ ++I;
+ }
+ return false;
+}
+
+bool MachineCSE::hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB){
+ unsigned PhysDef = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse())
+ // Can't touch anything to read a physical register.
+ return true;
+ if (MO.isDead())
+ // If the def is dead, it's ok.
+ continue;
+ // Ok, this is a physical register def that's not marked "dead". That's
+ // common since this pass is run before livevariables. We can scan
+ // forward a few instructions and check if it is obviously dead.
+ if (PhysDef)
+ // Multiple physical register defs. These are rare, forget about it.
+ return true;
+ PhysDef = Reg;
+ }
+ }
+
+ if (PhysDef) {
+ MachineBasicBlock::iterator I = MI; I = llvm::next(I);
+ if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end()))
+ return true;
+ }
+ return false;
+}
+
+bool MachineCSE::isCSECandidate(MachineInstr *MI) {
+ // Ignore copies or instructions that read / write physical registers
+ // (except for dead defs of physical registers).
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) ||
+ MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg())
+ return false;
+
+ // Ignore stuff that we obviously can't move.
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+ TID.hasUnmodeledSideEffects())
+ return false;
+
+ if (TID.mayLoad()) {
+ // Okay, this instruction does a load. As a refinement, we allow the target
+ // to decide whether the loaded value is actually a constant. If so, we can
+ // actually use it as a load.
+ if (!MI->isInvariantLoad(AA))
+ // FIXME: we should be able to hoist loads with no other side effects if
+ // there are no other instructions which can change memory in this loop.
+ // This is a trivial form of alias analysis.
+ return false;
+ }
+ return true;
+}
+
+bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) {
+ bool Changed = false;
+
+ ScopedHashTableScope<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait> VNTS(VNT);
+ MachineBasicBlock *MBB = Node->getBlock();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (!isCSECandidate(MI))
+ continue;
+
+ bool FoundCSE = VNT.count(MI);
+ if (!FoundCSE) {
+ // Look for trivial copy coalescing opportunities.
+ if (PerformTrivialCoalescing(MI, MBB))
+ FoundCSE = VNT.count(MI);
+ }
+ // FIXME: commute commutable instructions?
+
+ // If the instruction defines a physical register and the value *may* be
+ // used, then it's not safe to replace it with a common subexpression.
+ if (FoundCSE && hasLivePhysRegDefUse(MI, MBB))
+ FoundCSE = false;
+
+ if (!FoundCSE) {
+ VNT.insert(MI, CurrVN++);
+ Exps.push_back(MI);
+ continue;
+ }
+
+ // Found a common subexpression, eliminate it.
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ DEBUG(dbgs() << "Examining: " << *MI);
+ DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+ unsigned NumDefs = MI->getDesc().getNumDefs();
+ for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned OldReg = MO.getReg();
+ unsigned NewReg = CSMI->getOperand(i).getReg();
+ if (OldReg == NewReg)
+ continue;
+ assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
+ TargetRegisterInfo::isVirtualRegister(NewReg) &&
+ "Do not CSE physical register defs!");
+ MRI->replaceRegWith(OldReg, NewReg);
+ --NumDefs;
+ }
+ MI->eraseFromParent();
+ ++NumCSEs;
+ }
+
+ // Recursively call ProcessBlock with childred.
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ Changed |= ProcessBlock(Children[i]);
+
+ return Changed;
+}
+
+bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ AA = &getAnalysis<AliasAnalysis>();
+ return ProcessBlock(DT->getRootNode());
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index f141c56..4377d5b 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -95,6 +95,9 @@ MachineFunction::MachineFunction(Function *F, const TargetMachine &TM,
MFInfo = 0;
FrameInfo = new (Allocator.Allocate<MachineFrameInfo>())
MachineFrameInfo(*TM.getFrameInfo());
+ if (Fn->hasFnAttr(Attribute::StackAlignment))
+ FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
+ Fn->getAttributes().getFnAttributes()));
ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
MachineConstantPool(TM.getTargetData());
Alignment = TM.getTargetLowering()->getFunctionAlignment(F);
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index df61c74..e23670d 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -18,6 +18,7 @@
#include "llvm/Type.h"
#include "llvm/Value.h"
#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -305,7 +306,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
int64_t o, uint64_t s, unsigned int a)
: Offset(o), Size(s), V(v),
- Flags((f & 7) | ((Log2_32(a) + 1) << 3)) {
+ Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)) {
assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
assert((isLoad() || isStore()) && "Not a load/store!");
}
@@ -327,7 +328,8 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
if (MMO->getBaseAlignment() >= getBaseAlignment()) {
// Update the alignment value.
- Flags = (Flags & 7) | ((Log2_32(MMO->getBaseAlignment()) + 1) << 3);
+ Flags = (Flags & ((1 << MOMaxBits) - 1)) |
+ ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
// Also update the base and offset, because the new alignment may
// not be applicable with the old ones.
V = MMO->getValue();
@@ -700,6 +702,35 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
MemRefsEnd = NewMemRefsEnd;
}
+bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
+ MICheckType Check) const {
+ // If opcodes or number of operands are not the same then the two
+ // instructions are obviously not identical.
+ if (Other->getOpcode() != getOpcode() ||
+ Other->getNumOperands() != getNumOperands())
+ return false;
+
+ // Check operands to make sure they match.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ const MachineOperand &OMO = Other->getOperand(i);
+ // Clients may or may not want to ignore defs when testing for equality.
+ // For example, machine CSE pass only cares about finding common
+ // subexpressions, so it's safe to ignore virtual register defs.
+ if (Check != CheckDefs && MO.isReg() && MO.isDef()) {
+ if (Check == IgnoreDefs)
+ continue;
+ // Check == IgnoreVRegDefs
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
+ if (MO.getReg() != OMO.getReg())
+ return false;
+ } else if (!MO.isIdenticalTo(OMO))
+ return false;
+ }
+ return true;
+}
+
/// removeFromParent - This method unlinks 'this' from the containing basic
/// block, and returns it, but does not delete it.
MachineInstr *MachineInstr::removeFromParent() {
@@ -958,8 +989,8 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) {
/// SawStore is set to true, it means that there is a store (or call) between
/// the instruction's location and its intended destination.
bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
- bool &SawStore,
- AliasAnalysis *AA) const {
+ AliasAnalysis *AA,
+ bool &SawStore) const {
// Ignore stuff that we obviously can't move.
if (TID->mayStore() || TID->isCall()) {
SawStore = true;
@@ -984,11 +1015,11 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
/// isSafeToReMat - Return true if it's safe to rematerialize the specified
/// instruction which defined the specified register instead of copying it.
bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
- unsigned DstReg,
- AliasAnalysis *AA) const {
+ AliasAnalysis *AA,
+ unsigned DstReg) const {
bool SawStore = false;
if (!TII->isTriviallyReMaterializable(this, AA) ||
- !isSafeToMove(TII, SawStore, AA))
+ !isSafeToMove(TII, AA, SawStore))
return false;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
@@ -1324,3 +1355,48 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg,
true /*IsDef*/,
true /*IsImp*/));
}
+
+unsigned
+MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
+ unsigned Hash = MI->getOpcode() * 37;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ uint64_t Key = (uint64_t)MO.getType() << 32;
+ switch (MO.getType()) {
+ default: break;
+ case MachineOperand::MO_Register:
+ if (MO.isDef() && MO.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue; // Skip virtual register defs.
+ Key |= MO.getReg();
+ break;
+ case MachineOperand::MO_Immediate:
+ Key |= MO.getImm();
+ break;
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ Key |= MO.getIndex();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+ break;
+ case MachineOperand::MO_BlockAddress:
+ Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+ break;
+ }
+ Key += ~(Key << 32);
+ Key ^= (Key >> 22);
+ Key += ~(Key << 13);
+ Key ^= (Key >> 8);
+ Key += (Key << 3);
+ Key ^= (Key >> 15);
+ Key += ~(Key << 27);
+ Key ^= (Key >> 31);
+ Hash = (unsigned)Key + Hash * 37;
+ }
+ return Hash;
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 92c84f3..0361694 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -252,32 +252,6 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
return false;
}
- DEBUG({
- dbgs() << "--- Checking if we can hoist " << I;
- if (I.getDesc().getImplicitUses()) {
- dbgs() << " * Instruction has implicit uses:\n";
-
- const TargetRegisterInfo *TRI = TM->getRegisterInfo();
- for (const unsigned *ImpUses = I.getDesc().getImplicitUses();
- *ImpUses; ++ImpUses)
- dbgs() << " -> " << TRI->getName(*ImpUses) << "\n";
- }
-
- if (I.getDesc().getImplicitDefs()) {
- dbgs() << " * Instruction has implicit defines:\n";
-
- const TargetRegisterInfo *TRI = TM->getRegisterInfo();
- for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs();
- *ImpDefs; ++ImpDefs)
- dbgs() << " -> " << TRI->getName(*ImpDefs) << "\n";
- }
- });
-
- if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) {
- DEBUG(dbgs() << "Cannot hoist with implicit defines or uses\n");
- return false;
- }
-
// The instruction is loop invariant if all of its operands are.
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = I.getOperand(i);
@@ -311,6 +285,10 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
} else if (!MO.isDead()) {
// A def that isn't dead. We can't move it.
return false;
+ } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
+ // If the reg is live into the loop, we can't hoist an instruction
+ // which would clobber it.
+ return false;
}
}
@@ -467,7 +445,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
std::vector<const MachineInstr*> &PrevMIs) {
for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
const MachineInstr *PrevMI = PrevMIs[i];
- if (TII->isIdentical(MI, PrevMI, RegInfo))
+ if (TII->produceSameValue(MI, PrevMI))
return PrevMI;
}
return 0;
@@ -480,9 +458,20 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
+
+ // Replace virtual registers defined by MI by their counterparts defined
+ // by Dup.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDef())
+
+ // Physical registers may not differ here.
+ assert((!MO.isReg() || MO.getReg() == 0 ||
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ MO.getReg() == Dup->getOperand(i).getReg()) &&
+ "Instructions with different phys regs are not identical!");
+
+ if (MO.isReg() && MO.isDef() &&
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
}
MI->eraseFromParent();
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 8378906..39d2c75 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
// Out of line virtual method.
void MachineModuleInfoMachO::Anchor() {}
-
+void MachineModuleInfoELF::Anchor() {}
static int SortSymbolPair(const void *LHS, const void *RHS) {
const MCSymbol *LHSS =
@@ -34,10 +34,11 @@ static int SortSymbolPair(const void *LHS, const void *RHS) {
/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
/// sorted orer.
-MachineModuleInfoMachO::SymbolListTy
-MachineModuleInfoMachO::GetSortedStubs(const DenseMap<MCSymbol*,
- MCSymbol*> &Map) {
- MachineModuleInfoMachO::SymbolListTy List(Map.begin(), Map.end());
+MachineModuleInfoImpl::SymbolListTy
+MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
+ MCSymbol*> &Map) {
+ MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
+
if (!List.empty())
qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
return List;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index b31973e..d9ab677 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -116,6 +116,19 @@ MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
return 0;
}
+bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
+ use_iterator UI = use_begin(RegNo);
+ if (UI == use_end())
+ return false;
+ return ++UI == use_end();
+}
+
+bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
+ use_nodbg_iterator UI = use_nodbg_begin(RegNo);
+ if (UI == use_nodbg_end())
+ return false;
+ return ++UI == use_nodbg_end();
+}
#ifndef NDEBUG
void MachineRegisterInfo::dumpUses(unsigned Reg) const {
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index c391576..e47ba7c 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -72,8 +72,13 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
MachineBasicBlock *MBB) const {
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Only makes sense for vregs");
- for (MachineRegisterInfo::use_iterator I = RegInfo->use_begin(Reg),
- E = RegInfo->use_end(); I != E; ++I) {
+ // Ignoring debug uses is necessary so debug info doesn't affect the code.
+ // This may leave a referencing dbg_value in the original block, before
+ // the definition of the vreg. Dwarf generator handles this although the
+ // user might not get the right info at runtime.
+ for (MachineRegisterInfo::use_nodbg_iterator I =
+ RegInfo->use_nodbg_begin(Reg),
+ E = RegInfo->use_nodbg_end(); I != E; ++I) {
// Determine the block of the use.
MachineInstr *UseInst = &*I;
MachineBasicBlock *UseBlock = UseInst->getParent();
@@ -135,7 +140,10 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
ProcessedBegin = I == MBB.begin();
if (!ProcessedBegin)
--I;
-
+
+ if (MI->isDebugValue())
+ continue;
+
if (SinkInstruction(MI, SawStore))
++NumSunk, MadeChange = true;
@@ -149,7 +157,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// Check if it's safe to move the instruction.
- if (!MI->isSafeToMove(TII, SawStore, AA))
+ if (!MI->isSafeToMove(TII, AA, SawStore))
return false;
// FIXME: This should include support for sinking instructions within the
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
new file mode 100644
index 0000000..2717d4d
--- /dev/null
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -0,0 +1,189 @@
+//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes machine instruction PHIs to take advantage of
+// opportunities created during DAG legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phi-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
+STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
+
+namespace {
+ class OptimizePHIs : public MachineFunctionPass {
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification
+ OptimizePHIs() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ typedef SmallPtrSet<MachineInstr*, 16> InstrSet;
+ typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator;
+
+ bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg,
+ InstrSet &PHIsInCycle);
+ bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
+ bool OptimizeBB(MachineBasicBlock &MBB);
+ };
+}
+
+char OptimizePHIs::ID = 0;
+static RegisterPass<OptimizePHIs>
+X("opt-phis", "Optimize machine instruction PHIs");
+
+FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
+
+bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
+ MRI = &Fn.getRegInfo();
+ TII = Fn.getTarget().getInstrInfo();
+
+ // Find dead PHI cycles and PHI cycles that can be replaced by a single
+ // value. InstCombine does these optimizations, but DAG legalization may
+ // introduce new opportunities, e.g., when i64 values are split up for
+ // 32-bit targets.
+ bool Changed = false;
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= OptimizeBB(*I);
+
+ return Changed;
+}
+
+/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
+/// are copies of SingleValReg, possibly via copies through other PHIs. If
+/// SingleValReg is zero on entry, it is set to the register with the single
+/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned.
+bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
+ unsigned &SingleValReg,
+ InstrSet &PHIsInCycle) {
+ assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // See if we already saw this register.
+ if (!PHIsInCycle.insert(MI))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PHIsInCycle.size() == 16)
+ return false;
+
+ // Scan the PHI operands.
+ for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
+ unsigned SrcReg = MI->getOperand(i).getReg();
+ if (SrcReg == DstReg)
+ continue;
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+ // Skip over register-to-register moves.
+ unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx;
+ if (SrcMI &&
+ TII->isMoveInstr(*SrcMI, MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx) &&
+ SrcSubIdx == 0 && DstSubIdx == 0 &&
+ TargetRegisterInfo::isVirtualRegister(MvSrcReg))
+ SrcMI = MRI->getVRegDef(MvSrcReg);
+ if (!SrcMI)
+ return false;
+
+ if (SrcMI->isPHI()) {
+ if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle))
+ return false;
+ } else {
+ // Fail if there is more than one non-phi/non-move register.
+ if (SingleValReg != 0)
+ return false;
+ SingleValReg = SrcReg;
+ }
+ }
+ return true;
+}
+
+/// IsDeadPHICycle - Check if the register defined by a PHI is only used by
+/// other PHIs in a cycle.
+bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
+ assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
+ unsigned DstReg = MI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ "PHI destination is not a virtual register");
+
+ // See if we already saw this register.
+ if (!PHIsInCycle.insert(MI))
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PHIsInCycle.size() == 16)
+ return false;
+
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg),
+ E = MRI->use_end(); I != E; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle))
+ return false;
+ }
+
+ return true;
+}
+
+/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by
+/// a single value.
+bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator
+ MII = MBB.begin(), E = MBB.end(); MII != E; ) {
+ MachineInstr *MI = &*MII++;
+ if (!MI->isPHI())
+ break;
+
+ // Check for single-value PHI cycles.
+ unsigned SingleValReg = 0;
+ InstrSet PHIsInCycle;
+ if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
+ SingleValReg != 0) {
+ MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg);
+ MI->eraseFromParent();
+ ++NumPHICycles;
+ Changed = true;
+ continue;
+ }
+
+ // Check for dead PHI cycles.
+ PHIsInCycle.clear();
+ if (IsDeadPHICycle(MI, PHIsInCycle)) {
+ for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
+ PI != PE; ++PI) {
+ MachineInstr *PhiMI = *PI;
+ if (&*MII == PhiMI)
+ ++MII;
+ PhiMI->eraseFromParent();
+ }
+ ++NumDeadPHICycles;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
index b48f548..bd18b52 100644
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -18,7 +18,6 @@
#include "Graph.h"
#include "Solution.h"
-#include "llvm/Support/raw_ostream.h"
#include <vector>
#include <limits>
@@ -230,7 +229,7 @@ namespace PBQP {
}
/// \brief Apply rule R1.
- /// @param nItr Node iterator for node to apply R1 to.
+ /// @param xnItr Node iterator for node to apply R1 to.
///
/// Node will be automatically pushed to the solver stack.
void applyR1(Graph::NodeItr xnItr) {
@@ -278,7 +277,7 @@ namespace PBQP {
}
/// \brief Apply rule R2.
- /// @param nItr Node iterator for node to apply R2 to.
+ /// @param xnItr Node iterator for node to apply R2 to.
///
/// Node will be automatically pushed to the solver stack.
void applyR2(Graph::NodeItr xnItr) {
@@ -494,14 +493,23 @@ namespace PBQP {
bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) {
+ const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity();
+
Matrix &edgeCosts = g.getEdgeCosts(eItr);
Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)),
&vCosts = g.getNodeCosts(g.getEdgeNode2(eItr));
for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
- PBQPNum rowMin = edgeCosts.getRowMin(r);
+ PBQPNum rowMin = infinity;
+
+ for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
+ if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin)
+ rowMin = edgeCosts[r][c];
+ }
+
uCosts[r] += rowMin;
- if (rowMin != std::numeric_limits<PBQPNum>::infinity()) {
+
+ if (rowMin != infinity) {
edgeCosts.subFromRow(r, rowMin);
}
else {
@@ -510,9 +518,16 @@ namespace PBQP {
}
for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
- PBQPNum colMin = edgeCosts.getColMin(c);
+ PBQPNum colMin = infinity;
+
+ for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
+ if (uCosts[r] != infinity && edgeCosts[r][c] < colMin)
+ colMin = edgeCosts[r][c];
+ }
+
vCosts[c] += colMin;
- if (colMin != std::numeric_limits<PBQPNum>::infinity()) {
+
+ if (colMin != infinity) {
edgeCosts.subFromCol(c, colMin);
}
else {
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
index c09ad74..30d34d9 100644
--- a/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h
@@ -128,14 +128,7 @@ namespace PBQP {
/// selected for heuristic reduction instead.
bool shouldOptimallyReduce(Graph::NodeItr nItr) {
if (getSolver().getSolverDegree(nItr) < 3) {
- if (getGraph().getNodeCosts(nItr)[0] !=
- std::numeric_limits<PBQPNum>::infinity()) {
- return true;
- }
- // Otherwise we have an infinite spill cost node.
- initializeNode(nItr);
- NodeData &nd = getHeuristicNodeData(nItr);
- return nd.isAllocable;
+ return true;
}
// else
return false;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index b740c68..8bbe0a7 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -55,8 +55,6 @@ void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
MRI = &Fn.getRegInfo();
- PHIDefs.clear();
- PHIKills.clear();
bool Changed = false;
// Split critical edges to help the coalescer
@@ -215,10 +213,6 @@ void llvm::PHIElimination::LowerAtomicPHINode(
TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC);
}
- // Record PHI def.
- assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?");
- PHIDefs[DestReg] = &MBB;
-
// Update live variable information if there is any.
LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
if (LV) {
@@ -229,6 +223,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Increment use count of the newly created virtual register.
VI.NumUses++;
+ LV->setPHIJoin(IncomingReg);
// When we are reusing the incoming register, it may already have been
// killed in this block. The old kill will also have been inserted at
@@ -276,9 +271,6 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// path the PHI.
MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
- // Record the kill.
- PHIKills[SrcReg].insert(&opBlock);
-
// If source is defined by an implicit def, there is no need to insert a
// copy.
MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
@@ -451,34 +443,3 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
return NMBB;
}
-
-unsigned
-PHIElimination::PHINodeTraits::getHashValue(const MachineInstr *MI) {
- if (!MI || MI==getEmptyKey() || MI==getTombstoneKey())
- return DenseMapInfo<MachineInstr*>::getHashValue(MI);
- unsigned hash = 0;
- for (unsigned ni = 1, ne = MI->getNumOperands(); ni != ne; ni += 2)
- hash = hash*37 + DenseMapInfo<BBVRegPair>::
- getHashValue(BBVRegPair(MI->getOperand(ni+1).getMBB()->getNumber(),
- MI->getOperand(ni).getReg()));
- return hash;
-}
-
-bool PHIElimination::PHINodeTraits::isEqual(const MachineInstr *LHS,
- const MachineInstr *RHS) {
- const MachineInstr *EmptyKey = getEmptyKey();
- const MachineInstr *TombstoneKey = getTombstoneKey();
- if (!LHS || !RHS || LHS==EmptyKey || RHS==EmptyKey ||
- LHS==TombstoneKey || RHS==TombstoneKey)
- return LHS==RHS;
-
- unsigned ne = LHS->getNumOperands();
- if (ne != RHS->getNumOperands())
- return false;
- // Ignore operand 0, the defined register.
- for (unsigned ni = 1; ni != ne; ni += 2)
- if (LHS->getOperand(ni).getReg() != RHS->getOperand(ni).getReg() ||
- LHS->getOperand(ni+1).getMBB() != RHS->getOperand(ni+1).getMBB())
- return false;
- return true;
-}
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index 895aaa4..7dedf03 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -22,17 +22,8 @@ namespace llvm {
/// Lower PHI instructions to copies.
class PHIElimination : public MachineFunctionPass {
MachineRegisterInfo *MRI; // Machine register information
- private:
-
- typedef SmallSet<MachineBasicBlock*, 4> PHIKillList;
- typedef DenseMap<unsigned, PHIKillList> PHIKillMap;
- typedef DenseMap<unsigned, MachineBasicBlock*> PHIDefMap;
public:
-
- typedef PHIKillList::iterator phi_kill_iterator;
- typedef PHIKillList::const_iterator const_phi_kill_iterator;
-
static char ID; // Pass identification, replacement for typeid
PHIElimination() : MachineFunctionPass(&ID) {}
@@ -40,38 +31,6 @@ namespace llvm {
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- /// Return true if the given vreg was defined by a PHI intsr prior to
- /// lowering.
- bool hasPHIDef(unsigned vreg) const {
- return PHIDefs.count(vreg);
- }
-
- /// Returns the block in which the PHI instruction which defined the
- /// given vreg used to reside.
- MachineBasicBlock* getPHIDefBlock(unsigned vreg) {
- PHIDefMap::iterator phiDefItr = PHIDefs.find(vreg);
- assert(phiDefItr != PHIDefs.end() && "vreg has no phi-def.");
- return phiDefItr->second;
- }
-
- /// Returns true if the given vreg was killed by a PHI instr.
- bool hasPHIKills(unsigned vreg) const {
- return PHIKills.count(vreg);
- }
-
- /// Returns an iterator over the BasicBlocks which contained PHI
- /// kills of this register prior to lowering.
- phi_kill_iterator phiKillsBegin(unsigned vreg) {
- PHIKillMap::iterator phiKillItr = PHIKills.find(vreg);
- assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills.");
- return phiKillItr->second.begin();
- }
- phi_kill_iterator phiKillsEnd(unsigned vreg) {
- PHIKillMap::iterator phiKillItr = PHIKills.find(vreg);
- assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills.");
- return phiKillItr->second.end();
- }
-
private:
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
/// in predecessor basic blocks.
@@ -109,12 +68,29 @@ namespace llvm {
// SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
// also after any exception handling labels: in landing pads execution
// starts at the label, so any copies placed before it won't be executed!
+ // We also deal with DBG_VALUEs, which are a bit tricky:
+ // PHI
+ // DBG_VALUE
+ // LABEL
+ // Here the DBG_VALUE needs to be skipped, and if it refers to a PHI it
+ // needs to be annulled or, better, moved to follow the label, as well.
+ // PHI
+ // DBG_VALUE
+ // no label
+ // Here it is not a good idea to skip the DBG_VALUE.
+ // FIXME: For now we skip and annul all DBG_VALUEs, maximally simple and
+ // maximally stupid.
MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
// Rather than assuming that EH labels come before other kinds of labels,
// just skip all labels.
- while (I != MBB.end() && (I->isPHI() || I->isLabel()))
+ while (I != MBB.end() &&
+ (I->isPHI() || I->isLabel() || I->isDebugValue())) {
+ if (I->isDebugValue() && I->getNumOperands()==3 &&
+ I->getOperand(0).isReg())
+ I->getOperand(0).setReg(0U);
++I;
+ }
return I;
}
@@ -122,21 +98,13 @@ namespace llvm {
typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
VRegPHIUse VRegPHIUseCount;
- PHIDefMap PHIDefs;
- PHIKillMap PHIKills;
// Defs of PHI sources which are implicit_def.
SmallPtrSet<MachineInstr*, 4> ImpDefs;
- // Lowered PHI nodes may be reused. We provide special DenseMap traits to
- // match PHI nodes with identical arguments.
- struct PHINodeTraits : public DenseMapInfo<MachineInstr*> {
- static unsigned getHashValue(const MachineInstr *PtrVal);
- static bool isEqual(const MachineInstr *LHS, const MachineInstr *RHS);
- };
-
// Map reusable lowered PHI node -> incoming join register.
- typedef DenseMap<MachineInstr*, unsigned, PHINodeTraits> LoweredPHIMap;
+ typedef DenseMap<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait> LoweredPHIMap;
LoweredPHIMap LoweredPHIs;
};
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index f67eb79..5ea2941 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -34,7 +34,7 @@ static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
RegisterPassParser<RegisterRegAlloc> >
RegAlloc("regalloc",
cl::init(&createLinearScanRegisterAllocator),
- cl::desc("Register allocator to use: (default = linearscan)"));
+ cl::desc("Register allocator to use (default=linearscan)"));
//===---------------------------------------------------------------------===//
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index f43395f..424181c 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -460,6 +460,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
I != E; --Count) {
MachineInstr *MI = --I;
+ if (MI->isDebugValue())
+ continue;
// Update liveness. Registers that are defed but not used in this
// instruction are now dead. Mark register and all subregs as they
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index e3df2e4..d7179b3 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -205,10 +205,9 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
// Process each use instruction once.
for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
UE = mri_->use_end(); UI != UE; ++UI) {
- MachineInstr *RMI = &*UI;
- MachineBasicBlock *RMBB = RMI->getParent();
- if (RMBB == MBB)
+ if (UI.getOperand().isUndef())
continue;
+ MachineInstr *RMI = &*UI;
if (ModInsts.insert(RMI))
RUses.push_back(RMI);
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 036f59a..138e711 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -175,9 +175,10 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
MachineBasicBlock::iterator I = *i;
// If call frames are not being included as part of the stack frame, and
- // there is no dynamic allocation (therefore referencing frame slots off
- // sp), leave the pseudo ops alone. We'll eliminate them later.
- if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
+ // the target doesn't indicate otherwise, remove the call frame pseudos
+ // here. The sub/add sp instruction pairs are still inserted, but we don't
+ // need to track the SP adjustment for frame index elimination.
+ if (RegInfo->canSimplifyCallFramePseudos(Fn))
RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
}
}
@@ -476,8 +477,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Loop over all of the stack objects, assigning sequential addresses...
MachineFrameInfo *FFI = Fn.getFrameInfo();
- unsigned MaxAlign = 1;
-
// Start at the beginning of the local area.
// The Offset is the distance from the stack top in the direction
// of stack growth -- so it's always nonnegative.
@@ -517,9 +516,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
Offset += FFI->getObjectSize(i);
unsigned Align = FFI->getObjectAlignment(i);
- // If the alignment of this object is greater than that of the stack,
- // then increase the stack alignment to match.
- MaxAlign = std::max(MaxAlign, Align);
// Adjust to alignment boundary
Offset = (Offset+Align-1)/Align*Align;
@@ -529,9 +525,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
for (int i = MaxCSFI; i >= MinCSFI ; --i) {
unsigned Align = FFI->getObjectAlignment(i);
- // If the alignment of this object is greater than that of the stack,
- // then increase the stack alignment to match.
- MaxAlign = std::max(MaxAlign, Align);
// Adjust to alignment boundary
Offset = (Offset+Align-1)/Align*Align;
@@ -540,6 +533,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
}
+ unsigned MaxAlign = FFI->getMaxAlignment();
+
// Make sure the special register scavenging spill slot is closest to the
// frame pointer if a frame pointer is required.
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
@@ -605,11 +600,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Update frame info to pretend that this is part of the stack...
FFI->setStackSize(Offset - LocalAreaOffset);
-
- // Remember the required stack alignment in case targets need it to perform
- // dynamic stack alignment.
- if (MaxAlign > FFI->getMaxAlignment())
- FFI->setMaxAlignment(MaxAlign);
}
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 7fb3e6e..5e86e5a 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -18,19 +18,38 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
#include <map>
using namespace llvm;
-static ManagedStatic<PseudoSourceValue[4]> PSVs;
+namespace {
+struct PSVGlobalsTy {
+ // PseudoSourceValues are immutable so don't need locking.
+ const PseudoSourceValue PSVs[4];
+ sys::Mutex Lock; // Guards FSValues, but not the values inside it.
+ std::map<int, const PseudoSourceValue *> FSValues;
+
+ PSVGlobalsTy() : PSVs() {}
+ ~PSVGlobalsTy() {
+ for (std::map<int, const PseudoSourceValue *>::iterator
+ I = FSValues.begin(), E = FSValues.end(); I != E; ++I) {
+ delete I->second;
+ }
+ }
+};
+
+static ManagedStatic<PSVGlobalsTy> PSVGlobals;
+
+} // anonymous namespace
const PseudoSourceValue *PseudoSourceValue::getStack()
-{ return &(*PSVs)[0]; }
+{ return &PSVGlobals->PSVs[0]; }
const PseudoSourceValue *PseudoSourceValue::getGOT()
-{ return &(*PSVs)[1]; }
+{ return &PSVGlobals->PSVs[1]; }
const PseudoSourceValue *PseudoSourceValue::getJumpTable()
-{ return &(*PSVs)[2]; }
+{ return &PSVGlobals->PSVs[2]; }
const PseudoSourceValue *PseudoSourceValue::getConstantPool()
-{ return &(*PSVs)[3]; }
+{ return &PSVGlobals->PSVs[3]; }
static const char *const PSVNames[] = {
"Stack",
@@ -48,13 +67,13 @@ PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
Subclass) {}
void PseudoSourceValue::printCustom(raw_ostream &O) const {
- O << PSVNames[this - *PSVs];
+ O << PSVNames[this - PSVGlobals->PSVs];
}
-static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues;
-
const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
- const PseudoSourceValue *&V = (*FSValues)[FI];
+ PSVGlobalsTy &PG = *PSVGlobals;
+ sys::ScopedLock locked(PG.Lock);
+ const PseudoSourceValue *&V = PG.FSValues[FI];
if (!V)
V = new FixedStackPseudoSourceValue(FI);
return V;
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 8e44a57..5c5a394 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -334,10 +334,6 @@ namespace {
SmallVector<unsigned, 256> &inactiveCounts,
bool SkipDGRegs);
- /// assignVirt2StackSlot - assigns this virtual register to a
- /// stack slot. returns the stack slot
- int assignVirt2StackSlot(unsigned virtReg);
-
void ComputeRelatedRegClasses();
template <typename ItTy>
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 4d2e3a3..04303cf 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -490,9 +490,12 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
// If the virtual register is already available, just update the instruction
// and return.
if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
- MarkPhysRegRecentlyUsed(PR); // Already have this value available!
MI->getOperand(OpNum).setReg(PR); // Assign the input register
- getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+ if (!MI->isDebugValue()) {
+ // Do not do these for DBG_VALUE as they can affect codegen.
+ MarkPhysRegRecentlyUsed(PR); // Already have this value available!
+ getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+ }
return MI;
}
@@ -609,6 +612,8 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
MachineOperand& MO = I->getOperand(i);
// Uses don't trigger any flags, but we need to save
@@ -691,7 +696,13 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
bool usedOutsideBlock = isPhysReg ? false :
UsedInMultipleBlocks.test(MO.getReg() -
TargetRegisterInfo::FirstVirtualRegister);
- if (!isPhysReg && !usedOutsideBlock)
+ if (!isPhysReg && !usedOutsideBlock) {
+ // DBG_VALUE complicates this: if the only refs of a register outside
+ // this block are DBG_VALUE, we can't keep the reg live just for that,
+ // as it will cause the reg to be spilled at the end of this block when
+ // it wouldn't have been otherwise. Nullify the DBG_VALUEs when that
+ // happens.
+ bool UsedByDebugValueOnly = false;
for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
UE = MRI.reg_end(); UI != UE; ++UI)
// Two cases:
@@ -699,12 +710,26 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
// - used in the same block before it is defined (loop)
if (UI->getParent() != &MBB ||
(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) {
+ if (UI->isDebugValue()) {
+ UsedByDebugValueOnly = true;
+ continue;
+ }
+ // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone.
UsedInMultipleBlocks.set(MO.getReg() -
TargetRegisterInfo::FirstVirtualRegister);
usedOutsideBlock = true;
+ UsedByDebugValueOnly = false;
break;
}
-
+ if (UsedByDebugValueOnly)
+ for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
+ UE = MRI.reg_end(); UI != UE; ++UI)
+ if (UI->isDebugValue() &&
+ (UI->getParent() != &MBB ||
+ (MO.isDef() && precedes(&*UI, MI))))
+ UI.getOperand().setReg(0U);
+ }
+
// Physical registers and those that are not live-out of the block
// are killed/dead at their last use/def within this block.
if (isPhysReg || !usedOutsideBlock) {
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 2701faf..81cfd8f 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -57,7 +57,7 @@
using namespace llvm;
static RegisterRegAlloc
-registerPBQPRepAlloc("pbqp", "PBQP register allocator.",
+registerPBQPRepAlloc("pbqp", "PBQP register allocator",
llvm::createPBQPRegisterAllocator);
static cl::opt<bool>
@@ -867,10 +867,6 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
// Find the vreg intervals in need of allocation.
findVRegIntervalsToAlloc();
- // If there aren't any then we're done here.
- if (vregIntervalsToAlloc.empty() && emptyVRegIntervals.empty())
- return true;
-
// If there are non-empty intervals allocate them using pbqp.
if (!vregIntervalsToAlloc.empty()) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 56dd533..badf34e 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -72,7 +72,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
} else {
return V;
}
- assert(isa<IntegerType>(V->getType()) && "Unexpected operand type!");
+ assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
} while (1);
}
@@ -87,7 +87,7 @@ static const Value *getUnderlyingObject(const Value *V) {
break;
const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
// If that succeeded in finding a pointer, continue the search.
- if (!isa<PointerType>(O->getType()))
+ if (!O->getType()->isPointerTy())
break;
V = O;
} while (1);
diff --git a/lib/CodeGen/SelectionDAG/Android.mk b/lib/CodeGen/SelectionDAG/Android.mk
new file mode 100644
index 0000000..eb15a18
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/Android.mk
@@ -0,0 +1,48 @@
+LOCAL_PATH:= $(call my-dir)
+
+codegen_selectiondag_SRC_FILES := \
+ CallingConvLower.cpp \
+ DAGCombiner.cpp \
+ FastISel.cpp \
+ FunctionLoweringInfo.cpp \
+ InstrEmitter.cpp \
+ LegalizeDAG.cpp \
+ LegalizeFloatTypes.cpp \
+ LegalizeIntegerTypes.cpp \
+ LegalizeTypes.cpp \
+ LegalizeTypesGeneric.cpp \
+ LegalizeVectorOps.cpp \
+ LegalizeVectorTypes.cpp \
+ ScheduleDAGFast.cpp \
+ ScheduleDAGList.cpp \
+ ScheduleDAGRRList.cpp \
+ ScheduleDAGSDNodes.cpp \
+ SelectionDAG.cpp \
+ SelectionDAGBuilder.cpp \
+ SelectionDAGISel.cpp \
+ SelectionDAGPrinter.cpp \
+ TargetLowering.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_selectiondag_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMSelectionDAG
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_selectiondag_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMSelectionDAG
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9189e71..3be6b43 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1064,7 +1064,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (VT.isInteger() && !VT.isVector()) {
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
@@ -1136,7 +1136,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
@@ -1758,7 +1758,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
- unsigned BitWidth = VT.getSizeInBits();
+ unsigned BitWidth = VT.getScalarType().getSizeInBits();
// fold vector ops
if (VT.isVector()) {
@@ -1786,7 +1786,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
if (RAND.getNode() != 0)
return RAND;
- // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+ // fold (and (or x, C), D) -> D if (C & D) == D
if (N1C && N0.getOpcode() == ISD::OR)
if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
@@ -1872,16 +1872,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueSizeInBits();
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getSizeInBits())) &&
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
LN0->getChain(), LN0->getBasePtr(),
LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
AddToWorkList(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -1894,16 +1895,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueSizeInBits();
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getSizeInBits())) &&
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
AddToWorkList(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -1935,7 +1937,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
LN0->getChain(), LN0->getBasePtr(),
LN0->getSrcValue(), LN0->getSrcValueOffset(),
- ExtVT, LN0->isVolatile(), LN0->getAlignment());
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
AddToWorkList(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -1970,7 +1973,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset(),
- ExtVT, LN0->isVolatile(), Alignment);
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ Alignment);
AddToWorkList(N);
CombineTo(LN0, Load, Load.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -2021,13 +2025,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (ROR.getNode() != 0)
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ // iff (c1 & c2) == 0.
if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
- return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
- DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
- N0.getOperand(0), N1),
- DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+ if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
}
// fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
@@ -2750,7 +2756,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
// If any of the input bits are KnownOne, then the input couldn't be all
@@ -3143,7 +3149,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
N0.getValueType(),
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
N0.getValueType(), ExtLoad);
@@ -3185,7 +3192,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
@@ -3315,7 +3323,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
N0.getValueType(),
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
N0.getValueType(), ExtLoad);
@@ -3357,7 +3366,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
@@ -3471,7 +3481,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
N0.getValueType(),
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
N0.getValueType(), ExtLoad);
@@ -3513,7 +3524,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
VT, LN0->getChain(), LN0->getBasePtr(),
LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
@@ -3636,10 +3648,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue Load = (ExtType == ISD::NON_EXTLOAD)
? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
- LN0->isVolatile(), NewAlign)
+ LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
: DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
- ExtVT, LN0->isVolatile(), NewAlign);
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ NewAlign);
// Replace the old load's chain with the new load's chain.
WorkListRemover DeadNodes(*this);
@@ -3726,7 +3739,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), EVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -3742,7 +3756,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), EVT,
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -3826,7 +3841,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
LD1->getBasePtr(), LD1->getSrcValue(),
- LD1->getSrcValueOffset(), false, Align);
+ LD1->getSrcValueOffset(), false, false, Align);
}
return SDValue();
@@ -3896,7 +3911,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
LN0->getBasePtr(),
LN0->getSrcValue(), LN0->getSrcValueOffset(),
- LN0->isVolatile(), OrigAlign);
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ OrigAlign);
AddToWorkList(N);
CombineTo(N0.getNode(),
DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
@@ -4492,7 +4508,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
N0.getValueType(),
- LN0->isVolatile(), LN0->getAlignment());
+ LN0->isVolatile(), LN0->isNonTemporal(),
+ LN0->getAlignment());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
@@ -4640,7 +4657,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
DAG.DeleteNode(Trunc);
}
// Replace the uses of SRL with SETCC
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
removeFromWorkList(N1.getNode());
DAG.DeleteNode(N1.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -4648,6 +4666,56 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
}
}
}
+
+ // Transform br(xor(x, y)) -> br(x != y)
+ // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
+ SDNode *TheXor = N1.getNode();
+ SDValue Op0 = TheXor->getOperand(0);
+ SDValue Op1 = TheXor->getOperand(1);
+ if (Op0.getOpcode() == Op1.getOpcode()) {
+ // Avoid missing important xor optimizations.
+ SDValue Tmp = visitXOR(TheXor);
+ if (Tmp.getNode()) {
+ DEBUG(dbgs() << "\nReplacing.8 ";
+ TheXor->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Tmp.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
+ removeFromWorkList(TheXor);
+ DAG.DeleteNode(TheXor);
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, Tmp, N2);
+ }
+ }
+
+ if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+ bool Equal = false;
+ if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
+ if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::XOR) {
+ TheXor = Op0.getNode();
+ Equal = true;
+ }
+
+ EVT SetCCVT = N1.getValueType();
+ if (LegalTypes)
+ SetCCVT = TLI.getSetCCResultType(SetCCVT);
+ SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
+ SetCCVT,
+ Op0, Op1,
+ Equal ? ISD::SETEQ : ISD::SETNE);
+ // Replace the uses of XOR with SETCC
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ }
+ }
return SDValue();
}
@@ -4960,7 +5028,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
LD->getValueType(0),
Chain, Ptr, LD->getSrcValue(),
LD->getSrcValueOffset(), LD->getMemoryVT(),
- LD->isVolatile(), Align);
+ LD->isVolatile(), LD->isNonTemporal(), Align);
}
}
@@ -4997,7 +5065,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
- DEBUG(dbgs() << "\nReplacing.6 ";
+ DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
Undef.getNode()->dump(&DAG);
@@ -5042,7 +5110,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
BetterChain, Ptr,
LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile(), LD->getAlignment());
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
} else {
ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
LD->getValueType(0),
@@ -5050,6 +5119,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
LD->getSrcValueOffset(),
LD->getMemoryVT(),
LD->isVolatile(),
+ LD->isNonTemporal(),
LD->getAlignment());
}
@@ -5149,13 +5219,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
LD->getChain(), NewPtr,
LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile(), NewAlign);
+ LD->isVolatile(), LD->isNonTemporal(),
+ NewAlign);
SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
NewVal, NewPtr,
ST->getSrcValue(), ST->getSrcValueOffset(),
- false, NewAlign);
+ false, false, NewAlign);
AddToWorkList(NewPtr.getNode());
AddToWorkList(NewLD.getNode());
@@ -5184,7 +5255,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
Ptr, ST->getSrcValue(),
ST->getSrcValueOffset(), ST->getMemoryVT(),
- ST->isVolatile(), Align);
+ ST->isVolatile(), ST->isNonTemporal(), Align);
}
}
@@ -5201,7 +5272,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign);
+ ST->getSrcValueOffset(), ST->isVolatile(),
+ ST->isNonTemporal(), OrigAlign);
}
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
@@ -5227,7 +5299,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
Ptr, ST->getSrcValue(),
ST->getSrcValueOffset(), ST->isVolatile(),
- ST->getAlignment());
+ ST->isNonTemporal(), ST->getAlignment());
}
break;
case MVT::f64:
@@ -5239,7 +5311,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
Ptr, ST->getSrcValue(),
ST->getSrcValueOffset(), ST->isVolatile(),
- ST->getAlignment());
+ ST->isNonTemporal(), ST->getAlignment());
} else if (!ST->isVolatile() &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
// Many FP stores are not made apparent until after legalize, e.g. for
@@ -5253,18 +5325,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
Ptr, ST->getSrcValue(),
ST->getSrcValueOffset(),
- isVolatile, ST->getAlignment());
+ isVolatile, isNonTemporal,
+ ST->getAlignment());
Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
SVOffset += 4;
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
Ptr, ST->getSrcValue(),
- SVOffset, isVolatile, Alignment);
+ SVOffset, isVolatile, isNonTemporal,
+ Alignment);
return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
St0, St1);
}
@@ -5286,12 +5361,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (ST->isTruncatingStore()) {
ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
ST->getSrcValue(),ST->getSrcValueOffset(),
- ST->getMemoryVT(),
- ST->isVolatile(), ST->getAlignment());
+ ST->getMemoryVT(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
} else {
ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
ST->getSrcValue(), ST->getSrcValueOffset(),
- ST->isVolatile(), ST->getAlignment());
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
}
// Create token to keep both nodes around.
@@ -5325,7 +5401,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
Ptr, ST->getSrcValue(),
ST->getSrcValueOffset(), ST->getMemoryVT(),
- ST->isVolatile(), ST->getAlignment());
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
@@ -5358,7 +5435,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
Ptr, ST->getSrcValue(),
ST->getSrcValueOffset(), ST->getMemoryVT(),
- ST->isVolatile(), ST->getAlignment());
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
}
return ReduceLoadOpStoreWidth(N);
@@ -5503,7 +5581,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset(),
- LN0->isVolatile(), Align);
+ LN0->isVolatile(), LN0->isNonTemporal(), Align);
}
return SDValue();
@@ -5883,6 +5961,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
LLD->getChain(),
Addr, 0, 0,
LLD->isVolatile(),
+ LLD->isNonTemporal(),
LLD->getAlignment());
} else {
Load = DAG.getExtLoad(LLD->getExtensionType(),
@@ -5891,6 +5970,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
LLD->getChain(), Addr, 0, 0,
LLD->getMemoryVT(),
LLD->isVolatile(),
+ LLD->isNonTemporal(),
LLD->getAlignment());
}
@@ -5998,7 +6078,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
CstOffset);
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0, false,
- Alignment);
+ false, Alignment);
}
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 35ef5b7..1d76c7c 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -350,6 +350,34 @@ bool FastISel::SelectCall(User *I) {
(void)TargetSelectInstruction(cast<Instruction>(I));
return true;
}
+ case Intrinsic::dbg_value: {
+ // This requires target support, but right now X86 is the only Fast target.
+ DbgValueInst *DI = cast<DbgValueInst>(I);
+ const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ Value *V = DI->getValue();
+ if (!V) {
+ // Currently the optimizer can produce this; insert an undef to
+ // help debugging. Probably the optimizer should not do this.
+ BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
+ addMetadata(DI->getVariable());
+ } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()).
+ addMetadata(DI->getVariable());
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()).
+ addMetadata(DI->getVariable());
+ } else if (unsigned Reg = lookUpRegForValue(V)) {
+ BuildMI(MBB, DL, II).addReg(Reg, RegState::Debug).addImm(DI->getOffset()).
+ addMetadata(DI->getVariable());
+ } else {
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ // Insert an undef so we can see what we dropped.
+ BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
+ addMetadata(DI->getVariable());
+ }
+ return true;
+ }
case Intrinsic::eh_exception: {
EVT VT = TLI.getValueType(I->getType());
switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 02fe85d..625de11 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "instr-emitter"
#include "InstrEmitter.h"
+#include "SDDbgValue.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -497,6 +498,56 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
assert(isNew && "Node emitted out of order - early");
}
+/// EmitDbgValue - Generate any debug info that refers to this Node. Constant
+/// dbg_value is not handled here.
+void
+InstrEmitter::EmitDbgValue(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ SDDbgValue *sd) {
+ if (!Node->getHasDebugValue())
+ return;
+ if (!sd)
+ return;
+ unsigned VReg = getVR(SDValue(sd->getSDNode(), sd->getResNo()), VRBaseMap);
+ const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+ DebugLoc DL = sd->getDebugLoc();
+ MachineInstr *MI;
+ if (VReg) {
+ MI = BuildMI(*MF, DL, II).addReg(VReg, RegState::Debug).
+ addImm(sd->getOffset()).
+ addMetadata(sd->getMDPtr());
+ } else {
+ // Insert an Undef so we can see what we dropped.
+ MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()).
+ addMetadata(sd->getMDPtr());
+ }
+ MBB->insert(InsertPos, MI);
+}
+
+/// EmitDbgValue - Generate constant debug info. No SDNode is involved.
+void
+InstrEmitter::EmitDbgValue(SDDbgValue *sd) {
+ if (!sd)
+ return;
+ const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+ DebugLoc DL = sd->getDebugLoc();
+ MachineInstr *MI;
+ Value *V = sd->getConst();
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ MI = BuildMI(*MF, DL, II).addImm(CI->getZExtValue()).
+ addImm(sd->getOffset()).
+ addMetadata(sd->getMDPtr());
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ MI = BuildMI(*MF, DL, II).addFPImm(CF).addImm(sd->getOffset()).
+ addMetadata(sd->getMDPtr());
+ } else {
+ // Insert an Undef so we can see what we dropped.
+ MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()).
+ addMetadata(sd->getMDPtr());
+ }
+ MBB->insert(InsertPos, MI);
+}
+
/// EmitNode - Generate machine code for a node and needed dependencies.
///
void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 91817e4..4fe9f19 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -23,6 +23,7 @@
namespace llvm {
class TargetInstrDesc;
+class SDDbgValue;
class InstrEmitter {
MachineFunction *MF;
@@ -97,6 +98,16 @@ public:
/// MachineInstr.
static unsigned CountOperands(SDNode *Node);
+ /// EmitDbgValue - Generate any debug info that refers to this Node. Constant
+ /// dbg_value is not handled here.
+ void EmitDbgValue(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ SDDbgValue* sd);
+
+
+ /// EmitDbgValue - Generate a constant DBG_VALUE. No node is involved.
+ void EmitDbgValue(SDDbgValue* sd);
+
/// EmitNode - Generate machine code for a node and needed dependencies.
///
void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 78e6e4e..f498263 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -377,9 +377,10 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
return DAG.getExtLoad(ISD::EXTLOAD, dl,
OrigVT, DAG.getEntryNode(),
CPIdx, PseudoSourceValue::getConstantPool(),
- 0, VT, false, Alignment);
+ 0, VT, false, false, Alignment);
return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0, false, Alignment);
+ PseudoSourceValue::getConstantPool(), 0, false, false,
+ Alignment);
}
/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
@@ -402,7 +403,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// FIXME: Does not handle truncating floating point stores!
SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
- SVOffset, ST->isVolatile(), Alignment);
+ SVOffset, ST->isVolatile(), ST->isNonTemporal(),
+ Alignment);
} else {
// Do a (aligned) store to a stack slot, then copy from the stack slot
// to the final destination using (unaligned) integer loads and stores.
@@ -418,7 +420,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Perform the original store, only redirected to the stack slot.
SDValue Store = DAG.getTruncStore(Chain, dl,
- Val, StackPtr, NULL, 0, StoredVT);
+ Val, StackPtr, NULL, 0, StoredVT,
+ false, false, 0);
SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
SmallVector<SDValue, 8> Stores;
unsigned Offset = 0;
@@ -426,11 +429,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Do all but one copies using the full register width.
for (unsigned i = 1; i < NumRegs; i++) {
// Load one integer register's worth from the stack slot.
- SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0);
+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0,
+ false, false, 0);
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
ST->getSrcValue(), SVOffset + Offset,
- ST->isVolatile(),
+ ST->isVolatile(), ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset)));
// Increment the pointers.
Offset += RegBytes;
@@ -446,11 +450,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Load from the stack slot.
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
- NULL, 0, MemVT);
+ NULL, 0, MemVT, false, false, 0);
Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
ST->getSrcValue(), SVOffset + Offset,
MemVT, ST->isVolatile(),
+ ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset)));
// The order of the stores doesn't matter - say it with a TokenFactor.
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
@@ -474,13 +479,14 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
SDValue Store1, Store2;
Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
ST->getSrcValue(), SVOffset, NewStoredVT,
- ST->isVolatile(), Alignment);
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
Alignment = MinAlign(Alignment, IncrementSize);
Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
ST->getSrcValue(), SVOffset + IncrementSize,
- NewStoredVT, ST->isVolatile(), Alignment);
+ NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
+ Alignment);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
}
@@ -502,7 +508,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// then bitconvert to floating point or vector.
SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
SVOffset, LD->isVolatile(),
- LD->getAlignment());
+ LD->isNonTemporal(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
if (VT.isFloatingPoint() && LoadedVT != VT)
Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
@@ -530,10 +536,11 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// Load one integer register's worth from the original location.
SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
SVOffset + Offset, LD->isVolatile(),
+ LD->isNonTemporal(),
MinAlign(LD->getAlignment(), Offset));
// Follow the load with a store to the stack slot. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
- NULL, 0));
+ NULL, 0, false, false, 0));
// Increment the pointers.
Offset += RegBytes;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
@@ -546,12 +553,13 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
LD->getSrcValue(), SVOffset + Offset,
MemVT, LD->isVolatile(),
+ LD->isNonTemporal(),
MinAlign(LD->getAlignment(), Offset));
// Follow the load with a store to the stack slot. Remember the store.
// On big-endian machines this requires a truncating store to ensure
// that the bits end up in the right place.
Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
- NULL, 0, MemVT));
+ NULL, 0, MemVT, false, false, 0));
// The order of the stores doesn't matter - say it with a TokenFactor.
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
@@ -559,7 +567,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// Finally, perform the original load only redirected to the stack slot.
Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
- NULL, 0, LoadedVT);
+ NULL, 0, LoadedVT, false, false, 0);
// Callers expect a MERGE_VALUES node.
SDValue Ops[] = { Load, TF };
@@ -588,20 +596,22 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
SDValue Lo, Hi;
if (TLI.isLittleEndian()) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
- SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+ SVOffset, NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
- MinAlign(Alignment, IncrementSize));
+ LD->isNonTemporal(), MinAlign(Alignment, IncrementSize));
} else {
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
- SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+ SVOffset, NewLoadedVT, LD->isVolatile(),
+ LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
- MinAlign(Alignment, IncrementSize));
+ LD->isNonTemporal(), MinAlign(Alignment, IncrementSize));
}
// aggregate the two parts
@@ -643,7 +653,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
// Store the vector.
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0);
+ PseudoSourceValue::getFixedStack(SPFI), 0,
+ false, false, 0);
// Truncate or zero extend offset to target pointer type.
unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -654,10 +665,12 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
// Store the scalar value.
Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
- PseudoSourceValue::getFixedStack(SPFI), 0, EltVT);
+ PseudoSourceValue::getFixedStack(SPFI), 0, EltVT,
+ false, false, 0);
// Load the updated vector.
return DAG.getLoad(VT, dl, Ch, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0);
+ PseudoSourceValue::getFixedStack(SPFI), 0,
+ false, false, 0);
}
@@ -702,6 +715,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
DebugLoc dl = ST->getDebugLoc();
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
if (CFP->getValueType(0) == MVT::f32 &&
@@ -710,14 +724,14 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
bitcastToAPInt().zextOrTrunc(32),
MVT::i32);
return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, Alignment);
+ SVOffset, isVolatile, isNonTemporal, Alignment);
} else if (CFP->getValueType(0) == MVT::f64) {
// If this target supports 64-bit registers, do a single 64-bit store.
if (getTypeAction(MVT::i64) == Legal) {
Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), MVT::i64);
return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, Alignment);
+ SVOffset, isVolatile, isNonTemporal, Alignment);
} else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
// Otherwise, if the target supports 32-bit registers, use 2 32-bit
// stores. If the target supports neither 32- nor 64-bits, this
@@ -728,11 +742,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
if (TLI.isBigEndian()) std::swap(Lo, Hi);
Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, Alignment);
+ SVOffset, isVolatile, isNonTemporal, Alignment);
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(4));
Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
- isVolatile, MinAlign(Alignment, 4U));
+ isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -1108,7 +1122,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
LD->getSrcValueOffset(),
- LD->isVolatile(), LD->getAlignment());
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
Tmp4 = LegalizeOp(Tmp1.getValue(1));
break;
@@ -1125,6 +1140,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
int SVOffset = LD->getSrcValueOffset();
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
if (SrcWidth != SrcVT.getStoreSizeInBits() &&
// Some targets pretend to have an i1 loading operation, and actually
@@ -1150,7 +1166,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
- NVT, isVolatile, Alignment);
+ NVT, isVolatile, isNonTemporal, Alignment);
Ch = Result.getValue(1); // The chain.
@@ -1187,7 +1203,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
Node->getValueType(0), Tmp1, Tmp2,
LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
- Alignment);
+ isNonTemporal, Alignment);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -1195,7 +1211,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
LD->getSrcValue(), SVOffset + IncrementSize,
- ExtraVT, isVolatile,
+ ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
// Build a factor node to remember that this load is independent of the
@@ -1215,7 +1231,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Load the top RoundWidth bits.
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
- Alignment);
+ isNonTemporal, Alignment);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -1224,7 +1240,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
Node->getValueType(0), Tmp1, Tmp2,
LD->getSrcValue(), SVOffset + IncrementSize,
- ExtraVT, isVolatile,
+ ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
// Build a factor node to remember that this load is independent of the
@@ -1284,7 +1300,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
(SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) {
SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
LD->getSrcValueOffset(),
- LD->isVolatile(), LD->getAlignment());
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
Result = DAG.getNode(ISD::FP_EXTEND, dl,
Node->getValueType(0), Load);
Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
@@ -1297,7 +1314,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
Tmp1, Tmp2, LD->getSrcValue(),
LD->getSrcValueOffset(), SrcVT,
- LD->isVolatile(), LD->getAlignment());
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
SDValue ValRes;
if (ExtType == ISD::SEXTLOAD)
ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
@@ -1325,6 +1343,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
if (!ST->isTruncatingStore()) {
if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
@@ -1361,7 +1380,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
ST->getSrcValue(), SVOffset, isVolatile,
- Alignment);
+ isNonTemporal, Alignment);
break;
}
break;
@@ -1379,7 +1398,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits());
Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, NVT, isVolatile, Alignment);
+ SVOffset, NVT, isVolatile, isNonTemporal,
+ Alignment);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1399,7 +1419,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Store the bottom RoundWidth bits.
Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
SVOffset, RoundVT,
- isVolatile, Alignment);
+ isVolatile, isNonTemporal, Alignment);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -1409,6 +1429,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
SVOffset + IncrementSize, ExtraVT, isVolatile,
+ isNonTemporal,
MinAlign(Alignment, IncrementSize));
} else {
// Big endian - avoid unaligned stores.
@@ -1417,7 +1438,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
- SVOffset, RoundVT, isVolatile, Alignment);
+ SVOffset, RoundVT, isVolatile, isNonTemporal,
+ Alignment);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -1425,6 +1447,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
DAG.getIntPtrConstant(IncrementSize));
Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
SVOffset + IncrementSize, ExtraVT, isVolatile,
+ isNonTemporal,
MinAlign(Alignment, IncrementSize));
}
@@ -1457,7 +1480,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, Alignment);
+ SVOffset, isVolatile, isNonTemporal,
+ Alignment);
break;
}
}
@@ -1484,7 +1508,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
DebugLoc dl = Op.getDebugLoc();
// Store the value to a temporary stack slot, then LOAD the returned part.
SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
+ false, false, 0);
// Add the offset to the index.
unsigned EltSize =
@@ -1500,10 +1525,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
if (Op.getValueType().isVector())
- return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0,
+ false, false, 0);
else
return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
- NULL, 0, Vec.getValueType().getVectorElementType());
+ NULL, 0, Vec.getValueType().getVectorElementType(),
+ false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1512,7 +1539,6 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
// the result as a vector.
// Create the stack frame object.
EVT VT = Node->getValueType(0);
- EVT OpVT = Node->getOperand(0).getValueType();
EVT EltVT = VT.getVectorElementType();
DebugLoc dl = Node->getDebugLoc();
SDValue FIPtr = DAG.CreateStackTemporary(VT);
@@ -1532,13 +1558,16 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
- // If EltVT smaller than OpVT, only store the bits necessary.
- if (!OpVT.isVector() && EltVT.bitsLT(OpVT)) {
+ // If the destination vector element type is narrower than the source
+ // element type, only store the bits necessary.
+ if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
- Node->getOperand(i), Idx, SV, Offset, EltVT));
+ Node->getOperand(i), Idx, SV, Offset,
+ EltVT, false, false, 0));
} else
Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
- Node->getOperand(i), Idx, SV, Offset));
+ Node->getOperand(i), Idx, SV, Offset,
+ false, false, 0));
}
SDValue StoreChain;
@@ -1549,7 +1578,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
StoreChain = DAG.getEntryNode();
// Result is a load from the stack slot.
- return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1572,12 +1601,14 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType());
SDValue StorePtr = StackPtr, LoadPtr = StackPtr;
SDValue Ch =
- DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0);
+ DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0,
+ false, false, 0);
if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian())
LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(),
LoadPtr, DAG.getIntPtrConstant(4));
SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(),
- Ch, LoadPtr, NULL, 0, MVT::i32);
+ Ch, LoadPtr, NULL, 0, MVT::i32,
+ false, false, 0);
}
SignBit =
DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
@@ -1701,20 +1732,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
if (SrcSize > SlotSize)
Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
- SV, 0, SlotVT, false, SrcAlign);
+ SV, 0, SlotVT, false, false, SrcAlign);
else {
assert(SrcSize == SlotSize && "Invalid store");
Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
- SV, 0, false, SrcAlign);
+ SV, 0, false, false, SrcAlign);
}
// Result is a load from the stack slot.
if (SlotSize == DestSize)
- return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign);
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false,
+ DestAlign);
assert(SlotSize < DestSize && "Unknown extension!");
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT,
- false, DestAlign);
+ false, false, DestAlign);
}
SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1729,9 +1761,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
StackPtr,
PseudoSourceValue::getFixedStack(SPFI), 0,
- Node->getValueType(0).getVectorElementType());
+ Node->getValueType(0).getVectorElementType(),
+ false, false, 0);
return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0);
+ PseudoSourceValue::getFixedStack(SPFI), 0,
+ false, false, 0);
}
@@ -1805,7 +1839,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
- false, Alignment);
+ false, false, Alignment);
}
if (!MoreThanTwoValues) {
@@ -1865,8 +1899,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), false,
/*isReturnValueUsed=*/true,
- Callee, Args, DAG,
- Node->getDebugLoc(), DAG.GetOrdering(Node));
+ Callee, Args, DAG, Node->getDebugLoc());
// Legalize the call sequence, starting with the chain. This will advance
// the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
@@ -1943,13 +1976,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
}
// store the lo of the constructed double - based on integer input
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
- Op0Mapped, Lo, NULL, 0);
+ Op0Mapped, Lo, NULL, 0,
+ false, false, 0);
// initial hi portion of constructed double
SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
// store the hi of the constructed double - biased exponent
- SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0);
+ SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0,
+ false, false, 0);
// load the constructed double
- SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0);
+ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0,
+ false, false, 0);
// FP constant to bias correct the final result
SDValue Bias = DAG.getConstantFP(isSigned ?
BitsToDouble(0x4330000080000000ULL) :
@@ -1972,6 +2008,31 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
return Result;
}
assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+
+ // Implementation of unsigned i64 to f64 following the algorithm in
+ // __floatundidf in compiler_rt. This implementation has the advantage
+ // of performing rounding correctly, both in the default rounding mode
+ // and in all alternate rounding modes.
+ // TODO: Generalize this for use with other types.
+ if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+ SDValue TwoP52 =
+ DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64);
+ SDValue TwoP84PlusTwoP52 =
+ DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64);
+ SDValue TwoP84 =
+ DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64);
+
+ SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
+ DAG.getConstant(32, MVT::i64));
+ SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
+ SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
+ SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr);
+ SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr);
+ SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52);
+ return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
+ }
+
SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
@@ -2004,13 +2065,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
if (DestVT == MVT::f32)
FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
- false, Alignment);
+ false, false, Alignment);
else {
FudgeInReg =
LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
- MVT::f32, false, Alignment));
+ MVT::f32, false, false, Alignment));
}
return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
@@ -2271,7 +2332,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
false, false, false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
- Args, DAG, dl, DAG.GetOrdering(Node));
+ Args, DAG, dl);
Results.push_back(CallResult.second);
break;
}
@@ -2350,16 +2411,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
EVT VT = Node->getValueType(0);
Tmp1 = Node->getOperand(0);
Tmp2 = Node->getOperand(1);
- SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0);
+ SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
+ false, false, 0);
// Increment the pointer, VAList, to the next vaarg
Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
DAG.getConstant(TLI.getTargetData()->
getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
TLI.getPointerTy()));
// Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0);
+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0,
+ false, false, 0);
// Load the actual argument out of the pointer VAList
- Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0));
+ Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0,
+ false, false, 0));
Results.push_back(Results[0].getValue(1));
break;
}
@@ -2369,8 +2433,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
- Node->getOperand(2), VS, 0);
- Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0);
+ Node->getOperand(2), VS, 0, false, false, 0);
+ Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0,
+ false, false, 0);
Results.push_back(Tmp1);
break;
}
@@ -2827,7 +2892,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0, MemVT);
+ PseudoSourceValue::getJumpTable(), 0, MemVT,
+ false, false, 0);
Addr = LD;
if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
// For PIC, the sequence is:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4f0fce7..35a7c7c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -444,7 +444,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(),
NVT, L->getChain(), L->getBasePtr(), L->getOffset(),
L->getSrcValue(), L->getSrcValueOffset(), NVT,
- L->isVolatile(), L->getAlignment());
+ L->isVolatile(), L->isNonTemporal(), L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -456,8 +456,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getMemoryVT(), L->getChain(),
L->getBasePtr(), L->getOffset(),
L->getSrcValue(), L->getSrcValueOffset(),
- L->getMemoryVT(),
- L->isVolatile(), L->getAlignment());
+ L->getMemoryVT(), L->isVolatile(),
+ L->isNonTemporal(), L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -755,7 +755,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
ST->getSrcValue(), ST->getSrcValueOffset(),
- ST->isVolatile(), ST->getAlignment());
+ ST->isVolatile(), ST->isNonTemporal(),
+ ST->getAlignment());
}
@@ -1073,8 +1074,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->getMemoryVT(),
- LD->isVolatile(), LD->getAlignment());
+ LD->getMemoryVT(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
// Remember the chain.
Chain = Hi.getValue(1);
@@ -1382,6 +1383,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
ST->getSrcValue(), ST->getSrcValueOffset(),
- ST->getMemoryVT(),
- ST->isVolatile(), ST->getAlignment());
+ ST->getMemoryVT(), ST->isVolatile(),
+ ST->isNonTemporal(), ST->getAlignment());
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9932cf4..81f28ad 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -359,7 +359,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
N->getSrcValue(), N->getSrcValueOffset(),
N->getMemoryVT(), N->isVolatile(),
- N->getAlignment());
+ N->isNonTemporal(), N->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -873,6 +873,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
+ bool isNonTemporal = N->isNonTemporal();
DebugLoc dl = N->getDebugLoc();
SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
@@ -880,7 +881,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
// Truncate the value and store the result.
return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
SVOffset, N->getMemoryVT(),
- isVolatile, Alignment);
+ isVolatile, isNonTemporal, Alignment);
}
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
@@ -1079,8 +1080,8 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Amt = N->getOperand(1);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT ShTy = Amt.getValueType();
- unsigned ShBits = ShTy.getSizeInBits();
- unsigned NVTBits = NVT.getSizeInBits();
+ unsigned ShBits = ShTy.getScalarType().getSizeInBits();
+ unsigned NVTBits = NVT.getScalarType().getSizeInBits();
assert(isPowerOf2_32(NVTBits) &&
"Expanded integer type size not a power of two!");
DebugLoc dl = N->getDebugLoc();
@@ -1500,6 +1501,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
+ bool isNonTemporal = N->isNonTemporal();
DebugLoc dl = N->getDebugLoc();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1508,7 +1510,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
EVT MemVT = N->getMemoryVT();
Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
- MemVT, isVolatile, Alignment);
+ MemVT, isVolatile, isNonTemporal, Alignment);
// Remember the chain.
Ch = Lo.getValue(1);
@@ -1530,7 +1532,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
} else if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
- isVolatile, Alignment);
+ isVolatile, isNonTemporal, Alignment);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -1542,7 +1544,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(),
SVOffset+IncrementSize, NEVT,
- isVolatile, MinAlign(Alignment, IncrementSize));
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1560,7 +1563,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
- isVolatile, Alignment);
+ isVolatile, isNonTemporal, Alignment);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -1569,7 +1572,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),
SVOffset+IncrementSize,
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- isVolatile, MinAlign(Alignment, IncrementSize));
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
// Build a factor node to remember that this load is independent of the
// other one.
@@ -2212,6 +2216,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
+ bool isNonTemporal = N->isNonTemporal();
DebugLoc dl = N->getDebugLoc();
SDValue Lo, Hi;
@@ -2220,13 +2225,14 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (N->getMemoryVT().bitsLE(NVT)) {
GetExpandedInteger(N->getValue(), Lo, Hi);
return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
- N->getMemoryVT(), isVolatile, Alignment);
+ N->getMemoryVT(), isVolatile, isNonTemporal,
+ Alignment);
} else if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
GetExpandedInteger(N->getValue(), Lo, Hi);
Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
- isVolatile, Alignment);
+ isVolatile, isNonTemporal, Alignment);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2238,7 +2244,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
SVOffset+IncrementSize, NEVT,
- isVolatile, MinAlign(Alignment, IncrementSize));
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
} else {
// Big-endian - high bits are at low addresses. Favor aligned stores at
@@ -2264,7 +2271,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Store both the high bits and maybe some of the low bits.
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
- SVOffset, HiVT, isVolatile, Alignment);
+ SVOffset, HiVT, isVolatile, isNonTemporal,
+ Alignment);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -2273,7 +2281,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
SVOffset+IncrementSize,
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- isVolatile, MinAlign(Alignment, IncrementSize));
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
}
@@ -2341,7 +2350,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
// FIXME: Avoid the extend by constructing the right constant pool?
SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
FudgePtr, NULL, 0, MVT::f32,
- false, Alignment);
+ false, false, Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 37f36a3..f3e7ca4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -871,9 +871,10 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
// the source and destination types.
SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
// Emit a store to the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0,
+ false, false, 0);
// Result is a load from the stack slot.
- return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0);
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0);
}
/// CustomLowerNode - Replace the node's results with custom code provided
@@ -1033,8 +1034,7 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
false, 0, TLI.getLibcallCallingConv(LC), false,
/*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl,
- DAG.GetOrdering(DAG.getEntryNode().getNode()));
+ Callee, Args, DAG, dl);
return CallInfo.first;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index a1b6ced..5e83b4b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -122,10 +122,11 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
// Emit a store to the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0,
+ false, false, 0);
// Load the first half from the stack slot.
- Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0);
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0);
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -134,7 +135,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
- MinAlign(Alignment, IncrementSize));
+ false, MinAlign(Alignment, IncrementSize));
// Handle endianness of the load.
if (TLI.isBigEndian())
@@ -205,11 +206,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
int SVOffset = LD->getSrcValueOffset();
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
- isVolatile, Alignment);
+ isVolatile, isNonTemporal, Alignment);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -217,7 +219,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
SVOffset+IncrementSize,
- isVolatile, MinAlign(Alignment, IncrementSize));
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
// Build a factor node to remember that this load is independent of the
// other one.
@@ -383,6 +386,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
int SVOffset = St->getSrcValueOffset();
unsigned Alignment = St->getAlignment();
bool isVolatile = St->isVolatile();
+ bool isNonTemporal = St->isNonTemporal();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -394,14 +398,15 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
- isVolatile, Alignment);
+ isVolatile, isNonTemporal, Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
SVOffset + IncrementSize,
- isVolatile, MinAlign(Alignment, IncrementSize));
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bf95bb5..8363c3a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -172,7 +172,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
DAG.getUNDEF(N->getBasePtr().getValueType()),
N->getSrcValue(), N->getSrcValueOffset(),
N->getMemoryVT().getVectorElementType(),
- N->isVolatile(), N->getOriginalAlignment());
+ N->isVolatile(), N->isNonTemporal(),
+ N->getOriginalAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -366,11 +367,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getBasePtr(),
N->getSrcValue(), N->getSrcValueOffset(),
N->getMemoryVT().getVectorElementType(),
- N->isVolatile(), N->getAlignment());
+ N->isVolatile(), N->isNonTemporal(),
+ N->getAlignment());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
- N->isVolatile(), N->getOriginalAlignment());
+ N->isVolatile(), N->isNonTemporal(),
+ N->getOriginalAlignment());
}
@@ -696,17 +699,20 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
+ false, false, 0);
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
unsigned Alignment =
TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext()));
- Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT);
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
+ false, false, 0);
// Load the Lo part from the stack slot.
- Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0);
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0,
+ false, false, 0);
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
@@ -715,7 +721,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
- MinAlign(Alignment, IncrementSize));
+ false, MinAlign(Alignment, IncrementSize));
}
void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -743,19 +749,20 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
EVT MemoryVT = LD->getMemoryVT();
unsigned Alignment = LD->getOriginalAlignment();
bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
EVT LoMemVT, HiMemVT;
GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,
- SV, SVOffset, LoMemVT, isVolatile, Alignment);
+ SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
SVOffset += IncrementSize;
Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,
- SV, SVOffset, HiMemVT, isVolatile, Alignment);
+ SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1086,12 +1093,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0,
+ false, false, 0);
// Load back the required element.
StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
- SV, 0, EltVT);
+ SV, 0, EltVT, false, false, 0);
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1106,6 +1114,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
bool isVol = N->isVolatile();
+ bool isNT = N->isNonTemporal();
SDValue Lo, Hi;
GetSplitVector(N->getOperand(1), Lo, Hi);
@@ -1116,10 +1125,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (isTruncating)
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
- LoMemVT, isVol, Alignment);
+ LoMemVT, isVol, isNT, Alignment);
else
Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
- isVol, Alignment);
+ isVol, isNT, Alignment);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -1128,10 +1137,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (isTruncating)
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
- HiMemVT, isVol, Alignment);
+ HiMemVT, isVol, isNT, Alignment);
else
Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
- isVol, Alignment);
+ isVol, isNT, Alignment);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -1242,10 +1251,96 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
// Binary op widening.
+ unsigned Opcode = N->getOpcode();
+ DebugLoc dl = N->getDebugLoc();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue InOp1 = GetWidenedVector(N->getOperand(0));
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ EVT VT = WidenVT;
+ unsigned NumElts = VT.getVectorNumElements();
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ }
+
+ if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+ // Operation doesn't trap so just widen as normal.
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ } else if (NumElts == 1) {
+ // No legal vector version so unroll the vector operation and then widen.
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+ } else {
+ // Since the operation can trap, apply operation on the original vector.
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ unsigned Idx = 0; // Current Idx into input vectors.
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ EVT PrevVecVT = VT;
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ // Since we are using concat vector, build a vector from the scalar ops.
+ SDValue VecOp = DAG.getUNDEF(PrevVecVT);
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp1, DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp2, DAG.getIntPtrConstant(Idx));
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp,
+ DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2),
+ DAG.getIntPtrConstant(i));
+ }
+ CurNumElts = 0;
+ ConcatOps[ConcatEnd++] = VecOp;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // Rebuild vector to one with the widen type
+ Idx = ConcatEnd - 1;
+ while (Idx != 0) {
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx != 0 && ConcatOps[Idx].getValueType() == VT)
+ --Idx;
+ if (Idx != 0) {
+ VT = ConcatOps[Idx].getValueType();
+ ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ &ConcatOps[Idx+1], ConcatEnd - Idx - 1);
+ ConcatEnd = Idx + 2;
+ }
+ }
+
+ unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(VT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+ }
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
@@ -2042,6 +2137,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
int SVOffset = LD->getSrcValueOffset();
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
const Value *SV = LD->getSrcValue();
int LdWidth = LdVT.getSizeInBits();
@@ -2052,7 +2148,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
- isVolatile, Align);
+ isVolatile, isNonTemporal, Align);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction
@@ -2099,7 +2195,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
SVOffset+Offset, isVolatile,
- MinAlign(Align, Increment));
+ isNonTemporal, MinAlign(Align, Increment));
LdChain.push_back(LdOp.getValue(1));
LdOps.push_back(LdOp);
@@ -2173,6 +2269,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
int SVOffset = LD->getSrcValueOffset();
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
const Value *SV = LD->getSrcValue();
EVT EltVT = WidenVT.getVectorElementType();
@@ -2184,14 +2281,15 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
SmallVector<SDValue, 16> Ops(WidenNumElts);
unsigned Increment = LdEltVT.getSizeInBits() / 8;
Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
- LdEltVT, isVolatile, Align);
+ LdEltVT, isVolatile, isNonTemporal, Align);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
BasePtr, DAG.getIntPtrConstant(Offset));
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
- SVOffset + Offset, LdEltVT, isVolatile, Align);
+ SVOffset + Offset, LdEltVT, isVolatile,
+ isNonTemporal, Align);
LdChain.push_back(Ops[i].getValue(1));
}
@@ -2215,6 +2313,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
int SVOffset = ST->getSrcValueOffset();
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
SDValue ValOp = GetWidenedVector(ST->getValue());
DebugLoc dl = ST->getDebugLoc();
@@ -2240,6 +2339,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
DAG.getIntPtrConstant(Idx));
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
SVOffset + Offset, isVolatile,
+ isNonTemporal,
MinAlign(Align, Offset)));
StWidth -= NewVTWidth;
Offset += Increment;
@@ -2258,8 +2358,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
DAG.getIntPtrConstant(Idx++));
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset + Offset, isVolatile,
- MinAlign(Align, Offset)));
+ SVOffset + Offset, isVolatile,
+ isNonTemporal, MinAlign(Align, Offset)));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
@@ -2282,6 +2382,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
int SVOffset = ST->getSrcValueOffset();
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
SDValue ValOp = GetWidenedVector(ST->getValue());
DebugLoc dl = ST->getDebugLoc();
@@ -2304,7 +2405,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
DAG.getIntPtrConstant(0));
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
SVOffset, StEltVT,
- isVolatile, Align));
+ isVolatile, isNonTemporal, Align));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
@@ -2313,7 +2414,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
DAG.getIntPtrConstant(0));
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
SVOffset + Offset, StEltVT,
- isVolatile, MinAlign(Align, Offset)));
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
}
}
diff --git a/lib/CodeGen/SelectionDAG/SDDbgValue.h b/lib/CodeGen/SelectionDAG/SDDbgValue.h
new file mode 100644
index 0000000..9e15fc9
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SDDbgValue.h
@@ -0,0 +1,67 @@
+//===-- llvm/CodeGen/SDDbgValue.h - SD dbg_value handling--------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDDBGVALUE_H
+#define LLVM_CODEGEN_SDDBGVALUE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class MDNode;
+class SDNode;
+class Value;
+
+/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// Either Const or Node is nonzero, but not both.
+/// We do not use SDValue here to avoid including its header.
+
+class SDDbgValue {
+ SDNode *Node; // valid for non-constants
+ unsigned ResNo; // valid for non-constants
+ Value *Const; // valid for constants
+ MDNode *mdPtr;
+ uint64_t Offset;
+ DebugLoc DL;
+public:
+ // Constructor for non-constants.
+ SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl) :
+ Node(N), ResNo(R), Const(0), mdPtr(mdP), Offset(off), DL(dl) {}
+
+ // Constructor for constants.
+ SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl) : Node(0),
+ ResNo(0), Const(C), mdPtr(mdP), Offset(off), DL(dl) {}
+
+ // Returns the MDNode pointer.
+ MDNode *getMDPtr() { return mdPtr; }
+
+ // Returns the SDNode* (valid for non-constants only).
+ SDNode *getSDNode() { assert (!Const); return Node; }
+
+ // Returns the ResNo (valid for non-constants only).
+ unsigned getResNo() { assert (!Const); return ResNo; }
+
+ // Returns the Value* for a constant (invalid for non-constants).
+ Value *getConst() { assert (!Node); return Const; }
+
+ // Returns the offset.
+ uint64_t getOffset() { return Offset; }
+
+ // Returns the DebugLoc.
+ DebugLoc getDebugLoc() { return DL; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index b51c61b..06e7b8c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -218,8 +218,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = ForceUnitLatencies();
- for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
- E = DAG->allnodes_end(); NI != E; ++NI) {
+ // Add all nodes in depth first order.
+ SmallVector<SDNode*, 64> Worklist;
+ SmallPtrSet<SDNode*, 64> Visited;
+ Worklist.push_back(DAG->getRoot().getNode());
+ Visited.insert(DAG->getRoot().getNode());
+
+ while (!Worklist.empty()) {
+ SDNode *NI = Worklist.pop_back_val();
+
+ // Add all operands to the worklist unless they've already been added.
+ for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
+ if (Visited.insert(NI->getOperand(i).getNode()))
+ Worklist.push_back(NI->getOperand(i).getNode());
+
if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
continue;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6122a2a..746d4e2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -468,18 +468,20 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
}
/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
-/// the CSE map that carries volatility, indexing mode, and
+/// the CSE map that carries volatility, temporalness, indexing mode, and
/// extension/truncation information.
///
static inline unsigned
-encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile) {
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
+ bool isNonTemporal) {
assert((ConvType & 3) == ConvType &&
"ConvType may not require more than 2 bits!");
assert((AM & 7) == AM &&
"AM may not require more than 3 bits!");
return ConvType |
(AM << 2) |
- (isVolatile << 5);
+ (isVolatile << 5) |
+ (isNonTemporal << 6);
}
//===----------------------------------------------------------------------===//
@@ -829,6 +831,7 @@ void SelectionDAG::clear() {
EntryNode.UseList = 0;
AllNodes.push_back(&EntryNode);
Root = getEntryNode();
+ delete Ordering;
Ordering = new SDNodeOrdering();
}
@@ -859,14 +862,14 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
///
SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ EVT EltVT = VT.getScalarType();
SDValue NegOne =
getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
return getNode(ISD::XOR, DL, VT, Val, NegOne);
}
SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ EVT EltVT = VT.getScalarType();
assert((EltVT.getSizeInBits() >= 64 ||
(uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
"getConstant with a uint64_t value that doesn't fit in the type!");
@@ -880,7 +883,7 @@ SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
assert(VT.isInteger() && "Cannot create FP integer constant!");
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ EVT EltVT = VT.getScalarType();
assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
"APInt size does not match type size!");
@@ -923,8 +926,7 @@ SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
- EVT EltVT =
- VT.isVector() ? VT.getVectorElementType() : VT;
+ EVT EltVT = VT.getScalarType();
// Do the map lookup using the actual bit pattern for the floating point
// value, so that we don't have problems with 0.0 comparing equal to -0.0, and
@@ -958,8 +960,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
}
SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
- EVT EltVT =
- VT.isVector() ? VT.getVectorElementType() : VT;
+ EVT EltVT = VT.getScalarType();
if (EltVT==MVT::f32)
return getConstantFP(APFloat((float)Val), VT, isTarget);
else
@@ -1344,7 +1345,7 @@ SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
}
SDValue SelectionDAG::getSrcValue(const Value *V) {
- assert((!V || isa<PointerType>(V->getType())) &&
+ assert((!V || V->getType()->isPointerTy()) &&
"SrcValue is not a pointer?");
FoldingSetNodeID ID;
@@ -2232,6 +2233,29 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
return false;
}
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->isZero();
+
+ // TODO: Recognize more cases here.
+
+ return false;
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+ // Check the obvious case.
+ if (A == B) return true;
+
+ // For for negative and positive zero.
+ if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+ if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+ if (CA->isZero() && CB->isZero()) return true;
+
+ // Otherwise they may not be equal.
+ return false;
+}
+
bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
if (!GA) return false;
@@ -3080,8 +3104,7 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
DebugLoc dl) {
- unsigned NumBits = VT.isVector() ?
- VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits();
+ unsigned NumBits = VT.getScalarType().getSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
APInt Val = APInt(NumBits, C->getZExtValue() & 255);
unsigned Shift = 8;
@@ -3185,7 +3208,7 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
bool isSrcConst = isa<ConstantSDNode>(Src);
EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
- if (VT != MVT::iAny) {
+ if (VT != MVT::Other) {
const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
// If source is a string constant, this will require an unaligned load.
@@ -3193,14 +3216,14 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
if (Dst.getOpcode() != ISD::FrameIndex) {
// Can't change destination alignment. It requires a unaligned store.
if (AllowUnalign)
- VT = MVT::iAny;
+ VT = MVT::Other;
} else {
int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
if (MFI->isFixedObjectIndex(FI)) {
// Can't change destination alignment. It requires a unaligned store.
if (AllowUnalign)
- VT = MVT::iAny;
+ VT = MVT::Other;
} else {
// Give the stack frame object a larger alignment if needed.
if (MFI->getObjectAlignment(FI) < NewAlign)
@@ -3211,7 +3234,7 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
}
}
- if (VT == MVT::iAny) {
+ if (VT == MVT::Other) {
if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
VT = MVT::i64;
} else {
@@ -3299,7 +3322,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, false, DstAlign);
+ DstSV, DstSVOff + DstOff, false, false, DstAlign);
} else {
// The type might not be legal for the target. This should only happen
// if the type is smaller than a legal type, as on PPC, so the right
@@ -3310,10 +3333,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
assert(NVT.bitsGE(VT));
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcSVOff + SrcOff, VT, false, Align);
+ SrcSV, SrcSVOff + SrcOff, VT, false, false, Align);
Store = DAG.getTruncStore(Chain, dl, Value,
- getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, VT, false, DstAlign);
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, VT, false, false,
+ DstAlign);
}
OutChains.push_back(Store);
SrcOff += VTSize;
@@ -3358,7 +3382,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Value = DAG.getLoad(VT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcSVOff + SrcOff, false, Align);
+ SrcSV, SrcSVOff + SrcOff, false, false, Align);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -3373,7 +3397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Store = DAG.getStore(Chain, dl, LoadValues[i],
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, false, DstAlign);
+ DstSV, DstSVOff + DstOff, false, false, DstAlign);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -3408,7 +3432,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SDValue Value = getMemsetValue(Src, VT, DAG, dl);
SDValue Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff);
+ DstSV, DstSVOff + DstOff, false, false, 0);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -3472,7 +3496,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
/*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
TLI.getPointerTy()),
- Args, *this, dl, GetOrdering(Chain.getNode()));
+ Args, *this, dl);
return CallResult.second;
}
@@ -3521,7 +3545,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
/*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
TLI.getPointerTy()),
- Args, *this, dl, GetOrdering(Chain.getNode()));
+ Args, *this, dl);
return CallResult.second;
}
@@ -3580,7 +3604,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
/*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
TLI.getPointerTy()),
- Args, *this, dl, GetOrdering(Chain.getNode()));
+ Args, *this, dl);
return CallResult.second;
}
@@ -3788,7 +3812,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
SDValue Ptr, SDValue Offset,
const Value *SV, int SVOffset, EVT MemVT,
- bool isVolatile, unsigned Alignment) {
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(VT);
@@ -3802,6 +3827,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
unsigned Flags = MachineMemOperand::MOLoad;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
MachineMemOperand *MMO =
MF.getMachineMemOperand(SV, Flags, SVOffset,
MemVT.getStoreSize(), Alignment);
@@ -3840,7 +3867,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
ID.AddInteger(MemVT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile()));
+ ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
+ MMO->isNonTemporal()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -3856,20 +3884,22 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
const Value *SV, int SVOffset,
- bool isVolatile, unsigned Alignment) {
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef,
- SV, SVOffset, VT, isVolatile, Alignment);
+ SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment);
}
SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
SDValue Chain, SDValue Ptr,
const Value *SV,
int SVOffset, EVT MemVT,
- bool isVolatile, unsigned Alignment) {
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,
- SV, SVOffset, MemVT, isVolatile, Alignment);
+ SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment);
}
SDValue
@@ -3881,12 +3911,13 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(),
LD->getChain(), Base, Offset, LD->getSrcValue(),
LD->getSrcValueOffset(), LD->getMemoryVT(),
- LD->isVolatile(), LD->getAlignment());
+ LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
}
SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
SDValue Ptr, const Value *SV, int SVOffset,
- bool isVolatile, unsigned Alignment) {
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(Val.getValueType());
@@ -3900,6 +3931,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
unsigned Flags = MachineMemOperand::MOStore;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
MachineMemOperand *MMO =
MF.getMachineMemOperand(SV, Flags, SVOffset,
Val.getValueType().getStoreSize(), Alignment);
@@ -3916,7 +3949,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(VT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile()));
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -3932,7 +3966,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
SDValue Ptr, const Value *SV,
int SVOffset, EVT SVT,
- bool isVolatile, unsigned Alignment) {
+ bool isVolatile, bool isNonTemporal,
+ unsigned Alignment) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(SVT);
@@ -3946,6 +3981,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
unsigned Flags = MachineMemOperand::MOStore;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
MachineMemOperand *MMO =
MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
@@ -3976,7 +4013,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(SVT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile()));
+ ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4535,91 +4573,13 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDVTList VTs, const SDValue *Ops,
unsigned NumOps) {
- return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT) {
- SDVTList VTs = getVTList(VT);
- return MorphNodeTo(N, Opc, VTs, 0, 0);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT, SDValue Op1) {
- SDVTList VTs = getVTList(VT);
- SDValue Ops[] = { Op1 };
- return MorphNodeTo(N, Opc, VTs, Ops, 1);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT, SDValue Op1,
- SDValue Op2) {
- SDVTList VTs = getVTList(VT);
- SDValue Ops[] = { Op1, Op2 };
- return MorphNodeTo(N, Opc, VTs, Ops, 2);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT, SDValue Op1,
- SDValue Op2, SDValue Op3) {
- SDVTList VTs = getVTList(VT);
- SDValue Ops[] = { Op1, Op2, Op3 };
- return MorphNodeTo(N, Opc, VTs, Ops, 3);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT, const SDValue *Ops,
- unsigned NumOps) {
- SDVTList VTs = getVTList(VT);
- return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT1, EVT VT2, const SDValue *Ops,
- unsigned NumOps) {
- SDVTList VTs = getVTList(VT1, VT2);
- return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT1, EVT VT2) {
- SDVTList VTs = getVTList(VT1, VT2);
- return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT1, EVT VT2, EVT VT3,
- const SDValue *Ops, unsigned NumOps) {
- SDVTList VTs = getVTList(VT1, VT2, VT3);
- return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT1, EVT VT2,
- SDValue Op1) {
- SDVTList VTs = getVTList(VT1, VT2);
- SDValue Ops[] = { Op1 };
- return MorphNodeTo(N, Opc, VTs, Ops, 1);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT1, EVT VT2,
- SDValue Op1, SDValue Op2) {
- SDVTList VTs = getVTList(VT1, VT2);
- SDValue Ops[] = { Op1, Op2 };
- return MorphNodeTo(N, Opc, VTs, Ops, 2);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT1, EVT VT2,
- SDValue Op1, SDValue Op2,
- SDValue Op3) {
- SDVTList VTs = getVTList(VT1, VT2);
- SDValue Ops[] = { Op1, Op2, Op3 };
- return MorphNodeTo(N, Opc, VTs, Ops, 3);
+ N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+ // Reset the NodeID to -1.
+ N->setNodeId(-1);
+ return N;
}
-/// MorphNodeTo - These *mutate* the specified node to have the specified
+/// MorphNodeTo - This *mutates* the specified node to have the specified
/// return type, opcode, and operands.
///
/// Note that MorphNodeTo returns the resultant node. If there is already a
@@ -4695,12 +4655,14 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
// Delete any nodes that are still dead after adding the uses for the
// new operands.
- SmallVector<SDNode *, 16> DeadNodes;
- for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
- E = DeadNodeSet.end(); I != E; ++I)
- if ((*I)->use_empty())
- DeadNodes.push_back(*I);
- RemoveDeadNodes(DeadNodes);
+ if (!DeadNodeSet.empty()) {
+ SmallVector<SDNode *, 16> DeadNodes;
+ for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+ E = DeadNodeSet.end(); I != E; ++I)
+ if ((*I)->use_empty())
+ DeadNodes.push_back(*I);
+ RemoveDeadNodes(DeadNodes);
+ }
if (IP)
CSEMap.InsertNode(N, IP); // Memoize the new node.
@@ -4907,6 +4869,43 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
return NULL;
}
+namespace {
+
+/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
+/// pointed to by a use iterator is deleted, increment the use iterator
+/// so that it doesn't dangle.
+///
+/// This class also manages a "downlink" DAGUpdateListener, to forward
+/// messages to ReplaceAllUsesWith's callers.
+///
+class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
+ SelectionDAG::DAGUpdateListener *DownLink;
+ SDNode::use_iterator &UI;
+ SDNode::use_iterator &UE;
+
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ // Increment the iterator as needed.
+ while (UI != UE && N == *UI)
+ ++UI;
+
+ // Then forward the message.
+ if (DownLink) DownLink->NodeDeleted(N, E);
+ }
+
+ virtual void NodeUpdated(SDNode *N) {
+ // Just forward the message.
+ if (DownLink) DownLink->NodeUpdated(N);
+ }
+
+public:
+ RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl,
+ SDNode::use_iterator &ui,
+ SDNode::use_iterator &ue)
+ : DownLink(dl), UI(ui), UE(ue) {}
+};
+
+}
+
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
/// This can cause recursive merging of nodes in the DAG.
///
@@ -4927,6 +4926,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
// is replaced by To, we don't want to replace of all its users with To
// too. See PR3018 for more info.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(UpdateListener, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
@@ -4945,7 +4945,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, UpdateListener);
+ AddModifiedNodeToCSEMaps(User, &Listener);
}
}
@@ -4971,6 +4971,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(UpdateListener, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
@@ -4989,7 +4990,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, UpdateListener);
+ AddModifiedNodeToCSEMaps(User, &Listener);
}
}
@@ -5007,6 +5008,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(UpdateListener, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
@@ -5026,7 +5028,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, UpdateListener);
+ AddModifiedNodeToCSEMaps(User, &Listener);
}
}
@@ -5048,6 +5050,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From.getNode()->use_begin(),
UE = From.getNode()->use_end();
+ RAUWUpdateListener Listener(UpdateListener, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
bool UserRemovedFromCSEMaps = false;
@@ -5083,7 +5086,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, UpdateListener);
+ AddModifiedNodeToCSEMaps(User, &Listener);
}
}
@@ -5280,8 +5283,11 @@ GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
MachineMemOperand *mmo)
: SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
- SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+ assert(isNonTemporal() == MMO->isNonTemporal() &&
+ "Non-temporal encoding error!");
assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
}
@@ -5290,7 +5296,8 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
MachineMemOperand *mmo)
: SDNode(Opc, dl, VTs, Ops, NumOps),
MemoryVT(memvt), MMO(mmo) {
- SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
}
@@ -5459,15 +5466,15 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
if (getMachineOpcode() < TII->getNumOpcodes())
return TII->get(getMachineOpcode()).getName();
- return "<<Unknown Machine Node>>";
+ return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
}
if (G) {
const TargetLowering &TLI = G->getTargetLoweringInfo();
const char *Name = TLI.getTargetNodeName(getOpcode());
if (Name) return Name;
- return "<<Unknown Target Node>>";
+ return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
}
- return "<<Unknown Node>>";
+ return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
#ifndef NDEBUG
case ISD::DELETED_NODE:
@@ -5904,6 +5911,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (G)
if (unsigned Order = G->GetOrdering(this))
OS << " [ORD=" << Order << ']';
+
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
}
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
@@ -6292,31 +6302,37 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
return true;
}
+#ifdef XDEBUG
static void checkForCyclesHelper(const SDNode *N,
- std::set<const SDNode *> &visited) {
- if (visited.find(N) != visited.end()) {
+ SmallPtrSet<const SDNode*, 32> &Visited,
+ SmallPtrSet<const SDNode*, 32> &Checked) {
+ // If this node has already been checked, don't check it again.
+ if (Checked.count(N))
+ return;
+
+ // If a node has already been visited on this depth-first walk, reject it as
+ // a cycle.
+ if (!Visited.insert(N)) {
dbgs() << "Offending node:\n";
N->dumprFull();
- assert(0 && "Detected cycle in SelectionDAG");
+ errs() << "Detected cycle in SelectionDAG\n";
+ abort();
}
-
- std::set<const SDNode*>::iterator i;
- bool inserted;
-
- tie(i, inserted) = visited.insert(N);
- assert(inserted && "Missed cycle");
-
- for(unsigned i = 0; i < N->getNumOperands(); ++i) {
- checkForCyclesHelper(N->getOperand(i).getNode(), visited);
- }
- visited.erase(i);
+
+ for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
+
+ Checked.insert(N);
+ Visited.erase(N);
}
+#endif
void llvm::checkForCycles(const llvm::SDNode *N) {
#ifdef XDEBUG
assert(N && "Checking nonexistant SDNode");
- std::set<const SDNode *> visited;
- checkForCyclesHelper(N, visited);
+ SmallPtrSet<const SDNode*, 32> visited;
+ SmallPtrSet<const SDNode*, 32> checked;
+ checkForCyclesHelper(N, visited, checked);
#endif
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index de17f90..05be9a1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -155,7 +155,7 @@ namespace {
/// this value and returns the result as a ValueVTs value. This uses
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
- SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+ SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
SDValue &Chain, SDValue *Flag) const;
/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
@@ -163,14 +163,14 @@ namespace {
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- unsigned Order, SDValue &Chain, SDValue *Flag) const;
+ SDValue &Chain, SDValue *Flag) const;
/// AddInlineAsmOperands - Add this value to the specified inlineasm node
/// operand list. This adds the code marker, matching input operand index
/// (if applicable), and includes the number of values added into it.
void AddInlineAsmOperands(unsigned Code,
bool HasMatching, unsigned MatchingIdx,
- SelectionDAG &DAG, unsigned Order,
+ SelectionDAG &DAG,
std::vector<SDValue> &Ops) const;
};
}
@@ -180,7 +180,7 @@ namespace {
/// larger then ValueVT then AssertOp can be used to specify whether the extra
/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
/// (ISD::AssertSext).
-static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
const SDValue *Parts,
unsigned NumParts, EVT PartVT, EVT ValueVT,
ISD::NodeType AssertOp = ISD::DELETED_NODE) {
@@ -205,9 +205,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
if (RoundParts > 2) {
- Lo = getCopyFromParts(DAG, dl, Order, Parts, RoundParts / 2,
+ Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2,
PartVT, HalfVT);
- Hi = getCopyFromParts(DAG, dl, Order, Parts + RoundParts / 2,
+ Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2,
RoundParts / 2, PartVT, HalfVT);
} else {
Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
@@ -223,7 +223,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
// Assemble the trailing non-power-of-2 part.
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
- Hi = getCopyFromParts(DAG, dl, Order,
+ Hi = getCopyFromParts(DAG, dl,
Parts + RoundParts, OddParts, PartVT, OddVT);
// Combine the round and odd parts.
@@ -259,7 +259,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
// If the register was not expanded, truncate or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
- Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i], 1,
+ Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
PartVT, IntermediateVT);
} else if (NumParts > 0) {
// If the intermediate type was expanded, build the intermediate
@@ -268,7 +268,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
- Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i * Factor], Factor,
+ Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
PartVT, IntermediateVT);
}
@@ -292,7 +292,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
- Val = getCopyFromParts(DAG, dl, Order, Parts, NumParts, PartVT, IntVT);
+ Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
}
}
@@ -349,7 +349,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
-static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
EVT PartVT,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
@@ -417,7 +417,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
DAG.getConstant(RoundBits,
TLI.getPointerTy()));
- getCopyToParts(DAG, dl, Order, OddVal, Parts + RoundParts,
+ getCopyToParts(DAG, dl, OddVal, Parts + RoundParts,
OddParts, PartVT);
if (TLI.isBigEndian())
@@ -514,7 +514,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
// If the register was not expanded, promote or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
- getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i], 1, PartVT);
+ getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
@@ -522,7 +522,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
- getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i*Factor], Factor, PartVT);
+ getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT);
}
}
@@ -680,7 +680,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
getCurDebugLoc());
}
- if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
+ if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
"Unknown struct or array constant!");
@@ -747,8 +747,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
SDValue Chain = DAG.getEntryNode();
- return RFV.getCopyFromRegs(DAG, getCurDebugLoc(),
- SDNodeOrder, Chain, NULL);
+ return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
}
/// Get the EVTs and ArgFlags collections that represent the legalized return
@@ -844,19 +843,17 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
Chains[i] =
DAG.getStore(Chain, getCurDebugLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + i),
- Add, NULL, Offsets[i], false, 0);
+ Add, NULL, Offsets[i], false, false, 0);
}
Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
MVT::Other, &Chains[0], NumValues);
- } else {
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
- unsigned NumValues = ValueVTs.size();
- if (NumValues == 0) continue;
-
- SDValue RetOp = getValue(I.getOperand(i));
+ } else if (I.getNumOperands() != 0) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues) {
+ SDValue RetOp = getValue(I.getOperand(0));
for (unsigned j = 0, f = NumValues; j != f; ++j) {
EVT VT = ValueVTs[j];
@@ -881,7 +878,7 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
SmallVector<SDValue, 4> Parts(NumParts);
- getCopyToParts(DAG, getCurDebugLoc(), SDNodeOrder,
+ getCopyToParts(DAG, getCurDebugLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
&Parts[0], NumParts, PartVT, ExtendKind);
@@ -1973,7 +1970,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
if (Cases.size() >= 2)
// Must recompute end() each iteration because it may be
// invalidated by erase if we hold on to it
- for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+ for (CaseItr TmpBegin = Cases.begin(), I = TmpBegin, J = ++TmpBegin; J != Cases.end(); ) {
const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
MachineBasicBlock* nextBB = J->BB;
@@ -2062,9 +2059,15 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
}
void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
- // Update machine-CFG edges.
+ // Update machine-CFG edges with unique successors.
+ SmallVector<BasicBlock*, 32> succs;
+ succs.reserve(I.getNumSuccessors());
for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
- CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]);
+ succs.push_back(I.getSuccessor(i));
+ array_pod_sort(succs.begin(), succs.end());
+ succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
+ for (unsigned i = 0, e = succs.size(); i != e; ++i)
+ CurMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
MVT::Other, getControlRoot(),
@@ -2074,7 +2077,7 @@ void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
void SelectionDAGBuilder::visitFSub(User &I) {
// -0.0 - X --> fneg
const Type *Ty = I.getType();
- if (isa<VectorType>(Ty)) {
+ if (Ty->isVectorTy()) {
if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
const VectorType *DestTy = cast<VectorType>(I.getType());
const Type *ElTy = DestTy->getElementType();
@@ -2111,7 +2114,7 @@ void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- if (!isa<VectorType>(I.getType()) &&
+ if (!I.getType()->isVectorTy() &&
Op2.getValueType() != TLI.getShiftAmountTy()) {
// If the operand is smaller than the shift count type, promote it.
EVT PTy = TLI.getPointerTy();
@@ -2699,7 +2702,9 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) {
SDValue Ptr = getValue(SV);
const Type *Ty = I.getType();
+
bool isVolatile = I.isVolatile();
+ bool isNonTemporal = I.getMetadata("nontemporal") != 0;
unsigned Alignment = I.getAlignment();
SmallVector<EVT, 4> ValueVTs;
@@ -2731,7 +2736,8 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) {
PtrVT, Ptr,
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
- A, SV, Offsets[i], isVolatile, Alignment);
+ A, SV, Offsets[i], isVolatile,
+ isNonTemporal, Alignment);
Values[i] = L;
Chains[i] = L.getValue(1);
@@ -2772,6 +2778,7 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) {
SmallVector<SDValue, 4> Chains(NumValues);
EVT PtrVT = Ptr.getValueType();
bool isVolatile = I.isVolatile();
+ bool isNonTemporal = I.getMetadata("nontemporal") != 0;
unsigned Alignment = I.getAlignment();
for (unsigned i = 0; i != NumValues; ++i) {
@@ -2779,7 +2786,8 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) {
DAG.getConstant(Offsets[i], PtrVT));
Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
SDValue(Src.getNode(), Src.getResNo() + i),
- Add, PtrV, Offsets[i], isVolatile, Alignment);
+ Add, PtrV, Offsets[i], isVolatile,
+ isNonTemporal, Alignment);
}
DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
@@ -2879,7 +2887,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
///
/// where Op is the hexidecimal representation of floating point value.
static SDValue
-GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) {
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
DAG.getConstant(0x007fffff, MVT::i32));
SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
@@ -2894,7 +2902,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) {
/// where Op is the hexidecimal representation of floating point value.
static SDValue
GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
- DebugLoc dl, unsigned Order) {
+ DebugLoc dl) {
SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
DAG.getConstant(0x7f800000, MVT::i32));
SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
@@ -3078,13 +3086,13 @@ SelectionDAGBuilder::visitLog(CallInst &I) {
SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
// Scale the exponent by log(2) [0.69314718f].
- SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
getF32Constant(DAG, 0x3f317218));
// Get the significand and build it into a floating-point number with
// exponent of 1.
- SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+ SDValue X = GetSignificand(DAG, Op1, dl);
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
@@ -3188,11 +3196,11 @@ SelectionDAGBuilder::visitLog2(CallInst &I) {
SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
// Get the exponent.
- SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+ SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
// Get the significand and build it into a floating-point number with
// exponent of 1.
- SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+ SDValue X = GetSignificand(DAG, Op1, dl);
// Different possible minimax approximations of significand in
// floating-point for various degrees of accuracy over [1,2].
@@ -3297,13 +3305,13 @@ SelectionDAGBuilder::visitLog10(CallInst &I) {
SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
// Scale the exponent by log10(2) [0.30102999f].
- SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
getF32Constant(DAG, 0x3e9a209a));
// Get the significand and build it into a floating-point number with
// exponent of 1.
- SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+ SDValue X = GetSignificand(DAG, Op1, dl);
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
@@ -4058,7 +4066,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
// Store the stack protector onto the stack.
Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
PseudoSourceValue::getFixedStack(FI),
- 0, true);
+ 0, true, false, 0);
setValue(&I, Res);
DAG.setRoot(Res);
return 0;
@@ -4276,8 +4284,8 @@ isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr,
// Check for a truly no-op bitcast.
if (isa<BitCastInst>(U) &&
(U->getOperand(0)->getType() == U->getType() ||
- (isa<PointerType>(U->getOperand(0)->getType()) &&
- isa<PointerType>(U->getType()))))
+ (U->getOperand(0)->getType()->isPointerTy() &&
+ U->getType()->isPointerTy())))
continue;
// Otherwise it's not a true no-op.
return false;
@@ -4385,7 +4393,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
CS.getCallingConv(),
isTailCall,
!CS.getInstruction()->use_empty(),
- Callee, Args, DAG, getCurDebugLoc(), SDNodeOrder);
+ Callee, Args, DAG, getCurDebugLoc());
assert((isTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
assert((Result.second.getNode() || !Result.first.getNode()) &&
@@ -4410,7 +4418,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
DemoteStackSlot,
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
- Add, NULL, Offsets[i], false, 1);
+ Add, NULL, Offsets[i], false, false, 1);
Values[i] = L;
Chains[i] = L.getValue(1);
}
@@ -4433,7 +4441,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
SDValue ReturnValue =
- getCopyFromParts(DAG, getCurDebugLoc(), SDNodeOrder, &Values[CurReg], NumRegs,
+ getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
RegisterVT, VT, AssertOp);
ReturnValues.push_back(ReturnValue);
CurReg += NumRegs;
@@ -4512,7 +4520,8 @@ static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
SDValue Ptr = Builder.getValue(PtrVal);
SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
- false /*volatile*/, 1 /* align=1 */);
+ false /*volatile*/,
+ false /*nontemporal*/, 1 /* align=1 */);
if (!ConstantMemory)
Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -4529,9 +4538,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
return false;
Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
- if (!isa<PointerType>(LHS->getType()) || !isa<PointerType>(RHS->getType()) ||
- !isa<IntegerType>(I.getOperand(3)->getType()) ||
- !isa<IntegerType>(I.getType()))
+ if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
+ !I.getOperand(3)->getType()->isIntegerTy() ||
+ !I.getType()->isIntegerTy())
return false;
ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
@@ -4625,7 +4634,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
StringRef Name = F->getName();
if (Name == "copysign" || Name == "copysignf") {
if (I.getNumOperands() == 3 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getOperand(1)->getType()->isFloatingPointTy() &&
I.getType() == I.getOperand(1)->getType() &&
I.getType() == I.getOperand(2)->getType()) {
SDValue LHS = getValue(I.getOperand(1));
@@ -4636,7 +4645,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
}
} else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
if (I.getNumOperands() == 2 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getOperand(1)->getType()->isFloatingPointTy() &&
I.getType() == I.getOperand(1)->getType()) {
SDValue Tmp = getValue(I.getOperand(1));
setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
@@ -4645,7 +4654,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
}
} else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
if (I.getNumOperands() == 2 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getOperand(1)->getType()->isFloatingPointTy() &&
I.getType() == I.getOperand(1)->getType() &&
I.onlyReadsMemory()) {
SDValue Tmp = getValue(I.getOperand(1));
@@ -4655,7 +4664,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
}
} else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
if (I.getNumOperands() == 2 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getOperand(1)->getType()->isFloatingPointTy() &&
I.getType() == I.getOperand(1)->getType() &&
I.onlyReadsMemory()) {
SDValue Tmp = getValue(I.getOperand(1));
@@ -4665,7 +4674,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
}
} else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
if (I.getNumOperands() == 2 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getOperand(1)->getType()->isFloatingPointTy() &&
I.getType() == I.getOperand(1)->getType() &&
I.onlyReadsMemory()) {
SDValue Tmp = getValue(I.getOperand(1));
@@ -4699,8 +4708,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
- unsigned Order, SDValue &Chain,
- SDValue *Flag) const {
+ SDValue &Chain, SDValue *Flag) const {
// Assemble the legal parts into the final values.
SmallVector<SDValue, 4> Values(ValueVTs.size());
SmallVector<SDValue, 8> Parts;
@@ -4765,7 +4773,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
Parts[i] = P;
}
- Values[Value] = getCopyFromParts(DAG, dl, Order, Parts.begin(),
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
NumRegs, RegisterVT, ValueVT);
Part += NumRegs;
Parts.clear();
@@ -4781,8 +4789,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- unsigned Order, SDValue &Chain,
- SDValue *Flag) const {
+ SDValue &Chain, SDValue *Flag) const {
// Get the list of the values's legal parts.
unsigned NumRegs = Regs.size();
SmallVector<SDValue, 8> Parts(NumRegs);
@@ -4791,7 +4798,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
EVT RegisterVT = RegVTs[Value];
- getCopyToParts(DAG, dl, Order,
+ getCopyToParts(DAG, dl,
Val.getValue(Val.getResNo() + Value),
&Parts[Part], NumParts, RegisterVT);
Part += NumParts;
@@ -4832,7 +4839,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
/// values added into it.
void RegsForValue::AddInlineAsmOperands(unsigned Code,
bool HasMatching,unsigned MatchingIdx,
- SelectionDAG &DAG, unsigned Order,
+ SelectionDAG &DAG,
std::vector<SDValue> &Ops) const {
assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
unsigned Flag = Code | (Regs.size() << 3);
@@ -5330,7 +5337,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
Chain = DAG.getStore(Chain, getCurDebugLoc(),
- OpInfo.CallOperand, StackSlot, NULL, 0);
+ OpInfo.CallOperand, StackSlot, NULL, 0,
+ false, false, 0);
OpInfo.CallOperand = StackSlot;
}
@@ -5421,7 +5429,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
2 /* REGDEF */ ,
false,
0,
- DAG, SDNodeOrder,
+ DAG,
AsmNodeOperands);
break;
}
@@ -5469,10 +5477,10 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
// Use the produced MatchedRegs object to
MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
- SDNodeOrder, Chain, &Flag);
+ Chain, &Flag);
MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
true, OpInfo.getMatchedOperand(),
- DAG, SDNodeOrder, AsmNodeOperands);
+ DAG, AsmNodeOperands);
break;
} else {
assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
@@ -5533,11 +5541,10 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
}
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
- SDNodeOrder, Chain, &Flag);
+ Chain, &Flag);
OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
- DAG, SDNodeOrder,
- AsmNodeOperands);
+ DAG, AsmNodeOperands);
break;
}
case InlineAsm::isClobber: {
@@ -5545,7 +5552,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
// allocator is aware that the physreg got clobbered.
if (!OpInfo.AssignedRegs.Regs.empty())
OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
- false, 0, DAG, SDNodeOrder,
+ false, 0, DAG,
AsmNodeOperands);
break;
}
@@ -5565,7 +5572,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
// and set it as the value of the call.
if (!RetValRegs.Regs.empty()) {
SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
- SDNodeOrder, Chain, &Flag);
+ Chain, &Flag);
// FIXME: Why don't we do this for inline asms with MRVs?
if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
@@ -5605,7 +5612,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
Value *Ptr = IndirectStoresToEmit[i].second;
SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
- SDNodeOrder, Chain, &Flag);
+ Chain, &Flag);
StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
}
@@ -5616,7 +5623,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
StoresToEmit[i].first,
getValue(StoresToEmit[i].second),
- StoresToEmit[i].second, 0);
+ StoresToEmit[i].second, 0,
+ false, false, 0);
OutChains.push_back(Val);
}
@@ -5669,8 +5677,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
CallingConv::ID CallConv, bool isTailCall,
bool isReturnValueUsed,
SDValue Callee,
- ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl,
- unsigned Order) {
+ ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
// Handle all of the outgoing arguments.
SmallVector<ISD::OutputArg, 32> Outs;
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
@@ -5721,7 +5728,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
else if (Args[i].isZExt)
ExtendKind = ISD::ZERO_EXTEND;
- getCopyToParts(DAG, dl, Order, Op, &Parts[0], NumParts,
+ getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
PartVT, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
@@ -5800,7 +5807,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
- ReturnValues.push_back(getCopyFromParts(DAG, dl, Order, &InVals[CurReg],
+ ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
NumRegs, RegisterVT, VT,
AssertOp));
CurReg += NumRegs;
@@ -5840,7 +5847,7 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
SDValue Chain = DAG.getEntryNode();
- RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), SDNodeOrder, Chain, 0);
+ RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
PendingExports.push_back(Chain);
}
@@ -5966,7 +5973,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
EVT VT = ValueVTs[0];
EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
ISD::NodeType AssertOp = ISD::DELETED_NODE;
- SDValue ArgValue = getCopyFromParts(DAG, dl, 0, &InVals[0], 1,
+ SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
RegVT, VT, AssertOp);
MachineFunction& MF = SDB->DAG.getMachineFunction();
@@ -6000,7 +6007,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
else if (F.paramHasAttr(Idx, Attribute::ZExt))
AssertOp = ISD::AssertZext;
- ArgValues.push_back(getCopyFromParts(DAG, dl, 0, &InVals[i],
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
NumParts, PartVT, VT,
AssertOp));
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index da2e6e4..05f9f1f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -56,9 +56,12 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
#include <algorithm>
using namespace llvm;
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+
static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
cl::desc("Enable verbose messages in the \"fast\" "
@@ -723,9 +726,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// code to the MachineBasicBlock.
if (TimePassesIsEnabled) {
NamedRegionTimer T("Instruction Selection", GroupName);
- InstructionSelect();
+ DoInstructionSelection();
} else {
- InstructionSelect();
+ DoInstructionSelection();
}
DEBUG(dbgs() << "Selected selection DAG:\n");
@@ -765,6 +768,66 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(BB->dump());
}
+void SelectionDAGISel::DoInstructionSelection() {
+ DEBUG(errs() << "===== Instruction selection begins:\n");
+
+ PreprocessISelDAG();
+
+ // Select target instructions for the DAG.
+ {
+ // Number all nodes with a topological order and set DAGSize.
+ DAGSize = CurDAG->AssignTopologicalOrder();
+
+ // Create a dummy node (which is not added to allnodes), that adds
+ // a reference to the root node, preventing it from being deleted,
+ // and tracking any changes of the root.
+ HandleSDNode Dummy(CurDAG->getRoot());
+ ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
+ ++ISelPosition;
+
+ // The AllNodes list is now topological-sorted. Visit the
+ // nodes by starting at the end of the list (the root of the
+ // graph) and preceding back toward the beginning (the entry
+ // node).
+ while (ISelPosition != CurDAG->allnodes_begin()) {
+ SDNode *Node = --ISelPosition;
+ // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
+ // but there are currently some corner cases that it misses. Also, this
+ // makes it theoretically possible to disable the DAGCombiner.
+ if (Node->use_empty())
+ continue;
+
+ SDNode *ResNode = Select(Node);
+
+ // FIXME: This is pretty gross. 'Select' should be changed to not return
+ // anything at all and this code should be nuked with a tactical strike.
+
+ // If node should not be replaced, continue with the next one.
+ if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
+ continue;
+ // Replace node.
+ if (ResNode)
+ ReplaceUses(Node, ResNode);
+
+ // If after the replacement this node is not used any more,
+ // remove this dead node.
+ if (Node->use_empty()) { // Don't delete EntryToken, etc.
+ ISelUpdater ISU(ISelPosition);
+ CurDAG->RemoveDeadNode(Node, &ISU);
+ }
+ }
+
+ CurDAG->setRoot(Dummy.getValue());
+ }
+ DEBUG(errs() << "===== Instruction selection ends:\n");
+
+ PostprocessISelDAG();
+
+ // FIXME: This shouldn't be needed, remove it.
+ CurDAG->RemoveDeadNodes();
+}
+
+
void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
MachineFunction &MF,
MachineModuleInfo *MMI,
@@ -870,6 +933,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
// feed PHI nodes in successor blocks.
if (isa<TerminatorInst>(BI))
if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
+ ++NumFastIselFailures;
ResetDebugLoc(SDB, FastIS);
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel miss: ";
@@ -894,6 +958,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
// Then handle certain instructions as single-LLVM-Instruction blocks.
if (isa<CallInst>(BI)) {
+ ++NumFastIselFailures;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel missed call: ";
BI->dump();
@@ -923,6 +988,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
// Otherwise, give up on FastISel for the rest of the block.
// For now, be a little lenient about non-branch terminators.
if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
+ ++NumFastIselFailures;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel miss: ";
BI->dump();
@@ -972,6 +1038,8 @@ SelectionDAGISel::FinishBasicBlock() {
MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
+ if (!BB->isSuccessor(PHI->getParent()))
+ continue;
PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
false));
PHI->addOperand(MachineOperand::CreateMBB(BB));
@@ -1316,13 +1384,29 @@ static SDNode *findFlagUse(SDNode *N) {
/// This function recursively traverses up the operand chain, ignoring
/// certain nodes.
static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
- SDNode *Root,
- SmallPtrSet<SDNode*, 16> &Visited) {
- if (Use->getNodeId() < Def->getNodeId() ||
- !Visited.insert(Use))
+ SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited,
+ bool IgnoreChains) {
+ // The NodeID's are given uniques ID's where a node ID is guaranteed to be
+ // greater than all of its (recursive) operands. If we scan to a point where
+ // 'use' is smaller than the node we're scanning for, then we know we will
+ // never find it.
+ //
+ // The Use may be -1 (unassigned) if it is a newly allocated node. This can
+ // happen because we scan down to newly selected nodes in the case of flag
+ // uses.
+ if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
+ return false;
+
+ // Don't revisit nodes if we already scanned it and didn't fail, we know we
+ // won't fail if we scan it again.
+ if (!Visited.insert(Use))
return false;
for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ // Ignore chain uses, they are validated by HandleMergeInputChains.
+ if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
+ continue;
+
SDNode *N = Use->getOperand(i).getNode();
if (N == Def) {
if (Use == ImmedUse || Use == Root)
@@ -1332,32 +1416,24 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
}
// Traverse up the operand chain.
- if (findNonImmUse(N, Def, ImmedUse, Root, Visited))
+ if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
return true;
}
return false;
}
-/// isNonImmUse - Start searching from Root up the DAG to check is Def can
-/// be reached. Return true if that's the case. However, ignore direct uses
-/// by ImmedUse (which would be U in the example illustrated in
-/// IsLegalAndProfitableToFold) and by Root (which can happen in the store
-/// case).
-/// FIXME: to be really generic, we should allow direct use by any node
-/// that is being folded. But realisticly since we only fold loads which
-/// have one non-chain use, we only need to watch out for load/op/store
-/// and load/op/cmp case where the root (store / cmp) may reach the load via
-/// its chain operand.
-static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {
- SmallPtrSet<SDNode*, 16> Visited;
- return findNonImmUse(Root, Def, ImmedUse, Root, Visited);
+/// IsProfitableToFold - Returns true if it's profitable to fold the specific
+/// operand node N of U during instruction selection that starts at Root.
+bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+ return N.hasOneUse();
}
-/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of
-/// U can be folded during instruction selection that starts at Root and
-/// folding N is profitable.
-bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
- SDNode *Root) const {
+/// IsLegalToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root.
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+ bool IgnoreChains) const {
if (OptLevel == CodeGenOpt::None) return false;
// If Root use can somehow reach N through a path that that doesn't contain
@@ -1402,6 +1478,8 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
// Fold. But since Fold and FU are flagged together, this will create
// a cycle in the scheduling graph.
+ // If the node has flags, walk down the graph to the "lowest" node in the
+ // flagged set.
EVT VT = Root->getValueType(Root->getNumValues()-1);
while (VT == MVT::Flag) {
SDNode *FU = findFlagUse(Root);
@@ -1409,9 +1487,17 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
break;
Root = FU;
VT = Root->getValueType(Root->getNumValues()-1);
+
+ // If our query node has a flag result with a use, we've walked up it. If
+ // the user (which has already been selected) has a chain or indirectly uses
+ // the chain, our WalkChainUsers predicate will not consider it. Because of
+ // this, we cannot ignore chains in this predicate.
+ IgnoreChains = false;
}
+
- return !isNonImmUse(Root, N, U);
+ SmallPtrSet<SDNode*, 16> Visited;
+ return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
}
SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
@@ -1423,6 +1509,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
VTs.push_back(MVT::Flag);
SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
VTs, &Ops[0], Ops.size());
+ New->setNodeId(-1);
return New.getNode();
}
@@ -1438,25 +1525,1219 @@ SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) {
MVT::Other, Tmp, Chain);
}
+/// GetVBR - decode a vbr encoding whose top bit is set.
+ALWAYS_INLINE static uint64_t
+GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
+ assert(Val >= 128 && "Not a VBR");
+ Val &= 127; // Remove first vbr bit.
+
+ unsigned Shift = 7;
+ uint64_t NextBits;
+ do {
+ NextBits = MatcherTable[Idx++];
+ Val |= (NextBits&127) << Shift;
+ Shift += 7;
+ } while (NextBits & 128);
+
+ return Val;
+}
+
+
+/// UpdateChainsAndFlags - When a match is complete, this method updates uses of
+/// interior flag and chain results to use the new flag and chain results.
+void SelectionDAGISel::
+UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
+ const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SDValue InputFlag,
+ const SmallVectorImpl<SDNode*> &FlagResultNodesMatched,
+ bool isMorphNodeTo) {
+ SmallVector<SDNode*, 4> NowDeadNodes;
+
+ ISelUpdater ISU(ISelPosition);
+
+ // Now that all the normal results are replaced, we replace the chain and
+ // flag results if present.
+ if (!ChainNodesMatched.empty()) {
+ assert(InputChain.getNode() != 0 &&
+ "Matched input chains but didn't produce a chain");
+ // Loop over all of the nodes we matched that produced a chain result.
+ // Replace all the chain results with the final chain we ended up with.
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ SDNode *ChainNode = ChainNodesMatched[i];
+
+ // If this node was already deleted, don't look at it.
+ if (ChainNode->getOpcode() == ISD::DELETED_NODE)
+ continue;
+
+ // Don't replace the results of the root node if we're doing a
+ // MorphNodeTo.
+ if (ChainNode == NodeToMatch && isMorphNodeTo)
+ continue;
+
+ SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
+ if (ChainVal.getValueType() == MVT::Flag)
+ ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
+ assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+ CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
+
+ // If the node became dead, delete it.
+ if (ChainNode->use_empty())
+ NowDeadNodes.push_back(ChainNode);
+ }
+ }
+
+ // If the result produces a flag, update any flag results in the matched
+ // pattern with the flag result.
+ if (InputFlag.getNode() != 0) {
+ // Handle any interior nodes explicitly marked.
+ for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) {
+ SDNode *FRN = FlagResultNodesMatched[i];
+
+ // If this node was already deleted, don't look at it.
+ if (FRN->getOpcode() == ISD::DELETED_NODE)
+ continue;
+
+ assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag &&
+ "Doesn't have a flag result");
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
+ InputFlag, &ISU);
+
+ // If the node became dead, delete it.
+ if (FRN->use_empty())
+ NowDeadNodes.push_back(FRN);
+ }
+ }
+
+ if (!NowDeadNodes.empty())
+ CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
+
+ DEBUG(errs() << "ISEL: Match complete!\n");
+}
+
+enum ChainResult {
+ CR_Simple,
+ CR_InducesCycle,
+ CR_LeadsToInteriorNode
+};
+
+/// WalkChainUsers - Walk down the users of the specified chained node that is
+/// part of the pattern we're matching, looking at all of the users we find.
+/// This determines whether something is an interior node, whether we have a
+/// non-pattern node in between two pattern nodes (which prevent folding because
+/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
+/// between pattern nodes (in which case the TF becomes part of the pattern).
+///
+/// The walk we do here is guaranteed to be small because we quickly get down to
+/// already selected nodes "below" us.
+static ChainResult
+WalkChainUsers(SDNode *ChainedNode,
+ SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
+ SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
+ ChainResult Result = CR_Simple;
+
+ for (SDNode::use_iterator UI = ChainedNode->use_begin(),
+ E = ChainedNode->use_end(); UI != E; ++UI) {
+ // Make sure the use is of the chain, not some other value we produce.
+ if (UI.getUse().getValueType() != MVT::Other) continue;
+
+ SDNode *User = *UI;
+
+ // If we see an already-selected machine node, then we've gone beyond the
+ // pattern that we're selecting down into the already selected chunk of the
+ // DAG.
+ if (User->isMachineOpcode() ||
+ User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
+ continue;
+
+ if (User->getOpcode() == ISD::CopyToReg ||
+ User->getOpcode() == ISD::CopyFromReg ||
+ User->getOpcode() == ISD::INLINEASM) {
+ // If their node ID got reset to -1 then they've already been selected.
+ // Treat them like a MachineOpcode.
+ if (User->getNodeId() == -1)
+ continue;
+ }
+
+ // If we have a TokenFactor, we handle it specially.
+ if (User->getOpcode() != ISD::TokenFactor) {
+ // If the node isn't a token factor and isn't part of our pattern, then it
+ // must be a random chained node in between two nodes we're selecting.
+ // This happens when we have something like:
+ // x = load ptr
+ // call
+ // y = x+4
+ // store y -> ptr
+ // Because we structurally match the load/store as a read/modify/write,
+ // but the call is chained between them. We cannot fold in this case
+ // because it would induce a cycle in the graph.
+ if (!std::count(ChainedNodesInPattern.begin(),
+ ChainedNodesInPattern.end(), User))
+ return CR_InducesCycle;
+
+ // Otherwise we found a node that is part of our pattern. For example in:
+ // x = load ptr
+ // y = x+4
+ // store y -> ptr
+ // This would happen when we're scanning down from the load and see the
+ // store as a user. Record that there is a use of ChainedNode that is
+ // part of the pattern and keep scanning uses.
+ Result = CR_LeadsToInteriorNode;
+ InteriorChainedNodes.push_back(User);
+ continue;
+ }
+
+ // If we found a TokenFactor, there are two cases to consider: first if the
+ // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
+ // uses of the TF are in our pattern) we just want to ignore it. Second,
+ // the TokenFactor can be sandwiched in between two chained nodes, like so:
+ // [Load chain]
+ // ^
+ // |
+ // [Load]
+ // ^ ^
+ // | \ DAG's like cheese
+ // / \ do you?
+ // / |
+ // [TokenFactor] [Op]
+ // ^ ^
+ // | |
+ // \ /
+ // \ /
+ // [Store]
+ //
+ // In this case, the TokenFactor becomes part of our match and we rewrite it
+ // as a new TokenFactor.
+ //
+ // To distinguish these two cases, do a recursive walk down the uses.
+ switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) {
+ case CR_Simple:
+ // If the uses of the TokenFactor are just already-selected nodes, ignore
+ // it, it is "below" our pattern.
+ continue;
+ case CR_InducesCycle:
+ // If the uses of the TokenFactor lead to nodes that are not part of our
+ // pattern that are not selected, folding would turn this into a cycle,
+ // bail out now.
+ return CR_InducesCycle;
+ case CR_LeadsToInteriorNode:
+ break; // Otherwise, keep processing.
+ }
+
+ // Okay, we know we're in the interesting interior case. The TokenFactor
+ // is now going to be considered part of the pattern so that we rewrite its
+ // uses (it may have uses that are not part of the pattern) with the
+ // ultimate chain result of the generated code. We will also add its chain
+ // inputs as inputs to the ultimate TokenFactor we create.
+ Result = CR_LeadsToInteriorNode;
+ ChainedNodesInPattern.push_back(User);
+ InteriorChainedNodes.push_back(User);
+ continue;
+ }
+
+ return Result;
+}
+
+/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
+/// operation for when the pattern matched at least one node with a chains. The
+/// input vector contains a list of all of the chained nodes that we match. We
+/// must determine if this is a valid thing to cover (i.e. matching it won't
+/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
+/// be used as the input node chain for the generated nodes.
+static SDValue
+HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SelectionDAG *CurDAG) {
+ // Walk all of the chained nodes we've matched, recursively scanning down the
+ // users of the chain result. This adds any TokenFactor nodes that are caught
+ // in between chained nodes to the chained and interior nodes list.
+ SmallVector<SDNode*, 3> InteriorChainedNodes;
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
+ InteriorChainedNodes) == CR_InducesCycle)
+ return SDValue(); // Would induce a cycle.
+ }
+
+ // Okay, we have walked all the matched nodes and collected TokenFactor nodes
+ // that we are interested in. Form our input TokenFactor node.
+ SmallVector<SDValue, 3> InputChains;
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ // Add the input chain of this node to the InputChains list (which will be
+ // the operands of the generated TokenFactor) if it's not an interior node.
+ SDNode *N = ChainNodesMatched[i];
+ if (N->getOpcode() != ISD::TokenFactor) {
+ if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
+ continue;
+
+ // Otherwise, add the input chain.
+ SDValue InChain = ChainNodesMatched[i]->getOperand(0);
+ assert(InChain.getValueType() == MVT::Other && "Not a chain");
+ InputChains.push_back(InChain);
+ continue;
+ }
+
+ // If we have a token factor, we want to add all inputs of the token factor
+ // that are not part of the pattern we're matching.
+ for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+ if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
+ N->getOperand(op).getNode()))
+ InputChains.push_back(N->getOperand(op));
+ }
+ }
+
+ SDValue Res;
+ if (InputChains.size() == 1)
+ return InputChains[0];
+ return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
+ MVT::Other, &InputChains[0], InputChains.size());
+}
+
+/// MorphNode - Handle morphing a node in place for the selector.
+SDNode *SelectionDAGISel::
+MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
+ // It is possible we're using MorphNodeTo to replace a node with no
+ // normal results with one that has a normal result (or we could be
+ // adding a chain) and the input could have flags and chains as well.
+ // In this case we need to shifting the operands down.
+ // FIXME: This is a horrible hack and broken in obscure cases, no worse
+ // than the old isel though. We should sink this into MorphNodeTo.
+ int OldFlagResultNo = -1, OldChainResultNo = -1;
+
+ unsigned NTMNumResults = Node->getNumValues();
+ if (Node->getValueType(NTMNumResults-1) == MVT::Flag) {
+ OldFlagResultNo = NTMNumResults-1;
+ if (NTMNumResults != 1 &&
+ Node->getValueType(NTMNumResults-2) == MVT::Other)
+ OldChainResultNo = NTMNumResults-2;
+ } else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
+ OldChainResultNo = NTMNumResults-1;
+
+ // Call the underlying SelectionDAG routine to do the transmogrification. Note
+ // that this deletes operands of the old node that become dead.
+ SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps);
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ }
+
+ unsigned ResNumResults = Res->getNumValues();
+ // Move the flag if needed.
+ if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 &&
+ (unsigned)OldFlagResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo),
+ SDValue(Res, ResNumResults-1));
+
+ if ((EmitNodeInfo & OPFL_FlagOutput) != 0)
+ --ResNumResults;
+
+ // Move the chain reference if needed.
+ if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
+ (unsigned)OldChainResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
+ SDValue(Res, ResNumResults-1));
+
+ // Otherwise, no replacement happened because the node already exists. Replace
+ // Uses of the old node with the new one.
+ if (Res != Node)
+ CurDAG->ReplaceAllUsesWith(Node, Res);
+
+ return Res;
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+ALWAYS_INLINE static bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) {
+ // Accept if it is exactly the same as a previously recorded node.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ return N == RecordedNodes[RecNo];
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+ALWAYS_INLINE static bool
+CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SelectionDAGISel &SDISel) {
+ return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+}
+
+/// CheckNodePredicate - Implements OP_CheckNodePredicate.
+ALWAYS_INLINE static bool
+CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SelectionDAGISel &SDISel, SDNode *N) {
+ return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
+}
+
+ALWAYS_INLINE static bool
+CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDNode *N) {
+ return N->getOpcode() == MatcherTable[MatcherIndex++];
+}
+
+ALWAYS_INLINE static bool
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (N.getValueType() == VT) return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
+}
+
+ALWAYS_INLINE static bool
+CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
+}
+
+
+ALWAYS_INLINE static bool
+CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ return cast<CondCodeSDNode>(N)->get() ==
+ (ISD::CondCode)MatcherTable[MatcherIndex++];
+}
+
+ALWAYS_INLINE static bool
+CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (cast<VTSDNode>(N)->getVT() == VT)
+ return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
+}
+
+ALWAYS_INLINE static bool
+CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ return C != 0 && C->getSExtValue() == Val;
+}
+
+ALWAYS_INLINE static bool
+CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::AND) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+}
+
+ALWAYS_INLINE static bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::OR) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+}
+
+/// IsPredicateKnownToFail - If we know how and can do so without pushing a
+/// scope, evaluate the current node. If the current predicate is known to
+/// fail, set Result=true and return anything. If the current predicate is
+/// known to pass, set Result=false and return the MatcherIndex to continue
+/// with. If the current predicate is unknown, set Result=false and return the
+/// MatcherIndex to continue with.
+static unsigned IsPredicateKnownToFail(const unsigned char *Table,
+ unsigned Index, SDValue N,
+ bool &Result, SelectionDAGISel &SDISel,
+ SmallVectorImpl<SDValue> &RecordedNodes){
+ switch (Table[Index++]) {
+ default:
+ Result = false;
+ return Index-1; // Could not evaluate this predicate.
+ case SelectionDAGISel::OPC_CheckSame:
+ Result = !::CheckSame(Table, Index, N, RecordedNodes);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPatternPredicate:
+ Result = !::CheckPatternPredicate(Table, Index, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPredicate:
+ Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckOpcode:
+ Result = !::CheckOpcode(Table, Index, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckType:
+ Result = !::CheckType(Table, Index, N, SDISel.TLI);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild0Type:
+ case SelectionDAGISel::OPC_CheckChild1Type:
+ case SelectionDAGISel::OPC_CheckChild2Type:
+ case SelectionDAGISel::OPC_CheckChild3Type:
+ case SelectionDAGISel::OPC_CheckChild4Type:
+ case SelectionDAGISel::OPC_CheckChild5Type:
+ case SelectionDAGISel::OPC_CheckChild6Type:
+ case SelectionDAGISel::OPC_CheckChild7Type:
+ Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type);
+ return Index;
+ case SelectionDAGISel::OPC_CheckCondCode:
+ Result = !::CheckCondCode(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckValueType:
+ Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
+ return Index;
+ case SelectionDAGISel::OPC_CheckInteger:
+ Result = !::CheckInteger(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckAndImm:
+ Result = !::CheckAndImm(Table, Index, N, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckOrImm:
+ Result = !::CheckOrImm(Table, Index, N, SDISel);
+ return Index;
+ }
+}
+
+
+struct MatchScope {
+ /// FailIndex - If this match fails, this is the index to continue with.
+ unsigned FailIndex;
+
+ /// NodeStack - The node stack when the scope was formed.
+ SmallVector<SDValue, 4> NodeStack;
+
+ /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
+ unsigned NumRecordedNodes;
+
+ /// NumMatchedMemRefs - The number of matched memref entries.
+ unsigned NumMatchedMemRefs;
+
+ /// InputChain/InputFlag - The current chain/flag
+ SDValue InputChain, InputFlag;
+
+ /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
+ bool HasChainNodesMatched, HasFlagResultNodesMatched;
+};
+
+SDNode *SelectionDAGISel::
+SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
+ unsigned TableSize) {
+ // FIXME: Should these even be selected? Handle these cases in the caller?
+ switch (NodeToMatch->getOpcode()) {
+ default:
+ break;
+ case ISD::EntryToken: // These nodes remain the same.
+ case ISD::BasicBlock:
+ case ISD::Register:
+ case ISD::HANDLENODE:
+ case ISD::TargetConstant:
+ case ISD::TargetConstantFP:
+ case ISD::TargetConstantPool:
+ case ISD::TargetFrameIndex:
+ case ISD::TargetExternalSymbol:
+ case ISD::TargetBlockAddress:
+ case ISD::TargetJumpTable:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::TargetGlobalAddress:
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ NodeToMatch->setNodeId(-1); // Mark selected.
+ return 0;
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
+ NodeToMatch->getOperand(0));
+ return 0;
+ case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
+ case ISD::EH_LABEL: return Select_EH_LABEL(NodeToMatch);
+ case ISD::UNDEF: return Select_UNDEF(NodeToMatch);
+ }
+
+ assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
+
+ // Set up the node stack with NodeToMatch as the only node on the stack.
+ SmallVector<SDValue, 8> NodeStack;
+ SDValue N = SDValue(NodeToMatch, 0);
+ NodeStack.push_back(N);
+
+ // MatchScopes - Scopes used when matching, if a match failure happens, this
+ // indicates where to continue checking.
+ SmallVector<MatchScope, 8> MatchScopes;
+
+ // RecordedNodes - This is the set of nodes that have been recorded by the
+ // state machine.
+ SmallVector<SDValue, 8> RecordedNodes;
+
+ // MatchedMemRefs - This is the set of MemRef's we've seen in the input
+ // pattern.
+ SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
+
+ // These are the current input chain and flag for use when generating nodes.
+ // Various Emit operations change these. For example, emitting a copytoreg
+ // uses and updates these.
+ SDValue InputChain, InputFlag;
+
+ // ChainNodesMatched - If a pattern matches nodes that have input/output
+ // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
+ // which ones they are. The result is captured into this list so that we can
+ // update the chain results when the pattern is complete.
+ SmallVector<SDNode*, 3> ChainNodesMatched;
+ SmallVector<SDNode*, 3> FlagResultNodesMatched;
+
+ DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+ NodeToMatch->dump(CurDAG);
+ errs() << '\n');
+
+ // Determine where to start the interpreter. Normally we start at opcode #0,
+ // but if the state machine starts with an OPC_SwitchOpcode, then we
+ // accelerate the first lookup (which is guaranteed to be hot) with the
+ // OpcodeOffset table.
+ unsigned MatcherIndex = 0;
+
+ if (!OpcodeOffset.empty()) {
+ // Already computed the OpcodeOffset table, just index into it.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n");
+
+ } else if (MatcherTable[0] == OPC_SwitchOpcode) {
+ // Otherwise, the table isn't computed, but the state machine does start
+ // with an OPC_SwitchOpcode instruction. Populate the table now, since this
+ // is the first time we're selecting an instruction.
+ unsigned Idx = 1;
+ while (1) {
+ // Get the size of this case.
+ unsigned CaseSize = MatcherTable[Idx++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
+ if (CaseSize == 0) break;
+
+ // Get the opcode, add the index to the table.
+ unsigned Opc = MatcherTable[Idx++];
+ if (Opc >= OpcodeOffset.size())
+ OpcodeOffset.resize((Opc+1)*2);
+ OpcodeOffset[Opc] = Idx;
+ Idx += CaseSize;
+ }
+
+ // Okay, do the lookup for the first opcode.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ }
+
+ while (1) {
+ assert(MatcherIndex < TableSize && "Invalid index");
+ BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+ switch (Opcode) {
+ case OPC_Scope: {
+ // Okay, the semantics of this operation are that we should push a scope
+ // then evaluate the first child. However, pushing a scope only to have
+ // the first check fail (which then pops it) is inefficient. If we can
+ // determine immediately that the first check (or first several) will
+ // immediately fail, don't even bother pushing a scope for them.
+ unsigned FailIndex;
+
+ while (1) {
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+ // Found the end of the scope with no match.
+ if (NumToSkip == 0) {
+ FailIndex = 0;
+ break;
+ }
+
+ FailIndex = MatcherIndex+NumToSkip;
+
+ // If we can't evaluate this predicate without pushing a scope (e.g. if
+ // it is a 'MoveParent') or if the predicate succeeds on this node, we
+ // push the scope and evaluate the full predicate chain.
+ bool Result;
+ MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
+ Result, *this, RecordedNodes);
+ if (!Result)
+ break;
+
+ DEBUG(errs() << " Skipped scope entry at index " << MatcherIndex
+ << " continuing at " << FailIndex << "\n");
+
+
+ // Otherwise, we know that this case of the Scope is guaranteed to fail,
+ // move to the next case.
+ MatcherIndex = FailIndex;
+ }
+
+ // If the whole scope failed to match, bail.
+ if (FailIndex == 0) break;
+
+ // Push a MatchScope which indicates where to go if the first child fails
+ // to match.
+ MatchScope NewEntry;
+ NewEntry.FailIndex = FailIndex;
+ NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
+ NewEntry.NumRecordedNodes = RecordedNodes.size();
+ NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
+ NewEntry.InputChain = InputChain;
+ NewEntry.InputFlag = InputFlag;
+ NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
+ NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty();
+ MatchScopes.push_back(NewEntry);
+ continue;
+ }
+ case OPC_RecordNode:
+ // Remember this node, it may end up being an operand in the pattern.
+ RecordedNodes.push_back(N);
+ continue;
+
+ case OPC_RecordChild0: case OPC_RecordChild1:
+ case OPC_RecordChild2: case OPC_RecordChild3:
+ case OPC_RecordChild4: case OPC_RecordChild5:
+ case OPC_RecordChild6: case OPC_RecordChild7: {
+ unsigned ChildNo = Opcode-OPC_RecordChild0;
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+
+ RecordedNodes.push_back(N->getOperand(ChildNo));
+ continue;
+ }
+ case OPC_RecordMemRef:
+ MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
+ continue;
+
+ case OPC_CaptureFlagInput:
+ // If the current node has an input flag, capture it in InputFlag.
+ if (N->getNumOperands() != 0 &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag)
+ InputFlag = N->getOperand(N->getNumOperands()-1);
+ continue;
+
+ case OPC_MoveChild: {
+ unsigned ChildNo = MatcherTable[MatcherIndex++];
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+ N = N.getOperand(ChildNo);
+ NodeStack.push_back(N);
+ continue;
+ }
+
+ case OPC_MoveParent:
+ // Pop the current node off the NodeStack.
+ NodeStack.pop_back();
+ assert(!NodeStack.empty() && "Node stack imbalance!");
+ N = NodeStack.back();
+ continue;
+
+ case OPC_CheckSame:
+ if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
+ continue;
+ case OPC_CheckPatternPredicate:
+ if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+ continue;
+ case OPC_CheckPredicate:
+ if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
+ N.getNode()))
+ break;
+ continue;
+ case OPC_CheckComplexPat: {
+ unsigned CPNum = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+ if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum,
+ RecordedNodes))
+ break;
+ continue;
+ }
+ case OPC_CheckOpcode:
+ if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
+ continue;
+
+ case OPC_CheckType:
+ if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
+ continue;
+
+ case OPC_SwitchOpcode: {
+ unsigned CurNodeOpcode = N.getOpcode();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (1) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ // If the opcode matches, then we will execute this case.
+ if (CurNodeOpcode == MatcherTable[MatcherIndex++])
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ DEBUG(errs() << " OpcodeSwitch from " << SwitchStart
+ << " to " << MatcherIndex << "\n");
+ continue;
+ }
+
+ case OPC_SwitchType: {
+ MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy;
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (1) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ MVT::SimpleValueType CaseVT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (CaseVT == MVT::iPTR)
+ CaseVT = TLI.getPointerTy().SimpleTy;
+
+ // If the VT matches, then we will execute this case.
+ if (CurNodeVT == CaseVT)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
+ continue;
+ }
+ case OPC_CheckChild0Type: case OPC_CheckChild1Type:
+ case OPC_CheckChild2Type: case OPC_CheckChild3Type:
+ case OPC_CheckChild4Type: case OPC_CheckChild5Type:
+ case OPC_CheckChild6Type: case OPC_CheckChild7Type:
+ if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+ Opcode-OPC_CheckChild0Type))
+ break;
+ continue;
+ case OPC_CheckCondCode:
+ if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckValueType:
+ if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break;
+ continue;
+ case OPC_CheckInteger:
+ if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckAndImm:
+ if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+ case OPC_CheckOrImm:
+ if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+
+ case OPC_CheckFoldableChainNode: {
+ assert(NodeStack.size() != 1 && "No parent node");
+ // Verify that all intermediate nodes between the root and this one have
+ // a single use.
+ bool HasMultipleUses = false;
+ for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
+ if (!NodeStack[i].hasOneUse()) {
+ HasMultipleUses = true;
+ break;
+ }
+ if (HasMultipleUses) break;
+
+ // Check to see that the target thinks this is profitable to fold and that
+ // we can fold it without inducing cycles in the graph.
+ if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch) ||
+ !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch, true/*We validate our own chains*/))
+ break;
+
+ continue;
+ }
+ case OPC_EmitInteger: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+ RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT));
+ continue;
+ }
+ case OPC_EmitRegister: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT));
+ continue;
+ }
+
+ case OPC_EmitConvertToTarget: {
+ // Convert from IMM/FPIMM to target version.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Imm = RecordedNodes[RecNo];
+
+ if (Imm->getOpcode() == ISD::Constant) {
+ int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
+ Imm = CurDAG->getTargetConstant(Val, Imm.getValueType());
+ } else if (Imm->getOpcode() == ISD::ConstantFP) {
+ const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
+ Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
+ }
+
+ RecordedNodes.push_back(Imm);
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains: {
+ assert(InputChain.getNode() == 0 &&
+ "EmitMergeInputChains should be the first chain producing node");
+ // This node gets a list of nodes we matched in the input that have
+ // chains. We want to token factor all of the input chains to these nodes
+ // together. However, if any of the input chains is actually one of the
+ // nodes matched in this pattern, then we have an intra-match reference.
+ // Ignore these because the newly token factored chain should not refer to
+ // the old nodes.
+ unsigned NumChains = MatcherTable[MatcherIndex++];
+ assert(NumChains != 0 && "Can't TF zero chains");
+
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ for (unsigned i = 0; i != NumChains; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+ }
+
+ // If the inner loop broke out, the match fails.
+ if (ChainNodesMatched.empty())
+ break;
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (InputChain.getNode() == 0)
+ break; // Failed to merge.
+
+ continue;
+ }
+
+ case OPC_EmitCopyToReg: {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+
+ if (InputChain.getNode() == 0)
+ InputChain = CurDAG->getEntryNode();
+
+ InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
+ DestPhysReg, RecordedNodes[RecNo],
+ InputFlag);
+
+ InputFlag = InputChain.getValue(1);
+ continue;
+ }
+
+ case OPC_EmitNodeXForm: {
+ unsigned XFormNo = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo));
+ continue;
+ }
+
+ case OPC_EmitNode:
+ case OPC_MorphNodeTo: {
+ uint16_t TargetOpc = MatcherTable[MatcherIndex++];
+ TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+ // Get the result VT list.
+ unsigned NumVTs = MatcherTable[MatcherIndex++];
+ SmallVector<EVT, 4> VTs;
+ for (unsigned i = 0; i != NumVTs; ++i) {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
+ VTs.push_back(VT);
+ }
+
+ if (EmitNodeInfo & OPFL_Chain)
+ VTs.push_back(MVT::Other);
+ if (EmitNodeInfo & OPFL_FlagOutput)
+ VTs.push_back(MVT::Flag);
+
+ // This is hot code, so optimize the two most common cases of 1 and 2
+ // results.
+ SDVTList VTList;
+ if (VTs.size() == 1)
+ VTList = CurDAG->getVTList(VTs[0]);
+ else if (VTs.size() == 2)
+ VTList = CurDAG->getVTList(VTs[0], VTs[1]);
+ else
+ VTList = CurDAG->getVTList(VTs.data(), VTs.size());
+
+ // Get the operand list.
+ unsigned NumOps = MatcherTable[MatcherIndex++];
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
+ Ops.push_back(RecordedNodes[RecNo]);
+ }
+
+ // If there are variadic operands to add, handle them now.
+ if (EmitNodeInfo & OPFL_VariadicInfo) {
+ // Determine the start index to copy from.
+ unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
+ FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
+ assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
+ "Invalid variadic node");
+ // Copy all of the variadic operands, not including a potential flag
+ // input.
+ for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
+ i != e; ++i) {
+ SDValue V = NodeToMatch->getOperand(i);
+ if (V.getValueType() == MVT::Flag) break;
+ Ops.push_back(V);
+ }
+ }
+
+ // If this has chain/flag inputs, add them.
+ if (EmitNodeInfo & OPFL_Chain)
+ Ops.push_back(InputChain);
+ if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0)
+ Ops.push_back(InputFlag);
+
+ // Create the node.
+ SDNode *Res = 0;
+ if (Opcode != OPC_MorphNodeTo) {
+ // If this is a normal EmitNode command, just create the new node and
+ // add the results to the RecordedNodes list.
+ Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
+ VTList, Ops.data(), Ops.size());
+
+ // Add all the non-flag/non-chain results to the RecordedNodes list.
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break;
+ RecordedNodes.push_back(SDValue(Res, i));
+ }
+
+ } else {
+ Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
+ EmitNodeInfo);
+ }
+
+ // If the node had chain/flag results, update our notion of the current
+ // chain and flag.
+ if (EmitNodeInfo & OPFL_FlagOutput) {
+ InputFlag = SDValue(Res, VTs.size()-1);
+ if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-2);
+ } else if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-1);
+
+ // If the OPFL_MemRefs flag is set on this node, slap all of the
+ // accumulated memrefs onto it.
+ //
+ // FIXME: This is vastly incorrect for patterns with multiple outputs
+ // instructions that access memory and for ComplexPatterns that match
+ // loads.
+ if (EmitNodeInfo & OPFL_MemRefs) {
+ MachineSDNode::mmo_iterator MemRefs =
+ MF->allocateMemRefsArray(MatchedMemRefs.size());
+ std::copy(MatchedMemRefs.begin(), MatchedMemRefs.end(), MemRefs);
+ cast<MachineSDNode>(Res)
+ ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
+ }
+
+ DEBUG(errs() << " "
+ << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
+ << " node: "; Res->dump(CurDAG); errs() << "\n");
+
+ // If this was a MorphNodeTo then we're completely done!
+ if (Opcode == OPC_MorphNodeTo) {
+ // Update chain and flag uses.
+ UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
+ InputFlag, FlagResultNodesMatched, true);
+ return Res;
+ }
+
+ continue;
+ }
+
+ case OPC_MarkFlagResults: {
+ unsigned NumNodes = MatcherTable[MatcherIndex++];
+
+ // Read and remember all the flag-result nodes.
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+ }
+ continue;
+ }
+
+ case OPC_CompleteMatch: {
+ // The match has been completed, and any new nodes (if any) have been
+ // created. Patch up references to the matched dag to use the newly
+ // created nodes.
+ unsigned NumResults = MatcherTable[MatcherIndex++];
+
+ for (unsigned i = 0; i != NumResults; ++i) {
+ unsigned ResSlot = MatcherTable[MatcherIndex++];
+ if (ResSlot & 128)
+ ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
+
+ assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Res = RecordedNodes[ResSlot];
+
+ // FIXME2: Eliminate this horrible hack by fixing the 'Gen' program
+ // after (parallel) on input patterns are removed. This would also
+ // allow us to stop encoding #results in OPC_CompleteMatch's table
+ // entry.
+ if (NodeToMatch->getNumValues() <= i ||
+ NodeToMatch->getValueType(i) == MVT::Other ||
+ NodeToMatch->getValueType(i) == MVT::Flag)
+ break;
+ assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
+ NodeToMatch->getValueType(i) == MVT::iPTR ||
+ Res.getValueType() == MVT::iPTR ||
+ NodeToMatch->getValueType(i).getSizeInBits() ==
+ Res.getValueType().getSizeInBits()) &&
+ "invalid replacement");
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
+ }
+
+ // If the root node defines a flag, add it to the flag nodes to update
+ // list.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag)
+ FlagResultNodesMatched.push_back(NodeToMatch);
+
+ // Update chain and flag uses.
+ UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
+ InputFlag, FlagResultNodesMatched, false);
+
+ assert(NodeToMatch->use_empty() &&
+ "Didn't replace all uses of the node?");
+
+ // FIXME: We just return here, which interacts correctly with SelectRoot
+ // above. We should fix this to not return an SDNode* anymore.
+ return 0;
+ }
+ }
+
+ // If the code reached this point, then the match failed. See if there is
+ // another child to try in the current 'Scope', otherwise pop it until we
+ // find a case to check.
+ while (1) {
+ if (MatchScopes.empty()) {
+ CannotYetSelect(NodeToMatch);
+ return 0;
+ }
+
+ // Restore the interpreter state back to the point where the scope was
+ // formed.
+ MatchScope &LastScope = MatchScopes.back();
+ RecordedNodes.resize(LastScope.NumRecordedNodes);
+ NodeStack.clear();
+ NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
+ N = NodeStack.back();
+
+ DEBUG(errs() << " Match failed at index " << MatcherIndex
+ << " continuing at " << LastScope.FailIndex << "\n");
+
+ if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
+ MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
+ MatcherIndex = LastScope.FailIndex;
+
+ InputChain = LastScope.InputChain;
+ InputFlag = LastScope.InputFlag;
+ if (!LastScope.HasChainNodesMatched)
+ ChainNodesMatched.clear();
+ if (!LastScope.HasFlagResultNodesMatched)
+ FlagResultNodesMatched.clear();
+
+ // Check to see what the offset is at the new MatcherIndex. If it is zero
+ // we have reached the end of this scope, otherwise we have another child
+ // in the current scope to try.
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+
+ // If we have another child in this scope to match, update FailIndex and
+ // try it.
+ if (NumToSkip != 0) {
+ LastScope.FailIndex = MatcherIndex+NumToSkip;
+ break;
+ }
+
+ // End of this scope, pop it and try the next child in the containing
+ // scope.
+ MatchScopes.pop_back();
+ }
+ }
+}
+
+
+
void SelectionDAGISel::CannotYetSelect(SDNode *N) {
std::string msg;
raw_string_ostream Msg(msg);
Msg << "Cannot yet select: ";
- N->printrFull(Msg, CurDAG);
+
+ if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_VOID) {
+ N->printrFull(Msg, CurDAG);
+ } else {
+ bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
+ unsigned iid =
+ cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+ if (iid < Intrinsic::num_intrinsics)
+ Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+ else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
+ Msg << "target intrinsic %" << TII->getName(iid);
+ else
+ Msg << "unknown intrinsic #" << iid;
+ }
llvm_report_error(Msg.str());
}
-void SelectionDAGISel::CannotYetSelectIntrinsic(SDNode *N) {
- dbgs() << "Cannot yet select: ";
- unsigned iid =
- cast<ConstantSDNode>(N->getOperand(N->getOperand(0).getValueType() ==
- MVT::Other))->getZExtValue();
- if (iid < Intrinsic::num_intrinsics)
- llvm_report_error("Cannot yet select: intrinsic %" +
- Intrinsic::getName((Intrinsic::ID)iid));
- else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo())
- llvm_report_error(Twine("Cannot yet select: target intrinsic %") +
- tii->getName(iid));
-}
-
char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d74ec7e..8d0d884 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include <ctype.h>
using namespace llvm;
namespace llvm {
@@ -540,6 +541,24 @@ TargetLowering::~TargetLowering() {
delete &TLOF;
}
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
+ assert(isTypeLegal(VT));
+ switch (Op) {
+ default:
+ return false;
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ return true;
+ }
+}
+
+
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
unsigned &NumIntermediates,
EVT &RegisterVT,
@@ -1423,8 +1442,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::TRUNCATE: {
// Simplify the input, using demanded bit information, and compute the known
// zero/one bits live out.
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
APInt TruncMask = NewMask;
- TruncMask.zext(Op.getOperand(0).getValueSizeInBits());
+ TruncMask.zext(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
@@ -1435,15 +1456,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// on the known demanded bits.
if (Op.getOperand(0).getNode()->hasOneUse()) {
SDValue In = Op.getOperand(0);
- unsigned InBitWidth = In.getValueSizeInBits();
switch (In.getOpcode()) {
default: break;
case ISD::SRL:
// Shrink SRL by a constant if none of the high bits shifted in are
// demanded.
if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
- APInt HighBits = APInt::getHighBitsSet(InBitWidth,
- InBitWidth - BitWidth);
+ APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+ OperandBitWidth - BitWidth);
HighBits = HighBits.lshr(ShAmt->getZExtValue());
HighBits.trunc(BitWidth);
@@ -1589,7 +1609,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
// Fall back to ComputeMaskedBits to catch other known cases.
EVT OpVT = Val.getValueType();
- unsigned BitWidth = OpVT.getSizeInBits();
+ unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero, KnownOne;
DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
@@ -1698,7 +1718,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
Lod->getSrcValue(),
Lod->getSrcValueOffset() + bestOffset,
- false, NewAlign);
+ false, false, NewAlign);
return DAG.getSetCC(dl, VT,
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
DAG.getConstant(bestMask.trunc(bestWidth),
@@ -1757,7 +1777,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
break; // todo, be more careful with signed comparisons
}
} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
EVT ExtDstTy = N0.getValueType();
@@ -1791,22 +1811,21 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
Cond);
} else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
-
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
- if (N0.getOpcode() == ISD::SETCC) {
+ if (N0.getOpcode() == ISD::SETCC &&
+ isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
if (TrueWhenTrue)
- return N0;
-
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
CC = ISD::getSetCCInverse(CC,
N0.getOperand(0).getValueType().isInteger());
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
}
-
+
if ((N0.getOpcode() == ISD::XOR ||
- (N0.getOpcode() == ISD::AND &&
+ (N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR &&
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
@@ -1829,9 +1848,36 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOperand(0).getOperand(0),
N0.getOperand(1));
}
+
return DAG.getSetCC(dl, VT, Val, N1,
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
+ } else if (N1C->getAPIntValue() == 1 &&
+ (VT == MVT::i1 ||
+ getBooleanContents() == ZeroOrOneBooleanContent)) {
+ SDValue Op0 = N0;
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+
+ if ((Op0.getOpcode() == ISD::XOR) &&
+ Op0.getOperand(0).getOpcode() == ISD::SETCC &&
+ Op0.getOperand(1).getOpcode() == ISD::SETCC) {
+ // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+ Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+ return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
+ Cond);
+ } else if (Op0.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Op0.getOperand(1)) &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
+ if (Op0.getValueType() != VT)
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, VT));
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
}
}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 1d9bda4..ce72b2f 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -662,7 +662,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
if (!tii_->isTriviallyReMaterializable(DefMI, AA))
return false;
bool SawStore = false;
- if (!DefMI->isSafeToMove(tii_, SawStore, AA))
+ if (!DefMI->isSafeToMove(tii_, AA, SawStore))
return false;
if (TID.getNumDefs() != 1)
return false;
@@ -702,7 +702,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) {
if (!li_->hasInterval(*SR))
continue;
- DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
+ const LiveRange *DLR =
+ li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
if (DLR && DLR->valno->getCopy() == CopyMI)
DLR->valno->setCopy(0);
}
@@ -741,9 +742,21 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
NewMI->addOperand(MO);
if (MO.isDef() && li_->hasInterval(MO.getReg())) {
unsigned Reg = MO.getReg();
- DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
+ const LiveRange *DLR =
+ li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
if (DLR && DLR->valno->getCopy() == CopyMI)
DLR->valno->setCopy(0);
+ // Handle subregs as well
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (const unsigned* SR = tri_->getSubRegisters(Reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ const LiveRange *DLR =
+ li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
+ if (DLR && DLR->valno->getCopy() == CopyMI)
+ DLR->valno->setCopy(0);
+ }
+ }
}
}
@@ -752,6 +765,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
CopyMI->eraseFromParent();
ReMatCopies.insert(CopyMI);
ReMatDefs.insert(DefMI);
+ DEBUG(dbgs() << "Remat: " << *NewMI);
++NumReMats;
return true;
}
@@ -771,11 +785,16 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
SubIdx = 0;
}
+ // Copy the register use-list before traversing it. We may be adding operands
+ // and invalidating pointers.
+ SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg),
- E = mri_->reg_end(); I != E; ) {
- MachineOperand &O = I.getOperand();
- MachineInstr *UseMI = &*I;
- ++I;
+ E = mri_->reg_end(); I != E; ++I)
+ reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
+
+ for (unsigned N=0; N != reglist.size(); ++N) {
+ MachineInstr *UseMI = reglist[N].first;
+ MachineOperand &O = UseMI->getOperand(reglist[N].second);
unsigned OldSubIdx = O.getSubReg();
if (DstIsPhys) {
unsigned UseDstReg = DstReg;
@@ -796,6 +815,19 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
O.setReg(UseDstReg);
O.setSubReg(0);
+ if (OldSubIdx) {
+ // Def and kill of subregister of a virtual register actually defs and
+ // kills the whole register. Add imp-defs and imp-kills as needed.
+ if (O.isDef()) {
+ if(O.isDead())
+ UseMI->addRegisterDead(DstReg, tri_, true);
+ else
+ UseMI->addRegisterDefined(DstReg, tri_);
+ } else if (!O.isUndef() &&
+ (O.isKill() ||
+ UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0))))
+ UseMI->addRegisterKilled(DstReg, tri_, true);
+ }
continue;
}
@@ -1148,12 +1180,14 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg,
LiveInterval &SmallInt = li_->getInterval(SmallReg);
unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt);
unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt);
- if (SmallSize > Threshold || LargeSize > Threshold)
- if ((float)std::distance(mri_->use_nodbg_begin(SmallReg),
- mri_->use_nodbg_end()) / SmallSize <
- (float)std::distance(mri_->use_nodbg_begin(LargeReg),
- mri_->use_nodbg_end()) / LargeSize)
+ if (LargeSize > Threshold) {
+ unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg),
+ mri_->use_nodbg_end());
+ unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg),
+ mri_->use_nodbg_end());
+ if (SmallUses*LargeSize < LargeUses*SmallSize)
return false;
+ }
return true;
}
@@ -1173,6 +1207,8 @@ SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg),
E = mri_->reg_end(); I != E; ++I) {
MachineOperand &O = I.getOperand();
+ if (O.isDebug())
+ continue;
MachineInstr *MI = &*I;
if (MI == CopyMI || JoinedCopies.count(MI))
continue;
@@ -1559,7 +1595,10 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
(isExtSubReg || DstRC->isASubClass()) &&
!isWinToJoinCrossClass(LargeReg, SmallReg,
allocatableRCRegs_[NewRC].count())) {
- DEBUG(dbgs() << "\tSrc/Dest are different register classes.\n");
+ DEBUG(dbgs() << "\tSrc/Dest are different register classes: "
+ << SrcRC->getName() << "/"
+ << DstRC->getName() << " -> "
+ << NewRC->getName() << ".\n");
// Allow the coalescer to try again in case either side gets coalesced to
// a physical register that's compatible with the other side. e.g.
// r1024 = MOV32to32_ r1025
@@ -1680,6 +1719,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
(AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) {
JoinedCopies.insert(CopyMI);
+ DEBUG(dbgs() << "Trivial!\n");
return true;
}
@@ -1839,7 +1879,7 @@ static unsigned ComputeUltimateVN(VNInfo *VNI,
// If the VN has already been computed, just return it.
if (ThisValNoAssignments[VN] >= 0)
return ThisValNoAssignments[VN];
-// assert(ThisValNoAssignments[VN] != -2 && "Cyclic case?");
+ assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
// If this val is not a copy from the other val, then it must be a new value
// number in the destination.
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 8d4d1b2..059e8d6 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -44,7 +44,6 @@ namespace {
const Type *FunctionContextTy;
Constant *RegisterFn;
Constant *UnregisterFn;
- Constant *ResumeFn;
Constant *BuiltinSetjmpFn;
Constant *FrameAddrFn;
Constant *LSDAAddrFn;
@@ -67,8 +66,8 @@ namespace {
}
private:
- void markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
- Value *CallSite,
+ void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
+ void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
SwitchInst *CatchSwitch);
void splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
bool insertSjLjEHSupport(Function &F);
@@ -107,11 +106,6 @@ bool SjLjEHPass::doInitialization(Module &M) {
Type::getVoidTy(M.getContext()),
PointerType::getUnqual(FunctionContextTy),
(Type *)0);
- ResumeFn =
- M.getOrInsertFunction("_Unwind_SjLj_Resume",
- Type::getVoidTy(M.getContext()),
- VoidPtrTy,
- (Type *)0);
FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
@@ -123,12 +117,22 @@ bool SjLjEHPass::doInitialization(Module &M) {
return true;
}
+/// insertCallSiteStore - Insert a store of the call-site value to the
+/// function context
+void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number,
+ Value *CallSite) {
+ ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+ Number);
+ // Insert a store of the call-site number
+ new StoreInst(CallSiteNoC, CallSite, true, I); // volatile
+}
+
/// markInvokeCallSite - Insert code to mark the call_site for this invoke
-void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
+void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
Value *CallSite,
SwitchInst *CatchSwitch) {
ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
- InvokeNo);
+ InvokeNo);
// The runtime comes back to the dispatcher with the call_site - 1 in
// the context. Odd, but there it is.
ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
@@ -145,8 +149,11 @@ void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
}
}
- // Insert a store of the invoke num before the invoke
- new StoreInst(CallSiteNoC, CallSite, true, II); // volatile
+ // Insert the store of the call site value
+ insertCallSiteStore(II, InvokeNo, CallSite);
+
+ // Record the call site value for the back end so it stays associated with
+ // the invoke.
CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
// Add a switch case to our unwind block.
@@ -272,8 +279,8 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
SmallVector<InvokeInst*,16> Invokes;
// Look through the terminators of the basic blocks to find invokes, returns
- // and unwinds
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ // and unwinds.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
// Remember all return instructions in case we insert an invoke into this
// function.
@@ -283,6 +290,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
} else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
Unwinds.push_back(UI);
}
+ }
// If we don't have any invokes or unwinds, there's nothing to do.
if (Unwinds.empty() && Invokes.empty()) return false;
@@ -478,24 +486,21 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
- // The front end has likely added calls to _Unwind_Resume. We need
- // to find those calls and mark the call_site as -1 immediately prior.
- // resume is a noreturn function, so any block that has a call to it
- // should end in an 'unreachable' instruction with the call immediately
- // prior. That's how we'll search.
- // ??? There's got to be a better way. this is fugly.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if ((dyn_cast<UnreachableInst>(BB->getTerminator()))) {
- BasicBlock::iterator I = BB->getTerminator();
- // Check the previous instruction and see if it's a resume call
- if (I == BB->begin()) continue;
- if (CallInst *CI = dyn_cast<CallInst>(--I)) {
- if (CI->getCalledFunction() == ResumeFn) {
- Value *NegativeOne = Constant::getAllOnesValue(Int32Ty);
- new StoreInst(NegativeOne, CallSite, true, I); // volatile
- }
+ // Mark call instructions that aren't nounwind as no-action
+ // (call_site == -1). Skip the entry block, as prior to then, no function
+ // context has been created for this function and any unexpected exceptions
+ // thrown will go directly to the caller's context, which is what we want
+ // anyway, so no need to do anything here.
+ for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+ for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ // Ignore calls to the EH builtins (eh.selector, eh.exception)
+ Constant *Callee = CI->getCalledFunction();
+ if (Callee != SelectorFn && Callee != ExceptionFn
+ && !CI->doesNotThrow())
+ insertCallSiteStore(CI, -1, CallSite);
}
- }
+ }
// Replace all unwinds with a branch to the unwind handler.
// ??? Should this ever happen with sjlj exceptions?
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 48bb5af..8a6a727 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -113,7 +113,7 @@ bool StackProtector::RequiresStackProtector() const {
if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
// We apparently only care about character arrays.
- if (!AT->getElementType()->isInteger(8))
+ if (!AT->getElementType()->isIntegerTy(8))
continue;
// If an array has more than SSPBufferSize bytes of allocated space,
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 9ab4058..3223e53 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -403,26 +403,45 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
II->RemoveOperand(i);
}
}
- II->RemoveOperand(Idx+1);
- II->RemoveOperand(Idx);
- }
+ } else
+ Idx = 0;
+
+ // If Idx is set, the operands at Idx and Idx+1 must be removed.
+ // We reuse the location to avoid expensive RemoveOperand calls.
+
DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
if (LI != SSAUpdateVals.end()) {
// This register is defined in the tail block.
for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
MachineBasicBlock *SrcBB = LI->second[j].first;
unsigned SrcReg = LI->second[j].second;
- II->addOperand(MachineOperand::CreateReg(SrcReg, false));
- II->addOperand(MachineOperand::CreateMBB(SrcBB));
+ if (Idx != 0) {
+ II->getOperand(Idx).setReg(SrcReg);
+ II->getOperand(Idx+1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ II->addOperand(MachineOperand::CreateReg(SrcReg, false));
+ II->addOperand(MachineOperand::CreateMBB(SrcBB));
+ }
}
} else {
// Live in tail block, must also be live in predecessors.
for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
MachineBasicBlock *SrcBB = TDBBs[j];
- II->addOperand(MachineOperand::CreateReg(Reg, false));
- II->addOperand(MachineOperand::CreateMBB(SrcBB));
+ if (Idx != 0) {
+ II->getOperand(Idx).setReg(Reg);
+ II->getOperand(Idx+1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ II->addOperand(MachineOperand::CreateReg(Reg, false));
+ II->addOperand(MachineOperand::CreateMBB(SrcBB));
+ }
}
}
+ if (Idx != 0) {
+ II->RemoveOperand(Idx+1);
+ II->RemoveOperand(Idx);
+ }
}
}
}
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index a0fccab..e9e998f 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -150,6 +150,11 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
MBB.insert(I, MI);
}
+bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1) const {
+ return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
MachineFunction &MF) const {
assert(!Orig->getDesc().isNotDuplicable() &&
@@ -157,37 +162,6 @@ MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
return MF.CloneMachineInstr(Orig);
}
-bool
-TargetInstrInfoImpl::isIdentical(const MachineInstr *MI,
- const MachineInstr *Other,
- const MachineRegisterInfo *MRI) const {
- if (MI->getOpcode() != Other->getOpcode() ||
- MI->getNumOperands() != Other->getNumOperands())
- return false;
-
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- const MachineOperand &OMO = Other->getOperand(i);
- if (MO.isReg() && MO.isDef()) {
- assert(OMO.isReg() && OMO.isDef());
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (Reg != OMO.getReg())
- return false;
- } else if (MRI->getRegClass(MO.getReg()) !=
- MRI->getRegClass(OMO.getReg()))
- return false;
-
- continue;
- }
-
- if (!MO.isIdenticalTo(OMO))
- return false;
- }
-
- return true;
-}
-
unsigned
TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
unsigned FnSize = 0;
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
new file mode 100644
index 0000000..d127f53
--- /dev/null
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -0,0 +1,902 @@
+//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+// ELF
+//===----------------------------------------------------------------------===//
+typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
+
+TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() {
+ // If we have the section uniquing map, free it.
+ delete (ELFUniqueMapTy*)UniquingMap;
+}
+
+const MCSection *TargetLoweringObjectFileELF::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+ SectionKind Kind, bool IsExplicit) const {
+ if (UniquingMap == 0)
+ UniquingMap = new ELFUniqueMapTy();
+ ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap;
+
+ // Do the lookup, if we have a hit, return it.
+ const MCSectionELF *&Entry = Map[Section];
+ if (Entry) return Entry;
+
+ return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit,
+ getContext());
+}
+
+void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ if (UniquingMap != 0)
+ ((ELFUniqueMapTy*)UniquingMap)->clear();
+ TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+ BSSSection =
+ getELFSection(".bss", MCSectionELF::SHT_NOBITS,
+ MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+ TextSection =
+ getELFSection(".text", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC,
+ SectionKind::getText());
+
+ DataSection =
+ getELFSection(".data", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+
+ ReadOnlySection =
+ getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+
+ TLSDataSection =
+ getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+ MCSectionELF::SHF_WRITE, SectionKind::getThreadData());
+
+ TLSBSSSection =
+ getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
+ MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+ MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS());
+
+ DataRelSection =
+ getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ DataRelLocalSection =
+ getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+ SectionKind::getDataRelLocal());
+
+ DataRelROSection =
+ getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRel());
+
+ DataRelROLocalSection =
+ getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRelLocal());
+
+ MergeableConst4Section =
+ getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
+ SectionKind::getMergeableConst4());
+
+ MergeableConst8Section =
+ getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
+ SectionKind::getMergeableConst8());
+
+ MergeableConst16Section =
+ getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
+ SectionKind::getMergeableConst16());
+
+ StaticCtorSection =
+ getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ StaticDtorSection =
+ getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ // Exception Handling Sections.
+
+ // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
+ // it contains relocatable pointers. In PIC mode, this is probably a big
+ // runtime hit for C++ apps. Either the contents of the LSDA need to be
+ // adjusted or this should be a data section.
+ LSDASection =
+ getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly());
+ EHFrameSection =
+ getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+ SectionKind::getDataRel());
+
+ // Debug Info Sections.
+ DwarfAbbrevSection =
+ getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfInfoSection =
+ getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfLineSection =
+ getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfFrameSection =
+ getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfPubNamesSection =
+ getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfStrSection =
+ getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfLocSection =
+ getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfARangesSection =
+ getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfRangesSection =
+ getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+}
+
+
+static SectionKind
+getELFKindForNamedSection(StringRef Name, SectionKind K) {
+ if (Name.empty() || Name[0] != '.') return K;
+
+ // Some lame default implementation based on some magic section names.
+ if (Name == ".bss" ||
+ Name.startswith(".bss.") ||
+ Name.startswith(".gnu.linkonce.b.") ||
+ Name.startswith(".llvm.linkonce.b.") ||
+ Name == ".sbss" ||
+ Name.startswith(".sbss.") ||
+ Name.startswith(".gnu.linkonce.sb.") ||
+ Name.startswith(".llvm.linkonce.sb."))
+ return SectionKind::getBSS();
+
+ if (Name == ".tdata" ||
+ Name.startswith(".tdata.") ||
+ Name.startswith(".gnu.linkonce.td.") ||
+ Name.startswith(".llvm.linkonce.td."))
+ return SectionKind::getThreadData();
+
+ if (Name == ".tbss" ||
+ Name.startswith(".tbss.") ||
+ Name.startswith(".gnu.linkonce.tb.") ||
+ Name.startswith(".llvm.linkonce.tb."))
+ return SectionKind::getThreadBSS();
+
+ return K;
+}
+
+
+static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+
+ if (Name == ".init_array")
+ return MCSectionELF::SHT_INIT_ARRAY;
+
+ if (Name == ".fini_array")
+ return MCSectionELF::SHT_FINI_ARRAY;
+
+ if (Name == ".preinit_array")
+ return MCSectionELF::SHT_PREINIT_ARRAY;
+
+ if (K.isBSS() || K.isThreadBSS())
+ return MCSectionELF::SHT_NOBITS;
+
+ return MCSectionELF::SHT_PROGBITS;
+}
+
+
+static unsigned
+getELFSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (!K.isMetadata())
+ Flags |= MCSectionELF::SHF_ALLOC;
+
+ if (K.isText())
+ Flags |= MCSectionELF::SHF_EXECINSTR;
+
+ if (K.isWriteable())
+ Flags |= MCSectionELF::SHF_WRITE;
+
+ if (K.isThreadLocal())
+ Flags |= MCSectionELF::SHF_TLS;
+
+ // K.isMergeableConst() is left out to honour PR4650
+ if (K.isMergeableCString() || K.isMergeableConst4() ||
+ K.isMergeableConst8() || K.isMergeableConst16())
+ Flags |= MCSectionELF::SHF_MERGE;
+
+ if (K.isMergeableCString())
+ Flags |= MCSectionELF::SHF_STRINGS;
+
+ return Flags;
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ StringRef SectionName = GV->getSection();
+
+ // Infer section flags from the section name if we can.
+ Kind = getELFKindForNamedSection(SectionName, Kind);
+
+ return getELFSection(SectionName,
+ getELFSectionType(SectionName, Kind),
+ getELFSectionFlags(Kind), Kind, true);
+}
+
+static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
+ if (Kind.isText()) return ".gnu.linkonce.t.";
+ if (Kind.isReadOnly()) return ".gnu.linkonce.r.";
+
+ if (Kind.isThreadData()) return ".gnu.linkonce.td.";
+ if (Kind.isThreadBSS()) return ".gnu.linkonce.tb.";
+
+ if (Kind.isDataNoRel()) return ".gnu.linkonce.d.";
+ if (Kind.isDataRelLocal()) return ".gnu.linkonce.d.rel.local.";
+ if (Kind.isDataRel()) return ".gnu.linkonce.d.rel.";
+ if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local.";
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return ".gnu.linkonce.d.rel.ro.";
+}
+
+const MCSection *TargetLoweringObjectFileELF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+
+ // If this global is linkonce/weak and the target handles this by emitting it
+ // into a 'uniqued' section name, create and return the section now.
+ if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) {
+ const char *Prefix = getSectionPrefixForUniqueGlobal(Kind);
+ SmallString<128> Name;
+ Name.append(Prefix, Prefix+strlen(Prefix));
+ Mang->getNameWithPrefix(Name, GV, false);
+ return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind),
+ getELFSectionFlags(Kind), Kind);
+ }
+
+ if (Kind.isText()) return TextSection;
+
+ if (Kind.isMergeable1ByteCString() ||
+ Kind.isMergeable2ByteCString() ||
+ Kind.isMergeable4ByteCString()) {
+
+ // We also need alignment here.
+ // FIXME: this is getting the alignment of the character, not the
+ // alignment of the global!
+ unsigned Align =
+ TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
+
+ const char *SizeSpec = ".rodata.str1.";
+ if (Kind.isMergeable2ByteCString())
+ SizeSpec = ".rodata.str2.";
+ else if (Kind.isMergeable4ByteCString())
+ SizeSpec = ".rodata.str4.";
+ else
+ assert(Kind.isMergeable1ByteCString() && "unknown string width");
+
+
+ std::string Name = SizeSpec + utostr(Align);
+ return getELFSection(Name, MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_MERGE |
+ MCSectionELF::SHF_STRINGS,
+ Kind);
+ }
+
+ if (Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return MergeableConst4Section;
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return MergeableConst8Section;
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return MergeableConst16Section;
+ return ReadOnlySection; // .const
+ }
+
+ if (Kind.isReadOnly()) return ReadOnlySection;
+
+ if (Kind.isThreadData()) return TLSDataSection;
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+
+ // Note: we claim that common symbols are put in BSSSection, but they are
+ // really emitted with the magic .comm directive, which creates a symbol table
+ // entry but not a section.
+ if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
+
+ if (Kind.isDataNoRel()) return DataSection;
+ if (Kind.isDataRelLocal()) return DataRelLocalSection;
+ if (Kind.isDataRel()) return DataRelSection;
+ if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return DataRelROSection;
+}
+
+/// getSectionForConstant - Given a mergeable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *TargetLoweringObjectFileELF::
+getSectionForConstant(SectionKind Kind) const {
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return MergeableConst4Section;
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return MergeableConst8Section;
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return MergeableConst16Section;
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return DataRelROSection;
+}
+
+const MCExpr *TargetLoweringObjectFileELF::
+getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding) const {
+
+ if (Encoding & dwarf::DW_EH_PE_indirect) {
+ MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += ".DW.stub";
+
+ // Add information about the stub reference to ELFMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
+ MCSymbol *&StubSym = ELFMMI.getGVStubEntry(Sym);
+ if (StubSym == 0) {
+ Name.clear();
+ Mang->getNameWithPrefix(Name, GV, false);
+ StubSym = getContext().GetOrCreateSymbol(Name.str());
+ }
+
+ return TargetLoweringObjectFile::
+ getSymbolForDwarfReference(Sym, MMI,
+ Encoding & ~dwarf::DW_EH_PE_indirect);
+ }
+
+ return TargetLoweringObjectFile::
+ getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
+}
+
+//===----------------------------------------------------------------------===//
+// MachO
+//===----------------------------------------------------------------------===//
+
+typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
+
+TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() {
+ // If we have the MachO uniquing map, free it.
+ delete (MachOUniqueMapTy*)UniquingMap;
+}
+
+
+const MCSectionMachO *TargetLoweringObjectFileMachO::
+getMachOSection(StringRef Segment, StringRef Section,
+ unsigned TypeAndAttributes,
+ unsigned Reserved2, SectionKind Kind) const {
+ // We unique sections by their segment/section pair. The returned section
+ // may not have the same flags as the requested section, if so this should be
+ // diagnosed by the client as an error.
+
+ // Create the map if it doesn't already exist.
+ if (UniquingMap == 0)
+ UniquingMap = new MachOUniqueMapTy();
+ MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap;
+
+ // Form the name to look up.
+ SmallString<64> Name;
+ Name += Segment;
+ Name.push_back(',');
+ Name += Section;
+
+ // Do the lookup, if we have a hit, return it.
+ const MCSectionMachO *&Entry = Map[Name.str()];
+ if (Entry) return Entry;
+
+ // Otherwise, return a new section.
+ return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
+ Reserved2, Kind, getContext());
+}
+
+
+void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ if (UniquingMap != 0)
+ ((MachOUniqueMapTy*)UniquingMap)->clear();
+ TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+ TextSection // .text
+ = getMachOSection("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ DataSection // .data
+ = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel());
+
+ CStringSection // .cstring
+ = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS,
+ SectionKind::getMergeable1ByteCString());
+ UStringSection
+ = getMachOSection("__TEXT","__ustring", 0,
+ SectionKind::getMergeable2ByteCString());
+ FourByteConstantSection // .literal4
+ = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS,
+ SectionKind::getMergeableConst4());
+ EightByteConstantSection // .literal8
+ = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS,
+ SectionKind::getMergeableConst8());
+
+ // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
+ // to using it in -static mode.
+ SixteenByteConstantSection = 0;
+ if (TM.getRelocationModel() != Reloc::Static &&
+ TM.getTargetData()->getPointerSize() == 32)
+ SixteenByteConstantSection = // .literal16
+ getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS,
+ SectionKind::getMergeableConst16());
+
+ ReadOnlySection // .const
+ = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly());
+
+ TextCoalSection
+ = getMachOSection("__TEXT", "__textcoal_nt",
+ MCSectionMachO::S_COALESCED |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ ConstTextCoalSection
+ = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED,
+ SectionKind::getText());
+ ConstDataCoalSection
+ = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED,
+ SectionKind::getText());
+ ConstDataSection // .const_data
+ = getMachOSection("__DATA", "__const", 0,
+ SectionKind::getReadOnlyWithRel());
+ DataCoalSection
+ = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED,
+ SectionKind::getDataRel());
+ DataCommonSection
+ = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL,
+ SectionKind::getBSS());
+ DataBSSSection
+ = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
+ SectionKind::getBSS());
+
+
+ LazySymbolPointerSection
+ = getMachOSection("__DATA", "__la_symbol_ptr",
+ MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+ NonLazySymbolPointerSection
+ = getMachOSection("__DATA", "__nl_symbol_ptr",
+ MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorSection
+ = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel());
+ StaticDtorSection
+ = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel());
+ } else {
+ StaticCtorSection
+ = getMachOSection("__DATA", "__mod_init_func",
+ MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+ SectionKind::getDataRel());
+ StaticDtorSection
+ = getMachOSection("__DATA", "__mod_term_func",
+ MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+ SectionKind::getDataRel());
+ }
+
+ // Exception Handling.
+ LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
+ SectionKind::getDataRel());
+ EHFrameSection =
+ getMachOSection("__TEXT", "__eh_frame",
+ MCSectionMachO::S_COALESCED |
+ MCSectionMachO::S_ATTR_NO_TOC |
+ MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+ MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+ SectionKind::getReadOnly());
+
+ // Debug Information.
+ DwarfAbbrevSection =
+ getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfInfoSection =
+ getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfLineSection =
+ getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfFrameSection =
+ getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfPubNamesSection =
+ getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfStrSection =
+ getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfLocSection =
+ getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfARangesSection =
+ getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfRangesSection =
+ getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfDebugInlineSection =
+ getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // Parse the section specifier and create it if valid.
+ StringRef Segment, Section;
+ unsigned TAA, StubSize;
+ std::string ErrorCode =
+ MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+ TAA, StubSize);
+ if (!ErrorCode.empty()) {
+ // If invalid, report the error with llvm_report_error.
+ llvm_report_error("Global variable '" + GV->getNameStr() +
+ "' has an invalid section specifier '" + GV->getSection()+
+ "': " + ErrorCode + ".");
+ // Fall back to dropping it into the data section.
+ return DataSection;
+ }
+
+ // Get the section.
+ const MCSectionMachO *S =
+ getMachOSection(Segment, Section, TAA, StubSize, Kind);
+
+ // Okay, now that we got the section, verify that the TAA & StubSize agree.
+ // If the user declared multiple globals with different section flags, we need
+ // to reject it here.
+ if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
+ // If invalid, report the error with llvm_report_error.
+ llvm_report_error("Global variable '" + GV->getNameStr() +
+ "' section type or attributes does not match previous"
+ " section specifier");
+ }
+
+ return S;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS");
+
+ if (Kind.isText())
+ return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+
+ // If this is weak/linkonce, put this in a coalescable section, either in text
+ // or data depending on if it is writable.
+ if (GV->isWeakForLinker()) {
+ if (Kind.isReadOnly())
+ return ConstTextCoalSection;
+ return DataCoalSection;
+ }
+
+ // FIXME: Alignment check should be handled by section classifier.
+ if (Kind.isMergeable1ByteCString() &&
+ TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ return CStringSection;
+
+ // Do not put 16-bit arrays in the UString section if they have an
+ // externally visible label, this runs into issues with certain linker
+ // versions.
+ if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
+ TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ return UStringSection;
+
+ if (Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+ return SixteenByteConstantSection;
+ }
+
+ // Otherwise, if it is readonly, but not something we can specially optimize,
+ // just drop it in .const.
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ // If this is marked const, put it into a const section. But if the dynamic
+ // linker needs to write to it, put it in the data segment.
+ if (Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ // Put zero initialized globals with strong external linkage in the
+ // DATA, __common section with the .zerofill directive.
+ if (Kind.isBSSExtern())
+ return DataCommonSection;
+
+ // Put zero initialized globals with local linkage in __DATA,__bss directive
+ // with the .zerofill directive (aka .lcomm).
+ if (Kind.isBSSLocal())
+ return DataBSSSection;
+
+ // Otherwise, just drop the variable in the normal data section.
+ return DataSection;
+}
+
+const MCSection *
+TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
+ // If this constant requires a relocation, we have to put it in the data
+ // segment, not in the text segment.
+ if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+ return SixteenByteConstantSection;
+ return ReadOnlySection; // .const
+}
+
+/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
+/// not to emit the UsedDirective for some symbols in llvm.used.
+// FIXME: REMOVE this (rdar://7071300)
+bool TargetLoweringObjectFileMachO::
+shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
+ /// On Darwin, internally linked data beginning with "L" or "l" does not have
+ /// the directive emitted (this occurs in ObjC metadata).
+ if (!GV) return false;
+
+ // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
+ if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
+ // FIXME: ObjC metadata is currently emitted as internal symbols that have
+ // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and
+ // this horrible hack can go away.
+ SmallString<64> Name;
+ Mang->getNameWithPrefix(Name, GV, false);
+ if (Name[0] == 'L' || Name[0] == 'l')
+ return false;
+ }
+
+ return true;
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::
+getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+
+ if (Encoding & DW_EH_PE_indirect) {
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += "$non_lazy_ptr";
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
+ MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym);
+ if (StubSym == 0) {
+ Name.clear();
+ Mang->getNameWithPrefix(Name, GV, false);
+ StubSym = getContext().GetOrCreateSymbol(Name.str());
+ }
+
+ return TargetLoweringObjectFile::
+ getSymbolForDwarfReference(Sym, MMI,
+ Encoding & ~dwarf::DW_EH_PE_indirect);
+ }
+
+ return TargetLoweringObjectFile::
+ getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
+}
+
+unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const {
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const {
+ return DW_EH_PE_pcrel;
+}
+
+unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const {
+ return DW_EH_PE_pcrel;
+}
+
+unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
+ return DW_EH_PE_absptr;
+}
+
+//===----------------------------------------------------------------------===//
+// COFF
+//===----------------------------------------------------------------------===//
+
+typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
+
+TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() {
+ delete (COFFUniqueMapTy*)UniquingMap;
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const {
+ // Create the map if it doesn't already exist.
+ if (UniquingMap == 0)
+ UniquingMap = new MachOUniqueMapTy();
+ COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap;
+
+ // Do the lookup, if we have a hit, return it.
+ const MCSectionCOFF *&Entry = Map[Name];
+ if (Entry) return Entry;
+
+ return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext());
+}
+
+void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ if (UniquingMap != 0)
+ ((COFFUniqueMapTy*)UniquingMap)->clear();
+ TargetLoweringObjectFile::Initialize(Ctx, TM);
+ TextSection = getCOFFSection("\t.text", true, SectionKind::getText());
+ DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel());
+ StaticCtorSection =
+ getCOFFSection(".ctors", false, SectionKind::getDataRel());
+ StaticDtorSection =
+ getCOFFSection(".dtors", false, SectionKind::getDataRel());
+
+ // FIXME: We're emitting LSDA info into a readonly section on COFF, even
+ // though it contains relocatable pointers. In PIC mode, this is probably a
+ // big runtime hit for C++ apps. Either the contents of the LSDA need to be
+ // adjusted or this should be a data section.
+ LSDASection =
+ getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly());
+ EHFrameSection =
+ getCOFFSection(".eh_frame", false, SectionKind::getDataRel());
+
+ // Debug info.
+ // FIXME: Don't use 'directive' mode here.
+ DwarfAbbrevSection =
+ getCOFFSection("\t.section\t.debug_abbrev,\"dr\"",
+ true, SectionKind::getMetadata());
+ DwarfInfoSection =
+ getCOFFSection("\t.section\t.debug_info,\"dr\"",
+ true, SectionKind::getMetadata());
+ DwarfLineSection =
+ getCOFFSection("\t.section\t.debug_line,\"dr\"",
+ true, SectionKind::getMetadata());
+ DwarfFrameSection =
+ getCOFFSection("\t.section\t.debug_frame,\"dr\"",
+ true, SectionKind::getMetadata());
+ DwarfPubNamesSection =
+ getCOFFSection("\t.section\t.debug_pubnames,\"dr\"",
+ true, SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"",
+ true, SectionKind::getMetadata());
+ DwarfStrSection =
+ getCOFFSection("\t.section\t.debug_str,\"dr\"",
+ true, SectionKind::getMetadata());
+ DwarfLocSection =
+ getCOFFSection("\t.section\t.debug_loc,\"dr\"",
+ true, SectionKind::getMetadata());
+ DwarfARangesSection =
+ getCOFFSection("\t.section\t.debug_aranges,\"dr\"",
+ true, SectionKind::getMetadata());
+ DwarfRangesSection =
+ getCOFFSection("\t.section\t.debug_ranges,\"dr\"",
+ true, SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ getCOFFSection("\t.section\t.debug_macinfo,\"dr\"",
+ true, SectionKind::getMetadata());
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ return getCOFFSection(GV->getSection(), false, Kind);
+}
+
+static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
+ if (Kind.isText())
+ return ".text$linkonce";
+ if (Kind.isWriteable())
+ return ".data$linkonce";
+ return ".rdata$linkonce";
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+ // If this global is linkonce/weak and the target handles this by emitting it
+ // into a 'uniqued' section name, create and return the section now.
+ if (GV->isWeakForLinker()) {
+ const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
+ SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+ Mang->getNameWithPrefix(Name, GV, false);
+ return getCOFFSection(Name.str(), false, Kind);
+ }
+
+ if (Kind.isText())
+ return getTextSection();
+
+ return getDataSection();
+}
+
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 6c7c1a1..c840b39 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -160,7 +160,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
MachineBasicBlock::iterator OldPos) {
// Check if it's safe to move this instruction.
bool SeenStore = true; // Be conservative.
- if (!MI->isSafeToMove(TII, SeenStore, AA))
+ if (!MI->isSafeToMove(TII, AA, SeenStore))
return false;
unsigned DefReg = 0;
@@ -213,6 +213,9 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
unsigned NumVisited = 0;
for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) {
MachineInstr *OtherMI = I;
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI->isDebugValue())
+ continue;
if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
++NumVisited;
@@ -451,13 +454,10 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
const TargetInstrInfo *TII,
bool &IsCopy,
unsigned &DstReg, bool &IsDstPhys) {
- MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg);
- if (UI == MRI->use_end())
- return 0;
- MachineInstr &UseMI = *UI;
- if (++UI != MRI->use_end())
- // More than one use.
+ if (!MRI->hasOneNonDBGUse(Reg))
+ // None or more than one use.
return 0;
+ MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg);
if (UseMI.getParent() != MBB)
return 0;
unsigned SrcReg;
@@ -923,6 +923,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
mi != me; ) {
MachineBasicBlock::iterator nmi = llvm::next(mi);
+ if (mi->isDebugValue()) {
+ mi = nmi;
+ continue;
+ }
const TargetInstrDesc &TID = mi->getDesc();
bool FirstTied = true;
@@ -1021,7 +1025,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
// copying it.
if (DefMI &&
DefMI->getDesc().isAsCheapAsAMove() &&
- DefMI->isSafeToReMat(TII, regB, AA) &&
+ DefMI->isSafeToReMat(TII, AA, regB) &&
isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 5956b61..ed02696 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -261,19 +261,21 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
OS << "********** REGISTER MAP **********\n";
for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) {
if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i])
- << "]\n";
+ << "] " << MRI.getRegClass(i)->getName() << "\n";
}
for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
- OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] << "]\n";
+ OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i]
+ << "] " << MRI.getRegClass(i)->getName() << "\n";
OS << '\n';
}
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index ce62594..7aa0a91 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -46,7 +46,7 @@ namespace {
static cl::opt<RewriterName>
RewriterOpt("rewriter",
- cl::desc("Rewriter to use: (default: local)"),
+ cl::desc("Rewriter to use (default=local)"),
cl::Prefix,
cl::values(clEnumVal(local, "local rewriter"),
clEnumVal(trivial, "trivial rewriter"),
@@ -62,6 +62,7 @@ VirtRegRewriter::~VirtRegRewriter() {}
/// substitutePhysReg - Replace virtual register in MachineOperand with a
/// physical register. Do the right thing with the sub-register index.
+/// Note that operands may be added, so the MO reference is no longer valid.
static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
const TargetRegisterInfo &TRI) {
if (unsigned SubIdx = MO.getSubReg()) {
@@ -123,14 +124,15 @@ struct TrivialRewriter : public VirtRegRewriter {
continue;
unsigned pReg = VRM.getPhys(reg);
mri->setPhysRegUsed(pReg);
- for (MachineRegisterInfo::reg_iterator regItr = mri->reg_begin(reg),
- regEnd = mri->reg_end(); regItr != regEnd;) {
- MachineOperand &mop = regItr.getOperand();
- assert(mop.isReg() && mop.getReg() == reg && "reg_iterator broken?");
- ++regItr;
- substitutePhysReg(mop, pReg, *tri);
- changed = true;
- }
+ // Copy the register use-list before traversing it.
+ SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
+ for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg),
+ E = mri->reg_end(); I != E; ++I)
+ reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
+ for (unsigned N=0; N != reglist.size(); ++N)
+ substitutePhysReg(reglist[N].first->getOperand(reglist[N].second),
+ pReg, *tri);
+ changed |= !reglist.empty();
}
}
@@ -1850,19 +1852,18 @@ private:
KilledMIRegs.clear();
for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
unsigned i = VirtUseOps[j];
- MachineOperand &MO = MI.getOperand(i);
- unsigned VirtReg = MO.getReg();
+ unsigned VirtReg = MI.getOperand(i).getReg();
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Not a virtual register?");
- unsigned SubIdx = MO.getSubReg();
+ unsigned SubIdx = MI.getOperand(i).getSubReg();
if (VRM.isAssignedReg(VirtReg)) {
// This virtual register was assigned a physreg!
unsigned Phys = VRM.getPhys(VirtReg);
RegInfo->setPhysRegUsed(Phys);
- if (MO.isDef())
+ if (MI.getOperand(i).isDef())
ReusedOperands.markClobbered(Phys);
- substitutePhysReg(MO, Phys, *TRI);
+ substitutePhysReg(MI.getOperand(i), Phys, *TRI);
if (VRM.isImplicitlyDefined(VirtReg))
// FIXME: Is this needed?
BuildMI(MBB, &MI, MI.getDebugLoc(),
@@ -1871,10 +1872,10 @@ private:
}
// This virtual register is now known to be a spilled value.
- if (!MO.isUse())
+ if (!MI.getOperand(i).isUse())
continue; // Handle defs in the loop below (handle use&def here though)
- bool AvoidReload = MO.isUndef();
+ bool AvoidReload = MI.getOperand(i).isUndef();
// Check if it is defined by an implicit def. It should not be spilled.
// Note, this is for correctness reason. e.g.
// 8 %reg1024<def> = IMPLICIT_DEF
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index 7bcd30a..9d07811 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -15,6 +15,7 @@
#include "llvm/CompilerDriver/BuiltinOptions.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SystemUtils.h"
#include "llvm/System/Program.h"
#include "llvm/System/TimeValue.h"
@@ -24,13 +25,23 @@
using namespace llvm;
using namespace llvmc;
+namespace llvmc {
+
+extern int Main(int argc, char** argv);
+extern const char* ProgramName;
+
+}
+
namespace {
int ExecuteProgram(const std::string& name,
const StrVector& args) {
sys::Path prog = sys::Program::FindProgramByName(name);
- if (prog.isEmpty())
- throw std::runtime_error("Can't find program '" + name + "'");
+ if (prog.isEmpty()) {
+ prog = FindExecutable(name, ProgramName, (void *)(intptr_t)&Main);
+ if (prog.isEmpty())
+ throw std::runtime_error("Can't find program '" + name + "'");
+ }
if (!prog.canExecute())
throw std::runtime_error("Program '" + name + "' is not executable.");
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
index 524607b..7d1c7fe 100644
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -34,7 +34,8 @@ namespace llvmc {
const std::string& LanguageMap::GetLanguage(const sys::Path& File) const {
StringRef suf = File.getSuffix();
- LanguageMap::const_iterator Lang = this->find(suf);
+ LanguageMap::const_iterator Lang =
+ this->find(suf.empty() ? "*empty*" : suf);
if (Lang == this->end())
throw std::runtime_error("File '" + File.str() +
"' has unknown suffix '" + suf.str() + '\'');
@@ -313,7 +314,7 @@ int CompilationGraph::Build (const sys::Path& TempDir,
JoinTool* JT = &dynamic_cast<JoinTool&>(*CurNode->ToolPtr.getPtr());
// Are there any files in the join list?
- if (JT->JoinListEmpty())
+ if (JT->JoinListEmpty() && !(JT->WorksOnEmpty() && InputFilenames.empty()))
continue;
Action CurAction = JT->GenerateAction(CurNode->HasChildren(),
diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp
index 3a3487a..b5e507d 100644
--- a/lib/CompilerDriver/Main.cpp
+++ b/lib/CompilerDriver/Main.cpp
@@ -100,7 +100,8 @@ int Main(int argc, char** argv) {
ProgramName = argv[0];
cl::ParseCommandLineOptions
- (argc, argv, "LLVM Compiler Driver (Work In Progress)", true);
+ (argc, argv, "LLVM Compiler Driver (Work In Progress)",
+ /* ReadResponseFiles = */ false);
PluginLoader Plugins;
Plugins.RunInitialization(langMap, graph);
@@ -126,10 +127,6 @@ int Main(int argc, char** argv) {
return 0;
}
- if (InputFilenames.empty()) {
- throw std::runtime_error("no input files");
- }
-
if (Time) {
GlobalTimeLog = new std::stringstream;
GlobalTimeLog->precision(2);
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
index 9f4ab49..5e558ca 100644
--- a/lib/CompilerDriver/Tool.cpp
+++ b/lib/CompilerDriver/Tool.cpp
@@ -17,6 +17,8 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/System/Path.h"
+#include <algorithm>
+
using namespace llvm;
using namespace llvmc;
@@ -71,3 +73,22 @@ sys::Path Tool::OutFilename(const sys::Path& In,
}
return Out;
}
+
+namespace {
+ template <class A, class B>
+ bool CompareFirst (std::pair<A,B> p1, std::pair<A,B> p2) {
+ return std::less<A>()(p1.first, p2.first);
+ }
+}
+
+StrVector Tool::SortArgs(ArgsVector& Args) const {
+ StrVector Out;
+
+ // HACK: this won't be needed when we'll migrate away from CommandLine.
+ std::stable_sort(Args.begin(), Args.end(), &CompareFirst<unsigned, std::string>);
+ for (ArgsVector::iterator B = Args.begin(), E = Args.end(); B != E; ++B) {
+ Out.push_back(B->second);
+ }
+
+ return Out;
+}
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 3e684e1..b2e2a04 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -339,12 +339,12 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
}
// FALLS THROUGH
case 1:
- if (!FTy->getParamType(0)->isInteger(32)) {
+ if (!FTy->getParamType(0)->isIntegerTy(32)) {
llvm_report_error("Invalid type for first argument of main() supplied");
}
// FALLS THROUGH
case 0:
- if (!isa<IntegerType>(FTy->getReturnType()) &&
+ if (!FTy->getReturnType()->isIntegerTy() &&
!FTy->getReturnType()->isVoidTy()) {
llvm_report_error("Invalid return type of main() supplied");
}
@@ -599,22 +599,22 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
switch (Op0->getType()->getTypeID()) {
default: llvm_unreachable("Invalid bitcast operand");
case Type::IntegerTyID:
- assert(DestTy->isFloatingPoint() && "invalid bitcast");
+ assert(DestTy->isFloatingPointTy() && "invalid bitcast");
if (DestTy->isFloatTy())
GV.FloatVal = GV.IntVal.bitsToFloat();
else if (DestTy->isDoubleTy())
GV.DoubleVal = GV.IntVal.bitsToDouble();
break;
case Type::FloatTyID:
- assert(DestTy->isInteger(32) && "Invalid bitcast");
+ assert(DestTy->isIntegerTy(32) && "Invalid bitcast");
GV.IntVal.floatToBits(GV.FloatVal);
break;
case Type::DoubleTyID:
- assert(DestTy->isInteger(64) && "Invalid bitcast");
+ assert(DestTy->isIntegerTy(64) && "Invalid bitcast");
GV.IntVal.doubleToBits(GV.DoubleVal);
break;
case Type::PointerTyID:
- assert(isa<PointerType>(DestTy) && "Invalid bitcast");
+ assert(DestTy->isPointerTy() && "Invalid bitcast");
break; // getConstantValue(Op0) above already converted it
}
return GV;
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 141cb27..c7495d4 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -87,11 +87,11 @@ void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) {
/*===-- Operations on execution engines -----------------------------------===*/
-LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
- LLVMModuleProviderRef MP,
- char **OutError) {
+LLVMBool LLVMCreateExecutionEngineForModule(LLVMExecutionEngineRef *OutEE,
+ LLVMModuleRef M,
+ char **OutError) {
std::string Error;
- EngineBuilder builder(unwrap(MP));
+ EngineBuilder builder(unwrap(M));
builder.setEngineKind(EngineKind::Either)
.setErrorStr(&Error);
if (ExecutionEngine *EE = builder.create()){
@@ -102,11 +102,11 @@ LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
return 1;
}
-LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
- LLVMModuleProviderRef MP,
- char **OutError) {
+LLVMBool LLVMCreateInterpreterForModule(LLVMExecutionEngineRef *OutInterp,
+ LLVMModuleRef M,
+ char **OutError) {
std::string Error;
- EngineBuilder builder(unwrap(MP));
+ EngineBuilder builder(unwrap(M));
builder.setEngineKind(EngineKind::Interpreter)
.setErrorStr(&Error);
if (ExecutionEngine *Interp = builder.create()) {
@@ -117,12 +117,12 @@ LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
return 1;
}
-LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
- LLVMModuleProviderRef MP,
- unsigned OptLevel,
- char **OutError) {
+LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
+ LLVMModuleRef M,
+ unsigned OptLevel,
+ char **OutError) {
std::string Error;
- EngineBuilder builder(unwrap(MP));
+ EngineBuilder builder(unwrap(M));
builder.setEngineKind(EngineKind::JIT)
.setErrorStr(&Error)
.setOptLevel((CodeGenOpt::Level)OptLevel);
@@ -134,6 +134,35 @@ LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
return 1;
}
+LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
+ LLVMModuleProviderRef MP,
+ char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMCreateExecutionEngineForModule(OutEE,
+ reinterpret_cast<LLVMModuleRef>(MP),
+ OutError);
+}
+
+LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
+ LLVMModuleProviderRef MP,
+ char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMCreateInterpreterForModule(OutInterp,
+ reinterpret_cast<LLVMModuleRef>(MP),
+ OutError);
+}
+
+LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
+ LLVMModuleProviderRef MP,
+ unsigned OptLevel,
+ char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMCreateJITCompilerForModule(OutJIT,
+ reinterpret_cast<LLVMModuleRef>(MP),
+ OptLevel, OutError);
+}
+
+
void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) {
delete unwrap(EE);
}
@@ -173,17 +202,29 @@ void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) {
unwrap(EE)->freeMachineCodeForFunction(unwrap<Function>(F));
}
+void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){
+ unwrap(EE)->addModule(unwrap(M));
+}
+
void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
- unwrap(EE)->addModule(unwrap(MP));
+ /* The module provider is now actually a module. */
+ LLVMAddModule(EE, reinterpret_cast<LLVMModuleRef>(MP));
+}
+
+LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
+ LLVMModuleRef *OutMod, char **OutError) {
+ Module *Mod = unwrap(M);
+ unwrap(EE)->removeModule(Mod);
+ *OutMod = wrap(Mod);
+ return 0;
}
LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
LLVMModuleProviderRef MP,
LLVMModuleRef *OutMod, char **OutError) {
- Module *M = unwrap(MP);
- unwrap(EE)->removeModule(M);
- *OutMod = wrap(M);
- return 0;
+ /* The module provider is now actually a module. */
+ return LLVMRemoveModule(EE, reinterpret_cast<LLVMModuleRef>(MP), OutMod,
+ OutError);
}
LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 73f5558..a2aad5a 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -591,7 +591,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy,
ECStack.pop_back();
if (ECStack.empty()) { // Finished main. Put result into exit code...
- if (RetTy && RetTy->isInteger()) { // Nonvoid return type?
+ if (RetTy && RetTy->isIntegerTy()) { // Nonvoid return type?
ExitValue = Result; // Capture the exit value of the program
} else {
memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
@@ -761,7 +761,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) {
GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
gep_type_iterator E,
ExecutionContext &SF) {
- assert(isa<PointerType>(Ptr->getType()) &&
+ assert(Ptr->getType()->isPointerTy() &&
"Cannot getElementOffset of a nonpointer type!");
uint64_t Total = 0;
@@ -979,7 +979,7 @@ GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy,
const Type *SrcTy = SrcVal->getType();
uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- assert(SrcTy->isFloatingPoint() && "Invalid FPToUI instruction");
+ assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");
if (SrcTy->getTypeID() == Type::FloatTyID)
Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
@@ -993,7 +993,7 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy,
const Type *SrcTy = SrcVal->getType();
uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- assert(SrcTy->isFloatingPoint() && "Invalid FPToSI instruction");
+ assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");
if (SrcTy->getTypeID() == Type::FloatTyID)
Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
@@ -1005,7 +1005,7 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy,
GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy,
ExecutionContext &SF) {
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- assert(DstTy->isFloatingPoint() && "Invalid UIToFP instruction");
+ assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
if (DstTy->getTypeID() == Type::FloatTyID)
Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
@@ -1017,7 +1017,7 @@ GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy,
GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy,
ExecutionContext &SF) {
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- assert(DstTy->isFloatingPoint() && "Invalid SIToFP instruction");
+ assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");
if (DstTy->getTypeID() == Type::FloatTyID)
Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
@@ -1031,7 +1031,7 @@ GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy,
ExecutionContext &SF) {
uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- assert(isa<PointerType>(SrcVal->getType()) && "Invalid PtrToInt instruction");
+ assert(SrcVal->getType()->isPointerTy() && "Invalid PtrToInt instruction");
Dest.IntVal = APInt(DBitWidth, (intptr_t) Src.PointerVal);
return Dest;
@@ -1040,7 +1040,7 @@ GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy,
GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
ExecutionContext &SF) {
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- assert(isa<PointerType>(DstTy) && "Invalid PtrToInt instruction");
+ assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
uint32_t PtrSize = TD.getPointerSizeInBits();
if (PtrSize != Src.IntVal.getBitWidth())
@@ -1055,27 +1055,27 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy,
const Type *SrcTy = SrcVal->getType();
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- if (isa<PointerType>(DstTy)) {
- assert(isa<PointerType>(SrcTy) && "Invalid BitCast");
+ if (DstTy->isPointerTy()) {
+ assert(SrcTy->isPointerTy() && "Invalid BitCast");
Dest.PointerVal = Src.PointerVal;
- } else if (DstTy->isInteger()) {
+ } else if (DstTy->isIntegerTy()) {
if (SrcTy->isFloatTy()) {
Dest.IntVal.zext(sizeof(Src.FloatVal) * CHAR_BIT);
Dest.IntVal.floatToBits(Src.FloatVal);
} else if (SrcTy->isDoubleTy()) {
Dest.IntVal.zext(sizeof(Src.DoubleVal) * CHAR_BIT);
Dest.IntVal.doubleToBits(Src.DoubleVal);
- } else if (SrcTy->isInteger()) {
+ } else if (SrcTy->isIntegerTy()) {
Dest.IntVal = Src.IntVal;
} else
llvm_unreachable("Invalid BitCast");
} else if (DstTy->isFloatTy()) {
- if (SrcTy->isInteger())
+ if (SrcTy->isIntegerTy())
Dest.FloatVal = Src.IntVal.bitsToFloat();
else
Dest.FloatVal = Src.FloatVal;
} else if (DstTy->isDoubleTy()) {
- if (SrcTy->isInteger())
+ if (SrcTy->isIntegerTy())
Dest.DoubleVal = Src.IntVal.bitsToDouble();
else
Dest.DoubleVal = Src.DoubleVal;
diff --git a/lib/ExecutionEngine/JIT/Android.mk b/lib/ExecutionEngine/JIT/Android.mk
new file mode 100644
index 0000000..1c7e27f
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/Android.mk
@@ -0,0 +1,32 @@
+LOCAL_PATH:= $(call my-dir)
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+ Intercept.cpp \
+ JIT.cpp \
+ JITDebugRegisterer.cpp \
+ JITDwarfEmitter.cpp \
+ JITEmitter.cpp \
+ JITMemoryManager.cpp \
+ OProfileJITEventListener.cpp \
+ TargetSelect.cpp
+
+LOCAL_MODULE:= libLLVMJIT
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+ JITMemoryManager.cpp
+
+LOCAL_MODULE:= libLLVMJIT
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 616a66e..dd74d73 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -18,6 +18,7 @@
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
#include "llvm/CodeGen/MachineCodeInfo.h"
#include "llvm/ExecutionEngine/GenericValue.h"
@@ -27,6 +28,7 @@
#include "llvm/Target/TargetJITInfo.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MutexGuard.h"
#include "llvm/System/DynamicLibrary.h"
#include "llvm/Config/config.h"
@@ -237,9 +239,53 @@ ExecutionEngine *JIT::createJIT(Module *M,
}
}
+namespace {
+/// This class supports the global getPointerToNamedFunction(), which allows
+/// bugpoint or gdb users to search for a function by name without any context.
+class JitPool {
+ SmallPtrSet<JIT*, 1> JITs; // Optimize for process containing just 1 JIT.
+ mutable sys::Mutex Lock;
+public:
+ void Add(JIT *jit) {
+ MutexGuard guard(Lock);
+ JITs.insert(jit);
+ }
+ void Remove(JIT *jit) {
+ MutexGuard guard(Lock);
+ JITs.erase(jit);
+ }
+ void *getPointerToNamedFunction(const char *Name) const {
+ MutexGuard guard(Lock);
+ assert(JITs.size() != 0 && "No Jit registered");
+ //search function in every instance of JIT
+ for (SmallPtrSet<JIT*, 1>::const_iterator Jit = JITs.begin(),
+ end = JITs.end();
+ Jit != end; ++Jit) {
+ if (Function *F = (*Jit)->FindFunctionNamed(Name))
+ return (*Jit)->getPointerToFunction(F);
+ }
+ // The function is not available : fallback on the first created (will
+ // search in symbol of the current program/library)
+ return (*JITs.begin())->getPointerToNamedFunction(Name);
+ }
+};
+ManagedStatic<JitPool> AllJits;
+}
+extern "C" {
+ // getPointerToNamedFunction - This function is used as a global wrapper to
+ // JIT::getPointerToNamedFunction for the purpose of resolving symbols when
+ // bugpoint is debugging the JIT. In that scenario, we are loading an .so and
+ // need to resolve function(s) that are being mis-codegenerated, so we need to
+ // resolve their addresses at runtime, and this is the way to do it.
+ void *getPointerToNamedFunction(const char *Name) {
+ return AllJits->getPointerToNamedFunction(Name);
+ }
+}
+
JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode)
- : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode) {
+ : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode),
+ isAlreadyCodeGenerating(false) {
setTargetData(TM.getTargetData());
jitstate = new JITState(M);
@@ -247,6 +293,9 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
// Initialize JCE
JCE = createEmitter(*this, JMM, TM);
+ // Register in global list of all JITs.
+ AllJits->Add(this);
+
// Add target data
MutexGuard locked(lock);
FunctionPassManager &PM = jitstate->getPM(locked);
@@ -281,6 +330,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
}
JIT::~JIT() {
+ AllJits->Remove(this);
delete jitstate;
delete JCE;
delete &TM;
@@ -361,12 +411,12 @@ GenericValue JIT::runFunction(Function *F,
// Handle some common cases first. These cases correspond to common `main'
// prototypes.
- if (RetTy->isInteger(32) || RetTy->isVoidTy()) {
+ if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
switch (ArgValues.size()) {
case 3:
- if (FTy->getParamType(0)->isInteger(32) &&
- isa<PointerType>(FTy->getParamType(1)) &&
- isa<PointerType>(FTy->getParamType(2))) {
+ if (FTy->getParamType(0)->isIntegerTy(32) &&
+ FTy->getParamType(1)->isPointerTy() &&
+ FTy->getParamType(2)->isPointerTy()) {
int (*PF)(int, char **, const char **) =
(int(*)(int, char **, const char **))(intptr_t)FPtr;
@@ -379,8 +429,8 @@ GenericValue JIT::runFunction(Function *F,
}
break;
case 2:
- if (FTy->getParamType(0)->isInteger(32) &&
- isa<PointerType>(FTy->getParamType(1))) {
+ if (FTy->getParamType(0)->isIntegerTy(32) &&
+ FTy->getParamType(1)->isPointerTy()) {
int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
// Call the function.
@@ -392,7 +442,7 @@ GenericValue JIT::runFunction(Function *F,
break;
case 1:
if (FTy->getNumParams() == 1 &&
- FTy->getParamType(0)->isInteger(32)) {
+ FTy->getParamType(0)->isIntegerTy(32)) {
GenericValue rv;
int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
@@ -503,8 +553,12 @@ GenericValue JIT::runFunction(Function *F,
else
ReturnInst::Create(F->getContext(), StubBB); // Just return void.
- // Finally, return the value returned by our nullary stub function.
- return runFunction(Stub, std::vector<GenericValue>());
+ // Finally, call our nullary stub function.
+ GenericValue Result = runFunction(Stub, std::vector<GenericValue>());
+ // Erase it, since no other function can have a reference to it.
+ Stub->eraseFromParent();
+ // And return the result.
+ return Result;
}
void JIT::RegisterJITEventListener(JITEventListener *L) {
@@ -570,7 +624,6 @@ void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
}
void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
- static bool isAlreadyCodeGenerating = false;
assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!");
// JIT the function
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index bb8f851..edae719 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -61,6 +61,10 @@ class JIT : public ExecutionEngine {
/// should be set to true. Doing so breaks freeMachineCodeForFunction.
bool AllocateGVsWithCode;
+ /// True while the JIT is generating code. Used to assert against recursive
+ /// entry.
+ bool isAlreadyCodeGenerating;
+
JITState *jitstate;
JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index c1051a9..946351b 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -522,7 +522,11 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality)));
}
- JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ // LSDA encoding: This must match the encoding used in EmitEHFrame ()
+ if (PointerSize == 4)
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+ else
+ JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8);
JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
} else {
JCE->emitULEB128Bytes(1);
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 34a9938..783ebb4 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -37,6 +37,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
@@ -57,7 +58,6 @@ using namespace llvm;
STATISTIC(NumBytes, "Number of bytes of machine code compiled");
STATISTIC(NumRelos, "Number of relocations applied");
STATISTIC(NumRetries, "Number of retries with more memory");
-static JIT *TheJIT = 0;
// A declaration may stop being a declaration once it's fully read from bitcode.
@@ -109,9 +109,13 @@ namespace {
/// particular GlobalVariable so that we can reuse them if necessary.
GlobalToIndirectSymMapTy GlobalToIndirectSymMap;
+ /// Instance of the JIT this ResolverState serves.
+ JIT *TheJIT;
+
public:
- JITResolverState() : FunctionToLazyStubMap(this),
- FunctionToCallSitesMap(this) {}
+ JITResolverState(JIT *jit) : FunctionToLazyStubMap(this),
+ FunctionToCallSitesMap(this),
+ TheJIT(jit) {}
FunctionToLazyStubMapTy& getFunctionToLazyStubMap(
const MutexGuard& locked) {
@@ -152,53 +156,18 @@ namespace {
// was no stub. This function uses the call-site->function map to find a
// relevant function, but asserts that only stubs and not other call sites
// will be passed in.
- Function *EraseStub(const MutexGuard &locked, void *Stub) {
- CallSiteToFunctionMapTy::iterator C2F_I =
- CallSiteToFunctionMap.find(Stub);
- if (C2F_I == CallSiteToFunctionMap.end()) {
- // Not a stub.
- return NULL;
- }
-
- Function *const F = C2F_I->second;
-#ifndef NDEBUG
- void *RealStub = FunctionToLazyStubMap.lookup(F);
- assert(RealStub == Stub &&
- "Call-site that wasn't a stub pass in to EraseStub");
-#endif
- FunctionToLazyStubMap.erase(F);
- CallSiteToFunctionMap.erase(C2F_I);
-
- // Remove the stub from the function->call-sites map, and remove the whole
- // entry from the map if that was the last call site.
- FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F);
- assert(F2C_I != FunctionToCallSitesMap.end() &&
- "FunctionToCallSitesMap broken");
- bool Erased = F2C_I->second.erase(Stub);
- (void)Erased;
- assert(Erased && "FunctionToCallSitesMap broken");
- if (F2C_I->second.empty())
- FunctionToCallSitesMap.erase(F2C_I);
-
- return F;
- }
+ Function *EraseStub(const MutexGuard &locked, void *Stub);
- void EraseAllCallSites(const MutexGuard &locked, Function *F) {
+ void EraseAllCallSitesFor(const MutexGuard &locked, Function *F) {
assert(locked.holds(TheJIT->lock));
- EraseAllCallSitesPrelocked(F);
- }
- void EraseAllCallSitesPrelocked(Function *F) {
- FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
- if (F2C == FunctionToCallSitesMap.end())
- return;
- for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(),
- E = F2C->second.end(); I != E; ++I) {
- bool Erased = CallSiteToFunctionMap.erase(*I);
- (void)Erased;
- assert(Erased && "Missing call site->function mapping");
- }
- FunctionToCallSitesMap.erase(F2C);
+ EraseAllCallSitesForPrelocked(F);
}
+ void EraseAllCallSitesForPrelocked(Function *F);
+
+ // Erases _all_ call sites regardless of their function. This is used to
+ // unregister the stub addresses from the StubToResolverMap in
+ // ~JITResolver().
+ void EraseAllCallSitesPrelocked();
};
/// JITResolver - Keep track of, and resolve, call sites for functions that
@@ -227,19 +196,16 @@ namespace {
JITEmitter &JE;
- static JITResolver *TheJITResolver;
- public:
- explicit JITResolver(JIT &jit, JITEmitter &je) : nextGOTIndex(0), JE(je) {
- TheJIT = &jit;
+ /// Instance of JIT corresponding to this Resolver.
+ JIT *TheJIT;
+ public:
+ explicit JITResolver(JIT &jit, JITEmitter &je)
+ : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) {
LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn);
- assert(TheJITResolver == 0 && "Multiple JIT resolvers?");
- TheJITResolver = this;
}
- ~JITResolver() {
- TheJITResolver = 0;
- }
+ ~JITResolver();
/// getLazyFunctionStubIfAvailable - This returns a pointer to a function's
/// lazy-compilation stub if it has already been created.
@@ -260,8 +226,6 @@ namespace {
void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
SmallVectorImpl<void*> &Ptrs);
- GlobalValue *invalidateStub(void *Stub);
-
/// getGOTIndexForAddress - Return a new or existing index in the GOT for
/// an address. This function only manages slots, it does not manage the
/// contents of the slots or the memory associated with the GOT.
@@ -273,6 +237,55 @@ namespace {
static void *JITCompilerFn(void *Stub);
};
+ class StubToResolverMapTy {
+ /// Map a stub address to a specific instance of a JITResolver so that
+ /// lazily-compiled functions can find the right resolver to use.
+ ///
+ /// Guarded by Lock.
+ std::map<void*, JITResolver*> Map;
+
+ /// Guards Map from concurrent accesses.
+ mutable sys::Mutex Lock;
+
+ public:
+ /// Registers a Stub to be resolved by Resolver.
+ void RegisterStubResolver(void *Stub, JITResolver *Resolver) {
+ MutexGuard guard(Lock);
+ Map.insert(std::make_pair(Stub, Resolver));
+ }
+ /// Unregisters the Stub when it's invalidated.
+ void UnregisterStubResolver(void *Stub) {
+ MutexGuard guard(Lock);
+ Map.erase(Stub);
+ }
+ /// Returns the JITResolver instance that owns the Stub.
+ JITResolver *getResolverFromStub(void *Stub) const {
+ MutexGuard guard(Lock);
+ // The address given to us for the stub may not be exactly right, it might
+ // be a little bit after the stub. As such, use upper_bound to find it.
+ // This is the same trick as in LookupFunctionFromCallSite from
+ // JITResolverState.
+ std::map<void*, JITResolver*>::const_iterator I = Map.upper_bound(Stub);
+ assert(I != Map.begin() && "This is not a known stub!");
+ --I;
+ return I->second;
+ }
+ /// True if any stubs refer to the given resolver. Only used in an assert().
+ /// O(N)
+ bool ResolverHasStubs(JITResolver* Resolver) const {
+ MutexGuard guard(Lock);
+ for (std::map<void*, JITResolver*>::const_iterator I = Map.begin(),
+ E = Map.end(); I != E; ++I) {
+ if (I->second == Resolver)
+ return true;
+ }
+ return false;
+ }
+ };
+ /// This needs to be static so that a lazy call stub can access it with no
+ /// context except the address of the stub.
+ ManagedStatic<StubToResolverMapTy> StubToResolverMap;
+
/// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
/// used to output functions to memory for execution.
class JITEmitter : public JITCodeEmitter {
@@ -333,9 +346,6 @@ namespace {
/// MMI - Machine module info for exception informations
MachineModuleInfo* MMI;
- // GVSet - a set to keep track of which globals have been seen
- SmallPtrSet<const GlobalVariable*, 8> GVSet;
-
// CurFn - The llvm function being emitted. Only valid during
// finishFunction().
const Function *CurFn;
@@ -359,22 +369,15 @@ namespace {
ValueMap<const Function *, EmittedCode,
EmittedFunctionConfig> EmittedFunctions;
- // CurFnStubUses - For a given Function, a vector of stubs that it
- // references. This facilitates the JIT detecting that a stub is no
- // longer used, so that it may be deallocated.
- DenseMap<AssertingVH<const Function>, SmallVector<void*, 1> > CurFnStubUses;
-
- // StubFnRefs - For a given pointer to a stub, a set of Functions which
- // reference the stub. When the count of a stub's references drops to zero,
- // the stub is unused.
- DenseMap<void *, SmallPtrSet<const Function*, 1> > StubFnRefs;
-
DILocation PrevDLT;
+ /// Instance of the JIT
+ JIT *TheJIT;
+
public:
JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
: SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
- EmittedFunctions(this), PrevDLT(NULL) {
+ EmittedFunctions(this), PrevDLT(NULL), TheJIT(&jit) {
MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
if (jit.getJITInfo().needsGOT()) {
MemMgr->AllocateGOT();
@@ -454,11 +457,6 @@ namespace {
/// function body.
void deallocateMemForFunction(const Function *F);
- /// AddStubToCurrentFunction - Mark the current function being JIT'd as
- /// using the stub at the specified address. Allows
- /// deallocateMemForFunction to also remove stubs no longer referenced.
- void AddStubToCurrentFunction(void *Stub);
-
virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
virtual void emitLabel(uint64_t LabelID) {
@@ -489,16 +487,86 @@ namespace {
bool MayNeedFarStub);
void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size);
- unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size);
- unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size);
+ unsigned addSizeOfGlobalsInConstantVal(
+ const Constant *C, unsigned Size,
+ SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
+ SmallVectorImpl<const GlobalVariable*> &Worklist);
+ unsigned addSizeOfGlobalsInInitializer(
+ const Constant *Init, unsigned Size,
+ SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
+ SmallVectorImpl<const GlobalVariable*> &Worklist);
unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF);
};
}
-JITResolver *JITResolver::TheJITResolver = 0;
-
void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
- JRS->EraseAllCallSitesPrelocked(F);
+ JRS->EraseAllCallSitesForPrelocked(F);
+}
+
+Function *JITResolverState::EraseStub(const MutexGuard &locked, void *Stub) {
+ CallSiteToFunctionMapTy::iterator C2F_I =
+ CallSiteToFunctionMap.find(Stub);
+ if (C2F_I == CallSiteToFunctionMap.end()) {
+ // Not a stub.
+ return NULL;
+ }
+
+ StubToResolverMap->UnregisterStubResolver(Stub);
+
+ Function *const F = C2F_I->second;
+#ifndef NDEBUG
+ void *RealStub = FunctionToLazyStubMap.lookup(F);
+ assert(RealStub == Stub &&
+ "Call-site that wasn't a stub passed in to EraseStub");
+#endif
+ FunctionToLazyStubMap.erase(F);
+ CallSiteToFunctionMap.erase(C2F_I);
+
+ // Remove the stub from the function->call-sites map, and remove the whole
+ // entry from the map if that was the last call site.
+ FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F);
+ assert(F2C_I != FunctionToCallSitesMap.end() &&
+ "FunctionToCallSitesMap broken");
+ bool Erased = F2C_I->second.erase(Stub);
+ (void)Erased;
+ assert(Erased && "FunctionToCallSitesMap broken");
+ if (F2C_I->second.empty())
+ FunctionToCallSitesMap.erase(F2C_I);
+
+ return F;
+}
+
+void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) {
+ FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
+ if (F2C == FunctionToCallSitesMap.end())
+ return;
+ StubToResolverMapTy &S2RMap = *StubToResolverMap;
+ for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(),
+ E = F2C->second.end(); I != E; ++I) {
+ S2RMap.UnregisterStubResolver(*I);
+ bool Erased = CallSiteToFunctionMap.erase(*I);
+ (void)Erased;
+ assert(Erased && "Missing call site->function mapping");
+ }
+ FunctionToCallSitesMap.erase(F2C);
+}
+
+void JITResolverState::EraseAllCallSitesPrelocked() {
+ StubToResolverMapTy &S2RMap = *StubToResolverMap;
+ for (CallSiteToFunctionMapTy::const_iterator
+ I = CallSiteToFunctionMap.begin(),
+ E = CallSiteToFunctionMap.end(); I != E; ++I) {
+ S2RMap.UnregisterStubResolver(I->first);
+ }
+ CallSiteToFunctionMap.clear();
+ FunctionToCallSitesMap.clear();
+}
+
+JITResolver::~JITResolver() {
+ // No need to lock because we're in the destructor, and state isn't shared.
+ state.EraseAllCallSitesPrelocked();
+ assert(!StubToResolverMap->ResolverHasStubs(this) &&
+ "Resolver destroyed with stubs still alive.");
}
/// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub
@@ -551,16 +619,22 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '"
<< F->getName() << "'\n");
- // Finally, keep track of the stub-to-Function mapping so that the
- // JITCompilerFn knows which function to compile!
- state.AddCallSite(locked, Stub, F);
-
- // If we are JIT'ing non-lazily but need to call a function that does not
- // exist yet, add it to the JIT's work list so that we can fill in the stub
- // address later.
- if (!Actual && !TheJIT->isCompilingLazily())
- if (!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage())
- TheJIT->addPendingFunction(F);
+ if (TheJIT->isCompilingLazily()) {
+ // Register this JITResolver as the one corresponding to this call site so
+ // JITCompilerFn will be able to find it.
+ StubToResolverMap->RegisterStubResolver(Stub, this);
+
+ // Finally, keep track of the stub-to-Function mapping so that the
+ // JITCompilerFn knows which function to compile!
+ state.AddCallSite(locked, Stub, F);
+ } else if (!Actual) {
+ // If we are JIT'ing non-lazily but need to call a function that does not
+ // exist yet, add it to the JIT's work list so that we can fill in the
+ // stub address later.
+ assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() &&
+ "'Actual' should have been set above.");
+ TheJIT->addPendingFunction(F);
+ }
return Stub;
}
@@ -634,44 +708,12 @@ void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
}
}
-GlobalValue *JITResolver::invalidateStub(void *Stub) {
- MutexGuard locked(TheJIT->lock);
-
- GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked);
-
- // Look up the cheap way first, to see if it's a function stub we are
- // invalidating. If so, remove it from both the forward and reverse maps.
- if (Function *F = state.EraseStub(locked, Stub)) {
- return F;
- }
-
- // Otherwise, it might be an indirect symbol stub. Find it and remove it.
- for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end();
- i != e; ++i) {
- if (i->second != Stub)
- continue;
- GlobalValue *GV = i->first;
- GM.erase(i);
- return GV;
- }
-
- // Lastly, check to see if it's in the ExternalFnToStubMap.
- for (std::map<void *, void *>::iterator i = ExternalFnToStubMap.begin(),
- e = ExternalFnToStubMap.end(); i != e; ++i) {
- if (i->second != Stub)
- continue;
- ExternalFnToStubMap.erase(i);
- break;
- }
-
- return 0;
-}
-
/// JITCompilerFn - This function is called when a lazy compilation stub has
/// been entered. It looks up which function this stub corresponds to, compiles
/// it if necessary, then returns the resultant function pointer.
void *JITResolver::JITCompilerFn(void *Stub) {
- JITResolver &JR = *TheJITResolver;
+ JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub);
+ assert(JR && "Unable to find the corresponding JITResolver to the call site");
Function* F = 0;
void* ActualPtr = 0;
@@ -680,24 +722,24 @@ void *JITResolver::JITCompilerFn(void *Stub) {
// Only lock for getting the Function. The call getPointerToFunction made
// in this function might trigger function materializing, which requires
// JIT lock to be unlocked.
- MutexGuard locked(TheJIT->lock);
+ MutexGuard locked(JR->TheJIT->lock);
// The address given to us for the stub may not be exactly right, it might
// be a little bit after the stub. As such, use upper_bound to find it.
pair<void*, Function*> I =
- JR.state.LookupFunctionFromCallSite(locked, Stub);
+ JR->state.LookupFunctionFromCallSite(locked, Stub);
F = I.second;
ActualPtr = I.first;
}
// If we have already code generated the function, just return the address.
- void *Result = TheJIT->getPointerToGlobalIfAvailable(F);
+ void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F);
if (!Result) {
// Otherwise we don't have it, do lazy compilation now.
// If lazy compilation is disabled, emit a useful error message and abort.
- if (!TheJIT->isCompilingLazily()) {
+ if (!JR->TheJIT->isCompilingLazily()) {
llvm_report_error("LLVM JIT requested to do lazy compilation of function '"
+ F->getName() + "' when lazy compiles are disabled!");
}
@@ -706,11 +748,11 @@ void *JITResolver::JITCompilerFn(void *Stub) {
<< "' In stub ptr = " << Stub << " actual ptr = "
<< ActualPtr << "\n");
- Result = TheJIT->getPointerToFunction(F);
+ Result = JR->TheJIT->getPointerToFunction(F);
}
// Reacquire the lock to update the GOT map.
- MutexGuard locked(TheJIT->lock);
+ MutexGuard locked(JR->TheJIT->lock);
// We might like to remove the call site from the CallSiteToFunction map, but
// we can't do that! Multiple threads could be stuck, waiting to acquire the
@@ -725,8 +767,8 @@ void *JITResolver::JITCompilerFn(void *Stub) {
// if they see it still using the stub address.
// Note: this is done so the Resolver doesn't have to manage GOT memory
// Do this without allocating map space if the target isn't using a GOT
- if(JR.revGOTMap.find(Stub) != JR.revGOTMap.end())
- JR.revGOTMap[Result] = JR.revGOTMap[Stub];
+ if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end())
+ JR->revGOTMap[Result] = JR->revGOTMap[Stub];
return Result;
}
@@ -751,7 +793,6 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
// that we're returning the same address for the function as any previous
// call. TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be
// close enough to call.
- AddStubToCurrentFunction(FnStub);
return FnStub;
}
@@ -768,18 +809,10 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
return TheJIT->getPointerToFunction(F);
}
- // Otherwise, we may need a to emit a stub, and, conservatively, we
- // always do so.
- void *StubAddr = Resolver.getLazyFunctionStub(F);
-
- // Add the stub to the current function's list of referenced stubs, so we can
- // deallocate them if the current function is ever freed. It's possible to
- // return null from getLazyFunctionStub in the case of a weak extern that
- // fails to resolve.
- if (StubAddr)
- AddStubToCurrentFunction(StubAddr);
-
- return StubAddr;
+ // Otherwise, we may need a to emit a stub, and, conservatively, we always do
+ // so. Note that it's possible to return null from getLazyFunctionStub in the
+ // case of a weak extern that fails to resolve.
+ return Resolver.getLazyFunctionStub(F);
}
void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
@@ -787,24 +820,9 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
// resolved address.
void *GVAddress = getPointerToGlobal(V, Reference, false);
void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress);
-
- // Add the stub to the current function's list of referenced stubs, so we can
- // deallocate them if the current function is ever freed.
- AddStubToCurrentFunction(StubAddr);
-
return StubAddr;
}
-void JITEmitter::AddStubToCurrentFunction(void *StubAddr) {
- assert(CurFn && "Stub added to current function, but current function is 0!");
-
- SmallVectorImpl<void*> &StubsUsed = CurFnStubUses[CurFn];
- StubsUsed.push_back(StubAddr);
-
- SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[StubAddr];
- FnRefs.insert(CurFn);
-}
-
void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
if (!DL.isUnknown()) {
DILocation CurDLT = EmissionDetails.MF->getDILocation(DL);
@@ -839,7 +857,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
return Size;
}
-static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) {
+static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI, JIT *jit) {
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return 0;
@@ -847,7 +865,7 @@ static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) {
for (unsigned i = 0, e = JT.size(); i != e; ++i)
NumEntries += JT[i].MBBs.size();
- return NumEntries * MJTI->getEntrySize(*TheJIT->getTargetData());
+ return NumEntries * MJTI->getEntrySize(*jit->getTargetData());
}
static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) {
@@ -876,11 +894,14 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) {
}
/// addSizeOfGlobalsInConstantVal - find any globals that we haven't seen yet
-/// but are referenced from the constant; put them in GVSet and add their
-/// size into the running total Size.
-
-unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
- unsigned Size) {
+/// but are referenced from the constant; put them in SeenGlobals and the
+/// Worklist, and add their size into the running total Size.
+
+unsigned JITEmitter::addSizeOfGlobalsInConstantVal(
+ const Constant *C,
+ unsigned Size,
+ SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
+ SmallVectorImpl<const GlobalVariable*> &Worklist) {
// If its undefined, return the garbage.
if (isa<UndefValue>(C))
return Size;
@@ -902,7 +923,7 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::BitCast: {
- Size = addSizeOfGlobalsInConstantVal(Op0, Size);
+ Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist);
break;
}
case Instruction::Add:
@@ -918,8 +939,9 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- Size = addSizeOfGlobalsInConstantVal(Op0, Size);
- Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size);
+ Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist);
+ Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size,
+ SeenGlobals, Worklist);
break;
}
default: {
@@ -933,8 +955,10 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
if (C->getType()->getTypeID() == Type::PointerTyID)
if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(C))
- if (GVSet.insert(GV))
+ if (SeenGlobals.insert(GV)) {
+ Worklist.push_back(GV);
Size = addSizeOfGlobal(GV, Size);
+ }
return Size;
}
@@ -942,15 +966,18 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
/// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet
/// but are referenced from the given initializer.
-unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init,
- unsigned Size) {
+unsigned JITEmitter::addSizeOfGlobalsInInitializer(
+ const Constant *Init,
+ unsigned Size,
+ SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
+ SmallVectorImpl<const GlobalVariable*> &Worklist) {
if (!isa<UndefValue>(Init) &&
!isa<ConstantVector>(Init) &&
!isa<ConstantAggregateZero>(Init) &&
!isa<ConstantArray>(Init) &&
!isa<ConstantStruct>(Init) &&
Init->getType()->isFirstClassType())
- Size = addSizeOfGlobalsInConstantVal(Init, Size);
+ Size = addSizeOfGlobalsInConstantVal(Init, Size, SeenGlobals, Worklist);
return Size;
}
@@ -961,7 +988,7 @@ unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init,
unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
unsigned Size = 0;
- GVSet.clear();
+ SmallPtrSet<const GlobalVariable*, 8> SeenGlobals;
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
@@ -985,7 +1012,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
// assuming the addresses of the new globals in this module
// start at 0 (or something) and adjusting them after codegen
// complete. Another possibility is to grab a marker bit in GV.
- if (GVSet.insert(GV))
+ if (SeenGlobals.insert(GV))
// A variable as yet unseen. Add in its size.
Size = addSizeOfGlobal(GV, Size);
}
@@ -994,12 +1021,14 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
}
DEBUG(dbgs() << "JIT: About to look through initializers\n");
// Look for more globals that are referenced only from initializers.
- // GVSet.end is computed each time because the set can grow as we go.
- for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin();
- I != GVSet.end(); I++) {
- const GlobalVariable* GV = *I;
+ SmallVector<const GlobalVariable*, 8> Worklist(
+ SeenGlobals.begin(), SeenGlobals.end());
+ while (!Worklist.empty()) {
+ const GlobalVariable* GV = Worklist.back();
+ Worklist.pop_back();
if (GV->hasInitializer())
- Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size);
+ Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size,
+ SeenGlobals, Worklist);
}
return Size;
@@ -1032,7 +1061,7 @@ void JITEmitter::startFunction(MachineFunction &F) {
MJTI->getEntryAlignment(*TheJIT->getTargetData()));
// Add the jump table size
- ActualSize += GetJumpTableSizeInBytes(MJTI);
+ ActualSize += GetJumpTableSizeInBytes(MJTI, TheJIT);
}
// Add the alignment for the function
@@ -1301,40 +1330,6 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
if (JITEmitDebugInfo) {
DR->UnregisterFunction(F);
}
-
- // If the function did not reference any stubs, return.
- if (CurFnStubUses.find(F) == CurFnStubUses.end())
- return;
-
- // For each referenced stub, erase the reference to this function, and then
- // erase the list of referenced stubs.
- SmallVectorImpl<void *> &StubList = CurFnStubUses[F];
- for (unsigned i = 0, e = StubList.size(); i != e; ++i) {
- void *Stub = StubList[i];
-
- // If we already invalidated this stub for this function, continue.
- if (StubFnRefs.count(Stub) == 0)
- continue;
-
- SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[Stub];
- FnRefs.erase(F);
-
- // If this function was the last reference to the stub, invalidate the stub
- // in the JITResolver. Were there a memory manager deallocateStub routine,
- // we could call that at this point too.
- if (FnRefs.empty()) {
- DEBUG(dbgs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n");
- StubFnRefs.erase(Stub);
-
- // Invalidate the stub. If it is a GV stub, update the JIT's global
- // mapping for that GV to zero.
- GlobalValue *GV = Resolver.invalidateStub(Stub);
- if (GV) {
- TheJIT->updateGlobalMapping(GV, 0);
- }
- }
- }
- CurFnStubUses.erase(F);
}
@@ -1552,19 +1547,6 @@ JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM,
return new JITEmitter(jit, JMM, tm);
}
-// getPointerToNamedFunction - This function is used as a global wrapper to
-// JIT::getPointerToNamedFunction for the purpose of resolving symbols when
-// bugpoint is debugging the JIT. In that scenario, we are loading an .so and
-// need to resolve function(s) that are being mis-codegenerated, so we need to
-// resolve their addresses at runtime, and this is the way to do it.
-extern "C" {
- void *getPointerToNamedFunction(const char *Name) {
- if (Function *F = TheJIT->FindFunctionNamed(Name))
- return TheJIT->getPointerToFunction(F);
- return TheJIT->getPointerToNamedFunction(Name);
- }
-}
-
// getPointerToFunctionOrStub - If the specified function has been
// code-gen'd, return a pointer to the function. If not, compile it, or use
// a stub to implement lazy compilation if available.
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 7f441b0..8487c83 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -159,7 +159,7 @@ static bool RecursiveResolveTypesI(const Type *DstTy, const Type *SrcTy,
if (DstTy == SrcTy) return false; // If already equal, noop
// If we found our opaque type, resolve it now!
- if (isa<OpaqueType>(DstTy) || isa<OpaqueType>(SrcTy))
+ if (DstTy->isOpaqueTy() || SrcTy->isOpaqueTy())
return ResolveTypes(DstTy, SrcTy);
// Two types cannot be resolved together if they are of different primitive
diff --git a/lib/MC/Android.mk b/lib/MC/Android.mk
new file mode 100644
index 0000000..dac5a54
--- /dev/null
+++ b/lib/MC/Android.mk
@@ -0,0 +1,45 @@
+LOCAL_PATH:= $(call my-dir)
+
+mc_SRC_FILES := \
+ MCAsmInfo.cpp \
+ MCAsmInfoCOFF.cpp \
+ MCAsmInfoDarwin.cpp \
+ MCAsmStreamer.cpp \
+ MCAssembler.cpp \
+ MCCodeEmitter.cpp \
+ MCContext.cpp \
+ MCDisassembler.cpp \
+ MCExpr.cpp \
+ MCInst.cpp \
+ MCInstPrinter.cpp \
+ MCMachOStreamer.cpp \
+ MCNullStreamer.cpp \
+ MCSection.cpp \
+ MCSectionELF.cpp \
+ MCSectionMachO.cpp \
+ MCStreamer.cpp \
+ MCSymbol.cpp \
+ MCValue.cpp \
+ TargetAsmBackend.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(mc_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMMC
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(mc_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMMC
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 9ead33b..4cf71dc 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -18,4 +18,5 @@ add_llvm_library(LLVMMC
MCStreamer.cpp
MCSymbol.cpp
MCValue.cpp
+ TargetAsmBackend.cpp
)
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 828377f..1b66900 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
+#include <ctype.h>
using namespace llvm;
namespace {
@@ -134,6 +135,9 @@ public:
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+
virtual void EmitValueToOffset(const MCExpr *Offset,
unsigned char Value = 0);
@@ -513,6 +517,13 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
EmitEOL();
}
+void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ // Emit with a text fill value.
+ EmitValueToAlignment(ByteAlignment, MAI.getTextAlignFillValue(),
+ 1, MaxBytesToEmit);
+}
+
void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
unsigned char Value) {
// FIXME: Verify that Offset is associated with the current section.
@@ -552,7 +563,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
MCFixup &F = Fixups[i];
- MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
+ const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
for (unsigned j = 0; j != Info.TargetSize; ++j) {
unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j;
assert(Index < Code.size() * 8 && "Invalid offset in fixup!");
@@ -599,7 +610,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
MCFixup &F = Fixups[i];
- MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
+ const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
OS << " fixup " << char('A' + i) << " - " << "offset: " << F.getOffset()
<< ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n";
}
@@ -617,6 +628,12 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
raw_ostream &OS = GetCommentOS();
OS << "<MCInst #" << Inst.getOpcode();
+ StringRef InstName;
+ if (InstPrinter)
+ InstName = InstPrinter->getOpcodeName(Inst.getOpcode());
+ if (!InstName.empty())
+ OS << ' ' << InstName;
+
for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) {
OS << "\n ";
Inst.getOperand(i).print(OS, &MAI);
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index bdc886b..96227db 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -16,11 +16,17 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+
+// FIXME: Gross.
+#include "../Target/X86/X86FixupKinds.h"
+
#include <vector>
using namespace llvm;
@@ -36,6 +42,8 @@ STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
MachObjectWriter &MOW);
+static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW);
+
/// isVirtualSection - Check if this is a section which does not actually exist
/// in the object file.
static bool isVirtualSection(const MCSection &Section) {
@@ -45,6 +53,30 @@ static bool isVirtualSection(const MCSection &Section) {
return (Type == MCSectionMachO::S_ZEROFILL);
}
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+ switch (Kind) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case X86::reloc_pcrel_1byte:
+ case FK_Data_1: return 0;
+ case FK_Data_2: return 1;
+ case X86::reloc_pcrel_4byte:
+ case X86::reloc_riprel_4byte:
+ case FK_Data_4: return 2;
+ case FK_Data_8: return 3;
+ }
+}
+
+static bool isFixupKindPCRel(unsigned Kind) {
+ switch (Kind) {
+ default:
+ return false;
+ case X86::reloc_pcrel_1byte:
+ case X86::reloc_pcrel_4byte:
+ case X86::reloc_riprel_4byte:
+ return true;
+ }
+}
+
class MachObjectWriter {
// See <mach-o/loader.h>.
enum {
@@ -402,13 +434,14 @@ public:
uint32_t Word0;
uint32_t Word1;
};
- void ComputeScatteredRelocationInfo(MCAssembler &Asm,
- MCSectionData::Fixup &Fixup,
+ void ComputeScatteredRelocationInfo(MCAssembler &Asm, MCFragment &Fragment,
+ MCAsmFixup &Fixup,
const MCValue &Target,
DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap,
std::vector<MachRelocationEntry> &Relocs) {
- uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset;
+ uint32_t Address = Fragment.getOffset() + Fixup.Offset;
unsigned IsPCRel = 0;
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
unsigned Type = RIT_Vanilla;
// See <reloc.h>.
@@ -424,11 +457,12 @@ public:
Value2 = SD->getFragment()->getAddress() + SD->getOffset();
}
- unsigned Log2Size = Log2_32(Fixup.Size);
- assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!");
-
// The value which goes in the fixup is current value of the expression.
Fixup.FixedValue = Value - Value2 + Target.getConstant();
+ if (isFixupKindPCRel(Fixup.Kind)) {
+ Fixup.FixedValue -= Address;
+ IsPCRel = 1;
+ }
MachRelocationEntry MRE;
MRE.Word0 = ((Address << 0) |
@@ -453,8 +487,8 @@ public:
}
}
- void ComputeRelocationInfo(MCAssembler &Asm,
- MCSectionData::Fixup &Fixup,
+ void ComputeRelocationInfo(MCAssembler &Asm, MCDataFragment &Fragment,
+ MCAsmFixup &Fixup,
DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap,
std::vector<MachRelocationEntry> &Relocs) {
MCValue Target;
@@ -466,19 +500,22 @@ public:
if (Target.getSymB() ||
(Target.getSymA() && !Target.getSymA()->isUndefined() &&
Target.getConstant()))
- return ComputeScatteredRelocationInfo(Asm, Fixup, Target,
+ return ComputeScatteredRelocationInfo(Asm, Fragment, Fixup, Target,
SymbolMap, Relocs);
// See <reloc.h>.
- uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset;
+ uint32_t Address = Fragment.getOffset() + Fixup.Offset;
uint32_t Value = 0;
unsigned Index = 0;
unsigned IsPCRel = 0;
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
unsigned IsExtern = 0;
unsigned Type = 0;
if (Target.isAbsolute()) { // constant
// SymbolNum of 0 indicates the absolute section.
+ //
+ // FIXME: When is this generated?
Type = RIT_Vanilla;
Value = 0;
llvm_unreachable("FIXME: Not yet implemented!");
@@ -495,10 +532,11 @@ public:
//
// FIXME: O(N)
Index = 1;
- for (MCAssembler::iterator it = Asm.begin(),
- ie = Asm.end(); it != ie; ++it, ++Index)
+ MCAssembler::iterator it = Asm.begin(), ie = Asm.end();
+ for (; it != ie; ++it, ++Index)
if (&*it == SD->getFragment()->getParent())
break;
+ assert(it != ie && "Unable to find section index!");
Value = SD->getFragment()->getAddress() + SD->getOffset();
}
@@ -508,8 +546,10 @@ public:
// The value which goes in the fixup is current value of the expression.
Fixup.FixedValue = Value + Target.getConstant();
- unsigned Log2Size = Log2_32(Fixup.Size);
- assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!");
+ if (isFixupKindPCRel(Fixup.Kind)) {
+ Fixup.FixedValue -= Address;
+ IsPCRel = 1;
+ }
// struct relocation_info (8 bytes)
MachRelocationEntry MRE;
@@ -766,17 +806,20 @@ public:
// is written.
std::vector<MachRelocationEntry> RelocInfos;
uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
- for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie;
- ++it) {
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
MCSectionData &SD = *it;
// The assembler writes relocations in the reverse order they were seen.
//
// FIXME: It is probably more complicated than this.
unsigned NumRelocsStart = RelocInfos.size();
- for (unsigned i = 0, e = SD.fixup_size(); i != e; ++i)
- ComputeRelocationInfo(Asm, SD.getFixups()[e - i - 1], SymbolMap,
- RelocInfos);
+ for (MCSectionData::reverse_iterator it2 = SD.rbegin(),
+ ie2 = SD.rend(); it2 != ie2; ++it2)
+ if (MCDataFragment *DF = dyn_cast<MCDataFragment>(&*it2))
+ for (unsigned i = 0, e = DF->fixup_size(); i != e; ++i)
+ ComputeRelocationInfo(Asm, *DF, DF->getFixups()[e - i - 1],
+ SymbolMap, RelocInfos);
unsigned NumRelocs = RelocInfos.size() - NumRelocsStart;
uint64_t SectionStart = SectionDataStart + SD.getAddress();
@@ -871,6 +914,16 @@ public:
OS << StringTable.str();
}
}
+
+ void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF) {
+ unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind);
+
+ // FIXME: Endianness assumption.
+ assert(Fixup.Offset + Size <= DF.getContents().size() &&
+ "Invalid fixup offset!");
+ for (unsigned i = 0; i != Size; ++i)
+ DF.getContents()[Fixup.Offset + i] = uint8_t(Fixup.FixedValue >> (i * 8));
+ }
};
/* *** */
@@ -905,35 +958,12 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
Address(~UINT64_C(0)),
Size(~UINT64_C(0)),
FileSize(~UINT64_C(0)),
- LastFixupLookup(~0),
HasInstructions(false)
{
if (A)
A->getSectionList().push_back(this);
}
-const MCSectionData::Fixup *
-MCSectionData::LookupFixup(const MCFragment *Fragment, uint64_t Offset) const {
- // Use a one level cache to turn the common case of accessing the fixups in
- // order into O(1) instead of O(N).
- unsigned i = LastFixupLookup, Count = Fixups.size(), End = Fixups.size();
- if (i >= End)
- i = 0;
- while (Count--) {
- const Fixup &F = Fixups[i];
- if (F.Fragment == Fragment && F.Offset == Offset) {
- LastFixupLookup = i;
- return &F;
- }
-
- ++i;
- if (i == End)
- i = 0;
- }
-
- return 0;
-}
-
/* *** */
MCSymbolData::MCSymbolData() : Symbol(0) {}
@@ -980,31 +1010,10 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
}
case MCFragment::FT_Data:
+ case MCFragment::FT_Fill:
F.setFileSize(F.getMaxFileSize());
break;
- case MCFragment::FT_Fill: {
- MCFillFragment &FF = cast<MCFillFragment>(F);
-
- F.setFileSize(F.getMaxFileSize());
-
- MCValue Target;
- if (!FF.getValue().EvaluateAsRelocatable(Target))
- llvm_report_error("expected relocatable expression");
-
- // If the fill value is constant, thats it.
- if (Target.isAbsolute())
- break;
-
- // Otherwise, add fixups for the values.
- for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) {
- MCSectionData::Fixup Fix(F, i * FF.getValueSize(),
- FF.getValue(),FF.getValueSize());
- SD.getFixups().push_back(Fix);
- }
- break;
- }
-
case MCFragment::FT_Org: {
MCOrgFragment &OF = cast<MCOrgFragment>(F);
@@ -1051,6 +1060,64 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
SD.setFileSize(Address - SD.getAddress());
}
+/// WriteNopData - Write optimal nops to the output file for the \arg Count
+/// bytes. This returns the number of bytes written. It may return 0 if
+/// the \arg Count is more than the maximum optimal nops.
+///
+/// FIXME this is X86 32-bit specific and should move to a better place.
+static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW) {
+ static const uint8_t Nops[16][16] = {
+ // nop
+ {0x90},
+ // xchg %ax,%ax
+ {0x66, 0x90},
+ // nopl (%[re]ax)
+ {0x0f, 0x1f, 0x00},
+ // nopl 0(%[re]ax)
+ {0x0f, 0x1f, 0x40, 0x00},
+ // nopl 0(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw 0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw %cs:0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0(%[re]ax,%[re]ax,1)
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x44, 0x00, 0x00,
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ // nopl 0L(%[re]ax) */
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ // nopl 0L(%[re]ax)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ // nopl 0L(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
+ };
+
+ if (Count > 15)
+ return 0;
+
+ for (uint64_t i = 0; i < Count; i++)
+ MOW.Write8 (uint8_t(Nops[Count - 1][i]));
+
+ return Count;
+}
+
/// WriteFileData - Write the \arg F data to the output file.
static void WriteFileData(raw_ostream &OS, const MCFragment &F,
MachObjectWriter &MOW) {
@@ -1074,6 +1141,14 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
"' is not a divisor of padding size '" +
Twine(AF.getFileSize()) + "'");
+ // See if we are aligning with nops, and if so do that first to try to fill
+ // the Count bytes. Then if that did not fill any bytes or there are any
+ // bytes left to fill use the the Value and ValueSize to fill the rest.
+ if (AF.getEmitNops()) {
+ uint64_t NopByteCount = WriteNopData(Count, MOW);
+ Count -= NopByteCount;
+ }
+
for (uint64_t i = 0; i != Count; ++i) {
switch (AF.getValueSize()) {
default:
@@ -1087,39 +1162,30 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
break;
}
- case MCFragment::FT_Data:
+ case MCFragment::FT_Data: {
+ MCDataFragment &DF = cast<MCDataFragment>(F);
+
+ // Apply the fixups.
+ //
+ // FIXME: Move elsewhere.
+ for (MCDataFragment::const_fixup_iterator it = DF.fixup_begin(),
+ ie = DF.fixup_end(); it != ie; ++it)
+ MOW.ApplyFixup(*it, DF);
+
OS << cast<MCDataFragment>(F).getContents().str();
break;
+ }
case MCFragment::FT_Fill: {
MCFillFragment &FF = cast<MCFillFragment>(F);
-
- int64_t Value = 0;
-
- MCValue Target;
- if (!FF.getValue().EvaluateAsRelocatable(Target))
- llvm_report_error("expected relocatable expression");
-
- if (Target.isAbsolute())
- Value = Target.getConstant();
for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) {
- if (!Target.isAbsolute()) {
- // Find the fixup.
- //
- // FIXME: Find a better way to write in the fixes.
- const MCSectionData::Fixup *Fixup =
- F.getParent()->LookupFixup(&F, i * FF.getValueSize());
- assert(Fixup && "Missing fixup for fill value!");
- Value = Fixup->FixedValue;
- }
-
switch (FF.getValueSize()) {
default:
assert(0 && "Invalid size!");
- case 1: MOW.Write8 (uint8_t (Value)); break;
- case 2: MOW.Write16(uint16_t(Value)); break;
- case 4: MOW.Write32(uint32_t(Value)); break;
- case 8: MOW.Write64(uint64_t(Value)); break;
+ case 1: MOW.Write8 (uint8_t (FF.getValue())); break;
+ case 2: MOW.Write16(uint16_t(FF.getValue())); break;
+ case 4: MOW.Write32(uint32_t(FF.getValue())); break;
+ case 8: MOW.Write64(uint64_t(FF.getValue())); break;
}
}
break;
@@ -1167,6 +1233,10 @@ static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
}
void MCAssembler::Finish() {
+ DEBUG_WITH_TYPE("mc-dump", {
+ llvm::errs() << "assembler backend - pre-layout\n--\n";
+ dump(); });
+
// Layout the concrete sections and fragments.
uint64_t Address = 0;
MCSectionData *Prev = 0;
@@ -1205,9 +1275,149 @@ void MCAssembler::Finish() {
Address += SD.getSize();
}
+ DEBUG_WITH_TYPE("mc-dump", {
+ llvm::errs() << "assembler backend - post-layout\n--\n";
+ dump(); });
+
// Write the object file.
MachObjectWriter MOW(OS);
MOW.WriteObject(*this);
OS.flush();
}
+
+
+// Debugging methods
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const MCAsmFixup &AF) {
+ OS << "<MCAsmFixup" << " Offset:" << AF.Offset << " Value:" << *AF.Value
+ << " Kind:" << AF.Kind << ">";
+ return OS;
+}
+
+}
+
+void MCFragment::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCFragment " << (void*) this << " Offset:" << Offset
+ << " FileSize:" << FileSize;
+
+ OS << ">";
+}
+
+void MCAlignFragment::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCAlignFragment ";
+ this->MCFragment::dump();
+ OS << "\n ";
+ OS << " Alignment:" << getAlignment()
+ << " Value:" << getValue() << " ValueSize:" << getValueSize()
+ << " MaxBytesToEmit:" << getMaxBytesToEmit() << ">";
+}
+
+void MCDataFragment::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCDataFragment ";
+ this->MCFragment::dump();
+ OS << "\n ";
+ OS << " Contents:[";
+ for (unsigned i = 0, e = getContents().size(); i != e; ++i) {
+ if (i) OS << ",";
+ OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+ }
+ OS << "] (" << getContents().size() << " bytes)";
+
+ if (!getFixups().empty()) {
+ OS << ",\n ";
+ OS << " Fixups:[";
+ for (fixup_iterator it = fixup_begin(), ie = fixup_end(); it != ie; ++it) {
+ if (it != fixup_begin()) OS << ",\n ";
+ OS << *it;
+ }
+ OS << "]";
+ }
+
+ OS << ">";
+}
+
+void MCFillFragment::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCFillFragment ";
+ this->MCFragment::dump();
+ OS << "\n ";
+ OS << " Value:" << getValue() << " ValueSize:" << getValueSize()
+ << " Count:" << getCount() << ">";
+}
+
+void MCOrgFragment::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCOrgFragment ";
+ this->MCFragment::dump();
+ OS << "\n ";
+ OS << " Offset:" << getOffset() << " Value:" << getValue() << ">";
+}
+
+void MCZeroFillFragment::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCZeroFillFragment ";
+ this->MCFragment::dump();
+ OS << "\n ";
+ OS << " Size:" << getSize() << " Alignment:" << getAlignment() << ">";
+}
+
+void MCSectionData::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCSectionData";
+ OS << " Alignment:" << getAlignment() << " Address:" << Address
+ << " Size:" << Size << " FileSize:" << FileSize
+ << " Fragments:[";
+ for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ if (it != begin()) OS << ",\n ";
+ it->dump();
+ }
+ OS << "]>";
+}
+
+void MCSymbolData::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCSymbolData Symbol:" << getSymbol()
+ << " Fragment:" << getFragment() << " Offset:" << getOffset()
+ << " Flags:" << getFlags() << " Index:" << getIndex();
+ if (isCommon())
+ OS << " (common, size:" << getCommonSize()
+ << " align: " << getCommonAlignment() << ")";
+ if (isExternal())
+ OS << " (external)";
+ if (isPrivateExtern())
+ OS << " (private extern)";
+ OS << ">";
+}
+
+void MCAssembler::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCAssembler\n";
+ OS << " Sections:[";
+ for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ if (it != begin()) OS << ",\n ";
+ it->dump();
+ }
+ OS << "],\n";
+ OS << " Symbols:[";
+
+ for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
+ if (it != symbol_begin()) OS << ",\n ";
+ it->dump();
+ }
+ OS << "]>\n";
+}
diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp
index c122763..accb06c 100644
--- a/lib/MC/MCCodeEmitter.cpp
+++ b/lib/MC/MCCodeEmitter.cpp
@@ -16,3 +16,15 @@ MCCodeEmitter::MCCodeEmitter() {
MCCodeEmitter::~MCCodeEmitter() {
}
+
+const MCFixupKindInfo &MCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const {
+ static const MCFixupKindInfo Builtins[] = {
+ { "FK_Data_1", 0, 8 },
+ { "FK_Data_2", 0, 16 },
+ { "FK_Data_4", 0, 32 },
+ { "FK_Data_8", 0, 64 }
+ };
+
+ assert(Kind <= 3 && "Unknown fixup kind");
+ return Builtins[Kind];
+}
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index e90c03c..92a7154 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -8,7 +8,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/ADT/StringRef.h"
using namespace llvm;
MCInstPrinter::~MCInstPrinter() {
}
+
+/// getOpcodeName - Return the name of the specified opcode enum (e.g.
+/// "MOV32ri") or empty if we can't resolve it.
+StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return "";
+}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 99a819f..a7a8a5d 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -137,6 +137,8 @@ public:
virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
virtual void EmitValueToOffset(const MCExpr *Offset,
unsigned char Value = 0);
@@ -333,7 +335,22 @@ void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
unsigned AddrSpace) {
- new MCFillFragment(*AddValueSymbols(Value), Size, 1, CurSectionData);
+ MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+ if (!DF)
+ DF = new MCDataFragment(CurSectionData);
+
+ // Avoid fixups when possible.
+ int64_t AbsValue;
+ if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
+ // FIXME: Endianness assumption.
+ for (unsigned i = 0; i != Size; ++i)
+ DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
+ } else {
+ DF->getFixups().push_back(MCAsmFixup(DF->getContents().size(),
+ *AddValueSymbols(Value),
+ MCFixup::getKindForSize(Size)));
+ DF->getContents().resize(DF->getContents().size() + Size, 0);
+ }
}
void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
@@ -342,7 +359,20 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
- CurSectionData);
+ false /* EmitNops */, CurSectionData);
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > CurSectionData->getAlignment())
+ CurSectionData->setAlignment(ByteAlignment);
+}
+
+void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ // FIXME the 0x90 is the default x86 1 byte nop opcode.
+ new MCAlignFragment(ByteAlignment, 0x90, 1, MaxBytesToEmit,
+ true /* EmitNops */, CurSectionData);
// Update the maximum alignment on the current section if necessary.
if (ByteAlignment > CurSectionData->getAlignment())
@@ -365,12 +395,23 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
CurSectionData->setHasInstructions(true);
- // FIXME: Relocations!
SmallVector<MCFixup, 4> Fixups;
SmallString<256> Code;
raw_svector_ostream VecOS(Code);
Emitter->EncodeInstruction(Inst, VecOS, Fixups);
- EmitBytes(VecOS.str(), 0);
+ VecOS.flush();
+
+ // Add the fixups and data.
+ MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+ if (!DF)
+ DF = new MCDataFragment(CurSectionData);
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ MCFixup &F = Fixups[i];
+ DF->getFixups().push_back(MCAsmFixup(DF->getContents().size()+F.getOffset(),
+ *F.getValue(),
+ F.getKind()));
+ }
+ DF->getContents().append(Code.begin(), Code.end());
}
void MCMachOStreamer::Finish() {
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 46e9ebf..ab61799 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -55,6 +55,9 @@ namespace {
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0) {}
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0) {}
+
virtual void EmitValueToOffset(const MCExpr *Offset,
unsigned char Value = 0) {}
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index d5bc396..6185c30 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -146,7 +146,7 @@ bool AsmParser::Run() {
// FIXME: Target hook & command line option for initial section.
Out.SwitchSection(getMachOSection("__TEXT", "__text",
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
- 0, SectionKind()));
+ 0, SectionKind::getText()));
// Prime the lexer.
@@ -325,9 +325,17 @@ bool AsmParser::ParseExpression(const MCExpr *&Res) {
/// expr ::= primaryexpr
///
bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+ // Parse the expression.
Res = 0;
- return ParsePrimaryExpr(Res, EndLoc) ||
- ParseBinOpRHS(1, Res, EndLoc);
+ if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
+ return true;
+
+ // Try to constant fold it up front, if possible.
+ int64_t Value;
+ if (Res->EvaluateAsAbsolute(Value))
+ Res = MCConstantExpr::Create(Value, getContext());
+
+ return false;
}
bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
@@ -906,8 +914,10 @@ bool AsmParser::ParseDirectiveDarwinSection() {
return Error(Loc, ErrorStr.c_str());
// FIXME: Arch specific.
+ bool isText = Segment == "__TEXT"; // FIXME: Hack.
Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize,
- SectionKind()));
+ isText ? SectionKind::getText()
+ : SectionKind::getDataRel()));
return false;
}
@@ -921,8 +931,10 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment,
Lex();
// FIXME: Arch specific.
+ bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack.
Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize,
- SectionKind()));
+ isText ? SectionKind::getText()
+ : SectionKind::getDataRel()));
// Set the implicit alignment, if any.
//
@@ -1229,8 +1241,14 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
}
}
- // FIXME: Target specific behavior about how the "extra" bytes are filled.
- Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
+ // FIXME: hard code the parser to use EmitCodeAlignment for text when using
+ // the TextAlignFillValue.
+ if(Out.getCurrentSection()->getKind().isText() &&
+ Lexer.getMAI().getTextAlignFillValue() == FillExpr)
+ Out.EmitCodeAlignment(Alignment, MaxBytesToFill);
+ else
+ // FIXME: Target specific behavior about how the "extra" bytes are filled.
+ Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
return false;
}
@@ -1354,7 +1372,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
if (IsLocal) {
Out.EmitZerofill(getMachOSection("__DATA", "__bss",
MCSectionMachO::S_ZEROFILL, 0,
- SectionKind()),
+ SectionKind::getBSS()),
Sym, Size, 1 << Pow2Alignment);
return false;
}
@@ -1390,7 +1408,7 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
// Create the zerofill section but no symbol
Out.EmitZerofill(getMachOSection(Segment, Section,
MCSectionMachO::S_ZEROFILL, 0,
- SectionKind()));
+ SectionKind::getBSS()));
return false;
}
@@ -1448,7 +1466,7 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
// FIXME: Arch specific.
Out.EmitZerofill(getMachOSection(Segment, Section,
MCSectionMachO::S_ZEROFILL, 0,
- SectionKind()),
+ SectionKind::getBSS()),
Sym, Size, 1 << Pow2Alignment);
return false;
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index 6cc67a2..370aad1 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -10,6 +10,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/raw_ostream.h"
+#include <ctype.h>
using namespace llvm;
/// SectionTypeDescriptors - These are strings that describe the various section
diff --git a/lib/MC/TargetAsmBackend.cpp b/lib/MC/TargetAsmBackend.cpp
new file mode 100644
index 0000000..918d272
--- /dev/null
+++ b/lib/MC/TargetAsmBackend.cpp
@@ -0,0 +1,19 @@
+//===-- TargetAsmBackend.cpp - Target Assembly Backend ---------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+TargetAsmBackend::TargetAsmBackend(const Target &T)
+ : TheTarget(T)
+{
+}
+
+TargetAsmBackend::~TargetAsmBackend() {
+}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 1e6d22f..8f860a6 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include <limits.h>
#include <cstring>
using namespace llvm;
@@ -625,17 +626,58 @@ APFloat::copySignificand(const APFloat &rhs)
/* Make this number a NaN, with an arbitrary but deterministic value
for the significand. If double or longer, this is a signalling NaN,
which may not be ideal. If float, this is QNaN(0). */
-void
-APFloat::makeNaN(unsigned type)
+void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
{
category = fcNaN;
- // FIXME: Add double and long double support for QNaN(0).
- if (semantics->precision == 24 && semantics->maxExponent == 127) {
- type |= 0x7fc00000U;
- type &= ~0x80000000U;
- } else
- type = ~0U;
- APInt::tcSet(significandParts(), type, partCount());
+ sign = Negative;
+
+ integerPart *significand = significandParts();
+ unsigned numParts = partCount();
+
+ // Set the significand bits to the fill.
+ if (!fill || fill->getNumWords() < numParts)
+ APInt::tcSet(significand, 0, numParts);
+ if (fill) {
+ APInt::tcAssign(significand, fill->getRawData(),
+ std::min(fill->getNumWords(), numParts));
+
+ // Zero out the excess bits of the significand.
+ unsigned bitsToPreserve = semantics->precision - 1;
+ unsigned part = bitsToPreserve / 64;
+ bitsToPreserve %= 64;
+ significand[part] &= ((1ULL << bitsToPreserve) - 1);
+ for (part++; part != numParts; ++part)
+ significand[part] = 0;
+ }
+
+ unsigned QNaNBit = semantics->precision - 2;
+
+ if (SNaN) {
+ // We always have to clear the QNaN bit to make it an SNaN.
+ APInt::tcClearBit(significand, QNaNBit);
+
+ // If there are no bits set in the payload, we have to set
+ // *something* to make it a NaN instead of an infinity;
+ // conventionally, this is the next bit down from the QNaN bit.
+ if (APInt::tcIsZero(significand, numParts))
+ APInt::tcSetBit(significand, QNaNBit - 1);
+ } else {
+ // We always have to set the QNaN bit to make it a QNaN.
+ APInt::tcSetBit(significand, QNaNBit);
+ }
+
+ // For x87 extended precision, we want to make a NaN, not a
+ // pseudo-NaN. Maybe we should expose the ability to make
+ // pseudo-NaNs?
+ if (semantics == &APFloat::x87DoubleExtended)
+ APInt::tcSetBit(significand, QNaNBit + 1);
+}
+
+APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
+ const APInt *fill) {
+ APFloat value(Sem, uninitialized);
+ value.makeNaN(SNaN, Negative, fill);
+ return value;
}
APFloat &
@@ -700,9 +742,14 @@ APFloat::APFloat(const fltSemantics &ourSemantics) {
sign = false;
}
+APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
+ assertArithmeticOK(ourSemantics);
+ // Allocates storage if necessary but does not initialize it.
+ initialize(&ourSemantics);
+}
APFloat::APFloat(const fltSemantics &ourSemantics,
- fltCategory ourCategory, bool negative, unsigned type)
+ fltCategory ourCategory, bool negative)
{
assertArithmeticOK(ourSemantics);
initialize(&ourSemantics);
@@ -711,7 +758,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics,
if (category == fcNormal)
category = fcZero;
else if (ourCategory == fcNaN)
- makeNaN(type);
+ makeNaN();
}
APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text)
@@ -2345,11 +2392,24 @@ APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mo
if (decDigitValue(*D.firstSigDigit) >= 10U) {
category = fcZero;
fs = opOK;
- } else if ((D.normalizedExponent + 1) * 28738
- <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
+
+ /* Check whether the normalized exponent is high enough to overflow
+ max during the log-rebasing in the max-exponent check below. */
+ } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
+ fs = handleOverflow(rounding_mode);
+
+ /* If it wasn't, then it also wasn't high enough to overflow max
+ during the log-rebasing in the min-exponent check. Check that it
+ won't overflow min in either check, then perform the min-exponent
+ check. */
+ } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
+ (D.normalizedExponent + 1) * 28738 <=
+ 8651 * (semantics->minExponent - (int) semantics->precision)) {
/* Underflow to zero and round. */
zeroSignificand();
fs = normalize(rounding_mode, lfLessThanHalf);
+
+ /* We can finally safely perform the max-exponent check. */
} else if ((D.normalizedExponent - 1) * 42039
>= 12655 * semantics->maxExponent) {
/* Overflow and round. */
@@ -3306,7 +3366,7 @@ namespace {
void APFloat::toString(SmallVectorImpl<char> &Str,
unsigned FormatPrecision,
- unsigned FormatMaxPadding) {
+ unsigned FormatMaxPadding) const {
switch (category) {
case fcInfinity:
if (isNegative())
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 3bce3f3..6a6384a 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -2344,13 +2344,21 @@ APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
& ((integerPart) 1 << bit % integerPartWidth)) != 0;
}
-/* Set the given bit of a bignum. */
+/* Set the given bit of a bignum. */
void
APInt::tcSetBit(integerPart *parts, unsigned int bit)
{
parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
}
+/* Clears the given bit of a bignum. */
+void
+APInt::tcClearBit(integerPart *parts, unsigned int bit)
+{
+ parts[bit / integerPartWidth] &=
+ ~((integerPart) 1 << (bit % integerPartWidth));
+}
+
/* Returns the bit number of the least significant set bit of a
number. If the input number has no bits set -1U is returned. */
unsigned int
diff --git a/lib/Support/Android.mk b/lib/Support/Android.mk
new file mode 100644
index 0000000..e972753
--- /dev/null
+++ b/lib/Support/Android.mk
@@ -0,0 +1,72 @@
+LOCAL_PATH:= $(call my-dir)
+
+support_SRC_FILES := \
+ APFloat.cpp \
+ APInt.cpp \
+ APSInt.cpp \
+ Allocator.cpp \
+ CommandLine.cpp \
+ ConstantRange.cpp \
+ Debug.cpp \
+ DeltaAlgorithm.cpp \
+ Dwarf.cpp \
+ ErrorHandling.cpp \
+ FileUtilities.cpp \
+ FoldingSet.cpp \
+ FormattedStream.cpp \
+ GraphWriter.cpp \
+ IsInf.cpp \
+ IsNAN.cpp \
+ ManagedStatic.cpp \
+ MemoryBuffer.cpp \
+ MemoryObject.cpp \
+ PluginLoader.cpp \
+ PrettyStackTrace.cpp \
+ Regex.cpp \
+ SlowOperationInformer.cpp \
+ SmallPtrSet.cpp \
+ SmallVector.cpp \
+ SourceMgr.cpp \
+ Statistic.cpp \
+ StringExtras.cpp \
+ StringMap.cpp \
+ StringPool.cpp \
+ StringRef.cpp \
+ SystemUtils.cpp \
+ TargetRegistry.cpp \
+ Timer.cpp \
+ Triple.cpp \
+ Twine.cpp \
+ circular_raw_ostream.cpp \
+ raw_os_ostream.cpp \
+ raw_ostream.cpp \
+ regcomp.c \
+ regerror.c \
+ regexec.c \
+ regfree.c \
+ regstrlcpy.c
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+# FIXME: This only requires RTTI because tblgen uses it. Fix that.
+REQUIRES_RTTI := 1
+
+LOCAL_SRC_FILES := $(support_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMSupport
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(support_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMSupport
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 961dc1f..2ab4103 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -650,7 +650,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
if (Handler == 0) {
if (SinkOpts.empty()) {
errs() << ProgramName << ": Unknown command line argument '"
- << argv[i] << "'. Try: '" << argv[0] << " --help'\n";
+ << argv[i] << "'. Try: '" << argv[0] << " -help'\n";
ErrorParsing = true;
} else {
for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(),
@@ -673,7 +673,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
errs() << ProgramName
<< ": Not enough positional command line arguments specified!\n"
<< "Must specify at least " << NumPositionalRequired
- << " positional arguments: See: " << argv[0] << " --help\n";
+ << " positional arguments: See: " << argv[0] << " -help\n";
ErrorParsing = true;
} else if (!HasUnlimitedPositionals
@@ -681,7 +681,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
errs() << ProgramName
<< ": Too many positional arguments specified!\n"
<< "Can specify at most " << PositionalOpts.size()
- << " positional arguments: See: " << argv[0] << " --help\n";
+ << " positional arguments: See: " << argv[0] << " -help\n";
ErrorParsing = true;
} else if (ConsumeAfterOpt == 0) {
@@ -1029,7 +1029,7 @@ void generic_parser_base::printOptionInfo(const Option &O,
//===----------------------------------------------------------------------===//
-// --help and --help-hidden option implementation
+// -help and -help-hidden option implementation
//
static int OptNameCompare(const void *LHS, const void *RHS) {
@@ -1134,7 +1134,7 @@ static HelpPrinter NormalPrinter(false);
static HelpPrinter HiddenPrinter(true);
static cl::opt<HelpPrinter, true, parser<bool> >
-HOp("help", cl::desc("Display available options (--help-hidden for more)"),
+HOp("help", cl::desc("Display available options (-help-hidden for more)"),
cl::location(NormalPrinter), cl::ValueDisallowed);
static cl::opt<HelpPrinter, true, parser<bool> >
@@ -1222,8 +1222,8 @@ void cl::PrintHelpMessage() {
// NormalPrinter variable is a HelpPrinter and the help gets printed when
// its operator= is invoked. That's because the "normal" usages of the
// help printer is to be assigned true/false depending on whether the
- // --help option was given or not. Since we're circumventing that we have
- // to make it look like --help was given, so we assign true.
+ // -help option was given or not. Since we're circumventing that we have
+ // to make it look like -help was given, so we assign true.
NormalPrinter = true;
}
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index 9ab3666..c72b5a1 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -56,15 +56,14 @@ void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) {
/// PadToColumn - Align the output to some column number.
///
/// \param NewCol - The column to move to.
-/// \param MinPad - The minimum space to give after the most recent
-/// I/O, even if the current column + minpad > newcol.
///
-void formatted_raw_ostream::PadToColumn(unsigned NewCol) {
+formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
// Figure out what's in the buffer and add it to the column count.
ComputeColumn(getBufferStart(), GetNumBytesInBuffer());
// Output spaces until we reach the desired column.
indent(std::max(int(NewCol - ColumnScanned), 1));
+ return *this;
}
void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index c8bca6e..ec84f9b 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -137,7 +137,7 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
args.clear();
args.push_back(gv.c_str());
args.push_back(PSFilename.c_str());
- args.push_back("-spartan");
+ args.push_back("--spartan");
args.push_back(0);
ErrMsg.clear();
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 9253b01..eb046d0 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -174,7 +174,8 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
#ifdef O_BINARY
OpenFlags |= O_BINARY; // Open input file in binary mode on win32.
#endif
- int FD = ::open(Filename.str().c_str(), O_RDONLY|OpenFlags);
+ SmallString<256> PathBuf(Filename.begin(), Filename.end());
+ int FD = ::open(PathBuf.c_str(), O_RDONLY|OpenFlags);
if (FD == -1) {
if (ErrStr) *ErrStr = strerror(errno);
return 0;
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index 618ca05..a7631de 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -90,3 +90,79 @@ bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
return true;
}
+
+std::string Regex::sub(StringRef Repl, StringRef String,
+ std::string *Error) {
+ SmallVector<StringRef, 8> Matches;
+
+ // Reset error, if given.
+ if (Error && !Error->empty()) *Error = "";
+
+ // Return the input if there was no match.
+ if (!match(String, &Matches))
+ return String;
+
+ // Otherwise splice in the replacement string, starting with the prefix before
+ // the match.
+ std::string Res(String.begin(), Matches[0].begin());
+
+ // Then the replacement string, honoring possible substitutions.
+ while (!Repl.empty()) {
+ // Skip to the next escape.
+ std::pair<StringRef, StringRef> Split = Repl.split('\\');
+
+ // Add the skipped substring.
+ Res += Split.first;
+
+ // Check for terminimation and trailing backslash.
+ if (Split.second.empty()) {
+ if (Repl.size() != Split.first.size() &&
+ Error && Error->empty())
+ *Error = "replacement string contained trailing backslash";
+ break;
+ }
+
+ // Otherwise update the replacement string and interpret escapes.
+ Repl = Split.second;
+
+ // FIXME: We should have a StringExtras function for mapping C99 escapes.
+ switch (Repl[0]) {
+ // Treat all unrecognized characters as self-quoting.
+ default:
+ Res += Repl[0];
+ Repl = Repl.substr(1);
+ break;
+
+ // Single character escapes.
+ case 't':
+ Res += '\t';
+ Repl = Repl.substr(1);
+ break;
+ case 'n':
+ Res += '\n';
+ Repl = Repl.substr(1);
+ break;
+
+ // Decimal escapes are backreferences.
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9': {
+ // Extract the backreference number.
+ StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
+ Repl = Repl.substr(Ref.size());
+
+ unsigned RefValue;
+ if (!Ref.getAsInteger(10, RefValue) &&
+ RefValue < Matches.size())
+ Res += Matches[RefValue];
+ else if (Error && Error->empty())
+ *Error = "invalid backreference string '" + Ref.str() + "'";
+ break;
+ }
+ }
+ }
+
+ // And finally the suffix.
+ Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
+
+ return Res;
+}
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index ae2640b..2b262dc 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/APInt.h"
using namespace llvm;
@@ -172,23 +173,28 @@ size_t StringRef::count(StringRef Str) const {
return Count;
}
+static unsigned GetAutoSenseRadix(StringRef &Str) {
+ if (Str.startswith("0x")) {
+ Str = Str.substr(2);
+ return 16;
+ } else if (Str.startswith("0b")) {
+ Str = Str.substr(2);
+ return 2;
+ } else if (Str.startswith("0")) {
+ return 8;
+ } else {
+ return 10;
+ }
+}
+
+
/// GetAsUnsignedInteger - Workhorse method that converts a integer character
/// sequence of radix up to 36 to an unsigned long long value.
static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
unsigned long long &Result) {
// Autosense radix if not specified.
- if (Radix == 0) {
- if (Str.startswith("0x")) {
- Str = Str.substr(2);
- Radix = 16;
- } else if (Str.startswith("0b")) {
- Str = Str.substr(2);
- Radix = 2;
- } else if (Str.startswith("0"))
- Radix = 8;
- else
- Radix = 10;
- }
+ if (Radix == 0)
+ Radix = GetAutoSenseRadix(Str);
// Empty strings (after the radix autosense) are invalid.
if (Str.empty()) return true;
@@ -272,3 +278,78 @@ bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
Result = Val;
return false;
}
+
+bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
+ StringRef Str = *this;
+
+ // Autosense radix if not specified.
+ if (Radix == 0)
+ Radix = GetAutoSenseRadix(Str);
+
+ assert(Radix > 1 && Radix <= 36);
+
+ // Empty strings (after the radix autosense) are invalid.
+ if (Str.empty()) return true;
+
+ // Skip leading zeroes. This can be a significant improvement if
+ // it means we don't need > 64 bits.
+ while (!Str.empty() && Str.front() == '0')
+ Str = Str.substr(1);
+
+ // If it was nothing but zeroes....
+ if (Str.empty()) {
+ Result = APInt(64, 0);
+ return false;
+ }
+
+ // (Over-)estimate the required number of bits.
+ unsigned Log2Radix = 0;
+ while ((1U << Log2Radix) < Radix) Log2Radix++;
+ bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
+
+ unsigned BitWidth = Log2Radix * Str.size();
+ if (BitWidth < Result.getBitWidth())
+ BitWidth = Result.getBitWidth(); // don't shrink the result
+ else
+ Result.zext(BitWidth);
+
+ APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
+ if (!IsPowerOf2Radix) {
+ // These must have the same bit-width as Result.
+ RadixAP = APInt(BitWidth, Radix);
+ CharAP = APInt(BitWidth, 0);
+ }
+
+ // Parse all the bytes of the string given this radix.
+ Result = 0;
+ while (!Str.empty()) {
+ unsigned CharVal;
+ if (Str[0] >= '0' && Str[0] <= '9')
+ CharVal = Str[0]-'0';
+ else if (Str[0] >= 'a' && Str[0] <= 'z')
+ CharVal = Str[0]-'a'+10;
+ else if (Str[0] >= 'A' && Str[0] <= 'Z')
+ CharVal = Str[0]-'A'+10;
+ else
+ return true;
+
+ // If the parsed value is larger than the integer radix, the string is
+ // invalid.
+ if (CharVal >= Radix)
+ return true;
+
+ // Add in this character.
+ if (IsPowerOf2Radix) {
+ Result <<= Log2Radix;
+ Result |= CharVal;
+ } else {
+ Result *= RadixAP;
+ CharAP = CharVal;
+ Result += CharAP;
+ }
+
+ Str = Str.substr(1);
+ }
+
+ return false;
+}
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 5a76184..61bf0a7 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -40,6 +40,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case x86: return "i386";
case x86_64: return "x86_64";
case xcore: return "xcore";
+ case mblaze: return "mblaze";
}
return "<invalid>";
@@ -62,6 +63,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case ppc64:
case ppc: return "ppc";
+ case mblaze: return "mblaze";
+
case sparcv9:
case sparc: return "sparc";
@@ -127,6 +130,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
return ppc64;
if (Name == "ppc")
return ppc;
+ if (Name == "mblaze")
+ return mblaze;
if (Name == "sparc")
return sparc;
if (Name == "sparcv9")
@@ -198,6 +203,8 @@ const char *Triple::getArchNameForAssembler() {
return "ppc";
if (Str == "powerpc64")
return "ppc64";
+ if (Str == "mblaze" || Str == "microblaze")
+ return "mblaze";
if (Str == "arm")
return "arm";
if (Str == "armv4t" || Str == "thumbv4t")
@@ -234,6 +241,8 @@ void Triple::Parse() const {
Arch = ppc;
else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
Arch = ppc64;
+ else if (ArchName == "mblaze")
+ Arch = mblaze;
else if (ArchName == "arm" ||
ArchName.startswith("armv") ||
ArchName == "xscale")
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index af6dc7c..071c924 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -368,6 +368,7 @@ void format_object_base::home() {
/// if no error occurred.
raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
unsigned Flags) : pos(0) {
+ assert(Filename != 0 && "Filename is null");
// Verify that we don't have both "append" and "excl".
assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
"Cannot specify both 'excl' and 'append' file creation flags!");
@@ -574,12 +575,18 @@ void raw_svector_ostream::resync() {
}
void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
- assert(Ptr == OS.end() && OS.size() + Size <= OS.capacity() &&
- "Invalid write_impl() call!");
-
- // We don't need to copy the bytes, just commit the bytes to the
- // SmallVector.
- OS.set_size(OS.size() + Size);
+ // If we're writing bytes from the end of the buffer into the smallvector, we
+ // don't need to copy the bytes, just commit the bytes because they are
+ // already in the right place.
+ if (Ptr == OS.end()) {
+ assert(OS.size() + Size <= OS.capacity() && "Invalid write_impl() call!");
+ OS.set_size(OS.size() + Size);
+ } else {
+ assert(GetNumBytesInBuffer() == 0 &&
+ "Should be writing from buffer if some bytes in it");
+ // Otherwise, do copy the bytes.
+ OS.append(Ptr, Ptr+Size);
+ }
// Grow the vector if necessary.
if (OS.capacity() - OS.size() < 64)
diff --git a/lib/System/Android.mk b/lib/System/Android.mk
new file mode 100644
index 0000000..3f11fc7
--- /dev/null
+++ b/lib/System/Android.mk
@@ -0,0 +1,46 @@
+LOCAL_PATH:= $(call my-dir)
+
+system_SRC_FILES := \
+ Alarm.cpp \
+ Atomic.cpp \
+ Disassembler.cpp \
+ Errno.cpp \
+ Host.cpp \
+ IncludeFile.cpp \
+ Memory.cpp \
+ Mutex.cpp \
+ Path.cpp \
+ Process.cpp \
+ Program.cpp \
+ RWMutex.cpp \
+ Signals.cpp \
+ ThreadLocal.cpp \
+ Threading.cpp \
+ TimeValue.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+REQUIRES_RTTI := 1
+
+LOCAL_SRC_FILES := $(system_SRC_FILES)
+LOCAL_CFLAGS += -march=i686
+
+LOCAL_MODULE:= libLLVMSystem
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+REQUIRES_RTTI := 1
+
+LOCAL_SRC_FILES := $(system_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMSystem
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/System/Unix/Host.inc b/lib/System/Unix/Host.inc
index c76d6a4..5b11876 100644
--- a/lib/System/Unix/Host.inc
+++ b/lib/System/Unix/Host.inc
@@ -21,6 +21,7 @@
#include "Unix.h"
#include <sys/utsname.h>
#include <string>
+#include <ctype.h>
using namespace llvm;
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
index e8c2806..c10498a 100644
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@@ -126,7 +126,7 @@ static void TimeOutHandler(int Sig) {
static void SetMemoryLimits (unsigned size)
{
-#if HAVE_SYS_RESOURCE_H
+#if HAVE_SYS_RESOURCE_H && HAVE_GETRLIMIT && HAVE_SETRLIMIT
struct rlimit r;
__typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576;
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
index 676e1e5..c8ec68a 100644
--- a/lib/System/Unix/Signals.inc
+++ b/lib/System/Unix/Signals.inc
@@ -52,7 +52,16 @@ static const int *const IntSigsEnd =
// KillSigs - Signals that are synchronous with the program that will cause it
// to die.
static const int KillSigs[] = {
- SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGSYS, SIGXCPU, SIGXFSZ
+ SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV
+#ifdef SIGSYS
+ , SIGSYS
+#endif
+#ifdef SIGXCPU
+ , SIGXCPU
+#endif
+#ifdef SIGXFSZ
+ , SIGXFSZ
+#endif
#ifdef SIGEMT
, SIGEMT
#endif
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 6fe7c2c..8e537d8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -643,6 +643,13 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
DebugLoc DL = DebugLoc::getUnknownLoc();
if (I != MBB.end()) DL = I->getDebugLoc();
+ // tGPR is used sometimes in ARM instructions that need to avoid using
+ // certain registers. Just treat it as GPR here.
+ if (DestRC == ARM::tGPRRegisterClass)
+ DestRC = ARM::GPRRegisterClass;
+ if (SrcRC == ARM::tGPRRegisterClass)
+ SrcRC = ARM::GPRRegisterClass;
+
if (DestRC != SrcRC) {
if (DestRC->getSize() != SrcRC->getSize())
return false;
@@ -697,6 +704,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectSize(FI),
Align);
+ // tGPR is used sometimes in ARM instructions that need to avoid using
+ // certain registers. Just treat it as GPR here.
+ if (RC == ARM::tGPRRegisterClass)
+ RC = ARM::GPRRegisterClass;
+
if (RC == ARM::GPRRegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
.addReg(SrcReg, getKillRegState(isKill))
@@ -745,6 +757,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectSize(FI),
Align);
+ // tGPR is used sometimes in ARM instructions that need to avoid using
+ // certain registers. Just treat it as GPR here.
+ if (RC == ARM::tGPRRegisterClass)
+ RC = ARM::GPRRegisterClass;
+
if (RC == ARM::GPRRegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
@@ -1020,9 +1037,8 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
return MI;
}
-bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
- const MachineInstr *MI1,
- const MachineRegisterInfo *MRI) const {
+bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1) const {
int Opcode = MI0->getOpcode();
if (Opcode == ARM::t2LDRpci ||
Opcode == ARM::t2LDRpci_pic ||
@@ -1051,7 +1067,7 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
return ACPV0->hasSameValue(ACPV1);
}
- return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI);
+ return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
/// getInstrPredicate - If instruction is predicated, returns its predicate
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0d9d4a7..0194231 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -289,8 +289,8 @@ public:
MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
- virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other,
- const MachineRegisterInfo *MRI) const;
+ virtual bool produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1) const;
};
static inline
@@ -332,7 +332,7 @@ bool isJumpTableBranchOpcode(int Opc) {
static inline
bool isIndirectBranchOpcode(int Opc) {
- return Opc == ARM::BRIND || Opc == ARM::tBRIND;
+ return Opc == ARM::BRIND || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
}
/// getInstrPredicate - If instruction is predicated, returns its predicate
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index cb0bd1d..577c363 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -513,7 +513,7 @@ cannotEliminateFrame(const MachineFunction &MF) const {
}
/// estimateStackSize - Estimate and return the size of the frame.
-static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
+static unsigned estimateStackSize(MachineFunction &MF) {
const MachineFrameInfo *FFI = MF.getFrameInfo();
int Offset = 0;
for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
@@ -583,14 +583,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
SmallVector<unsigned, 4> UnspilledCS2GPRs;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-
- // Calculate and set max stack object alignment early, so we can decide
- // whether we will need stack realignment (and thus FP).
- if (RealignStack) {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MFI->calculateMaxStackAlignment();
- }
-
// Spill R4 if Thumb2 function requires stack realignment - it will be used as
// scratch register.
// FIXME: It will be better just to find spare register here.
@@ -679,8 +671,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
}
+ // If any of the stack slot references may be out of range of an immediate
+ // offset, make sure a register (or a spill slot) is available for the
+ // register scavenger. Note that if we're indexing off the frame pointer, the
+ // effective stack size is 4 bytes larger since the FP points to the stack
+ // slot of the previous FP.
+ bool BigStack = RS &&
+ estimateStackSize(MF) + (hasFP(MF) ? 4 : 0) >= estimateRSStackSizeLimit(MF);
+
bool ExtraCSSpill = false;
- if (!CanEliminateFrame || cannotEliminateFrame(MF)) {
+ if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
@@ -735,51 +735,43 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// callee-saved register or reserve a special spill slot to facilitate
// register scavenging. Thumb1 needs a spill slot for stack pointer
// adjustments also, even when the frame itself is small.
- if (RS && !ExtraCSSpill) {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- // If any of the stack slot references may be out of range of an
- // immediate offset, make sure a register (or a spill slot) is
- // available for the register scavenger. Note that if we're indexing
- // off the frame pointer, the effective stack size is 4 bytes larger
- // since the FP points to the stack slot of the previous FP.
- if (estimateStackSize(MF, MFI) + (hasFP(MF) ? 4 : 0)
- >= estimateRSStackSizeLimit(MF)) {
- // If any non-reserved CS register isn't spilled, just spill one or two
- // extra. That should take care of it!
- unsigned NumExtras = TargetAlign / 4;
- SmallVector<unsigned, 2> Extras;
- while (NumExtras && !UnspilledCS1GPRs.empty()) {
- unsigned Reg = UnspilledCS1GPRs.back();
- UnspilledCS1GPRs.pop_back();
+ if (BigStack && !ExtraCSSpill) {
+ // If any non-reserved CS register isn't spilled, just spill one or two
+ // extra. That should take care of it!
+ unsigned NumExtras = TargetAlign / 4;
+ SmallVector<unsigned, 2> Extras;
+ while (NumExtras && !UnspilledCS1GPRs.empty()) {
+ unsigned Reg = UnspilledCS1GPRs.back();
+ UnspilledCS1GPRs.pop_back();
+ if (!isReservedReg(MF, Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ // For non-Thumb1 functions, also check for hi-reg CS registers
+ if (!AFI->isThumb1OnlyFunction()) {
+ while (NumExtras && !UnspilledCS2GPRs.empty()) {
+ unsigned Reg = UnspilledCS2GPRs.back();
+ UnspilledCS2GPRs.pop_back();
if (!isReservedReg(MF, Reg)) {
Extras.push_back(Reg);
NumExtras--;
}
}
- // For non-Thumb1 functions, also check for hi-reg CS registers
- if (!AFI->isThumb1OnlyFunction()) {
- while (NumExtras && !UnspilledCS2GPRs.empty()) {
- unsigned Reg = UnspilledCS2GPRs.back();
- UnspilledCS2GPRs.pop_back();
- if (!isReservedReg(MF, Reg)) {
- Extras.push_back(Reg);
- NumExtras--;
- }
- }
- }
- if (Extras.size() && NumExtras == 0) {
- for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
- MF.getRegInfo().setPhysRegUsed(Extras[i]);
- AFI->setCSRegisterIsSpilled(Extras[i]);
- }
- } else if (!AFI->isThumb1OnlyFunction()) {
- // note: Thumb1 functions spill to R12, not the stack.
- // Reserve a slot closest to SP or frame pointer.
- const TargetRegisterClass *RC = ARM::GPRRegisterClass;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
+ }
+ if (Extras.size() && NumExtras == 0) {
+ for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+ MF.getRegInfo().setPhysRegUsed(Extras[i]);
+ AFI->setCSRegisterIsSpilled(Extras[i]);
}
+ } else if (!AFI->isThumb1OnlyFunction()) {
+ // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
+ // closest to SP or frame pointer.
+ const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
}
}
}
@@ -1093,6 +1085,15 @@ hasReservedCallFrame(MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
+// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+// call frame pseudos can be simplified. Unlike most targets, having a FP
+// is not sufficient here since we still may reference some objects via SP
+// even when FP is available in Thumb2 mode.
+bool ARMBaseRegisterInfo::
+canSimplifyCallFramePseudos(MachineFunction &MF) const {
+ return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
static void
emitSPUpdate(bool isARM,
MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
@@ -1127,13 +1128,14 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
- "This eliminateCallFramePseudoInstr does not suppor Thumb1!");
+ "This eliminateCallFramePseudoInstr does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
// Replace the pseudo instruction with a new instruction...
unsigned Opc = Old->getOpcode();
- ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm();
- // FIXME: Thumb2 version of ADJCALLSTACKUP and ADJCALLSTACKDOWN?
+ int PIdx = Old->findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
// Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
unsigned PredReg = Old->getOperand(2).getReg();
@@ -1157,7 +1159,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
"This eliminateFrameIndex does not support Thumb1!");
@@ -1168,12 +1169,12 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
int FrameIndex = MI.getOperand(i).getIndex();
- int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj;
unsigned FrameReg;
- Offset = getFrameIndexReference(MF, FrameIndex, FrameReg);
+ int Offset = getFrameIndexReference(MF, FrameIndex, FrameReg);
if (FrameReg != ARM::SP)
SPAdj = 0;
+ Offset += SPAdj;
// Modify MI as necessary to handle as much of 'Offset' as possible
bool Done = false;
@@ -1264,7 +1265,7 @@ emitPrologue(MachineFunction &MF) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
- "This emitPrologue does not suppor Thumb1!");
+ "This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
unsigned NumBytes = MFI->getStackSize();
@@ -1349,7 +1350,9 @@ emitPrologue(MachineFunction &MF) const {
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+ if (STI.isTargetDarwin() || hasFP(MF))
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ NumBytes);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
@@ -1425,7 +1428,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
- "This emitEpilogue does not suppor Thumb1!");
+ "This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 33ba21d..64f6ff1 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -138,6 +138,7 @@ public:
virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
virtual bool hasReservedCallFrame(MachineFunction &MF) const;
+ virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const;
virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index bd703f4..21c6cb3 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -86,6 +86,7 @@ namespace {
void emitWordLE(unsigned Binary);
void emitDWordLE(uint64_t Binary);
void emitConstPoolInstruction(const MachineInstr &MI);
+ void emitMOVi32immInstruction(const MachineInstr &MI);
void emitMOVi2piecesInstruction(const MachineInstr &MI);
void emitLEApcrelJTInstruction(const MachineInstr &MI);
void emitPseudoMoveInstruction(const MachineInstr &MI);
@@ -143,6 +144,15 @@ namespace {
return getMachineOpValue(MI, MI.getOperand(OpIdx));
}
+ /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+ /// machine operand requires relocation, record the relocation and return zero.
+ unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
+ unsigned Reloc);
+ unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx,
+ unsigned Reloc) {
+ return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc);
+ }
+
/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
///
unsigned getShiftOp(unsigned Imm) const ;
@@ -214,6 +224,54 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
return 0;
}
+/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+/// machine operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
+ const MachineOperand &MO,
+ unsigned Reloc) {
+ assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
+ && "Relocation to this function should be for movt or movw");
+ switch(MO.getType()) {
+ case MachineOperand::MO_Register:
+ return ARMRegisterInfo::getRegisterNumbering(MO.getReg());
+ break;
+
+ case MachineOperand::MO_Immediate:
+ return static_cast<unsigned>(MO.getImm());
+ break;
+
+ case MachineOperand::MO_FPImmediate:
+ return static_cast<unsigned>(
+ MO.getFPImm()->getValueAPF().bitcastToAPInt().getLimitedValue());
+ break;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ emitMachineBasicBlock(MO.getMBB(), Reloc);
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ emitConstPoolAddress(MO.getIndex(), Reloc);
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ emitJumpTableAddress(MO.getIndex(), Reloc);
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
+ break;
+
+ default:
+ llvm_unreachable("Unsupported immediate operand type for movw/movt");
+ break;
+ }
+ return 0;
+}
+
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
@@ -433,6 +491,41 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
}
}
+void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+
+ unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
+
+ // Emit the 'mov' instruction.
+ unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm.
+ Binary |= Lo16 & 0xFFF;
+ Binary |= ((Lo16 >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12
+ emitWordLE(Binary);
+
+ unsigned Hi16 = (getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16) & 0xFFFF;
+ // Emit the 'mov' instruction.
+ Binary = 0x34 << 20; // movt: Insts[27-20] = 0b00110100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+}
+
void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
const MachineOperand &MO0 = MI.getOperand(0);
const MachineOperand &MO1 = MI.getOperand(1);
@@ -552,7 +645,6 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
switch (Opcode) {
default:
llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
- // FIXME: Add support for MOVimm32.
case TargetOpcode::INLINEASM: {
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
@@ -599,6 +691,11 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
emitMiscLoadStoreInstruction(MI, ARM::PC);
break;
}
+
+ case ARM::MOVi32imm:
+ emitMOVi32immInstruction(MI);
+ break;
+
case ARM::MOVi2pieces:
// Two instructions to materialize a constant.
emitMOVi2piecesInstruction(MI);
@@ -1138,7 +1235,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
// Set the conditional execution predicate
Binary |= II->getPredicate(&MI) << ARMII::CondShift;
- if (TID.Opcode == ARM::BX_RET)
+ if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR)
// The return register is LR.
Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR);
else
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index a458269..013e00a 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -65,7 +65,7 @@ public:
}
SDNode *Select(SDNode *N);
- virtual void InstructionSelect();
+
bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A,
SDValue &B, SDValue &C);
bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base,
@@ -201,11 +201,6 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
}
-void ARMDAGToDAGISel::InstructionSelect() {
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
SDValue N,
SDValue &BaseReg,
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 614e684..6a2c6bb 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -294,6 +294,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::SELECT_CC);
}
computeRegisterProperties();
@@ -544,6 +545,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::FMAX: return "ARMISD::FMAX";
+ case ARMISD::FMIN: return "ARMISD::FMIN";
}
}
@@ -863,7 +866,8 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
}
return DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), LocMemOffset);
+ PseudoSourceValue::getStack(), LocMemOffset,
+ false, false, 0);
}
void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
@@ -920,7 +924,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// These operations are automatically eliminated by the prolog/epilog pass
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
- SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32);
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
RegsToPassVector RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
@@ -969,8 +973,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
} else {
assert(VA.isMemLoc());
- if (StackPtr.getNode() == 0)
- StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
dl, DAG, VA, Flags));
@@ -983,8 +985,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
assert(VA.isMemLoc());
- if (StackPtr.getNode() == 0)
- StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
@@ -1031,7 +1031,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
@@ -1052,7 +1053,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
@@ -1238,7 +1240,8 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
if (RelocM == Reloc::Static)
return Result;
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1261,7 +1264,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue Chain = Argument.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1278,8 +1282,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
false, false, false, false,
0, CallingConv::C, false, /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl,
- DAG.GetOrdering(Chain.getNode()));
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
return CallResult.first;
}
@@ -1308,21 +1311,24 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
Chain = Offset.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
} else {
// local exec model
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
}
// The address of the thread local variable is the add of the thread
@@ -1358,13 +1364,15 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue Chain = Result.getValue(1);
SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
if (!UseGOTOFF)
Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- PseudoSourceValue::getGOT(), 0);
+ PseudoSourceValue::getGOT(), 0,
+ false, false, 0);
return Result;
} else {
// If we have T2 ops, we can materialize the address directly via movt/movw
@@ -1376,7 +1384,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
}
}
}
@@ -1403,7 +1412,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
@@ -1413,7 +1423,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- PseudoSourceValue::getGOT(), 0);
+ PseudoSourceValue::getGOT(), 0,
+ false, false, 0);
return Result;
}
@@ -1434,7 +1445,8 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
@@ -1467,7 +1479,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result =
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 0);
SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
@@ -1515,7 +1528,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+ false, false, 0);
}
SDValue
@@ -1592,7 +1606,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
- PseudoSourceValue::getFixedStack(FI), 0);
+ PseudoSourceValue::getFixedStack(FI), 0,
+ false, false, 0);
} else {
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -1707,7 +1722,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0));
+ PseudoSourceValue::getFixedStack(FI), 0,
+ false, false, 0));
}
}
@@ -1745,7 +1761,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0);
+ PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0,
+ false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
@@ -1939,13 +1956,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
}
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0);
+ PseudoSourceValue::getJumpTable(), 0,
+ false, false, 0);
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
} else {
Addr = DAG.getLoad(PTy, dl, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0);
+ PseudoSourceValue::getJumpTable(), 0, false, false, 0);
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
}
@@ -1993,7 +2011,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
? ARM::R7 : ARM::R11;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
- FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+ false, false, 0);
return FrameAddr;
}
@@ -2038,7 +2057,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
- SrcSV, SrcSVOff + SrcOff);
+ SrcSV, SrcSVOff + SrcOff, false, false, 0);
TFOps[i] = Loads[i].getValue(1);
SrcOff += VTSize;
}
@@ -2047,9 +2066,9 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
for (i = 0;
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
- DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
- DAG.getConstant(DstOff, MVT::i32)),
- DstSV, DstSVOff + DstOff);
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstSV, DstSVOff + DstOff, false, false, 0);
DstOff += VTSize;
}
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
@@ -2075,7 +2094,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
- SrcSV, SrcSVOff + SrcOff);
+ SrcSV, SrcSVOff + SrcOff, false, false, 0);
TFOps[i] = Loads[i].getValue(1);
++i;
SrcOff += VTSize;
@@ -2097,7 +2116,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
DAG.getConstant(DstOff, MVT::i32)),
- DstSV, DstSVOff + DstOff);
+ DstSV, DstSVOff + DstOff, false, false, 0);
++i;
DstOff += VTSize;
BytesLeft -= VTSize;
@@ -3835,23 +3854,106 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
+static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ // If the target supports NEON, try to use vmax/vmin instructions for f32
+ // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set,
+ // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
+ // a NaN; only do the transformation when it matches that behavior.
+
+ // For now only do this when using NEON for FP operations; if using VFP, it
+ // is not obvious that the benefit outweighs the cost of switching to the
+ // NEON pipeline.
+ if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
+ N->getValueType(0) != MVT::f32)
+ return SDValue();
+
+ SDValue CondLHS = N->getOperand(0);
+ SDValue CondRHS = N->getOperand(1);
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ unsigned Opcode = 0;
+ bool IsReversed;
+ if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
+ IsReversed = false; // x CC y ? x : y
+ } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
+ IsReversed = true ; // x CC y ? y : x
+ } else {
+ return SDValue();
+ }
+
+ bool IsUnordered;
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
+ // will return -0, so vmin can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
+ !UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+ break;
+
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
+ // will return +0, so vmax can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
+ !UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+ break;
+ }
+
+ if (!Opcode)
+ return SDValue();
+ return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+}
+
SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
- case ISD::ADD: return PerformADDCombine(N, DCI);
- case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::ADD: return PerformADDCombine(N, DCI);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
- case ISD::INTRINSIC_WO_CHAIN:
- return PerformIntrinsicCombine(N, DCI.DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
- case ISD::SRL:
- return PerformShiftCombine(N, DCI.DAG, Subtarget);
+ case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
}
return SDValue();
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 3c5df45..f8f8adc 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -131,7 +131,11 @@ namespace llvm {
VREV16, // reverse elements within 16-bit halfwords
VZIP, // zip (interleave)
VUZP, // unzip (deinterleave)
- VTRN // transpose
+ VTRN, // transpose
+
+ // Floating-point max and min:
+ FMAX,
+ FMIN
};
}
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 169eeed..76595fa 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -56,6 +56,9 @@ def NEONGetLnFrm : Format<25>;
def NEONSetLnFrm : Format<26>;
def NEONDupFrm : Format<27>;
+def MiscFrm : Format<29>;
+def ThumbMiscFrm : Format<30>;
+
// Misc flags.
// the instruction has a Rn register operand.
@@ -705,6 +708,20 @@ class AI3ldsbpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{24} = 1; // P bit
let Inst{27-25} = 0b000;
}
+class AI3lddpr<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-25} = 0b000;
+}
+
// Pre-indexed stores
class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -720,6 +737,19 @@ class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{24} = 1; // P bit
let Inst{27-25} = 0b000;
}
+class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-25} = 0b000;
+}
// Post-indexed loads
class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -731,7 +761,7 @@ class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{6} = 0; // S bit
let Inst{7} = 1;
let Inst{20} = 1; // L bit
- let Inst{21} = 1; // W bit
+ let Inst{21} = 0; // W bit
let Inst{24} = 0; // P bit
let Inst{27-25} = 0b000;
}
@@ -744,7 +774,7 @@ class AI3ldshpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{6} = 1; // S bit
let Inst{7} = 1;
let Inst{20} = 1; // L bit
- let Inst{21} = 1; // W bit
+ let Inst{21} = 0; // W bit
let Inst{24} = 0; // P bit
let Inst{27-25} = 0b000;
}
@@ -757,7 +787,20 @@ class AI3ldsbpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{6} = 1; // S bit
let Inst{7} = 1;
let Inst{20} = 1; // L bit
- let Inst{21} = 1; // W bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-25} = 0b000;
+}
+class AI3lddpo<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
let Inst{24} = 0; // P bit
let Inst{27-25} = 0b000;
}
@@ -772,7 +815,20 @@ class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{6} = 0; // S bit
let Inst{7} = 1;
let Inst{20} = 0; // L bit
- let Inst{21} = 1; // W bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-25} = 0b000;
+}
+class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
let Inst{24} = 0; // P bit
let Inst{27-25} = 0b000;
}
@@ -1147,6 +1203,19 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
let Inst{8} = 1; // The W bit.
}
+// Helper class for disassembly only
+// A6.3.16 & A6.3.17
+// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
+class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsThumb1Only, HasV5T];
@@ -1324,6 +1393,15 @@ class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
let Inst{4} = 0;
}
+// VFP conversion between floating-point and fixed-point
+class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+ // size (fixed-point number): sx == 0 ? 16 : 32
+ let Inst{7} = op5; // sx
+}
+
// VFP conversion instructions, if no NEON
class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
dag oops, dag iops, InstrItinClass itin,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 852c74e..3812aba 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -113,6 +113,8 @@ def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">;
+def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
@@ -130,8 +132,6 @@ def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
def IsARM : Predicate<"!Subtarget->isThumb()">;
def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
-def CarryDefIsUnused : Predicate<"!N->hasAnyUseOfValue(1)">;
-def CarryDefIsUsed : Predicate<"N->hasAnyUseOfValue(1)">;
// FIXME: Eventually this will be just "hasV6T2Ops".
def UseMovt : Predicate<"Subtarget->useMovt()">;
@@ -176,7 +176,7 @@ def imm16_31 : PatLeaf<(i32 imm), [{
return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32;
}]>;
-def so_imm_neg :
+def so_imm_neg :
PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1;
}], so_imm_neg_XFORM>;
@@ -194,7 +194,7 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{
/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
/// e.g., 0xf000ffff
def bf_inv_mask_imm : Operand<i32>,
- PatLeaf<(imm), [{
+ PatLeaf<(imm), [{
uint32_t v = (uint32_t)N->getZExtValue();
if (v == 0xffffffff)
return 0;
@@ -227,7 +227,7 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
}], hi16>;
-/// imm0_65535 predicate - True if the 32-bit immediate is in the range
+/// imm0_65535 predicate - True if the 32-bit immediate is in the range
/// [0.65535].
def imm0_65535 : PatLeaf<(i32 imm), [{
return (uint32_t)N->getZExtValue() < 65536;
@@ -236,6 +236,21 @@ def imm0_65535 : PatLeaf<(i32 imm), [{
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+/// adde and sube predicates - True based on whether the carry flag output
+/// will be needed or not.
+def adde_dead_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+ [{return !N->hasAnyUseOfValue(1);}]>;
+def sube_dead_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+ [{return !N->hasAnyUseOfValue(1);}]>;
+def adde_live_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+ [{return N->hasAnyUseOfValue(1);}]>;
+def sube_live_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+ [{return N->hasAnyUseOfValue(1);}]>;
+
//===----------------------------------------------------------------------===//
// Operand Definitions.
//
@@ -501,6 +516,22 @@ multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> {
}
}
+multiclass AI_unary_rrot_np<bits<8> opcod, string opc> {
+ def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src),
+ IIC_iUNAr, opc, "\t$dst, $src",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{11-10} = 0b00;
+ let Inst{19-16} = 0b1111;
+ }
+ def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot),
+ IIC_iUNAsi, opc, "\t$dst, $src, ror $rot",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{19-16} = 0b1111;
+ }
+}
+
/// AI_bin_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
@@ -510,13 +541,29 @@ multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
Requires<[IsARM, HasV6]> {
let Inst{11-10} = 0b00;
}
- def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS,
+ i32imm:$rot),
IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
[(set GPR:$dst, (opnode GPR:$LHS,
(rotr GPR:$RHS, rot_imm:$rot)))]>,
Requires<[IsARM, HasV6]>;
}
+// For disassembly only.
+multiclass AI_bin_rrot_np<bits<8> opcod, string opc> {
+ def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
+ IIC_iALUr, opc, "\t$dst, $LHS, $RHS",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{11-10} = 0b00;
+ }
+ def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS,
+ i32imm:$rot),
+ IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]>;
+}
+
/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
let Uses = [CPSR] in {
multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
@@ -524,13 +571,13 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ Requires<[IsARM]> {
let Inst{25} = 1;
}
def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ Requires<[IsARM]> {
let isCommutable = Commutable;
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
@@ -538,7 +585,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ Requires<[IsARM]> {
let Inst{25} = 0;
}
}
@@ -549,16 +596,14 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"),
[(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
- Requires<[IsARM, CarryDefIsUsed]> {
- let Defs = [CPSR];
+ Requires<[IsARM]> {
let Inst{20} = 1;
let Inst{25} = 1;
}
def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"),
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
- Requires<[IsARM, CarryDefIsUsed]> {
- let Defs = [CPSR];
+ Requires<[IsARM]> {
let Inst{11-4} = 0b00000000;
let Inst{20} = 1;
let Inst{25} = 0;
@@ -566,8 +611,7 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"),
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
- Requires<[IsARM, CarryDefIsUsed]> {
- let Defs = [CPSR];
+ Requires<[IsARM]> {
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -593,18 +637,153 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), NoItinerary,
"${instid:label} ${cpidx:cpentry}", []>;
-let Defs = [SP], Uses = [SP] in {
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def ADJCALLSTACKUP :
PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
"@ ADJCALLSTACKUP $amt1",
[(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
-def ADJCALLSTACKDOWN :
+def ADJCALLSTACKDOWN :
PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
"@ ADJCALLSTACKDOWN $amt",
[(ARMcallseq_start timm:$amt)]>;
}
+def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{7-0} = 0b00000000;
+}
+
+def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{7-0} = 0b00000001;
+}
+
+def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{7-0} = 0b00000010;
+}
+
+def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{7-0} = 0b00000011;
+}
+
+def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
+ "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{27-20} = 0b01101000;
+ let Inst{7-4} = 0b1011;
+}
+
+def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{7-0} = 0b00000100;
+}
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM]> {
+ let Inst{27-20} = 0b00010010;
+ let Inst{7-4} = 0b0111;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+// The singleton $opt operand contains the following information:
+// opt{4-0} = mode from Inst{4-0}
+// opt{5} = changemode from Inst{17}
+// opt{8-6} = AIF from Inst{8-6}
+// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
+def CPS : AXI<(outs),(ins i32imm:$opt), MiscFrm, NoItinerary, "cps${opt:cps}",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{27-20} = 0b00010000;
+ let Inst{16} = 0;
+ let Inst{5} = 0;
+}
+
+// Preload signals the memory system of possible future data/instruction access.
+// These are for disassembly only.
+multiclass APreLoad<bit data, bit read, string opc> {
+
+ def i : AXI<(outs), (ins GPR:$base, i32imm:$imm), MiscFrm, NoItinerary,
+ !strconcat(opc, "\t[$base, $imm]"), []> {
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 0; // 0 for immediate form
+ let Inst{24} = data;
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ }
+
+ def r : AXI<(outs), (ins addrmode2:$addr), MiscFrm, NoItinerary,
+ !strconcat(opc, "\t$addr"), []> {
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 1; // 1 for register form
+ let Inst{24} = data;
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ let Inst{4} = 0;
+ }
+}
+
+defm PLD : APreLoad<1, 1, "pld">;
+defm PLDW : APreLoad<1, 0, "pldw">;
+defm PLI : APreLoad<0, 1, "pli">;
+
+def SETENDBE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tbe",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{27-20} = 0b00010000;
+ let Inst{16} = 1;
+ let Inst{9} = 1;
+ let Inst{7-4} = 0b0000;
+}
+
+def SETENDLE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tle",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{27-20} = 0b00010000;
+ let Inst{16} = 1;
+ let Inst{9} = 0;
+ let Inst{7-4} = 0b0000;
+}
+
+def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{27-16} = 0b001100100000;
+ let Inst{7-4} = 0b1111;
+}
+
+// A5.4 Permanently UNDEFINED instructions.
+def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM]> {
+ let Inst{27-25} = 0b011;
+ let Inst{24-20} = 0b11111;
+ let Inst{7-5} = 0b111;
+ let Inst{4} = 0b1;
+}
+
// Address computation and loads and stores in PIC mode.
let isNotDuplicable = 1 in {
def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
@@ -665,7 +844,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
"(${label}_${id}-(",
"${:private}PCRELL${:uid}+8))\n"),
!strconcat("${:private}PCRELL${:uid}:\n\t",
- "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
+ "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
[]> {
let Inst{25} = 1;
}
@@ -674,24 +853,50 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
// Control Flow Instructions.
//
-let isReturn = 1, isTerminator = 1, isBarrier = 1 in
- def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
- "bx", "\tlr", [(ARMretflag)]> {
- let Inst{3-0} = 0b1110;
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ // ARMV4T and above
+ def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ "bx", "\tlr", [(ARMretflag)]>,
+ Requires<[IsARM, HasV4T]> {
+ let Inst{3-0} = 0b1110;
+ let Inst{7-4} = 0b0001;
+ let Inst{19-8} = 0b111111111111;
+ let Inst{27-20} = 0b00010010;
+ }
+
+ // ARMV4 only
+ def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ "mov", "\tpc, lr", [(ARMretflag)]>,
+ Requires<[IsARM, NoV4T]> {
+ let Inst{11-0} = 0b000000001110;
+ let Inst{15-12} = 0b1111;
+ let Inst{19-16} = 0b0000;
+ let Inst{27-20} = 0b00011010;
+ }
}
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ // ARMV4T and above
def BRIND : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
- [(brind GPR:$dst)]> {
+ [(brind GPR:$dst)]>,
+ Requires<[IsARM, HasV4T]> {
let Inst{7-4} = 0b0001;
let Inst{19-8} = 0b111111111111;
let Inst{27-20} = 0b00010010;
let Inst{31-28} = 0b1110;
}
+
+ // ARMV4 only
+ def MOVPCRX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "mov\tpc, $dst",
+ [(brind GPR:$dst)]>,
+ Requires<[IsARM, NoV4T]> {
+ let Inst{11-4} = 0b00000000;
+ let Inst{15-12} = 0b1111;
+ let Inst{19-16} = 0b0000;
+ let Inst{27-20} = 0b00011010;
+ let Inst{31-28} = 0b1110;
+ }
}
// FIXME: remove when we have a way to marking a MI with these properties.
@@ -732,14 +937,26 @@ let isCall = 1,
}
// ARMv4T
- def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops),
+ // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+ def BX : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
IIC_Br, "mov\tlr, pc\n\tbx\t$func",
- [(ARMcall_nolink GPR:$func)]>,
- Requires<[IsARM, IsNotDarwin]> {
+ [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, HasV4T, IsNotDarwin]> {
let Inst{7-4} = 0b0001;
let Inst{19-8} = 0b111111111111;
let Inst{27-20} = 0b00010010;
}
+
+ // ARMv4
+ def BMOVPCRX : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
+ IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func",
+ [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, NoV4T, IsNotDarwin]> {
+ let Inst{11-4} = 0b00000000;
+ let Inst{15-12} = 0b1111;
+ let Inst{19-16} = 0b0000;
+ let Inst{27-20} = 0b00011010;
+ }
}
// On Darwin R9 is call-clobbered.
@@ -769,13 +986,26 @@ let isCall = 1,
}
// ARMv4T
- def BXr9 : ABXIx2<(outs), (ins GPR:$func, variable_ops),
+ // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+ def BXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
IIC_Br, "mov\tlr, pc\n\tbx\t$func",
- [(ARMcall_nolink GPR:$func)]>, Requires<[IsARM, IsDarwin]> {
+ [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, HasV4T, IsDarwin]> {
let Inst{7-4} = 0b0001;
let Inst{19-8} = 0b111111111111;
let Inst{27-20} = 0b00010010;
}
+
+ // ARMv4
+ def BMOVPCRXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
+ IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func",
+ [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, NoV4T, IsDarwin]> {
+ let Inst{11-4} = 0b00000000;
+ let Inst{15-12} = 0b1111;
+ let Inst{19-16} = 0b0000;
+ let Inst{27-20} = 0b00011010;
+ }
}
let isBranch = 1, isTerminator = 1 in {
@@ -821,25 +1051,75 @@ let isBranch = 1, isTerminator = 1 in {
} // isBarrier = 1
// FIXME: should be able to write a pattern for ARMBrcond, but can't use
- // a two-value operand where a dag node expects two operands. :(
+ // a two-value operand where a dag node expects two operands. :(
def Bcc : ABI<0b1010, (outs), (ins brtarget:$target),
IIC_Br, "b", "\t$target",
[/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>;
}
+// Branch and Exchange Jazelle -- for disassembly only
+def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0010;
+ //let Inst{19-8} = 0xfff;
+ let Inst{7-4} = 0b0010;
+}
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0110;
+ let Inst{7-4} = 0b0111;
+}
+
+// Supervisor Call (Software Interrupt) -- for disassembly only
+let isCall = 1 in {
+def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
+ [/* For disassembly only; pattern left blank */]>;
+}
+
+// Store Return State is a system instruction -- for disassembly only
+def SRSW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode),
+ NoItinerary, "srs${addr:submode}\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b110; // W = 1
+}
+
+def SRS : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode),
+ NoItinerary, "srs${addr:submode}\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b100; // W = 0
+}
+
+// Return From Exception is a system instruction -- for disassembly only
+def RFEW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base),
+ NoItinerary, "rfe${addr:submode}\t$base!",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b011; // W = 1
+}
+
+def RFE : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base),
+ NoItinerary, "rfe${addr:submode}\t$base",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b001; // W = 0
+}
+
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//
// Load
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
"ldr", "\t$dst, $addr",
[(set GPR:$dst, (load addrmode2:$addr))]>;
// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
"ldr", "\t$dst, $addr", []>;
@@ -848,7 +1128,7 @@ def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
IIC_iLoadr, "ldrh", "\t$dst, $addr",
[(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
-def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
IIC_iLoadr, "ldrb", "\t$dst, $addr",
[(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
@@ -907,6 +1187,51 @@ def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
"ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
+
+// For disassembly only
+def LDRD_PRE : AI3lddpr<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
+ (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr,
+ "ldrd", "\t$dst1, $dst2, $addr!", "$addr.base = $base_wb", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+// For disassembly only
+def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadr,
+ "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+}
+
+// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
+
+def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru,
+ "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+
+def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
+ "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+
+def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am2offset:$offset), LdMiscFrm, IIC_iLoadru,
+ "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+
+def LDRHT : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+ "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+
+def LDRSHT : AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+ "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
}
// Store
@@ -915,8 +1240,8 @@ def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
[(store GPR:$src, addrmode2:$addr)]>;
// Stores with truncate
-def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer,
- "strh", "\t$src, $addr",
+def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
+ IIC_iStorer, "strh", "\t$src, $addr",
[(truncstorei16 GPR:$src, addrmode3:$addr)]>;
def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
@@ -931,47 +1256,87 @@ def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
// Indexed stores
def STR_PRE : AI2stwpr<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base, am2offset:$offset),
+ (ins GPR:$src, GPR:$base, am2offset:$offset),
StFrm, IIC_iStoreru,
"str", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb,
(pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
def STR_POST : AI2stwpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
+ (ins GPR:$src, GPR:$base,am2offset:$offset),
StFrm, IIC_iStoreru,
"str", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb,
(post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am3offset:$offset),
+ (ins GPR:$src, GPR:$base,am3offset:$offset),
StMiscFrm, IIC_iStoreru,
"strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb,
(pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am3offset:$offset),
+ (ins GPR:$src, GPR:$base,am3offset:$offset),
StMiscFrm, IIC_iStoreru,
"strh", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb, (post_truncsti16 GPR:$src,
GPR:$base, am3offset:$offset))]>;
def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
+ (ins GPR:$src, GPR:$base,am2offset:$offset),
StFrm, IIC_iStoreru,
"strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
GPR:$base, am2offset:$offset))]>;
def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
+ (ins GPR:$src, GPR:$base,am2offset:$offset),
StFrm, IIC_iStoreru,
"strb", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb, (post_truncsti8 GPR:$src,
GPR:$base, am2offset:$offset))]>;
+// For disassembly only
+def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
+ (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+ StMiscFrm, IIC_iStoreru,
+ "strd", "\t$src1, $src2, [$base, $offset]!",
+ "$base = $base_wb", []>;
+
+// For disassembly only
+def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
+ (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+ StMiscFrm, IIC_iStoreru,
+ "strd", "\t$src1, $src2, [$base], $offset",
+ "$base = $base_wb", []>;
+
+// STRT, STRBT, and STRHT are for disassembly only.
+
+def STRT : AI2stwpo<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am2offset:$offset),
+ StFrm, IIC_iStoreru,
+ "strt", "\t$src, [$base], $offset", "$base = $base_wb",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{21} = 1; // overwrite
+}
+
+def STRBT : AI2stbpo<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am2offset:$offset),
+ StFrm, IIC_iStoreru,
+ "strbt", "\t$src, [$base], $offset", "$base = $base_wb",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{21} = 1; // overwrite
+}
+
+def STRHT: AI3sthpo<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am3offset:$offset),
+ StMiscFrm, IIC_iStoreru,
+ "strht", "\t$src, [$base], $offset", "$base = $base_wb",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{21} = 1; // overwrite
+}
+
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
@@ -999,7 +1364,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
let Inst{25} = 0;
}
-def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
+def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
DPSoRegFrm, IIC_iMOVsr,
"mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
let Inst{25} = 0;
@@ -1012,7 +1377,7 @@ def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi,
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
+def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
DPFrm, IIC_iMOVi,
"movw", "\t$dst, $src",
[(set GPR:$dst, imm0_65535:$src)]>,
@@ -1026,7 +1391,7 @@ def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
DPFrm, IIC_iMOVi,
"movt", "\t$dst, $imm",
[(set GPR:$dst,
- (or (and GPR:$src, 0xffff),
+ (or (and GPR:$src, 0xffff),
lo16AllZero:$imm))]>, UnaryDP,
Requires<[IsARM, HasV6T2]> {
let Inst{20} = 0;
@@ -1045,7 +1410,7 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi,
// due to flag operands.
let Defs = [CPSR] in {
-def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
+def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1",
[(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
@@ -1069,7 +1434,11 @@ defm SXTAB : AI_bin_rrot<0b01101010,
defm SXTAH : AI_bin_rrot<0b01101011,
"sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
-// TODO: SXT(A){B|H}16
+// For disassembly only
+defm SXTB16 : AI_unary_rrot_np<0b01101000, "sxtb16">;
+
+// For disassembly only
+defm SXTAB16 : AI_bin_rrot_np<0b01101000, "sxtab16">;
// Zero extenders
@@ -1093,9 +1462,9 @@ defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
}
// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
-//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>;
+// For disassembly only
+defm UXTAB16 : AI_bin_rrot_np<0b01101100, "uxtab16">;
-// TODO: UXT(A){B|H}16
def SBFX : I<(outs GPR:$dst),
(ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
@@ -1131,13 +1500,13 @@ defm SUBS : AI1_bin_s_irs<0b0010, "subs",
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
- BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+ BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
- BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+ BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
- BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+ BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
- BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+ BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
// These don't define reg/reg forms, because they are handled above.
def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
@@ -1171,14 +1540,14 @@ def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
let Uses = [CPSR] in {
def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
DPFrm, IIC_iALUi, "rsc", "\t$dst, $a, $b",
- [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>,
+ Requires<[IsARM]> {
let Inst{25} = 1;
}
def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
- [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
+ Requires<[IsARM]> {
let Inst{25} = 0;
}
}
@@ -1187,15 +1556,15 @@ def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
let Defs = [CPSR], Uses = [CPSR] in {
def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
DPFrm, IIC_iALUi, "rscs\t$dst, $a, $b",
- [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>,
+ Requires<[IsARM]> {
let Inst{20} = 1;
let Inst{25} = 1;
}
def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b",
- [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
+ Requires<[IsARM]> {
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -1216,6 +1585,126 @@ def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
// (mul X, 2^n+1) -> (add (X << n), X)
// (mul X, 2^n-1) -> (rsb X, (X << n))
+// ARM Arithmetic Instruction -- for disassembly only
+// GPR:$dst = GPR:$a op GPR:$b
+class AAI<bits<8> op27_20, bits<4> op7_4, string opc>
+ : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr,
+ opc, "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-20} = op27_20;
+ let Inst{7-4} = op7_4;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def QADD : AAI<0b00010000, 0b0101, "qadd">;
+def QADD16 : AAI<0b01100010, 0b0001, "qadd16">;
+def QADD8 : AAI<0b01100010, 0b1001, "qadd8">;
+def QASX : AAI<0b01100010, 0b0011, "qasx">;
+def QDADD : AAI<0b00010100, 0b0101, "qdadd">;
+def QDSUB : AAI<0b00010110, 0b0101, "qdsub">;
+def QSAX : AAI<0b01100010, 0b0101, "qsax">;
+def QSUB : AAI<0b00010010, 0b0101, "qsub">;
+def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">;
+def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">;
+def UQADD8 : AAI<0b01100110, 0b1001, "uqadd8">;
+def UQASX : AAI<0b01100110, 0b0011, "uqasx">;
+def UQSAX : AAI<0b01100110, 0b0101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b0111, "uqsub16">;
+def UQSUB8 : AAI<0b01100110, 0b1111, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def SASX : AAI<0b01100001, 0b0011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b0001, "sadd16">;
+def SADD8 : AAI<0b01100001, 0b1001, "sadd8">;
+def SSAX : AAI<0b01100001, 0b0101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b0111, "ssub16">;
+def SSUB8 : AAI<0b01100001, 0b1111, "ssub8">;
+def UASX : AAI<0b01100101, 0b0011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b0001, "uadd16">;
+def UADD8 : AAI<0b01100101, 0b1001, "uadd8">;
+def USAX : AAI<0b01100101, 0b0101, "usax">;
+def USUB16 : AAI<0b01100101, 0b0111, "usub16">;
+def USUB8 : AAI<0b01100101, 0b1111, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def SHASX : AAI<0b01100011, 0b0011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b0001, "shadd16">;
+def SHADD8 : AAI<0b01100011, 0b1001, "shadd8">;
+def SHSAX : AAI<0b01100011, 0b0101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b0111, "shsub16">;
+def SHSUB8 : AAI<0b01100011, 0b1111, "shsub8">;
+def UHASX : AAI<0b01100111, 0b0011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b0001, "uhadd16">;
+def UHADD8 : AAI<0b01100111, 0b1001, "uhadd8">;
+def UHSAX : AAI<0b01100111, 0b0101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b0111, "uhsub16">;
+def UHSUB8 : AAI<0b01100111, 0b1111, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def USAD8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ MulFrm /* for convenience */, NoItinerary, "usad8",
+ "\t$dst, $a, $b", []>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{27-20} = 0b01111000;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b0001;
+}
+def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ MulFrm /* for convenience */, NoItinerary, "usada8",
+ "\t$dst, $a, $b, $acc", []>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{27-20} = 0b01111000;
+ let Inst{7-4} = 0b0001;
+}
+
+// Signed/Unsigned saturate -- for disassembly only
+
+def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
+ DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-21} = 0b0110101;
+ let Inst{6-4} = 0b001;
+}
+
+def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
+ DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-21} = 0b0110101;
+ let Inst{6-4} = 0b101;
+}
+
+def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+ NoItinerary, "ssat16", "\t$dst, $bit_pos, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-20} = 0b01101010;
+ let Inst{7-4} = 0b0011;
+}
+
+def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
+ DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-21} = 0b0110111;
+ let Inst{6-4} = 0b001;
+}
+
+def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
+ DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-21} = 0b0110111;
+ let Inst{6-4} = 0b101;
+}
+
+def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+ NoItinerary, "usat16", "\t$dst, $bit_pos, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-20} = 0b01101110;
+ let Inst{7-4} = 0b0011;
+}
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
@@ -1239,6 +1728,17 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
let Inst{6-0} = 0b0011111;
}
+// A8.6.18 BFI - Bitfield insert (Encoding A1)
+// Added for disassembler with the pattern field purposely left blank.
+def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfi", "\t$dst, $src, $imm", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
+}
+
def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
"mvn", "\t$dst, $src",
[(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
@@ -1251,7 +1751,7 @@ def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
let Inst{25} = 0;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
+def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
IIC_iMOVi, "mvn", "\t$dst, $imm",
[(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP {
let Inst{25} = 1;
@@ -1314,6 +1814,14 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
let Inst{15-12} = 0b1111;
}
+def SMMULR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ IIC_iMUL32, "smmulr", "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b0011; // R = 1
+ let Inst{15-12} = 0b1111;
+}
+
def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
@@ -1321,6 +1829,12 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
let Inst{7-4} = 0b0001;
}
+def SMMLAR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+ IIC_iMAC32, "smmlar", "\t$dst, $a, $b, $c",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b0011; // R = 1
+}
def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c",
@@ -1329,6 +1843,13 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
let Inst{7-4} = 0b1101;
}
+def SMMLSR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+ IIC_iMAC32, "smmlsr", "\t$dst, $a, $b, $c",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b1111; // R = 1
+}
+
multiclass AI_smul<string opc, PatFrag opnode> {
def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b",
@@ -1400,7 +1921,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16)))))]>,
+ (sra GPR:$b, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]> {
let Inst{5} = 0;
let Inst{6} = 1;
@@ -1446,8 +1967,87 @@ multiclass AI_smla<string opc, PatFrag opnode> {
defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
-// TODO: Halfword multiple accumulate long: SMLAL<x><y>
-// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only
+def SMLALBB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+ IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 0;
+}
+
+def SMLALBT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+ IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 1;
+}
+
+def SMLALTB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+ IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 0;
+}
+
+def SMLALTT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+ IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 1;
+}
+
+// Helper class for AI_smld -- for disassembly only
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+ let Inst{4} = 1;
+ let Inst{5} = swap;
+ let Inst{6} = sub;
+ let Inst{7} = 0;
+ let Inst{21-20} = 0b00;
+ let Inst{22} = long;
+ let Inst{27-23} = 0b01110;
+}
+
+multiclass AI_smld<bit sub, string opc> {
+
+ def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b, $acc">;
+
+ def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b, $acc">;
+
+ def LD : AMulDualI<1, sub, 0, (outs GPR:$ldst,GPR:$hdst), (ins GPR:$a,GPR:$b),
+ NoItinerary, !strconcat(opc, "ld"), "\t$ldst, $hdst, $a, $b">;
+
+ def LDX : AMulDualI<1, sub, 1, (outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+ NoItinerary, !strconcat(opc, "ldx"),"\t$ldst, $hdst, $a, $b">;
+
+}
+
+defm SMLA : AI_smld<0, "smla">;
+defm SMLS : AI_smld<1, "smls">;
+
+multiclass AI_sdml<bit sub, string opc> {
+
+ def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b"> {
+ let Inst{15-12} = 0b1111;
+ }
+
+ def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b"> {
+ let Inst{15-12} = 0b1111;
+ }
+
+}
+
+defm SMUA : AI_sdml<0, "smua">;
+defm SMUS : AI_sdml<1, "smus">;
//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
@@ -1505,7 +2105,7 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
(ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt",
+ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
(and (shl GPR:$src2, (i32 imm:$shamt)),
0xFFFF0000)))]>,
@@ -1522,7 +2122,7 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
(ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt",
+ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
(and (sra GPR:$src2, imm16_31:$shamt),
0xFFFF)))]>, Requires<[IsARM, HasV6]> {
@@ -1568,7 +2168,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
-// a two-value operand where a dag node expects two operands. :(
+// a two-value operand where a dag node expects two operands. :(
def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
IIC_iCMOVr, "mov", "\t$dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -1606,6 +2206,7 @@ def Int_MemBarrierV7 : AInoP<(outs), (ins),
Requires<[IsARM, HasV7]> {
let Inst{31-4} = 0xf57ff05;
// FIXME: add support for options other than a full system DMB
+ // See DMB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
@@ -1616,6 +2217,7 @@ def Int_SyncBarrierV7 : AInoP<(outs), (ins),
Requires<[IsARM, HasV7]> {
let Inst{31-4} = 0xf57ff04;
// FIXME: add support for options other than a full system DSB
+ // See DSB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
@@ -1638,6 +2240,64 @@ def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero),
}
}
+// Helper class for multiclass MemB -- for disassembly only
+class AMBI<string opc, string asm>
+ : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm,
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-20} = 0xf57;
+}
+
+multiclass MemB<bits<4> op7_4, string opc> {
+
+ def st : AMBI<opc, "\tst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b1110;
+ }
+
+ def ish : AMBI<opc, "\tish"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b1011;
+ }
+
+ def ishst : AMBI<opc, "\tishst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b1010;
+ }
+
+ def nsh : AMBI<opc, "\tnsh"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0111;
+ }
+
+ def nshst : AMBI<opc, "\tnshst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0110;
+ }
+
+ def osh : AMBI<opc, "\tosh"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0011;
+ }
+
+ def oshst : AMBI<opc, "\toshst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0010;
+ }
+}
+
+// These DMB variants are for disassembly only.
+defm DMB : MemB<0b0101, "dmb">;
+
+// These DSB variants are for disassembly only.
+defm DSB : MemB<0b0100, "dsb">;
+
+// ISB has only full system option -- for disassembly only
+def ISBsy : AMBI<"isb", ""> {
+ let Inst{7-4} = 0b0110;
+ let Inst{3-0} = 0b1111;
+}
+
let usesCustomInserter = 1 in {
let Uses = [CPSR] in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
@@ -1777,6 +2437,35 @@ def STREXD : AIstrex<0b01, (outs GPR:$success),
[]>;
}
+// Clear-Exclusive is for disassembly only.
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-20} = 0xf57;
+ let Inst{7-4} = 0b0001;
+}
+
+// SWP/SWPB are deprecated in V6/V7 and for disassembly only.
+let mayLoad = 1 in {
+def SWP : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
+ "swp", "\t$dst, $src, [$ptr]",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-23} = 0b00010;
+ let Inst{22} = 0; // B = 0
+ let Inst{21-20} = 0b00;
+ let Inst{7-4} = 0b1001;
+}
+
+def SWPB : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
+ "swpb", "\t$dst, $src, [$ptr]",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-23} = 0b00010;
+ let Inst{22} = 1; // B = 1
+ let Inst{21-20} = 0b00;
+ let Inst{7-4} = 0b1001;
+}
+}
+
//===----------------------------------------------------------------------===//
// TLS Instructions
//
@@ -1827,7 +2516,7 @@ let Defs =
// Two piece so_imms.
let isReMaterializable = 1 in
-def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src),
+def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src),
Pseudo, IIC_iMOVi,
"mov", "\t$dst, $src",
[(set GPR:$dst, so_imm2part:$src)]>,
@@ -1852,7 +2541,7 @@ def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS),
// FIXME: Remove this when we can do generalized remat.
let isReMaterializable = 1 in
def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
- "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
+ "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
[(set GPR:$dst, (i32 imm:$src))]>,
Requires<[IsARM, HasV6T2]>;
@@ -1959,3 +2648,226 @@ include "ARMInstrVFP.td"
//
include "ARMInstrNEON.td"
+
+//===----------------------------------------------------------------------===//
+// Coprocessor Instructions. For disassembly only.
+//
+
+def CDP : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+ nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+ NoItinerary, "cdp", "\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{4} = 0;
+}
+
+def CDP2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+ nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+ NoItinerary, "cdp2\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{4} = 0;
+}
+
+class ACI<dag oops, dag iops, string opc, string asm>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, NoItinerary,
+ opc, asm, "", [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-25} = 0b110;
+}
+
+multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
+
+ def _OFFSET : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+ opc, "\tp$cop, cr$CRd, $addr"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def _PRE : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+ opc, "\tp$cop, cr$CRd, $addr!"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def _POST : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
+ opc, "\tp$cop, cr$CRd, [$base], $offset"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def _OPTION : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, i32imm:$option),
+ opc, "\tp$cop, cr$CRd, [$base], $option"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def L_OFFSET : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+ opc, "l\tp$cop, cr$CRd, $addr"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+
+ def L_PRE : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+ opc, "l\tp$cop, cr$CRd, $addr!"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+
+ def L_POST : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
+ opc, "l\tp$cop, cr$CRd, [$base], $offset"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+
+ def L_OPTION : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option),
+ opc, "l\tp$cop, cr$CRd, [$base], $option"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+}
+
+defm LDC : LdStCop<{?,?,?,?}, 1, "ldc">;
+defm LDC2 : LdStCop<0b1111, 1, "ldc2">;
+defm STC : LdStCop<{?,?,?,?}, 0, "stc">;
+defm STC2 : LdStCop<0b1111, 0, "stc2">;
+
+def MCR : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+ NoItinerary, "mcr", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{20} = 0;
+ let Inst{4} = 1;
+}
+
+def MCR2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+ NoItinerary, "mcr2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{20} = 0;
+ let Inst{4} = 1;
+}
+
+def MRC : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+ NoItinerary, "mrc", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{20} = 1;
+ let Inst{4} = 1;
+}
+
+def MRC2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+ NoItinerary, "mrc2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{20} = 1;
+ let Inst{4} = 1;
+}
+
+def MCRR : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
+ GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
+ NoItinerary, "mcrr", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0100;
+}
+
+def MCRR2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
+ GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
+ NoItinerary, "mcrr2\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{23-20} = 0b0100;
+}
+
+def MRRC : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
+ GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
+ NoItinerary, "mrrc", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0101;
+}
+
+def MRRC2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
+ GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
+ NoItinerary, "mrrc2\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{23-20} = 0b0101;
+}
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+def MRS : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary, "mrs", "\t$dst, cpsr",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0000;
+ let Inst{7-4} = 0b0000;
+}
+
+def MRSsys : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary,"mrs","\t$dst, spsr",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0100;
+ let Inst{7-4} = 0b0000;
+}
+
+// FIXME: mask is ignored for the time being.
+def MSR : ABI<0b0001,(outs),(ins GPR:$src), NoItinerary, "msr", "\tcpsr, $src",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0010;
+ let Inst{7-4} = 0b0000;
+}
+
+// FIXME: mask is ignored for the time being.
+def MSRi : ABI<0b0011,(outs),(ins so_imm:$a), NoItinerary, "msr", "\tcpsr, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0010;
+ let Inst{7-4} = 0b0000;
+}
+
+// FIXME: mask is ignored for the time being.
+def MSRsys : ABI<0b0001,(outs),(ins GPR:$src),NoItinerary,"msr","\tspsr, $src",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0110;
+ let Inst{7-4} = 0b0000;
+}
+
+// FIXME: mask is ignored for the time being.
+def MSRsysi : ABI<0b0011,(outs),(ins so_imm:$a),NoItinerary,"msr","\tspsr, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0110;
+ let Inst{7-4} = 0b0000;
+}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index e2be7ba..3aa0810 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -83,11 +83,17 @@ def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>;
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
+def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+
//===----------------------------------------------------------------------===//
// NEON operand definitions
//===----------------------------------------------------------------------===//
@@ -123,9 +129,7 @@ def h64imm : Operand<i64> {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
def VLDMD : NI<(outs),
(ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
- IIC_fpLoadm,
- "vldm", "${addr:submode} ${addr:base}, $dst1",
- []> {
+ IIC_fpLoadm, "vldm", "${addr:submode} ${addr:base}, $dst1", []> {
let Inst{27-25} = 0b110;
let Inst{20} = 1;
let Inst{11-9} = 0b101;
@@ -133,9 +137,7 @@ def VLDMD : NI<(outs),
def VLDMS : NI<(outs),
(ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
- IIC_fpLoadm,
- "vldm", "${addr:submode} ${addr:base}, $dst1",
- []> {
+ IIC_fpLoadm, "vldm", "${addr:submode} ${addr:base}, $dst1", []> {
let Inst{27-25} = 0b110;
let Inst{20} = 1;
let Inst{11-9} = 0b101;
@@ -144,10 +146,9 @@ def VLDMS : NI<(outs),
*/
// Use vldmia to load a Q register as a D register pair.
-def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
- IIC_fpLoadm,
- "vldmia", "$addr, ${dst:dregpair}",
- [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
+def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm,
+ "vldmia", "$addr, ${dst:dregpair}",
+ [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
let Inst{27-25} = 0b110;
let Inst{24} = 0; // P bit
let Inst{23} = 1; // U bit
@@ -156,10 +157,9 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
}
// Use vstmia to store a Q register as a D register pair.
-def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
- IIC_fpStorem,
- "vstmia", "$addr, ${src:dregpair}",
- [(store (v2f64 QPR:$src), addrmode4:$addr)]> {
+def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
+ "vstmia", "$addr, ${src:dregpair}",
+ [(store (v2f64 QPR:$src), addrmode4:$addr)]> {
let Inst{27-25} = 0b110;
let Inst{24} = 0; // P bit
let Inst{23} = 1; // U bit
@@ -191,6 +191,29 @@ def VLD1q32 : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>;
def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>;
def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>;
+// These (dreg triple/quadruple) are for disassembly only.
+class VLD1D3<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0, 0b10, 0b0110, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
+ "\\{$dst1, $dst2, $dst3\\}, $addr", "",
+ [/* For disassembly only; pattern left blank */]>;
+class VLD1D4<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
+ "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VLD1d8T : VLD1D3<0b0000, "vld1", "8">;
+def VLD1d16T : VLD1D3<0b0100, "vld1", "16">;
+def VLD1d32T : VLD1D3<0b1000, "vld1", "32">;
+//def VLD1d64T : VLD1D3<0b1100, "vld1", "64">;
+
+def VLD1d8Q : VLD1D4<0b0000, "vld1", "8">;
+def VLD1d16Q : VLD1D4<0b0100, "vld1", "16">;
+def VLD1d32Q : VLD1D4<0b1000, "vld1", "32">;
+//def VLD1d64Q : VLD1D4<0b1100, "vld1", "64">;
+
+
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
// VLD2 : Vector Load (multiple 2-element structures)
@@ -216,6 +239,16 @@ def VLD2q8 : VLD2Q<0b0000, "vld2", "8">;
def VLD2q16 : VLD2Q<0b0100, "vld2", "16">;
def VLD2q32 : VLD2Q<0b1000, "vld2", "32">;
+// These (double-spaced dreg pair) are for disassembly only.
+class VLD2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0,0b10,0b1001,op7_4, (outs DPR:$dst1, DPR:$dst2),
+ (ins addrmode6:$addr), IIC_VLD2,
+ OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+
+def VLD2d8D : VLD2Ddbl<0b0000, "vld2", "8">;
+def VLD2d16D : VLD2Ddbl<0b0100, "vld2", "16">;
+def VLD2d32D : VLD2Ddbl<0b1000, "vld2", "32">;
+
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
@@ -285,105 +318,64 @@ def VLD4q32b : VLD4WB<0b1000, "vld4", "32">;
class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt>
: NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
- IIC_VLD2,
- OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
+ IIC_VLD2, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2", []>;
// vld2 to single-spaced registers.
def VLD2LNd8 : VLD2LN<0b0001, "vld2", "8">;
-def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> {
- let Inst{5} = 0;
-}
-def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> {
- let Inst{6} = 0;
-}
+def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 0; }
+def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 0; }
// vld2 to double-spaced even registers.
-def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> {
- let Inst{5} = 1;
-}
-def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> {
- let Inst{6} = 1;
-}
+def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; }
+def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; }
// vld2 to double-spaced odd registers.
-def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> {
- let Inst{5} = 1;
-}
-def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> {
- let Inst{6} = 1;
-}
+def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; }
+def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; }
// VLD3LN : Vector Load (single 3-element structure to one lane)
class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt>
: NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- nohash_imm:$lane), IIC_VLD3,
- OpcodeStr, Dt,
+ nohash_imm:$lane), IIC_VLD3, OpcodeStr, Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
// vld3 to single-spaced registers.
-def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> {
- let Inst{4} = 0;
-}
-def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> {
- let Inst{5-4} = 0b00;
-}
-def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> {
- let Inst{6-4} = 0b000;
-}
+def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { let Inst{4} = 0; }
+def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b00; }
+def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b000; }
// vld3 to double-spaced even registers.
-def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> {
- let Inst{5-4} = 0b10;
-}
-def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> {
- let Inst{6-4} = 0b100;
-}
+def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; }
// vld3 to double-spaced odd registers.
-def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> {
- let Inst{5-4} = 0b10;
-}
-def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> {
- let Inst{6-4} = 0b100;
-}
+def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; }
// VLD4LN : Vector Load (single 4-element structure to one lane)
class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt>
: NLdSt<1,0b10,op11_8,{?,?,?,?},
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
- nohash_imm:$lane), IIC_VLD4,
- OpcodeStr, Dt,
+ nohash_imm:$lane), IIC_VLD4, OpcodeStr, Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
// vld4 to single-spaced registers.
def VLD4LNd8 : VLD4LN<0b0011, "vld4", "8">;
-def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> {
- let Inst{5} = 0;
-}
-def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> {
- let Inst{6} = 0;
-}
+def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 0; }
+def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 0; }
// vld4 to double-spaced even registers.
-def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> {
- let Inst{5} = 1;
-}
-def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> {
- let Inst{6} = 1;
-}
+def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; }
+def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
// vld4 to double-spaced odd registers.
-def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> {
- let Inst{5} = 1;
-}
-def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> {
- let Inst{6} = 1;
-}
+def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; }
+def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
// VLD1DUP : Vector Load (single element to all lanes)
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
@@ -418,6 +410,31 @@ def VST1qf : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>;
def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>;
} // hasExtraSrcRegAllocReq
+// These (dreg triple/quadruple) are for disassembly only.
+class VST1D3<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+ OpcodeStr, Dt,
+ "\\{$src1, $src2, $src3\\}, $addr", "",
+ [/* For disassembly only; pattern left blank */]>;
+class VST1D4<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST, OpcodeStr, Dt,
+ "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VST1d8T : VST1D3<0b0000, "vst1", "8">;
+def VST1d16T : VST1D3<0b0100, "vst1", "16">;
+def VST1d32T : VST1D3<0b1000, "vst1", "32">;
+//def VST1d64T : VST1D3<0b1100, "vst1", "64">;
+
+def VST1d8Q : VST1D4<0b0000, "vst1", "8">;
+def VST1d16Q : VST1D4<0b0100, "vst1", "16">;
+def VST1d32Q : VST1D4<0b1000, "vst1", "32">;
+//def VST1d64Q : VST1D4<0b1100, "vst1", "64">;
+
+
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
// VST2 : Vector Store (multiple 2-element structures)
@@ -428,8 +445,7 @@ class VST2D<bits<4> op7_4, string OpcodeStr, string Dt>
class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0011,op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST,
- OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+ IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
"", []>;
def VST2d8 : VST2D<0b0000, "vst2", "8">;
@@ -443,6 +459,16 @@ def VST2q8 : VST2Q<0b0000, "vst2", "8">;
def VST2q16 : VST2Q<0b0100, "vst2", "16">;
def VST2q32 : VST2Q<0b1000, "vst2", "32">;
+// These (double-spaced dreg pair) are for disassembly only.
+class VST2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0, 0b00, 0b1001, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
+ OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+
+def VST2d8D : VST2Ddbl<0b0000, "vst2", "8">;
+def VST2d16D : VST2Ddbl<0b0100, "vst2", "16">;
+def VST2d32D : VST2Ddbl<0b1000, "vst2", "32">;
+
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0100,op7_4, (outs),
@@ -476,14 +502,12 @@ def VST3q32b : VST3WB<0b1000, "vst3", "32">;
class VST4D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0000,op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST,
- OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+ IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
"", []>;
class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST,
- OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+ IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
"$addr.addr = $wb", []>;
def VST4d8 : VST4D<0b0000, "vst4", "8">;
@@ -511,104 +535,63 @@ def VST4q32b : VST4WB<0b1000, "vst4", "32">;
// VST2LN : Vector Store (single 2-element structure from one lane)
class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt>
: NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
- IIC_VST,
- OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
- "", []>;
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+ IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
+ "", []>;
// vst2 to single-spaced registers.
def VST2LNd8 : VST2LN<0b0001, "vst2", "8">;
-def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> {
- let Inst{5} = 0;
-}
-def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> {
- let Inst{6} = 0;
-}
+def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 0; }
+def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 0; }
// vst2 to double-spaced even registers.
-def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> {
- let Inst{5} = 1;
-}
-def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> {
- let Inst{6} = 1;
-}
+def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; }
+def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; }
// vst2 to double-spaced odd registers.
-def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> {
- let Inst{5} = 1;
-}
-def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> {
- let Inst{6} = 1;
-}
+def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; }
+def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; }
// VST3LN : Vector Store (single 3-element structure from one lane)
class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt>
: NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- nohash_imm:$lane), IIC_VST,
- OpcodeStr, Dt,
- "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VST, OpcodeStr, Dt,
+ "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
// vst3 to single-spaced registers.
-def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> {
- let Inst{4} = 0;
-}
-def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> {
- let Inst{5-4} = 0b00;
-}
-def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> {
- let Inst{6-4} = 0b000;
-}
+def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { let Inst{4} = 0; }
+def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b00; }
+def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b000; }
// vst3 to double-spaced even registers.
-def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> {
- let Inst{5-4} = 0b10;
-}
-def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> {
- let Inst{6-4} = 0b100;
-}
+def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; }
// vst3 to double-spaced odd registers.
-def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> {
- let Inst{5-4} = 0b10;
-}
-def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> {
- let Inst{6-4} = 0b100;
-}
+def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; }
// VST4LN : Vector Store (single 4-element structure from one lane)
class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt>
: NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
- nohash_imm:$lane), IIC_VST,
- OpcodeStr, Dt,
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VST, OpcodeStr, Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
- "", []>;
+ "", []>;
// vst4 to single-spaced registers.
def VST4LNd8 : VST4LN<0b0011, "vst4", "8">;
-def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> {
- let Inst{5} = 0;
-}
-def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> {
- let Inst{6} = 0;
-}
+def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 0; }
+def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 0; }
// vst4 to double-spaced even registers.
-def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> {
- let Inst{5} = 1;
-}
-def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> {
- let Inst{6} = 1;
-}
+def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; }
+def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; }
// vst4 to double-spaced odd registers.
-def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> {
- let Inst{5} = 1;
-}
-def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> {
- let Inst{6} = 1;
-}
+def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; }
+def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; }
} // mayStore = 1, hasExtraSrcRegAllocReq = 1
@@ -656,34 +639,26 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
// Instruction Classes
//===----------------------------------------------------------------------===//
-// Basic 2-register operations, both double- and quad-register.
+// Basic 2-register operations: single-, double- and quad-register.
+class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
+ IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
(ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
(ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
-// Basic 2-register operations, scalar single-precision.
-class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
- IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
-
-class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
- : NEONFPPat<(ResTy (OpNode SPR:$a)),
- (EXTRACT_SUBREG
- (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)),
- arm_ssubreg_0)>;
-
// Basic 2-register intrinsics, both double- and quad-register.
class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
@@ -700,21 +675,6 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
-// Basic 2-register intrinsics, scalar single-precision
-class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin,
- OpcodeStr, Dt, "$dst, $src", "", []>;
-
-class N2VDIntsPat<SDNode OpNode, NeonI Inst>
- : NEONFPPat<(f32 (OpNode SPR:$a)),
- (EXTRACT_SUBREG
- (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)),
- arm_ssubreg_0)>;
-
// Narrow 2-register intrinsics.
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -742,15 +702,22 @@ class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
InstrItinClass itin, string OpcodeStr, string Dt>
: N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2),
- (ins QPR:$src1, QPR:$src2), itin,
- OpcodeStr, Dt, "$dst1, $dst2",
+ (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2",
"$src1 = $dst1, $src2 = $dst2", []>;
-// Basic 3-register operations, both double- and quad-register.
+// Basic 3-register operations: single-, double- and quad-register.
+class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
+ let isCommutable = Commutable;
+}
+
class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy,
- SDNode OpNode, bit Commutable>
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
@@ -763,9 +730,9 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
ValueType ResTy, ValueType OpTy,
SDNode OpNode, bit Commutable>
: N3VX<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
- OpcodeStr, "$dst, $src1, $src2", "",
- [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
+ OpcodeStr, "$dst, $src1, $src2", "",
+ [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{
let isCommutable = Commutable;
}
class N3VDSL<bits<2> op21_20, bits<4> op11_8,
@@ -776,27 +743,23 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
- (Ty (NEONvduplane (Ty DPR_VFP2:$src2),
- imm:$lane)))))]> {
+ (Ty (NEONvduplane (Ty DPR_VFP2:$src2), imm:$lane)))))]>{
let isCommutable = 0;
}
class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- IIC_VMULi16D,
- OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ IIC_VMULi16D, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
- (Ty (NEONvduplane (Ty DPR_8:$src2),
- imm:$lane)))))]> {
+ (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy,
- SDNode OpNode, bit Commutable>
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
@@ -805,12 +768,11 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
}
class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr,
- ValueType ResTy, ValueType OpTy,
- SDNode OpNode, bit Commutable>
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3VX<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
- OpcodeStr, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
+ OpcodeStr, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{
let isCommutable = Commutable;
}
class N3VQSL<bits<2> op21_20, bits<4> op11_8,
@@ -825,13 +787,11 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
imm:$lane)))))]> {
let isCommutable = 0;
}
-class N3VQSL16<bits<2> op21_20, bits<4> op11_8,
- string OpcodeStr, string Dt,
+class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- IIC_VMULi16Q,
- OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ IIC_VMULi16Q, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -839,27 +799,10 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8,
let isCommutable = 0;
}
-// Basic 3-register operations, scalar single-precision
-class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- SDNode OpNode, bit Commutable>
- : N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
- OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
- let isCommutable = Commutable;
-}
-class N3VDsPat<SDNode OpNode, NeonI Inst>
- : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
- (EXTRACT_SUBREG
- (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
- arm_ssubreg_0)>;
-
// Basic 3-register intrinsics, both double- and quad-register.
class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy,
- Intrinsic IntOp, bit Commutable>
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
@@ -891,8 +834,7 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy,
- Intrinsic IntOp, bit Commutable>
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
@@ -924,7 +866,15 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
let isCommutable = 0;
}
-// Multiply-Add/Sub operations, both double- and quad-register.
+// Multiply-Add/Sub operations: single-, double- and quad-register.
+class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
+
class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode OpNode>
@@ -976,8 +926,8 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$src2,
- (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
- imm:$lane)))))))]>;
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
+ imm:$lane)))))))]>;
class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy,
@@ -989,25 +939,8 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$src2,
- (ResTy (NEONvduplane (OpTy DPR_8:$src3),
- imm:$lane)))))))]>;
-
-// Multiply-Add/Sub operations, scalar single-precision
-class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType Ty, SDNode MulOp, SDNode OpNode>
- : N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR_VFP2:$dst),
- (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
-
-class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
- : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
- (EXTRACT_SUBREG
- (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
- arm_ssubreg_0)>;
+ (ResTy (NEONvduplane (OpTy DPR_8:$src3),
+ imm:$lane)))))))]>;
// Neon 3-argument intrinsics, both double- and quad-register.
// The destination register is also used as the first source operand register.
@@ -1050,9 +983,9 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
(OpTy DPR:$src2),
(OpTy (NEONvduplane (OpTy DPR_VFP2:$src3),
imm:$lane)))))]>;
-class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- Intrinsic IntOp>
+class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
(ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
@@ -1063,7 +996,6 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass iti
(OpTy (NEONvduplane (OpTy DPR_8:$src3),
imm:$lane)))))]>;
-
// Narrowing 3-register intrinsics.
class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
@@ -1095,9 +1027,9 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
(ResTy (IntOp (OpTy DPR:$src1),
(OpTy (NEONvduplane (OpTy DPR_VFP2:$src2),
imm:$lane)))))]>;
-class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- Intrinsic IntOp>
+class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
@@ -1249,6 +1181,45 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// S = single int (32 bit) elements
// D = double int (64 bit) elements
+// Neon 2-register vector operations -- for disassembly only.
+
+// First with only element sizes of 8, 16 and 32 bits:
+multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4, string opc, string Dt,
+ string asm> {
+ // 64-bit vector types.
+ def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "", []>;
+ def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "", []>;
+ def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "", []>;
+ def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ opc, "f32", asm, "", []> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+
+ // 128-bit vector types.
+ def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "", []>;
+ def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "", []>;
+ def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "", []>;
+ def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ opc, "f32", asm, "", []> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+}
+
// Neon 3-register vector operations.
// First with only element sizes of 8, 16 and 32 bits:
@@ -1262,22 +1233,22 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
OpcodeStr, !strconcat(Dt, "8"),
v8i8, v8i8, OpNode, Commutable>;
def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
- OpcodeStr, !strconcat(Dt, "16"),
- v4i16, v4i16, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, OpNode, Commutable>;
def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
- OpcodeStr, !strconcat(Dt, "32"),
- v2i32, v2i32, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, OpNode, Commutable>;
// 128-bit vector types.
def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
- OpcodeStr, !strconcat(Dt, "8"),
- v16i8, v16i8, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, OpNode, Commutable>;
def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
- OpcodeStr, !strconcat(Dt, "16"),
- v8i16, v8i16, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, OpNode, Commutable>;
def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
- OpcodeStr, !strconcat(Dt, "32"),
- v4i32, v4i32, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, OpNode, Commutable>;
}
multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
@@ -1372,7 +1343,7 @@ multiclass N3VIntSL_HS<bits<4> op11_8,
def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
- OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
}
@@ -1386,8 +1357,8 @@ multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
: N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16,
- OpcodeStr, !strconcat(Dt, "8"),
- v8i8, v8i8, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp, Commutable>;
def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16,
OpcodeStr, !strconcat(Dt, "8"),
v16i8, v16i8, IntOp, Commutable>;
@@ -1402,11 +1373,11 @@ multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
: N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32,
- OpcodeStr, !strconcat(Dt, "64"),
- v1i64, v1i64, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp, Commutable>;
def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32,
- OpcodeStr, !strconcat(Dt, "64"),
- v2i64, v2i64, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp, Commutable>;
}
@@ -1511,9 +1482,11 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8,
def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
- OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, mul, ShOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
+ mul, ShOp>;
def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
- OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, mul, ShOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
+ mul, ShOp>;
}
// Neon 3-argument intrinsics,
@@ -1522,19 +1495,19 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, Intrinsic IntOp> {
// 64-bit vector types.
def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D,
- OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
- OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D,
- OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
// 128-bit vector types.
def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q,
- OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q,
- OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q,
- OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
}
@@ -1576,17 +1549,17 @@ multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
- itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+ itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
- itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+ itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
// 128-bit vector types.
def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
- itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+ itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
- itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+ itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
- itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+ itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
}
@@ -1846,29 +1819,31 @@ def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8",
def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8",
v16i8, v16i8, int_arm_neon_vmulp, 1>;
def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32",
- v2f32, v2f32, fmul, 1>;
+ v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32",
- v4f32, v4f32, fmul, 1>;
-defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
-def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
-def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>;
+ v4f32, v4f32, fmul, 1>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
+def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
+def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
+ v2f32, fmul>;
+
def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
(v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
(v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i16_reg imm:$lane))),
+ (DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
(v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
(v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
(v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
(v4f32 (VMULslfq (v4f32 QPR:$src1),
(v2f32 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
@@ -1883,14 +1858,14 @@ def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
imm:$lane)))),
(v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i16_reg imm:$lane))),
+ (DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
(v4i32 (NEONvduplane (v4i32 QPR:$src2),
imm:$lane)))),
(v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
@@ -1905,14 +1880,14 @@ def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
imm:$lane)))),
(v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i16_reg imm:$lane))),
+ (DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
(v4i32 (NEONvduplane (v4i32 QPR:$src2),
imm:$lane)))),
(v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
@@ -1950,30 +1925,28 @@ def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
v4f32, v2f32, fmul, fadd>;
def : Pat<(v8i16 (add (v8i16 QPR:$src1),
- (mul (v8i16 QPR:$src2),
- (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
- (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1),
- (v8i16 QPR:$src2),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
(v4i16 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i16_reg imm:$lane))),
+ (DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (add (v4i32 QPR:$src1),
- (mul (v4i32 QPR:$src2),
- (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
- (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1),
- (v4i32 QPR:$src2),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
- (fmul (v4f32 QPR:$src2),
- (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (fmul (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
(v4f32 (VMLAslfq (v4f32 QPR:$src1),
(v4f32 QPR:$src2),
(v2f32 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
@@ -2003,30 +1976,27 @@ def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
v4f32, v2f32, fmul, fsub>;
def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
- (mul (v8i16 QPR:$src2),
- (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
- (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1),
- (v8i16 QPR:$src2),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
(v4i16 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i16_reg imm:$lane))),
+ (DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
- (mul (v4i32 QPR:$src2),
- (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
- (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1),
- (v4i32 QPR:$src2),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
- (fmul (v4f32 QPR:$src2),
- (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
- (v4f32 (VMLSslfq (v4f32 QPR:$src1),
- (v4f32 QPR:$src2),
+ (fmul (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
(v2f32 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
@@ -2088,6 +2058,10 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
NEONvceq, 1>;
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
+// For disassembly only.
+defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
+ "$dst, $src, #0">;
+
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>;
@@ -2097,6 +2071,13 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32",
v2i32, v2f32, NEONvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
+// For disassembly only.
+defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
+ "$dst, $src, #0">;
+// For disassembly only.
+defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
+ "$dst, $src, #0">;
+
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>;
@@ -2106,6 +2087,13 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
NEONvcgt, 0>;
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
+// For disassembly only.
+defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
+ "$dst, $src, #0">;
+// For disassembly only.
+defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
+ "$dst, $src, #0">;
+
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32",
v2i32, v2f32, int_arm_neon_vacged, 0>;
@@ -2247,9 +2235,9 @@ defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>;
// Vector Maximum and Minimum.
// VMAX : Vector Maximum
-defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VMAXs : N3VInt_QHS<0,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>;
-defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VMAXu : N3VInt_QHS<1,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>;
def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32",
v2f32, v2f32, int_arm_neon_vmaxs, 1>;
@@ -2257,9 +2245,9 @@ def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32",
v4f32, v4f32, int_arm_neon_vmaxs, 1>;
// VMIN : Vector Minimum
-defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VMINs : N3VInt_QHS<0,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>;
-defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VMINu : N3VInt_QHS<1,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>;
def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32",
v2f32, v2f32, int_arm_neon_vmins, 1>;
@@ -2401,16 +2389,17 @@ def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
v2i64, v2i32, NEONvshlli>;
// VSHRN : Vector Shift Right and Narrow
-defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>;
+defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
+ NEONvshrn>;
// VRSHL : Vector Rounding Shift
defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts, 0>;
+ IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts,0>;
defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu, 0>;
+ IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu,0>;
// VRSHR : Vector Rounding Shift Right
-defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>;
-defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>;
+defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>;
+defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>;
// VRSHRN : Vector Rounding Shift Right and Narrow
defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
@@ -2418,14 +2407,14 @@ defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
// VQSHL : Vector Saturating Shift
defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts, 0>;
+ IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts,0>;
defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu, 0>;
+ IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu,0>;
// VQSHL : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>;
-defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>;
+defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>;
+defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>;
// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu", "s", NEONvqshlsu>;
+defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D, "vqshlu","s",NEONvqshlsu>;
// VQSHRN : Vector Saturating Shift Right and Narrow
defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
@@ -2438,10 +2427,10 @@ defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
NEONvqshrnsu>;
// VQRSHL : Vector Saturating Rounding Shift
-defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
+defm VQRSHLs : N3VInt_QHSD<0,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
IIC_VSHLi4Q, "vqrshl", "s",
int_arm_neon_vqrshifts, 0>;
-defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
+defm VQRSHLu : N3VInt_QHSD<1,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
IIC_VSHLi4Q, "vqrshl", "u",
int_arm_neon_vqrshiftu, 0>;
@@ -2508,7 +2497,7 @@ def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
// VNEG : Vector Negate (floating-point)
-def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
(outs DPR:$dst), (ins DPR:$src), IIC_VUNAD,
"vneg", "f32", "$dst, $src", "",
[(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>;
@@ -2547,6 +2536,14 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
IIC_VCNTiQ, "vcnt", "8",
v16i8, v16i8, int_arm_neon_vcnt>;
+// Vector Swap -- for disassembly only.
+def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ "vswp", "$dst, $src", "", []>;
+def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ "vswp", "$dst, $src", "", []>;
+
// Vector Move Operations.
// VMOV : Vector Move (Register)
@@ -2678,10 +2675,10 @@ def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane))>;
def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
- (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1), DPR_VFP2)),
+ (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
(SSubReg_f32_reg imm:$src2))>;
def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
- (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1), QPR_VFP2)),
+ (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
(SSubReg_f32_reg imm:$src2))>;
//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
@@ -2849,11 +2846,13 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)),
(INSERT_SUBREG QPR:$src,
- (i64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))),
+ (i64 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_f64_reg imm:$lane))),
(DSubReg_f64_other_reg imm:$lane))>;
def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)),
(INSERT_SUBREG QPR:$src,
- (f64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))),
+ (f64 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_f64_reg imm:$lane))),
(DSubReg_f64_other_reg imm:$lane))>;
// VMOVN : Vector Narrowing Move
@@ -3092,70 +3091,110 @@ def VTBX4
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//
+class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
+ : NEONFPPat<(ResTy (OpNode SPR:$a)),
+ (EXTRACT_SUBREG (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
+ SPR:$a, arm_ssubreg_0)),
+ arm_ssubreg_0)>;
+
+class N3VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+ (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$a, arm_ssubreg_0),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$b, arm_ssubreg_0)),
+ arm_ssubreg_0)>;
+
+class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
+ (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$acc, arm_ssubreg_0),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$a, arm_ssubreg_0),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$b, arm_ssubreg_0)),
+ arm_ssubreg_0)>;
+
// These need separate instructions because they must use DPR_VFP2 register
// class which have SPR sub-registers.
// Vector Add Operations used for single-precision FP
let neverHasSideEffects = 1 in
-def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd", "f32", v2f32, v2f32, fadd,1>;
-def : N3VDsPat<fadd, VADDfd_sfp>;
+def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>;
+def : N3VSPat<fadd, VADDfd_sfp>;
// Vector Sub Operations used for single-precision FP
let neverHasSideEffects = 1 in
-def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub", "f32", v2f32, v2f32, fsub,0>;
-def : N3VDsPat<fsub, VSUBfd_sfp>;
+def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>;
+def : N3VSPat<fsub, VSUBfd_sfp>;
// Vector Multiply Operations used for single-precision FP
let neverHasSideEffects = 1 in
-def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul", "f32", v2f32, v2f32, fmul,1>;
-def : N3VDsPat<fmul, VMULfd_sfp>;
+def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>;
+def : N3VSPat<fmul, VMULfd_sfp>;
// Vector Multiply-Accumulate/Subtract used for single-precision FP
// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
// we want to avoid them for now. e.g., alternating vmla/vadd instructions.
//let neverHasSideEffects = 1 in
-//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32,fmul,fadd>;
-//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
+//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32",
+// v2f32, fmul, fadd>;
+//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>;
//let neverHasSideEffects = 1 in
-//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32,fmul,fsub>;
-//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
+//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32",
+// v2f32, fmul, fsub>;
+//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>;
// Vector Absolute used for single-precision FP
let neverHasSideEffects = 1 in
-def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAD, "vabs", "f32",
- v2f32, v2f32, int_arm_neon_vabs>;
-def : N2VDIntsPat<fabs, VABSfd_sfp>;
+def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
+ "vabs", "f32", "$dst, $src", "", []>;
+def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>;
// Vector Negate used for single-precision FP
let neverHasSideEffects = 1 in
-def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
- "vneg", "f32", "$dst, $src", "", []>;
-def : N2VDIntsPat<fneg, VNEGf32d_sfp>;
+def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
+ "vneg", "f32", "$dst, $src", "", []>;
+def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
+
+// Vector Maximum used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ "vmax", "f32", "$dst, $src1, $src2", "", []>;
+def : N3VSPat<NEONfmax, VMAXfd_sfp>;
+
+// Vector Minimum used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ "vmin", "f32", "$dst, $src1, $src2", "", []>;
+def : N3VSPat<NEONfmin, VMINfd_sfp>;
// Vector Convert between single-precision FP and integer
let neverHasSideEffects = 1 in
-def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
- v2i32, v2f32, fp_to_sint>;
-def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
+def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v2i32, v2f32, fp_to_sint>;
+def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
let neverHasSideEffects = 1 in
-def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
- v2i32, v2f32, fp_to_uint>;
-def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
+def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v2i32, v2f32, fp_to_uint>;
+def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
let neverHasSideEffects = 1 in
-def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
- v2f32, v2i32, sint_to_fp>;
-def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
+def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v2f32, v2i32, sint_to_fp>;
+def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
let neverHasSideEffects = 1 in
-def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
- v2f32, v2i32, uint_to_fp>;
-def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
+def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v2f32, v2i32, uint_to_fp>;
+def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 1dcea26..786dd65 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -120,7 +120,10 @@ def t_addrmode_sp : Operand<i32>,
// Miscellaneous Instructions.
//
-let Defs = [SP], Uses = [SP] in {
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def tADJCALLSTACKUP :
PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
"@ tADJCALLSTACKUP $amt1",
@@ -132,6 +135,76 @@ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
[(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>;
}
+def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = 0b00000000;
+}
+
+def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = 0b00010000;
+}
+
+def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = 0b00100000;
+}
+
+def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = 0b00110000;
+}
+
+def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = 0b01000000;
+}
+
+def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101101> {
+ let Inst{9-5} = 0b10010;
+ let Inst{3} = 1;
+}
+
+def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101101> {
+ let Inst{9-5} = 0b10010;
+ let Inst{3} = 0;
+}
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b10;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+// The singleton $opt operand contains the following information:
+// opt{4-0} = mode ==> don't care
+// opt{5} = changemode ==> 0 (false for 16-bit Thumb instr)
+// opt{8-6} = AIF from Inst{2-0}
+// opt{10-9} = 1:imod from Inst{4} with 0b10 as enable and 0b11 as disable
+//
+// The opt{4-0} and opt{5} sub-fields are to accommodate 32-bit Thumb and ARM
+// CPS which has more options.
+def tCPS : T1I<(outs), (ins i32imm:$opt), NoItinerary, "cps${opt:cps}",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Misc<0b0110011>;
+
// For both thumb1 and thumb2.
let isNotDuplicable = 1 in
def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr,
@@ -200,7 +273,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
let Inst{6-3} = 0b1110; // Rm = lr
}
// Alternative return instruction used by vararg functions.
- def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target", []>,
+ def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target",[]>,
T1Special<{1,1,0,?}>; // A6.2.3 & A8.6.25
}
@@ -228,20 +301,20 @@ let isCall = 1,
D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
- (outs), (ins i32imm:$func, variable_ops), IIC_Br,
+ (outs), (ins i32imm:$func, variable_ops), IIC_Br,
"bl\t${func:call}",
[(ARMtcall tglobaladdr:$func)]>,
Requires<[IsThumb, IsNotDarwin]>;
// ARMv5T and above, also used for Thumb2
def tBLXi : TIx2<0b11110, 0b11, 0,
- (outs), (ins i32imm:$func, variable_ops), IIC_Br,
+ (outs), (ins i32imm:$func, variable_ops), IIC_Br,
"blx\t${func:call}",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsThumb, HasV5T, IsNotDarwin]>;
// Also used for Thumb2
- def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
+ def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
"blx\t$func",
[(ARMtcall GPR:$func)]>,
Requires<[IsThumb, HasV5T, IsNotDarwin]>,
@@ -249,7 +322,7 @@ let isCall = 1,
// ARMv4T
def tBX : TIx2<{?,?,?,?,?}, {?,?}, ?,
- (outs), (ins tGPR:$func, variable_ops), IIC_Br,
+ (outs), (ins tGPR:$func, variable_ops), IIC_Br,
"mov\tlr, pc\n\tbx\t$func",
[(ARMcall_nolink tGPR:$func)]>,
Requires<[IsThumb1Only, IsNotDarwin]>;
@@ -263,20 +336,20 @@ let isCall = 1,
D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
// Also used for Thumb2
def tBLr9 : TIx2<0b11110, 0b11, 1,
- (outs), (ins i32imm:$func, variable_ops), IIC_Br,
+ (outs), (ins i32imm:$func, variable_ops), IIC_Br,
"bl\t${func:call}",
[(ARMtcall tglobaladdr:$func)]>,
Requires<[IsThumb, IsDarwin]>;
// ARMv5T and above, also used for Thumb2
def tBLXi_r9 : TIx2<0b11110, 0b11, 0,
- (outs), (ins i32imm:$func, variable_ops), IIC_Br,
+ (outs), (ins i32imm:$func, variable_ops), IIC_Br,
"blx\t${func:call}",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsThumb, HasV5T, IsDarwin]>;
// Also used for Thumb2
- def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
+ def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
"blx\t$func",
[(ARMtcall GPR:$func)]>,
Requires<[IsThumb, HasV5T, IsDarwin]>,
@@ -284,7 +357,7 @@ let isCall = 1,
// ARMv4T
def tBXr9 : TIx2<{?,?,?,?,?}, {?,?}, ?,
- (outs), (ins tGPR:$func, variable_ops), IIC_Br,
+ (outs), (ins tGPR:$func, variable_ops), IIC_Br,
"mov\tlr, pc\n\tbx\t$func",
[(ARMcall_nolink tGPR:$func)]>,
Requires<[IsThumb1Only, IsDarwin]>;
@@ -299,7 +372,7 @@ let isBranch = 1, isTerminator = 1 in {
// Far jump
let Defs = [LR] in
- def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br,
+ def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br,
"bl\t$target\t@ far jump",[]>;
def tBR_JTr : T1JTI<(outs),
@@ -332,16 +405,34 @@ let isBranch = 1, isTerminator = 1 in {
T1Misc<{1,0,?,1,?,?,?}>;
}
+// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
+// A8.6.16 B: Encoding T1
+// If Inst{11-8} == 0b1111 then SEE SVC
+let isCall = 1 in {
+def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>,
+ Encoding16 {
+ let Inst{15-12} = 0b1101;
+ let Inst{11-8} = 0b1111;
+}
+}
+
+// A8.6.16 B: Encoding T1 -- for disassembly only
+// If Inst{11-8} == 0b1110 then UNDEFINED
+def tTRAP : T1I<(outs), (ins), IIC_Br, "trap", []>, Encoding16 {
+ let Inst{15-12} = 0b1101;
+ let Inst{11-8} = 0b1110;
+}
+
//===----------------------------------------------------------------------===//
// Load Store Instructions.
//
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
-def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
"ldr", "\t$dst, $addr",
[(set tGPR:$dst, (load t_addrmode_s4:$addr))]>,
T1LdSt<0b100>;
-def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
+def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
"ldr", "\t$dst, $addr",
[]>,
T1LdSt4Imm<{1,?,?}>;
@@ -391,15 +482,14 @@ def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
// Load tconstpool
// FIXME: Use ldr.n to work around a Darwin assembler bug.
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
"ldr", ".n\t$dst, $addr",
[(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>,
T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59
// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
"ldr", "\t$dst, $addr", []>,
T1LdStSP<{1,?,?}>;
@@ -644,7 +734,7 @@ def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
// multiply register
let isCommutable = 1 in
def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32,
- "mul", "\t$dst, $rhs",
+ "mul", "\t$dst, $rhs, $dst", /* A8.6.105 MUL Encoding T1 */
[(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>,
T1DataProcessing<0b1101>;
@@ -761,7 +851,7 @@ def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
T1Misc<{0,0,1,0,1,0,?}>;
-// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
// Expanded after instruction selection into a branch sequence.
let usesCustomInserter = 1 in // Expanded after instruction selection.
def tMOVCCr_pseudo :
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 55c7aa2..6241766 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -13,7 +13,7 @@
// IT block predicate field
def it_pred : Operand<i32> {
- let PrintMethod = "printPredicateOperand";
+ let PrintMethod = "printMandatoryPredicateOperand";
}
// IT block condition mask
@@ -53,10 +53,10 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
def t2_so_imm : Operand<i32>,
PatLeaf<(imm), [{
- return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
+ return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
}]>;
-// t2_so_imm_not - Match an immediate that is a complement
+// t2_so_imm_not - Match an immediate that is a complement
// of a t2_so_imm.
def t2_so_imm_not : Operand<i32>,
PatLeaf<(imm), [{
@@ -114,13 +114,13 @@ def imm0_4095 : Operand<i32>,
return (uint32_t)N->getZExtValue() < 4096;
}]>;
-def imm0_4095_neg : PatLeaf<(i32 imm), [{
- return (uint32_t)(-N->getZExtValue()) < 4096;
-}], imm_neg_XFORM>;
+def imm0_4095_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 4096;
+}], imm_neg_XFORM>;
def imm0_255_neg : PatLeaf<(i32 imm), [{
return (uint32_t)(-N->getZExtValue()) < 255;
-}], imm_neg_XFORM>;
+}], imm_neg_XFORM>;
// Define Thumb2 specific addressing modes.
@@ -131,7 +131,7 @@ def t2addrmode_imm12 : Operand<i32>,
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
-// t2addrmode_imm8 := reg - imm8
+// t2addrmode_imm8 := reg +/- imm8
def t2addrmode_imm8 : Operand<i32>,
ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
let PrintMethod = "printT2AddrModeImm8Operand";
@@ -208,7 +208,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
// binary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
-multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
+multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0, string wide =""> {
// shifted imm
def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
@@ -368,15 +368,16 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
}
/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
-/// for a binary operation that produces a value and use and define the carry
+/// for a binary operation that produces a value and use the carry
/// bit. It's not predicable.
let Uses = [CPSR] in {
-multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> {
+multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
// shifted imm
def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
[(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
- Requires<[IsThumb2, CarryDefIsUnused]> {
+ Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -387,7 +388,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
[(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
- Requires<[IsThumb2, CarryDefIsUnused]> {
+ Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -401,19 +402,23 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
[(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
- Requires<[IsThumb2, CarryDefIsUnused]> {
+ Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
let Inst{20} = 0; // The S bit.
}
- // Carry setting variants
+}
+
+// Carry setting variants
+let Defs = [CPSR] in {
+multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
// shifted imm
- def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
- !strconcat(opc, "s\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
- Requires<[IsThumb2, CarryDefIsUsed]> {
- let Defs = [CPSR];
+ def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ opc, "\t$dst, $lhs, $rhs",
+ [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -421,11 +426,10 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
let Inst{15} = 0;
}
// register
- def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
- !strconcat(opc, "s.w\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
- Requires<[IsThumb2, CarryDefIsUsed]> {
- let Defs = [CPSR];
+ def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ opc, ".w\t$dst, $lhs, $rhs",
+ [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -436,11 +440,10 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
let Inst{5-4} = 0b00; // type
}
// shifted register
- def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
- !strconcat(opc, "s.w\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
- Requires<[IsThumb2, CarryDefIsUsed]> {
- let Defs = [CPSR];
+ def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ opc, ".w\t$dst, $lhs, $rhs",
+ [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -448,6 +451,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
}
}
}
+}
/// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit.
let Defs = [CPSR] in {
@@ -626,19 +630,6 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
}
}
-/// T2I_picld - Defines the PIC load pattern.
-class T2I_picld<string opc, PatFrag opnode> :
- T2I<(outs GPR:$dst), (ins addrmodepc:$addr), IIC_iLoadi,
- !strconcat("\n${addr:label}:\n\t", opc), "\t$dst, $addr",
- [(set GPR:$dst, (opnode addrmodepc:$addr))]>;
-
-/// T2I_picst - Defines the PIC store pattern.
-class T2I_picst<string opc, PatFrag opnode> :
- T2I<(outs), (ins GPR:$src, addrmodepc:$addr), IIC_iStorer,
- !strconcat("\n${addr:label}:\n\t", opc), "\t$src, $addr",
- [(opnode GPR:$src, addrmodepc:$addr)]>;
-
-
/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
@@ -666,6 +657,57 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
}
}
+// SXTB16 and UXTB16 do not need the .w qualifier.
+multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
+ def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ opc, "\t$dst, $src",
+ [(set GPR:$dst, (opnode GPR:$src))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ opc, "\t$dst, $src, ror $rot",
+ [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = {?,?}; // rotate
+ }
+}
+
+// DO variant - disassembly only, no pattern
+
+multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> {
+ def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ opc, "\t$dst, $src", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ opc, "\t$dst, $src, ror $rot", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = {?,?}; // rotate
+ }
+}
+
/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
@@ -692,6 +734,29 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
}
}
+// DO variant - disassembly only, no pattern
+
+multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
+ def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
+ opc, "\t$dst, $LHS, $RHS", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = {?,?}; // rotate
+ }
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -734,7 +799,7 @@ def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
let Inst{19-16} = 0b1101; // Rn = sp
let Inst{15} = 0;
}
-def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
+def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
IIC_iALUi, "addw", "\t$dst, $sp, $imm", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -787,6 +852,25 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
let Inst{15} = 0;
}
+// Signed and unsigned division, for disassembly only
+def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
+ "sdiv", "\t$dst, $a, $b", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011100;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
+ "udiv", "\t$dst, $a, $b", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011101;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
let usesCustomInserter = 1 in { // Expanded after instruction selection.
def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
@@ -803,7 +887,7 @@ def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
//
// Load
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
defm t2LDR : T2I_ld<0, 0b10, "ldr", UnOpFrag<(load node:$Src)>>;
// Loads with zero extension
@@ -925,10 +1009,32 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
[]>;
}
+// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
+// for disassembly only.
+// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
+class T2IldT<bit signed, bits<2> type, string opc>
+ : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc,
+ "\t$dst, $addr", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
+ let Inst{22-21} = type;
+ let Inst{20} = 1; // load
+ let Inst{11} = 1;
+ let Inst{10-8} = 0b110; // PUW.
+}
+
+def t2LDRT : T2IldT<0, 0b10, "ldrt">;
+def t2LDRBT : T2IldT<0, 0b00, "ldrbt">;
+def t2LDRHT : T2IldT<0, 0b01, "ldrht">;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt">;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht">;
+
// Store
-defm t2STR : T2I_st<0b10, "str", BinOpFrag<(store node:$LHS, node:$RHS)>>;
-defm t2STRB : T2I_st<0b00, "strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
-defm t2STRH : T2I_st<0b01, "strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword
let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in
@@ -979,9 +1085,98 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
[(set GPR:$base_wb,
(post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
+// only.
+// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
+class T2IstT<bits<2> type, string opc>
+ : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), IIC_iStorei, opc,
+ "\t$src, $addr", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-25} = 0b00;
+ let Inst{24} = 0; // not signed
+ let Inst{23} = 0;
+ let Inst{22-21} = type;
+ let Inst{20} = 0; // store
+ let Inst{11} = 1;
+ let Inst{10-8} = 0b110; // PUW
+}
+
+def t2STRT : T2IstT<0b10, "strt">;
+def t2STRBT : T2IstT<0b00, "strbt">;
+def t2STRHT : T2IstT<0b01, "strht">;
// FIXME: ldrd / strd pre / post variants
+// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
+// data/instruction access. These are for disassembly only.
+multiclass T2Ipl<bit instr, bit write, string opc> {
+
+ def i12 : T2I<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoadi, opc,
+ "\t$addr", []> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 1; // U = 1
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ }
+
+ def i8 : T2I<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc,
+ "\t$addr", []> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // U = 0
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = 0b1100;
+ }
+
+ // A8.6.118 #0 and #-0 differs. Translates -0 to -1, -1 to -2, ..., etc.
+ def pci : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc,
+ "\t[pc, ${imm:negzero}]", []> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = ?; // add = (U == 1)
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{19-16} = 0b1111; // Rn = 0b1111
+ let Inst{15-12} = 0b1111;
+ }
+
+ def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoadi, opc,
+ "\t[$base, $a]", []> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // add = TRUE for T1
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-6} = 0000000;
+ let Inst{5-4} = 0b00; // no shift is applied
+ }
+
+ def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoadi, opc,
+ "\t[$base, $a, lsl $shamt]", []> {
+ let Inst{31-25} = 0b1111100;
+ let Inst{24} = instr;
+ let Inst{23} = 0; // add = TRUE for T1
+ let Inst{22} = 0;
+ let Inst{21} = write;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-6} = 0000000;
+ }
+}
+
+defm t2PLD : T2Ipl<0, 0, "pld">;
+defm t2PLDW : T2Ipl<0, 1, "pldw">;
+defm t2PLI : T2Ipl<1, 0, "pli">;
+
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
@@ -989,7 +1184,7 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
def t2LDM : T2XI<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
- IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
+ IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
@@ -1001,7 +1196,7 @@ def t2LDM : T2XI<(outs),
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
def t2STM : T2XI<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
- IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
+ IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
@@ -1074,13 +1269,15 @@ defm t2SXTB : T2I_unary_rrot<0b100, "sxtb",
UnOpFrag<(sext_inreg node:$Src, i8)>>;
defm t2SXTH : T2I_unary_rrot<0b000, "sxth",
UnOpFrag<(sext_inreg node:$Src, i16)>>;
+defm t2SXTB16 : T2I_unary_rrot_DO<0b010, "sxtb16">;
defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
defm t2SXTAH : T2I_bin_rrot<0b000, "sxtah",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+defm t2SXTAB16 : T2I_bin_rrot_DO<0b010, "sxtab16">;
-// TODO: SXT(A){B|H}16
+// TODO: SXT(A){B|H}16 - done for disassembly only
// Zero extenders
@@ -1089,7 +1286,7 @@ defm t2UXTB : T2I_unary_rrot<0b101, "uxtb",
UnOpFrag<(and node:$Src, 0x000000FF)>>;
defm t2UXTH : T2I_unary_rrot<0b001, "uxth",
UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm t2UXTB16 : T2I_unary_rrot<0b011, "uxtb16",
+defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16",
UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
@@ -1101,6 +1298,7 @@ defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
defm t2UXTAH : T2I_bin_rrot<0b001, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+defm t2UXTAB16 : T2I_bin_rrot_DO<0b011, "uxtab16">;
}
//===----------------------------------------------------------------------===//
@@ -1119,9 +1317,13 @@ defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
- BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+ BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
- BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+ BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc",
+ BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
+ BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
// RSB
defm t2RSB : T2I_rbin_is <0b1110, "rsb",
@@ -1138,6 +1340,155 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
(t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+// Select Bytes -- for disassembly only
+
+def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel",
+ "\t$dst, $a, $b", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b010;
+ let Inst{23} = 0b1;
+ let Inst{22-20} = 0b010;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 0b1;
+ let Inst{6-4} = 0b000;
+}
+
+// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
+// And Miscellaneous operations -- for disassembly only
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc>
+ : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, opc,
+ "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0101;
+ let Inst{22-20} = op22_20;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = op7_4;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def t2QADD : T2I_pam<0b000, 0b1000, "qadd">;
+def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
+def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
+def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
+def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">;
+def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">;
+def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
+def t2QSUB : T2I_pam<0b000, 0b1010, "qsub">;
+def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
+def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
+def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
+def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">;
+def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">;
+def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">;
+def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">;
+def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def t2SASX : T2I_pam<0b010, 0b0000, "sasx">;
+def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">;
+def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">;
+def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">;
+def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">;
+def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">;
+def t2UASX : T2I_pam<0b010, 0b0100, "uasx">;
+def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">;
+def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">;
+def t2USAX : T2I_pam<0b110, 0b0100, "usax">;
+def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">;
+def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">;
+def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">;
+def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">;
+def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">;
+def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">;
+def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">;
+def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">;
+def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">;
+def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">;
+def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">;
+def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
+def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ NoItinerary, "usad8", "\t$dst, $a, $b", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst),
+ (ins GPR:$a, GPR:$b, GPR:$acc), NoItinerary, "usada8",
+ "\t$dst, $a, $b, $acc", []>;
+
+// Signed/Unsigned saturate -- for disassembly only
+
+def t2SSATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
+ NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 0; // sh = '0'
+}
+
+def t2SSATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
+ NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+}
+
+def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+ "ssat16", "\t$dst, $bit_pos, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+}
+
+def t2USATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
+ NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 0; // sh = '0'
+}
+
+def t2USATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
+ NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+}
+
+def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+ "usat16", "\t$dst, $bit_pos, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+}
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
@@ -1342,6 +1693,8 @@ def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
}
} // neverHasSideEffects
+// Rounding variants of the below included for disassembly only
+
// Most significant word multiply
def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
"smmul", "\t$dst, $a, $b",
@@ -1353,6 +1706,15 @@ def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
+def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ "smmulr", "\t$dst, $a, $b", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
"smmla", "\t$dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> {
@@ -1363,6 +1725,14 @@ def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
+def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+ "smmlar", "\t$dst, $a, $b, $c", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = {?, ?, ?, ?}; // Ra
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
"smmls", "\t$dst, $a, $b, $c",
@@ -1374,6 +1744,15 @@ def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
+def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+ "smmlsr", "\t$dst, $a, $b, $c", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b110;
+ let Inst{15-12} = {?, ?, ?, ?}; // Ra
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
multiclass T2I_smul<string opc, PatFrag opnode> {
def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
!strconcat(opc, "bb"), "\t$dst, $a, $b",
@@ -1466,7 +1845,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
!strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16)))))]> {
+ (sra GPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1490,7 +1869,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
!strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16)))))]> {
+ (sra GPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1502,7 +1881,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16))))]> {
+ (sext_inreg GPR:$b, i16)), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1514,7 +1893,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16))))]> {
+ (sra GPR:$b, (i32 16))), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1527,16 +1906,70 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
-// TODO: Halfword multiple accumulate long: SMLAL<x><y>
-// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
-
+// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
+def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>;
+
+// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+// These are for disassembly only.
+
+def t2SMUAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst),
+ (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlad",
+ "\t$dst, $a, $b, $acc", []>;
+def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst),
+ (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smladx",
+ "\t$dst, $a, $b, $acc", []>;
+def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst),
+ (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsd",
+ "\t$dst, $a, $b, $acc", []>;
+def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst),
+ (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsdx",
+ "\t$dst, $a, $b, $acc", []>;
+def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlald",
+ "\t$ldst, $hdst, $a, $b", []>;
+def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaldx",
+ "\t$ldst, $hdst, $a, $b", []>;
+def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsld",
+ "\t$ldst, $hdst, $a, $b", []>;
+def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsldx",
+ "\t$ldst, $hdst, $a, $b", []>;
//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
//
-class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern> {
let Inst{31-27} = 0b11111;
let Inst{26-22} = 0b01010;
@@ -1572,7 +2005,7 @@ def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
(shl GPR:$src, (i32 8))), i16))]>;
def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt",
+ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
(and (shl GPR:$src2, (i32 imm:$shamt)),
0xFFFF0000)))]> {
@@ -1590,7 +2023,7 @@ def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
(t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt",
+ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
(and (sra GPR:$src2, imm16_31:$shamt),
0xFFFF)))]> {
@@ -1643,7 +2076,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
-// a two-value operand where a dag node expects two operands. :(
+// a two-value operand where a dag node expects two operands. :(
def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
"mov", ".w\t$dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -1723,6 +2156,66 @@ def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
}
}
+// Helper class for multiclass T2MemB -- for disassembly only
+class T2I_memb<string opc, string asm>
+ : T2I<(outs), (ins), NoItinerary, opc, asm,
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasV7]> {
+ let Inst{31-20} = 0xf3b;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
+
+multiclass T2MemB<bits<4> op7_4, string opc> {
+
+ def st : T2I_memb<opc, "\tst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b1110;
+ }
+
+ def ish : T2I_memb<opc, "\tish"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b1011;
+ }
+
+ def ishst : T2I_memb<opc, "\tishst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b1010;
+ }
+
+ def nsh : T2I_memb<opc, "\tnsh"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0111;
+ }
+
+ def nshst : T2I_memb<opc, "\tnshst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0110;
+ }
+
+ def osh : T2I_memb<opc, "\tosh"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0011;
+ }
+
+ def oshst : T2I_memb<opc, "\toshst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0010;
+ }
+}
+
+// These DMB variants are for disassembly only.
+defm t2DMB : T2MemB<0b0101, "dmb">;
+
+// These DSB variants are for disassembly only.
+defm t2DSB : T2MemB<0b0100, "dsb">;
+
+// ISB has only full system option -- for disassembly only
+def t2ISBsy : T2I_memb<"isb", ""> {
+ let Inst{7-4} = 0b0110;
+ let Inst{3-0} = 0b1111;
+}
+
class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
InstrItinClass itin, string opc, string asm, string cstr,
list<dag> pattern, bits<4> rt2 = 0b1111>
@@ -1789,6 +2282,16 @@ def t2STREXD : T2I_strex<0b11, (outs GPR:$success),
{?, ?, ?, ?}>;
}
+// Clear-Exclusive is for disassembly only.
+def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-20} = 0xf3b;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{7-4} = 0b0010;
+}
+
//===----------------------------------------------------------------------===//
// TLS Instructions
//
@@ -1906,6 +2409,24 @@ def t2TBH :
let Inst{15-8} = 0b11110000;
let Inst{7-4} = 0b0001; // H form
}
+
+// Generic versions of the above two instructions, for disassembly only
+
+def t2TBBgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br,
+ "tbb", "\t[$a, $b]", []>{
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-4} = 0b0000; // B form
+}
+
+def t2TBHgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br,
+ "tbh", "\t[$a, $b, lsl #1]", []> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-4} = 0b0001; // H form
+}
} // isNotDuplicable, isIndirectBranch
} // isBranch, isTerminator, isBarrier
@@ -1931,6 +2452,119 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
let Inst{15-8} = 0b10111111;
}
+// Branch and Exchange Jazelle -- for disassembly only
+// Rm = Inst{19-16}
+def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111100;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+// The singleton $opt operand contains the following information:
+// opt{4-0} = mode from Inst{4-0}
+// opt{5} = changemode from Inst{17}
+// opt{8-6} = AIF from Inst{8-6}
+// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
+def t2CPS : T2XI<(outs),(ins i32imm:$opt), NoItinerary, "cps${opt:cps}",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111010;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
+
+// A6.3.4 Branches and miscellaneous control
+// Table A6-14 Change Processor State, and hint instructions
+// Helper class for disassembly only.
+class T2I_hint<bits<8> op7_0, string opc, string asm>
+ : T2I<(outs), (ins), NoItinerary, opc, asm,
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-20} = 0xf3a;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-8} = 0b000;
+ let Inst{7-0} = op7_0;
+}
+
+def t2NOP : T2I_hint<0b00000000, "nop", ".w">;
+def t2YIELD : T2I_hint<0b00000001, "yield", ".w">;
+def t2WFE : T2I_hint<0b00000010, "wfe", ".w">;
+def t2WFI : T2I_hint<0b00000011, "wfi", ".w">;
+def t2SEV : T2I_hint<0b00000100, "sev", ".w">;
+
+def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-20} = 0xf3a;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-8} = 0b000;
+ let Inst{7-4} = 0b1111;
+}
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+// Option = Inst{19-16}
+def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26-20} = 0b1111111;
+ let Inst{15-12} = 0b1000;
+}
+
+// Store Return State is a system instruction -- for disassembly only
+def t2SRSDBW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000010; // W = 1
+}
+
+def t2SRSDB : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000000; // W = 0
+}
+
+def t2SRSIAW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0011010; // W = 1
+}
+
+def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0011000; // W = 0
+}
+
+// Return From Exception is a system instruction -- for disassembly only
+def t2RFEDBW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfedb", "\t$base!",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000011; // W = 1
+}
+
+def t2RFEDB : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeab", "\t$base",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000001; // W = 0
+}
+
+def t2RFEIAW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base!",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0011011; // W = 1
+}
+
+def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0011001; // W = 0
+}
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
@@ -1970,9 +2604,59 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// Pseudo instruction that combines ldr from constpool and add pc. This should
// be expanded into two instructions late to allow if-conversion and
// scheduling.
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+// Rd = Instr{11-8}
+def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-21} = 0b11111;
+ let Inst{20} = 0; // The R bit.
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
+
+// Rd = Instr{11-8}
+def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-21} = 0b11111;
+ let Inst{20} = 1; // The R bit.
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
+
+// FIXME: mask is ignored for the time being.
+// Rn = Inst{19-16}
+def t2MSR : T2I<(outs), (ins GPR:$src), NoItinerary, "msr", "\tcpsr, $src",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-21} = 0b11100;
+ let Inst{20} = 0; // The R bit.
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
+
+// FIXME: mask is ignored for the time being.
+// Rn = Inst{19-16}
+def t2MSRsys : T2I<(outs), (ins GPR:$src), NoItinerary, "msr", "\tspsr, $src",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-21} = 0b11100;
+ let Inst{20} = 1; // The R bit.
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 365e1e3..7c117ed 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -54,7 +54,7 @@ def vfp_f64imm : Operand<f64>,
// Load / store Instructions.
//
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
IIC_fpLoad64, "vldr", ".64\t$dst, $addr",
[(set DPR:$dst, (load addrmode5:$addr))]>;
@@ -412,6 +412,101 @@ def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
let Inst{7} = 0; // Z bit
}
+// Convert between floating-point and fixed-point
+// Data type for fixed-point naming convention:
+// S16 (U=0, sx=0) -> SH
+// U16 (U=1, sx=0) -> UH
+// S32 (U=0, sx=1) -> SL
+// U32 (U=1, sx=1) -> UL
+
+let Constraints = "$a = $dst" in {
+
+// FP to Fixed-Point:
+
+def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+// Fixed-Point to FP:
+
+def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
+ (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
+ (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
+ [/* For disassembly only; pattern left blank */]>;
+
+} // End of 'let Constraints = "$src = $dst" in'
+
//===----------------------------------------------------------------------===//
// FP FMA Operations.
//
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index bef5a06..6db6ba4 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -48,7 +48,13 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
// write our own wrapper, which does things our way, so we have complete
// control over register saving and restoring.
extern "C" {
-#if defined(__arm__)
+ // We don't need this on Android (generally on hand-held devices). This
+ // function is for the purpose of supporting "lazy symbol lookup" (lookup
+ // undefined symbol at runtime) (Actually, if you tried to remove the
+ // !defined(ANDROID) guard, you'll get compilation error since Android's
+ // toolchain choose armv5te as its CPU architecture which does not support
+ // instruction 'stmdb' and 'ldmia' within the function)
+#if defined(__arm__) && !defined(ANDROID)
void ARMCompilationCallback();
asm(
".text\n"
@@ -60,7 +66,7 @@ extern "C" {
// whole compilation callback doesn't exist as far as the caller is
// concerned, so we can't just preserve the callee saved regs.
"stmdb sp!, {r0, r1, r2, r3, lr}\n"
-#ifndef __SOFTFP__
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
"fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
// The LR contains the address of the stub function on entry.
@@ -83,7 +89,7 @@ extern "C" {
// 6-20 | D0..D7 | Saved VFP registers
// +--------+
//
-#ifndef __SOFTFP__
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
// Restore VFP caller-saved registers.
"fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
@@ -318,6 +324,18 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
*((intptr_t*)RelocPos) |= ResultPtr;
break;
}
+ case ARM::reloc_arm_movw: {
+ ResultPtr = ResultPtr & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12
+ break;
+ }
+ case ARM::reloc_arm_movt: {
+ ResultPtr = (ResultPtr >> 16) & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12
+ break;
+ }
}
}
}
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index b78b95b..19f1e3b 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -350,7 +350,8 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
: ARMRegisterInfo::getRegisterNumbering(Reg);
// AM4 - register numbers in ascending order.
// AM5 - consecutive register numbers in ascending order.
- if (NewOffset == Offset + (int)Size &&
+ if (Reg != ARM::SP &&
+ NewOffset == Offset + (int)Size &&
((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
Offset += Size;
PRegNum = RegNum;
@@ -747,11 +748,24 @@ static bool isMemoryOp(const MachineInstr *MI) {
if (MMO->isVolatile())
return false;
- // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is not.
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
if (MMO->getAlignment() < 4)
return false;
}
+ // str <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+ if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
+ MI->getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
+ MI->getOperand(1).isUndef())
+ return false;
+
int Opcode = MI->getOpcode();
switch (Opcode) {
default: break;
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
index 2cc2950..86e7206 100644
--- a/lib/Target/ARM/ARMRelocations.h
+++ b/lib/Target/ARM/ARMRelocations.h
@@ -47,7 +47,13 @@ namespace llvm {
reloc_arm_pic_jt,
// reloc_arm_branch - Branch address relocation.
- reloc_arm_branch
+ reloc_arm_branch,
+
+ // reloc_arm_movt - MOVT immediate relocation.
+ reloc_arm_movt,
+
+ // reloc_arm_movw - MOVW immediate relocation.
+ reloc_arm_movw
};
}
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 426862c..622034b 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -33,7 +33,7 @@ UseMOVT("arm-use-movt",
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
bool isT)
- : ARMArchVersion(V4T)
+ : ARMArchVersion(V4)
, ARMFPUType(None)
, UseNEONForSinglePrecisionFP(UseNEONFP)
, IsThumb(isT)
@@ -54,6 +54,11 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
// Parse features string.
CPUString = ParseSubtargetFeatures(FS, CPUString);
+ // When no arch is specified either by CPU or by attributes, make the default
+ // ARMv4T.
+ if (CPUString == "generic" && (FS.empty() || FS == "generic"))
+ ARMArchVersion = V4T;
+
// Set the boolean corresponding to the current target triple, or the default
// if one cannot be determined, to true.
unsigned Len = TT.length();
@@ -68,25 +73,28 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
}
if (Idx) {
unsigned SubVer = TT[Idx];
- if (SubVer > '4' && SubVer <= '9') {
- if (SubVer >= '7') {
- ARMArchVersion = V7A;
- } else if (SubVer == '6') {
- ARMArchVersion = V6;
- if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
- ARMArchVersion = V6T2;
- } else if (SubVer == '5') {
- ARMArchVersion = V5T;
- if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
- ARMArchVersion = V5TE;
- }
- if (ARMArchVersion >= V6T2)
- ThumbMode = Thumb2;
+ if (SubVer >= '7' && SubVer <= '9') {
+ ARMArchVersion = V7A;
+ } else if (SubVer == '6') {
+ ARMArchVersion = V6;
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
+ ARMArchVersion = V6T2;
+ } else if (SubVer == '5') {
+ ARMArchVersion = V5T;
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+ ARMArchVersion = V5TE;
+ } else if (SubVer == '4') {
+ if (Len >= Idx+2 && TT[Idx+1] == 't')
+ ARMArchVersion = V4T;
+ else
+ ARMArchVersion = V4;
}
}
// Thumb2 implies at least V6T2.
- if (ARMArchVersion < V6T2 && ThumbMode >= Thumb2)
+ if (ARMArchVersion >= V6T2)
+ ThumbMode = Thumb2;
+ else if (ThumbMode >= Thumb2)
ARMArchVersion = V6T2;
if (Len >= 10) {
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 3f06b7b..6980851 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,7 +26,7 @@ class GlobalValue;
class ARMSubtarget : public TargetSubtarget {
protected:
enum ARMArchEnum {
- V4T, V5T, V5TE, V6, V6T2, V7A
+ V4, V4T, V5T, V5TE, V6, V6T2, V7A
};
enum ARMFPEnum {
@@ -38,7 +38,7 @@ protected:
Thumb2
};
- /// ARMArchVersion - ARM architecture version: V4T (base), V5T, V5TE,
+ /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE,
/// V6, V6T2, V7A.
ARMArchEnum ARMArchVersion;
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index 9703403..a488c0a 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -10,7 +10,7 @@
#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/MC/MCSectionELF.h"
namespace llvm {
@@ -24,7 +24,7 @@ namespace llvm {
if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
StaticCtorSection =
- getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY,
+ getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY,
MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
SectionKind::getDataRel());
StaticDtorSection =
diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk
new file mode 100644
index 0000000..ea796af
--- /dev/null
+++ b/lib/Target/ARM/Android.mk
@@ -0,0 +1,49 @@
+LOCAL_PATH := $(call my-dir)
+
+# For the device only
+# =====================================================
+include $(CLEAR_VARS)
+include $(CLEAR_TBLGEN_VARS)
+
+TBLGEN_TABLES := \
+ ARMGenRegisterInfo.h.inc \
+ ARMGenRegisterNames.inc \
+ ARMGenRegisterInfo.inc \
+ ARMGenInstrNames.inc \
+ ARMGenInstrInfo.inc \
+ ARMGenDAGISel.inc \
+ ARMGenSubtarget.inc \
+ ARMGenCodeEmitter.inc \
+ ARMGenCallingConv.inc
+
+LOCAL_SRC_FILES := \
+ ARMBaseInstrInfo.cpp \
+ ARMBaseRegisterInfo.cpp \
+ ARMCodeEmitter.cpp \
+ ARMConstantIslandPass.cpp \
+ ARMConstantPoolValue.cpp \
+ ARMExpandPseudoInsts.cpp \
+ ARMISelDAGToDAG.cpp \
+ ARMISelLowering.cpp \
+ ARMInstrInfo.cpp \
+ ARMJITInfo.cpp \
+ ARMLoadStoreOptimizer.cpp \
+ ARMMCAsmInfo.cpp \
+ ARMRegisterInfo.cpp \
+ ARMSubtarget.cpp \
+ ARMTargetMachine.cpp \
+ NEONMoveFix.cpp \
+ NEONPreAllocPass.cpp \
+ Thumb1InstrInfo.cpp \
+ Thumb1RegisterInfo.cpp \
+ Thumb2ITBlockPass.cpp \
+ Thumb2InstrInfo.cpp \
+ Thumb2RegisterInfo.cpp \
+ Thumb2SizeReduction.cpp
+
+LOCAL_MODULE:= libLLVMARMCodeGen
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_TBLGEN_RULES_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 0a75c09..d6d595c 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
@@ -37,7 +38,6 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
@@ -122,6 +122,7 @@ namespace {
void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum);
void printPredicateOperand(const MachineInstr *MI, int OpNum);
+ void printMandatoryPredicateOperand(const MachineInstr *MI, int OpNum);
void printSBitModifierOperand(const MachineInstr *MI, int OpNum);
void printPCLabel(const MachineInstr *MI, int OpNum);
void printRegisterList(const MachineInstr *MI, int OpNum);
@@ -786,6 +787,12 @@ void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int OpNum) {
O << ARMCondCodeToString(CC);
}
+void ARMAsmPrinter::printMandatoryPredicateOperand(const MachineInstr *MI,
+ int OpNum) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ O << ARMCondCodeToString(CC);
+}
+
void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){
unsigned Reg = MI->getOperand(OpNum).getReg();
if (Reg) {
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index d7d8e09..a2084b0 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -325,6 +325,12 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) {
O << ARMCondCodeToString(CC);
}
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
+ unsigned OpNum) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ O << ARMCondCodeToString(CC);
+}
+
void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum){
if (MI->getOperand(OpNum).getReg()) {
assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index 23a7f05..b7964c9 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -71,6 +71,7 @@ public:
void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {}
void printPredicateOperand(const MCInst *MI, unsigned OpNum);
+ void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum);
void printSBitModifierOperand(const MCInst *MI, unsigned OpNum);
void printRegisterList(const MCInst *MI, unsigned OpNum);
void printCPInstOperand(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 9efb5a1..57b65cf 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -10,6 +10,8 @@ Reimplement 'select' in terms of 'SEL'.
* Implement pre/post increment support. (e.g. PR935)
* Implement smarter constant generation for binops with large immediates.
+A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX
+
//===---------------------------------------------------------------------===//
Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the
diff --git a/lib/Target/ARM/TargetInfo/Android.mk b/lib/Target/ARM/TargetInfo/Android.mk
new file mode 100644
index 0000000..c1998a1
--- /dev/null
+++ b/lib/Target/ARM/TargetInfo/Android.mk
@@ -0,0 +1,24 @@
+LOCAL_PATH := $(call my-dir)
+
+# For the device only
+# =====================================================
+include $(CLEAR_VARS)
+include $(CLEAR_TBLGEN_VARS)
+
+TBLGEN_TABLES := \
+ ARMGenRegisterNames.inc \
+ ARMGenInstrNames.inc
+
+TBLGEN_TD_DIR := $(LOCAL_PATH)/..
+
+LOCAL_SRC_FILES := \
+ ARMTargetInfo.cpp
+
+LOCAL_C_INCLUDES += \
+ $(LOCAL_PATH)/..
+
+LOCAL_MODULE:= libLLVMARMInfo
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_TBLGEN_RULES_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index d6630ce..163d1e9 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -450,9 +450,9 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset -= AFI->getGPRCalleeSavedArea1Offset();
else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
Offset -= AFI->getGPRCalleeSavedArea2Offset();
- else if (hasFP(MF)) {
- assert(SPAdj == 0 && "Unexpected");
- // There is alloca()'s in this function, must reference off the frame
+ else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
+ // There are alloca()'s in this function, must reference off the frame
// pointer instead.
FrameReg = getFrameRegister(MF);
Offset -= AFI->getFramePtrSpillOffset();
@@ -778,9 +778,19 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
}
static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
- return (MI->getOpcode() == ARM::tRestore &&
- MI->getOperand(1).isFI() &&
- isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
+ if (MI->getOpcode() == ARM::tRestore &&
+ MI->getOperand(1).isFI() &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+ return true;
+ else if (MI->getOpcode() == ARM::tPOP) {
+ // The first three operands are predicates and such. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 3, e = MI->getNumOperands() - 2; i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ return false;
}
void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
@@ -794,13 +804,13 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
int NumBytes = (int)MFI->getStackSize();
+ const unsigned *CSRegs = getCalleeSavedRegs();
if (!AFI->hasStackFrame()) {
if (NumBytes != 0)
emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
- const unsigned *CSRegs = getCalleeSavedRegs();
if (MBBI != MBB.begin()) {
do
--MBBI;
@@ -836,6 +846,9 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
}
if (VARegSaveSize) {
+ // Move back past the callee-saved register restoration
+ while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
+ ++MBBI;
// Epilogue for vararg functions: pop LR to R3 and branch off it.
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
.addReg(0) // No write back.
@@ -845,6 +858,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
.addReg(ARM::R3, RegState::Kill);
+ // erase the old tBX_RET instruction
MBB.erase(MBBI);
}
}
diff --git a/lib/Target/Alpha/AlphaCallingConv.td b/lib/Target/Alpha/AlphaCallingConv.td
index 38ada69..bde8819 100644
--- a/lib/Target/Alpha/AlphaCallingConv.td
+++ b/lib/Target/Alpha/AlphaCallingConv.td
@@ -14,7 +14,8 @@
//===----------------------------------------------------------------------===//
def RetCC_Alpha : CallingConv<[
// i64 is returned in register R0
- CCIfType<[i64], CCAssignToReg<[R0]>>,
+ // R1 is an llvm extension, I don't know what gcc does
+ CCIfType<[i64], CCAssignToReg<[R0,R1]>>,
// f32 / f64 are returned in F0/F1
CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index eaefef9..5303d85 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -64,7 +64,7 @@ namespace {
/// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are
/// in checking mode. If LHS is null, we assume that the mask has already
/// been validated before.
- uint64_t get_zapImm(SDValue LHS, uint64_t Constant) {
+ uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const {
uint64_t BitsToCheck = 0;
unsigned Result = 0;
for (unsigned i = 0; i != 8; ++i) {
@@ -159,10 +159,6 @@ namespace {
// target-specific node if it hasn't already been changed.
SDNode *Select(SDNode *N);
- /// InstructionSelect - This callback is invoked by
- /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
- virtual void InstructionSelect();
-
virtual const char *getPassName() const {
return "Alpha DAG->DAG Pattern Instruction Selection";
}
@@ -222,20 +218,11 @@ SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() {
return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode();
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void AlphaDAGToDAGISel::InstructionSelect() {
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
- if (N->isMachineOpcode()) {
+ if (N->isMachineOpcode())
return NULL; // Already selected.
- }
DebugLoc dl = N->getDebugLoc();
switch (N->getOpcode()) {
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 0bbe567..5d8310e 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -21,7 +21,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/Module.h"
@@ -282,7 +282,8 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
DAG.getIntPtrConstant(VA.getLocMemOffset()));
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), 0));
+ PseudoSourceValue::getStack(), 0,
+ false, false, 0));
}
}
@@ -426,7 +427,8 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
+ false, false, 0);
}
InVals.push_back(ArgVal);
}
@@ -442,14 +444,16 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false);
if (i == 0) VarArgsBase = FI;
SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
- LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0));
+ LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
+ false, false, 0));
if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false);
SDFI = DAG.getFrameIndex(FI, MVT::i64);
- LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0));
+ LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
+ false, false, 0));
}
//Set up a token factor with all the stack traffic
@@ -528,11 +532,12 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
DebugLoc dl = N->getDebugLoc();
- SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0);
+ SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0,
+ false, false, 0);
SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
DAG.getConstant(8, MVT::i64));
SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
- Tmp, NULL, 0, MVT::i32);
+ Tmp, NULL, 0, MVT::i32, false, false, 0);
DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
if (N->getValueType(0).isFloatingPoint())
{
@@ -547,7 +552,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
DAG.getConstant(8, MVT::i64));
Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0,
- MVT::i32);
+ MVT::i32, false, false, 0);
}
/// LowerOperation - Provide custom lowering hooks for some operations.
@@ -694,9 +699,10 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
SDValue Result;
if (Op.getValueType() == MVT::i32)
Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
- NULL, 0, MVT::i32);
+ NULL, 0, MVT::i32, false, false, 0);
else
- Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0);
+ Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0,
+ false, false, 0);
return Result;
}
case ISD::VACOPY: {
@@ -706,15 +712,18 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0);
- SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0);
+ SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0,
+ false, false, 0);
+ SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0,
+ false, false, 0);
SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
DAG.getConstant(8, MVT::i64));
Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
- NP, NULL,0, MVT::i32);
+ NP, NULL,0, MVT::i32, false, false, 0);
SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
DAG.getConstant(8, MVT::i64));
- return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32);
+ return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32,
+ false, false, 0);
}
case ISD::VASTART: {
SDValue Chain = Op.getOperand(0);
@@ -723,11 +732,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
// vastart stores the address of the VarArgsBase and VarArgsOffset
SDValue FR = DAG.getFrameIndex(VarArgsBase, MVT::i64);
- SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0);
+ SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0,
+ false, false, 0);
SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
DAG.getConstant(8, MVT::i64));
return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64),
- SA2, NULL, 0, MVT::i32);
+ SA2, NULL, 0, MVT::i32, false, false, 0);
}
case ISD::RETURNADDR:
return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc::getUnknownLoc(),
@@ -749,7 +759,8 @@ void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Chain, DataPtr;
LowerVAARG(N, Chain, DataPtr, DAG);
- SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0);
+ SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0,
+ false, false, 0);
Results.push_back(Res);
Results.push_back(SDValue(Res.getNode(), 1));
}
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 8917e86..341c4a7 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -92,7 +92,7 @@ def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended
((int64_t)N->getZExtValue() << 32) >> 32;
}], SExt16>;
-def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm:$L), [{
+def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!RHS) return 0;
uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
@@ -602,9 +602,9 @@ def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle count
def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier
def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
-def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 1), (i64 imm:$dev)),
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)),
(WMB)>;
-def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 imm:$ss), (i64 imm:$dev)),
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
(MB)>;
//Basic Floating point ops
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index 64bdd62..ba662fb 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -251,7 +251,7 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
} else {
std::string msg;
raw_string_ostream Msg(msg);
- Msg << "Too big a stack frame at " + NumBytes;
+ Msg << "Too big a stack frame at " << NumBytes;
llvm_report_error(Msg.str());
}
@@ -303,15 +303,14 @@ void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF,
} else {
std::string msg;
raw_string_ostream Msg(msg);
- Msg << "Too big a stack frame at " + NumBytes;
+ Msg << "Too big a stack frame at " << NumBytes;
llvm_report_error(Msg.str());
}
}
}
unsigned AlphaRegisterInfo::getRARegister() const {
- llvm_unreachable("What is the return address register");
- return 0;
+ return Alpha::R26;
}
unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/lib/Target/Android.mk b/lib/Target/Android.mk
new file mode 100644
index 0000000..8bf4340
--- /dev/null
+++ b/lib/Target/Android.mk
@@ -0,0 +1,38 @@
+LOCAL_PATH:= $(call my-dir)
+
+target_SRC_FILES := \
+ Mangler.cpp \
+ SubtargetFeature.cpp \
+ Target.cpp \
+ TargetAsmLexer.cpp \
+ TargetData.cpp \
+ TargetELFWriterInfo.cpp \
+ TargetFrameInfo.cpp \
+ TargetInstrInfo.cpp \
+ TargetIntrinsicInfo.cpp \
+ TargetLoweringObjectFile.cpp \
+ TargetMachine.cpp \
+ TargetRegisterInfo.cpp \
+ TargetSubtarget.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(target_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMTarget
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(target_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMTarget
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index 2c9cc60..c8d71aa 100644
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -41,7 +41,7 @@ namespace {
BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(TM, OptLevel) {}
- virtual void InstructionSelect();
+ virtual void PostprocessISelDAG();
virtual const char *getPassName() const {
return "Blackfin DAG->DAG Pattern Instruction Selection";
@@ -72,13 +72,7 @@ FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM,
return new BlackfinDAGToDAGISel(TM, OptLevel);
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void BlackfinDAGToDAGISel::InstructionSelect() {
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
- DEBUG(errs() << "Selected selection DAG before regclass fixup:\n");
- DEBUG(CurDAG->dump());
+void BlackfinDAGToDAGISel::PostprocessISelDAG() {
FixRegisterClasses(*CurDAG);
}
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index 269707a..5ce2013 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -22,7 +22,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -206,7 +206,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(),
true, false);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+ false, false, 0));
}
}
@@ -329,7 +330,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
PseudoSourceValue::getStack(),
- Offset));
+ Offset, false, false, 0));
}
}
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index fd4c4e7..10f873f 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -49,7 +49,6 @@
#include "llvm/System/Host.h"
#include "llvm/Config/config.h"
#include <algorithm>
-#include <sstream>
using namespace llvm;
extern "C" void LLVMInitializeCBackendTarget() {
@@ -153,26 +152,16 @@ namespace {
return false;
}
- raw_ostream &printType(formatted_raw_ostream &Out,
- const Type *Ty,
+ raw_ostream &printType(raw_ostream &Out, const Type *Ty,
bool isSigned = false,
const std::string &VariableName = "",
bool IgnoreName = false,
const AttrListPtr &PAL = AttrListPtr());
- std::ostream &printType(std::ostream &Out, const Type *Ty,
- bool isSigned = false,
- const std::string &VariableName = "",
- bool IgnoreName = false,
- const AttrListPtr &PAL = AttrListPtr());
- raw_ostream &printSimpleType(formatted_raw_ostream &Out,
- const Type *Ty,
- bool isSigned,
- const std::string &NameSoFar = "");
- std::ostream &printSimpleType(std::ostream &Out, const Type *Ty,
- bool isSigned,
+ raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty,
+ bool isSigned,
const std::string &NameSoFar = "");
- void printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
+ void printStructReturnPointerFunctionType(raw_ostream &Out,
const AttrListPtr &PAL,
const PointerType *Ty);
@@ -385,8 +374,8 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
// If this isn't a struct or array type, remove it from our set of types
// to name. This simplifies emission later.
- if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second) &&
- !isa<ArrayType>(I->second)) {
+ if (!I->second->isStructTy() && !I->second->isOpaqueTy() &&
+ !I->second->isArrayTy()) {
TST.remove(I);
} else {
// If this is not used, remove it from the symbol table.
@@ -405,7 +394,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
unsigned RenameCounter = 0;
for (std::set<const Type *>::const_iterator I = UT.begin(), E = UT.end();
I != E; ++I)
- if (isa<StructType>(*I) || isa<ArrayType>(*I)) {
+ if ((*I)->isStructTy() || (*I)->isArrayTy()) {
while (M.addTypeName("unnamed"+utostr(RenameCounter), *I))
++RenameCounter;
Changed = true;
@@ -454,11 +443,12 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
/// printStructReturnPointerFunctionType - This is like printType for a struct
/// return type, except, instead of printing the type as void (*)(Struct*, ...)
/// print it as "Struct (*)(...)", for struct return functions.
-void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
+void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
const AttrListPtr &PAL,
const PointerType *TheTy) {
const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
- std::stringstream FunctionInnards;
+ std::string tstr;
+ raw_string_ostream FunctionInnards(tstr);
FunctionInnards << " (*) (";
bool PrintedType = false;
@@ -470,7 +460,7 @@ void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
FunctionInnards << ", ";
const Type *ArgTy = *I;
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
- assert(isa<PointerType>(ArgTy));
+ assert(ArgTy->isPointerTy());
ArgTy = cast<PointerType>(ArgTy)->getElementType();
}
printType(FunctionInnards, ArgTy,
@@ -484,63 +474,14 @@ void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
FunctionInnards << "void";
}
FunctionInnards << ')';
- std::string tstr = FunctionInnards.str();
printType(Out, RetTy,
- /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
}
raw_ostream &
-CWriter::printSimpleType(formatted_raw_ostream &Out, const Type *Ty,
- bool isSigned,
+CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
const std::string &NameSoFar) {
- assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) &&
- "Invalid type for printSimpleType");
- switch (Ty->getTypeID()) {
- case Type::VoidTyID: return Out << "void " << NameSoFar;
- case Type::IntegerTyID: {
- unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
- if (NumBits == 1)
- return Out << "bool " << NameSoFar;
- else if (NumBits <= 8)
- return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
- else if (NumBits <= 16)
- return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
- else if (NumBits <= 32)
- return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
- else if (NumBits <= 64)
- return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
- else {
- assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
- return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
- }
- }
- case Type::FloatTyID: return Out << "float " << NameSoFar;
- case Type::DoubleTyID: return Out << "double " << NameSoFar;
- // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
- // present matches host 'long double'.
- case Type::X86_FP80TyID:
- case Type::PPC_FP128TyID:
- case Type::FP128TyID: return Out << "long double " << NameSoFar;
-
- case Type::VectorTyID: {
- const VectorType *VTy = cast<VectorType>(Ty);
- return printSimpleType(Out, VTy->getElementType(), isSigned,
- " __attribute__((vector_size(" +
- utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
- }
-
- default:
-#ifndef NDEBUG
- errs() << "Unknown primitive type: " << *Ty << "\n";
-#endif
- llvm_unreachable(0);
- }
-}
-
-std::ostream &
-CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned,
- const std::string &NameSoFar) {
- assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) &&
+ assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
"Invalid type for printSimpleType");
switch (Ty->getTypeID()) {
case Type::VoidTyID: return Out << "void " << NameSoFar;
@@ -587,120 +528,16 @@ CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned,
// Pass the Type* and the variable name and this prints out the variable
// declaration.
//
-raw_ostream &CWriter::printType(formatted_raw_ostream &Out,
- const Type *Ty,
+raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
bool isSigned, const std::string &NameSoFar,
bool IgnoreName, const AttrListPtr &PAL) {
- if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) {
- printSimpleType(Out, Ty, isSigned, NameSoFar);
- return Out;
- }
-
- // Check to see if the type is named.
- if (!IgnoreName || isa<OpaqueType>(Ty)) {
- std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
- if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
- }
-
- switch (Ty->getTypeID()) {
- case Type::FunctionTyID: {
- const FunctionType *FTy = cast<FunctionType>(Ty);
- std::stringstream FunctionInnards;
- FunctionInnards << " (" << NameSoFar << ") (";
- unsigned Idx = 1;
- for (FunctionType::param_iterator I = FTy->param_begin(),
- E = FTy->param_end(); I != E; ++I) {
- const Type *ArgTy = *I;
- if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
- assert(isa<PointerType>(ArgTy));
- ArgTy = cast<PointerType>(ArgTy)->getElementType();
- }
- if (I != FTy->param_begin())
- FunctionInnards << ", ";
- printType(FunctionInnards, ArgTy,
- /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
- ++Idx;
- }
- if (FTy->isVarArg()) {
- if (FTy->getNumParams())
- FunctionInnards << ", ...";
- } else if (!FTy->getNumParams()) {
- FunctionInnards << "void";
- }
- FunctionInnards << ')';
- std::string tstr = FunctionInnards.str();
- printType(Out, FTy->getReturnType(),
- /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
- return Out;
- }
- case Type::StructTyID: {
- const StructType *STy = cast<StructType>(Ty);
- Out << NameSoFar + " {\n";
- unsigned Idx = 0;
- for (StructType::element_iterator I = STy->element_begin(),
- E = STy->element_end(); I != E; ++I) {
- Out << " ";
- printType(Out, *I, false, "field" + utostr(Idx++));
- Out << ";\n";
- }
- Out << '}';
- if (STy->isPacked())
- Out << " __attribute__ ((packed))";
- return Out;
- }
-
- case Type::PointerTyID: {
- const PointerType *PTy = cast<PointerType>(Ty);
- std::string ptrName = "*" + NameSoFar;
-
- if (isa<ArrayType>(PTy->getElementType()) ||
- isa<VectorType>(PTy->getElementType()))
- ptrName = "(" + ptrName + ")";
-
- if (!PAL.isEmpty())
- // Must be a function ptr cast!
- return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
- return printType(Out, PTy->getElementType(), false, ptrName);
- }
-
- case Type::ArrayTyID: {
- const ArrayType *ATy = cast<ArrayType>(Ty);
- unsigned NumElements = ATy->getNumElements();
- if (NumElements == 0) NumElements = 1;
- // Arrays are wrapped in structs to allow them to have normal
- // value semantics (avoiding the array "decay").
- Out << NameSoFar << " { ";
- printType(Out, ATy->getElementType(), false,
- "array[" + utostr(NumElements) + "]");
- return Out << "; }";
- }
-
- case Type::OpaqueTyID: {
- std::string TyName = "struct opaque_" + itostr(OpaqueCounter++);
- assert(TypeNames.find(Ty) == TypeNames.end());
- TypeNames[Ty] = TyName;
- return Out << TyName << ' ' << NameSoFar;
- }
- default:
- llvm_unreachable("Unhandled case in getTypeProps!");
- }
-
- return Out;
-}
-
-// Pass the Type* and the variable name and this prints out the variable
-// declaration.
-//
-std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
- bool isSigned, const std::string &NameSoFar,
- bool IgnoreName, const AttrListPtr &PAL) {
- if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) {
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) {
printSimpleType(Out, Ty, isSigned, NameSoFar);
return Out;
}
// Check to see if the type is named.
- if (!IgnoreName || isa<OpaqueType>(Ty)) {
+ if (!IgnoreName || Ty->isOpaqueTy()) {
std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
}
@@ -708,14 +545,15 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
switch (Ty->getTypeID()) {
case Type::FunctionTyID: {
const FunctionType *FTy = cast<FunctionType>(Ty);
- std::stringstream FunctionInnards;
+ std::string tstr;
+ raw_string_ostream FunctionInnards(tstr);
FunctionInnards << " (" << NameSoFar << ") (";
unsigned Idx = 1;
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I) {
const Type *ArgTy = *I;
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
- assert(isa<PointerType>(ArgTy));
+ assert(ArgTy->isPointerTy());
ArgTy = cast<PointerType>(ArgTy)->getElementType();
}
if (I != FTy->param_begin())
@@ -731,9 +569,8 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
FunctionInnards << "void";
}
FunctionInnards << ')';
- std::string tstr = FunctionInnards.str();
printType(Out, FTy->getReturnType(),
- /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
return Out;
}
case Type::StructTyID: {
@@ -756,8 +593,8 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
const PointerType *PTy = cast<PointerType>(Ty);
std::string ptrName = "*" + NameSoFar;
- if (isa<ArrayType>(PTy->getElementType()) ||
- isa<VectorType>(PTy->getElementType()))
+ if (PTy->getElementType()->isArrayTy() ||
+ PTy->getElementType()->isVectorTy())
ptrName = "(" + ptrName + ")";
if (!PAL.isEmpty())
@@ -1144,7 +981,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
Out << "((";
printType(Out, CPV->getType()); // sign doesn't matter
Out << ")/*UNDEF*/";
- if (!isa<VectorType>(CPV->getType())) {
+ if (!CPV->getType()->isVectorTy()) {
Out << "0)";
} else {
Out << "{})";
@@ -1396,7 +1233,7 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
}
if (NeedsExplicitCast) {
Out << "((";
- if (Ty->isInteger() && Ty != Type::getInt1Ty(Ty->getContext()))
+ if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext()))
printSimpleType(Out, Ty, TypeIsSigned);
else
printType(Out, Ty); // not integer, sign doesn't matter
@@ -1497,7 +1334,7 @@ void CWriter::writeInstComputationInline(Instruction &I) {
// We can't currently support integer types other than 1, 8, 16, 32, 64.
// Validate this.
const Type *Ty = I.getType();
- if (Ty->isInteger() && (Ty!=Type::getInt1Ty(I.getContext()) &&
+ if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
Ty!=Type::getInt8Ty(I.getContext()) &&
Ty!=Type::getInt16Ty(I.getContext()) &&
Ty!=Type::getInt32Ty(I.getContext()) &&
@@ -1660,7 +1497,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
// If the operand was a pointer, convert to a large integer type.
const Type* OpTy = Operand->getType();
- if (isa<PointerType>(OpTy))
+ if (OpTy->isPointerTy())
OpTy = TD->getIntPtrType(Operand->getContext());
Out << "((";
@@ -2102,10 +1939,10 @@ bool CWriter::doInitialization(Module &M) {
// complete. If the value is an aggregate, print out { 0 }, and let
// the compiler figure out the rest of the zeros.
Out << " = " ;
- if (isa<StructType>(I->getInitializer()->getType()) ||
- isa<VectorType>(I->getInitializer()->getType())) {
+ if (I->getInitializer()->getType()->isStructTy() ||
+ I->getInitializer()->getType()->isVectorTy()) {
Out << "{ 0 }";
- } else if (isa<ArrayType>(I->getInitializer()->getType())) {
+ } else if (I->getInitializer()->getType()->isArrayTy()) {
// As with structs and vectors, but with an extra set of braces
// because arrays are wrapped in structs.
Out << "{ { 0 } }";
@@ -2274,7 +2111,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
//
Out << "/* Structure contents */\n";
for (I = TST.begin(); I != End; ++I)
- if (isa<StructType>(I->second) || isa<ArrayType>(I->second))
+ if (I->second->isStructTy() || I->second->isArrayTy())
// Only print out used types!
printContainedStructs(I->second, StructPrinted);
}
@@ -2287,14 +2124,15 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
void CWriter::printContainedStructs(const Type *Ty,
std::set<const Type*> &StructPrinted) {
// Don't walk through pointers.
- if (isa<PointerType>(Ty) || Ty->isPrimitiveType() || Ty->isInteger()) return;
+ if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
+ return;
// Print all contained types first.
for (Type::subtype_iterator I = Ty->subtype_begin(),
E = Ty->subtype_end(); I != E; ++I)
printContainedStructs(*I, StructPrinted);
- if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
+ if (Ty->isStructTy() || Ty->isArrayTy()) {
// Check to see if we have already printed this struct.
if (StructPrinted.insert(Ty).second) {
// Print structure type out.
@@ -2327,7 +2165,8 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
const AttrListPtr &PAL = F->getAttributes();
- std::stringstream FunctionInnards;
+ std::string tstr;
+ raw_string_ostream FunctionInnards(tstr);
// Print out the name...
FunctionInnards << GetValueName(F) << '(';
@@ -2382,7 +2221,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
if (PrintedArg) FunctionInnards << ", ";
const Type *ArgTy = *I;
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
- assert(isa<PointerType>(ArgTy));
+ assert(ArgTy->isPointerTy());
ArgTy = cast<PointerType>(ArgTy)->getElementType();
}
printType(FunctionInnards, ArgTy,
@@ -2423,8 +2262,8 @@ static inline bool isFPIntBitCast(const Instruction &I) {
return false;
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DstTy = I.getType();
- return (SrcTy->isFloatingPoint() && DstTy->isInteger()) ||
- (DstTy->isFloatingPoint() && SrcTy->isInteger());
+ return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
+ (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
}
void CWriter::printFunction(Function &F) {
@@ -2713,7 +2552,7 @@ void CWriter::visitPHINode(PHINode &I) {
void CWriter::visitBinaryOperator(Instruction &I) {
// binary instructions, shift instructions, setCond instructions.
- assert(!isa<PointerType>(I.getType()));
+ assert(!I.getType()->isPointerTy());
// We must cast the results of binary operations which might be promoted.
bool needsCast = false;
@@ -3489,7 +3328,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
// exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
if (isAddressExposed(Ptr)) {
writeOperandInternal(Ptr, Static);
- } else if (I != E && isa<StructType>(*I)) {
+ } else if (I != E && (*I)->isStructTy()) {
// If we didn't already emit the first operand, see if we can print it as
// P->f instead of "P[0].f"
writeOperand(Ptr);
@@ -3504,13 +3343,13 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
}
for (; I != E; ++I) {
- if (isa<StructType>(*I)) {
+ if ((*I)->isStructTy()) {
Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
- } else if (isa<ArrayType>(*I)) {
+ } else if ((*I)->isArrayTy()) {
Out << ".array[";
writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
Out << ']';
- } else if (!isa<VectorType>(*I)) {
+ } else if (!(*I)->isVectorTy()) {
Out << '[';
writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
Out << ']';
@@ -3668,7 +3507,7 @@ void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
i != e; ++i) {
const Type *IndexedTy =
ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), b, i+1);
- if (isa<ArrayType>(IndexedTy))
+ if (IndexedTy->isArrayTy())
Out << ".array[" << *i << "]";
else
Out << ".field" << *i;
@@ -3689,7 +3528,7 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
i != e; ++i) {
const Type *IndexedTy =
ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), b, i+1);
- if (isa<ArrayType>(IndexedTy))
+ if (IndexedTy->isArrayTy())
Out << ".array[" << *i << "]";
else
Out << ".field" << *i;
@@ -3705,7 +3544,8 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
formatted_raw_ostream &o,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
PM.add(createGCLoweringPass());
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index 715bbda..d178e7f 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -27,7 +27,8 @@ struct CTargetMachine : public TargetMachine {
virtual bool addPassesToEmitWholeFile(PassManager &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel);
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
};
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index 06eb149..47cb579 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -123,8 +123,8 @@ multiclass CompareLogicalGreaterThan64 {
defm I64LGT: CompareLogicalGreaterThan64;
def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
-def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
- I64LGTv2i64.Fragment>;
+//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+// I64LGTv2i64.Fragment>;
// i64 setult:
def : I64SETCCNegCond<setule, I64LGTr64>;
@@ -201,8 +201,8 @@ multiclass CompareGreaterThan64 {
defm I64GT: CompareLogicalGreaterThan64;
def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
-def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
- I64GTv2i64.Fragment>;
+//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+// I64GTv2i64.Fragment>;
// i64 setult:
def : I64SETCCNegCond<setle, I64GTr64>;
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 80693e1..396a921 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -294,15 +294,19 @@ namespace {
((vecVT == MVT::v2i64) &&
((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
(SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
- (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0))))
- return Select(bvNode);
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) {
+ HandleSDNode Dummy(SDValue(bvNode, 0));
+ if (SDNode *N = Select(bvNode))
+ return N;
+ return Dummy.getValue().getNode();
+ }
// No, need to emit a constant pool spill:
std::vector<Constant*> CV;
for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
ConstantSDNode *V = dyn_cast<ConstantSDNode > (bvNode->getOperand(i));
- CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
}
Constant *CP = ConstantVector::get(CV);
@@ -311,10 +315,15 @@ namespace {
SDValue CGPoolOffset =
SPU::LowerConstantPool(CPIdx, *CurDAG,
SPUtli.getSPUTargetMachine());
- return SelectCode(CurDAG->getLoad(vecVT, dl,
- CurDAG->getEntryNode(), CGPoolOffset,
- PseudoSourceValue::getConstantPool(), 0,
- false, Alignment).getNode());
+
+ HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
+ CurDAG->getEntryNode(), CGPoolOffset,
+ PseudoSourceValue::getConstantPool(),0,
+ false, false, Alignment));
+ CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
}
/// Select - Convert the specified operand from a target-independent to a
@@ -390,10 +399,6 @@ namespace {
return false;
}
- /// InstructionSelect - This callback is invoked by
- /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
- virtual void InstructionSelect();
-
virtual const char *getPassName() const {
return "Cell SPU DAG->DAG Pattern Instruction Selection";
}
@@ -411,16 +416,6 @@ namespace {
};
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void
-SPUDAGToDAGISel::InstructionSelect()
-{
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
/*!
\arg Op The ISD instruction operand
\arg N The address to be tested
@@ -692,9 +687,8 @@ SPUDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[8];
DebugLoc dl = N->getDebugLoc();
- if (N->isMachineOpcode()) {
+ if (N->isMachineOpcode())
return NULL; // Already selected.
- }
if (Opc == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
@@ -759,43 +753,67 @@ SPUDAGToDAGISel::Select(SDNode *N) {
}
SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
- SDNode *PromoteScalar =
- SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
- Op0VecVT, Op0).getNode());
-
+
+ HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
+ Op0VecVT, Op0));
+
+ SDValue PromScalar;
+ if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
+ PromScalar = SDValue(N, 0);
+ else
+ PromScalar = PromoteScalar.getValue();
+
SDValue zextShuffle =
CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
- SDValue(PromoteScalar, 0),
- SDValue(PromoteScalar, 0),
+ PromScalar, PromScalar,
SDValue(shufMaskLoad, 0));
- // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we
- // re-use it in the VEC2PREFSLOT selection without needing to explicitly
- // call SelectCode (it's already done for us.)
- SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle).getNode());
- return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
- zextShuffle).getNode());
+ HandleSDNode Dummy2(zextShuffle);
+ if (SDNode *N = SelectCode(Dummy2.getValue().getNode()))
+ zextShuffle = SDValue(N, 0);
+ else
+ zextShuffle = Dummy2.getValue();
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
+ zextShuffle));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ SelectCode(Dummy.getValue().getNode());
+ return Dummy.getValue().getNode();
} else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
- return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
- N->getOperand(0), N->getOperand(1),
- SDValue(CGLoad, 0)).getNode());
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
} else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());
- return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
- N->getOperand(0), N->getOperand(1),
- SDValue(CGLoad, 0)).getNode());
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
} else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
- return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
- N->getOperand(0), N->getOperand(1),
- SDValue(CGLoad, 0)).getNode());
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
} else if (Opc == ISD::TRUNCATE) {
SDValue Op0 = N->getOperand(0);
if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
@@ -832,17 +850,14 @@ SPUDAGToDAGISel::Select(SDNode *N) {
}
}
} else if (Opc == ISD::SHL) {
- if (OpVT == MVT::i64) {
+ if (OpVT == MVT::i64)
return SelectSHLi64(N, OpVT);
- }
} else if (Opc == ISD::SRL) {
- if (OpVT == MVT::i64) {
+ if (OpVT == MVT::i64)
return SelectSRLi64(N, OpVT);
- }
} else if (Opc == ISD::SRA) {
- if (OpVT == MVT::i64) {
+ if (OpVT == MVT::i64)
return SelectSRAi64(N, OpVT);
- }
} else if (Opc == ISD::FNEG
&& (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
DebugLoc dl = N->getDebugLoc();
@@ -1224,13 +1239,15 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
? shufmask.getNode()
: emitBuildVector(shufmask.getNode()));
- SDNode *shufNode =
- Select(CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+ SDValue shufNode =
+ CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
SDValue(lhsNode, 0), SDValue(rhsNode, 0),
- SDValue(shufMaskNode, 0)).getNode());
-
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
- SDValue(shufNode, 0));
+ SDValue(shufMaskNode, 0));
+ HandleSDNode Dummy(shufNode);
+ SDNode *SN = SelectCode(Dummy.getValue().getNode());
+ if (SN == 0) SN = Dummy.getValue().getNode();
+
+ return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(SN, 0));
} else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
SDValue(emitBuildVector(i64vec.getNode()), 0));
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index fe0f019..e863ee3 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -25,7 +25,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Support/Debug.h"
@@ -118,8 +118,7 @@ namespace {
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), false,
/*isReturnValueUsed=*/true,
- Callee, Args, DAG, Op.getDebugLoc(),
- DAG.GetOrdering(InChain.getNode()));
+ Callee, Args, DAG, Op.getDebugLoc());
return CallInfo.first;
}
@@ -669,7 +668,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
// Re-emit as a v16i8 vector load
result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
LN->getSrcValue(), LN->getSrcValueOffset(),
- LN->isVolatile(), 16);
+ LN->isVolatile(), LN->isNonTemporal(), 16);
// Update the chain
the_chain = result.getValue(1);
@@ -820,7 +819,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
// Re-emit as a v16i8 vector load
alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
SN->getSrcValue(), SN->getSrcValueOffset(),
- SN->isVolatile(), 16);
+ SN->isVolatile(), SN->isNonTemporal(), 16);
// Update the chain
the_chain = alignLoadVec.getValue(1);
@@ -861,7 +860,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
result = DAG.getStore(the_chain, dl, result, basePtr,
LN->getSrcValue(), LN->getSrcValueOffset(),
- LN->isVolatile(), LN->getAlignment());
+ LN->isVolatile(), LN->isNonTemporal(),
+ LN->getAlignment());
#if 0 && !defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
@@ -1086,7 +1086,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
// or we're forced to do vararg
int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
ArgOffset += StackSlotSize;
}
@@ -1108,7 +1108,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
true, false);
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
- SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
+ SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
+ false, false, 0);
Chain = Store.getOperand(0);
MemOps.push_back(Store);
@@ -1190,7 +1191,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (ArgRegIdx != NumArgRegs) {
RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
} else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ false, false, 0));
ArgOffset += StackSlotSize;
}
break;
@@ -1199,7 +1201,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (ArgRegIdx != NumArgRegs) {
RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
} else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ false, false, 0));
ArgOffset += StackSlotSize;
}
break;
@@ -1212,7 +1215,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (ArgRegIdx != NumArgRegs) {
RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
} else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ false, false, 0));
ArgOffset += StackSlotSize;
}
break;
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
index 5ef3c6b..3e17a51 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -34,5 +34,8 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) {
// Exception handling is not supported on CellSPU (think about it: you only
// have 256K for code+data. Would you support exception handling?)
ExceptionsType = ExceptionHandling::None;
+
+ // SPU assembly requires ".section" before ".bss"
+ UsesELFSectionDirectiveForBSS = true;
}
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 3dd8ca7..9c5893c 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -221,7 +221,7 @@ namespace {
APFloat APF = APFloat(CFP->getValueAPF()); // copy
if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
- Out << "ConstantFP::get(getGlobalContext(), ";
+ Out << "ConstantFP::get(mod->getContext(), ";
Out << "APFloat(";
#if HAVE_PRINTF_A
char Buffer[100];
@@ -344,23 +344,23 @@ namespace {
std::string CppWriter::getCppName(const Type* Ty) {
// First, handle the primitive types .. easy
- if (Ty->isPrimitiveType() || Ty->isInteger()) {
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
switch (Ty->getTypeID()) {
- case Type::VoidTyID: return "Type::getVoidTy(getGlobalContext())";
+ case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())";
case Type::IntegerTyID: {
unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
- return "IntegerType::get(getGlobalContext(), " + utostr(BitWidth) + ")";
+ return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")";
}
- case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(getGlobalContext())";
- case Type::FloatTyID: return "Type::getFloatTy(getGlobalContext())";
- case Type::DoubleTyID: return "Type::getDoubleTy(getGlobalContext())";
- case Type::LabelTyID: return "Type::getLabelTy(getGlobalContext())";
+ case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())";
+ case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())";
+ case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())";
+ case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())";
default:
error("Invalid primitive type");
break;
}
// shouldn't be returned, but make it sensible
- return "Type::getVoidTy(getGlobalContext())";
+ return "Type::getVoidTy(mod->getContext())";
}
// Now, see if we've seen the type before and return that
@@ -493,7 +493,7 @@ namespace {
bool CppWriter::printTypeInternal(const Type* Ty) {
// We don't print definitions for primitive types
- if (Ty->isPrimitiveType() || Ty->isInteger())
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy())
return false;
// If we already defined this type, we don't need to define it again.
@@ -514,7 +514,7 @@ namespace {
TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
if (I == UnresolvedTypes.end()) {
Out << "PATypeHolder " << typeName;
- Out << "_fwd = OpaqueType::get(getGlobalContext());";
+ Out << "_fwd = OpaqueType::get(mod->getContext());";
nl(Out);
UnresolvedTypes[Ty] = typeName;
}
@@ -615,7 +615,7 @@ namespace {
}
case Type::OpaqueTyID: {
Out << "OpaqueType* " << typeName;
- Out << " = OpaqueType::get(getGlobalContext());";
+ Out << " = OpaqueType::get(mod->getContext());";
nl(Out);
break;
}
@@ -686,7 +686,7 @@ namespace {
// For primitive types and types already defined, just add a name
TypeMap::const_iterator TNI = TypeNames.find(TI->second);
- if (TI->second->isInteger() || TI->second->isPrimitiveType() ||
+ if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() ||
TNI != TypeNames.end()) {
Out << "mod->addTypeName(\"";
printEscapedString(TI->first);
@@ -751,7 +751,7 @@ namespace {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
std::string constValue = CI->getValue().toString(10, true);
Out << "ConstantInt* " << constName
- << " = ConstantInt::get(getGlobalContext(), APInt("
+ << " = ConstantInt::get(mod->getContext(), APInt("
<< cast<IntegerType>(CI->getType())->getBitWidth()
<< ", StringRef(\"" << constValue << "\"), 10));";
} else if (isa<ConstantAggregateZero>(CV)) {
@@ -769,7 +769,7 @@ namespace {
CA->getType()->getElementType() ==
Type::getInt8Ty(CA->getContext())) {
Out << "Constant* " << constName <<
- " = ConstantArray::get(getGlobalContext(), \"";
+ " = ConstantArray::get(mod->getContext(), \"";
std::string tmp = CA->getAsString();
bool nullTerminate = false;
if (tmp[tmp.length()-1] == 0) {
@@ -995,7 +995,7 @@ namespace {
void CppWriter::printVariableHead(const GlobalVariable *GV) {
nl(Out) << "GlobalVariable* " << getCppName(GV);
if (is_inline) {
- Out << " = mod->getGlobalVariable(getGlobalContext(), ";
+ Out << " = mod->getGlobalVariable(mod->getContext(), ";
printEscapedString(GV->getName());
Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
nl(Out) << "if (!" << getCppName(GV) << ") {";
@@ -1094,7 +1094,7 @@ namespace {
case Instruction::Ret: {
const ReturnInst* ret = cast<ReturnInst>(I);
- Out << "ReturnInst::Create(getGlobalContext(), "
+ Out << "ReturnInst::Create(mod->getContext(), "
<< (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
break;
}
@@ -1171,7 +1171,7 @@ namespace {
}
case Instruction::Unreachable: {
Out << "new UnreachableInst("
- << "getGlobalContext(), "
+ << "mod->getContext(), "
<< bbname << ");";
break;
}
@@ -1673,7 +1673,7 @@ namespace {
BI != BE; ++BI) {
std::string bbname(getCppName(BI));
Out << "BasicBlock* " << bbname <<
- " = BasicBlock::Create(getGlobalContext(), \"";
+ " = BasicBlock::Create(mod->getContext(), \"";
if (BI->hasName())
printEscapedString(BI->getName());
Out << "\"," << getCppName(BI->getParent()) << ",0);";
@@ -2009,7 +2009,8 @@ char CppWriter::ID = 0;
bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
formatted_raw_ostream &o,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
PM.add(new CppWriter(o));
return false;
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 1f74f76..b7aae91 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -30,7 +30,8 @@ struct CPPTargetMachine : public TargetMachine {
virtual bool addPassesToEmitWholeFile(PassManager &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel);
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
};
diff --git a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..cfb2fc8
--- /dev/null
+++ b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories(
+ ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/..
+ )
+
+add_llvm_library(LLVMMBlazeAsmPrinter
+ MBlazeAsmPrinter.cpp
+ )
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen) \ No newline at end of file
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
new file mode 100644
index 0000000..6fe1026
--- /dev/null
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -0,0 +1,302 @@
+//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MBlaze assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-asm-printer"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MathExtras.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+ class MBlazeAsmPrinter : public AsmPrinter {
+ const MBlazeSubtarget *Subtarget;
+ public:
+ explicit MBlazeAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+ MCContext &Ctx, MCStreamer &Streamer,
+ const MCAsmInfo *T )
+ : AsmPrinter(O, TM, Ctx, Streamer, T) {
+ Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "MBlaze Assembly Printer";
+ }
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printUnsignedImm(const MachineInstr *MI, int opNum);
+ void printFSLImm(const MachineInstr *MI, int opNum);
+ void printMemOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printFCCOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printSavedRegsBitmask();
+ void printHex32(unsigned int Value);
+
+ const char *emitCurrentABIString();
+ void emitFrameDirective();
+
+ void printInstruction(const MachineInstr *MI); // autogenerated.
+ void EmitInstruction(const MachineInstr *MI) {
+ printInstruction(MI);
+ O << '\n';
+ }
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ static const char *getRegisterName(unsigned RegNo);
+
+ virtual void EmitFunctionEntryLabel();
+ void EmitStartOfAsmFile(Module &M);
+ };
+} // end of anonymous namespace
+
+#include "MBlazeGenAsmWriter.inc"
+
+//===----------------------------------------------------------------------===//
+//
+// MBlaze Asm Directives
+//
+// -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+// Describe the stack frame.
+//
+// -- Mask directives "mask bitmask, offset"
+// Tells the assembler which registers are saved and where.
+// bitmask - contain a little endian bitset indicating which registers are
+// saved on function prologue (e.g. with a 0x80000000 mask, the
+// assembler knows the register 31 (RA) is saved at prologue.
+// offset - the position before stack pointer subtraction indicating where
+// the first saved register on prologue is located. (e.g. with a
+//
+// Consider the following function prologue:
+//
+// .frame R19,48,R15
+// .mask 0xc0000000,-8
+// addiu R1, R1, -48
+// sw R15, 40(R1)
+// sw R19, 36(R1)
+//
+// With a 0xc0000000 mask, the assembler knows the register 15 (R15) and
+// 19 (R19) are saved at prologue. As the save order on prologue is from
+// left to right, R15 is saved first. A -8 offset means that after the
+// stack pointer subtration, the first register in the mask (R15) will be
+// saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mask directives
+//===----------------------------------------------------------------------===//
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MBlazeAsmPrinter::printSavedRegsBitmask() {
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ const MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
+
+ // CPU Saved Registers Bitmasks
+ unsigned int CPUBitmask = 0;
+
+ // Set the CPU Bitmasks
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(CSI[i].getReg());
+ if (CSI[i].getRegClass() == MBlaze::CPURegsRegisterClass)
+ CPUBitmask |= (1 << RegNum);
+ }
+
+ // Return Address and Frame registers must also be set in CPUBitmask.
+ if (RI.hasFP(*MF))
+ CPUBitmask |= (1 << MBlazeRegisterInfo::
+ getRegisterNumbering(RI.getFrameRegister(*MF)));
+
+ if (MFI->hasCalls())
+ CPUBitmask |= (1 << MBlazeRegisterInfo::
+ getRegisterNumbering(RI.getRARegister()));
+
+ // Print CPUBitmask
+ O << "\t.mask \t"; printHex32(CPUBitmask); O << ','
+ << MBlazeFI->getCPUTopSavedRegOff() << '\n';
+}
+
+// Print a 32 bit hex number with all numbers.
+void MBlazeAsmPrinter::printHex32(unsigned int Value) {
+ O << "0x";
+ for (int i = 7; i >= 0; i--)
+ O << utohexstr( (Value & (0xF << (i*4))) >> (i*4) );
+}
+
+//===----------------------------------------------------------------------===//
+// Frame and Set directives
+//===----------------------------------------------------------------------===//
+
+/// Frame Directive
+void MBlazeAsmPrinter::emitFrameDirective() {
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+ unsigned stackReg = RI.getFrameRegister(*MF);
+ unsigned returnReg = RI.getRARegister();
+ unsigned stackSize = MF->getFrameInfo()->getStackSize();
+
+
+ O << "\t.frame\t" << getRegisterName(stackReg)
+ << ',' << stackSize << ','
+ << getRegisterName(returnReg)
+ << '\n';
+}
+
+void MBlazeAsmPrinter::EmitFunctionEntryLabel() {
+ O << "\t.ent\t" << *CurrentFnSym << '\n';
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void MBlazeAsmPrinter::EmitFunctionBodyStart() {
+ emitFrameDirective();
+ printSavedRegsBitmask();
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void MBlazeAsmPrinter::EmitFunctionBodyEnd() {
+ O << "\t.end\t" << *CurrentFnSym << '\n';
+}
+
+// Print out an operand for an inline asm expression.
+bool MBlazeAsmPrinter::
+PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,const char *ExtraCode){
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << getRegisterName(MO.getReg());
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (int)MO.getImm();
+ break;
+
+ case MachineOperand::MO_FPImmediate: {
+ const ConstantFP* fp = MO.getFPImm();
+ printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue());
+ O << ";\t# immediate = " << *fp;
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol(OutContext);
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *GetGlobalValueSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getIndex();
+ if (MO.getOffset())
+ O << "+" << MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+}
+
+void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.getType() == MachineOperand::MO_Immediate)
+ O << (unsigned int)MO.getImm();
+ else
+ printOperand(MI, opNum);
+}
+
+void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.getType() == MachineOperand::MO_Immediate)
+ O << "rfsl" << (unsigned int)MO.getImm();
+ else
+ printOperand(MI, opNum);
+}
+
+void MBlazeAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier) {
+ printOperand(MI, opNum+1);
+ O << ", ";
+ printOperand(MI, opNum);
+}
+
+void MBlazeAsmPrinter::
+printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier) {
+ const MachineOperand& MO = MI->getOperand(opNum);
+ O << MBlaze::MBlazeFCCToString((MBlaze::CondCode)MO.getImm());
+}
+
+void MBlazeAsmPrinter::EmitStartOfAsmFile(Module &M) {
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmPrinter() {
+ RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
+}
diff --git a/lib/Target/MBlaze/AsmPrinter/Makefile b/lib/Target/MBlaze/AsmPrinter/Makefile
new file mode 100644
index 0000000..c8e4d8f
--- /dev/null
+++ b/lib/Target/MBlaze/AsmPrinter/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmPrinter
+
+# Hack: we need to include 'main' MBlaze target directory to grab
+# private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
new file mode 100644
index 0000000..c93e3df
--- /dev/null
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_TARGET_DEFINITIONS MBlaze.td)
+
+tablegen(MBlazeGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(MBlazeGenRegisterNames.inc -gen-register-enums)
+tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc)
+tablegen(MBlazeGenInstrNames.inc -gen-instr-enums)
+tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc)
+tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer)
+tablegen(MBlazeGenDAGISel.inc -gen-dag-isel)
+tablegen(MBlazeGenCallingConv.inc -gen-callingconv)
+tablegen(MBlazeGenSubtarget.inc -gen-subtarget)
+tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
+
+add_llvm_target(MBlazeCodeGen
+ MBlazeDelaySlotFiller.cpp
+ MBlazeInstrInfo.cpp
+ MBlazeISelDAGToDAG.cpp
+ MBlazeISelLowering.cpp
+ MBlazeMCAsmInfo.cpp
+ MBlazeRegisterInfo.cpp
+ MBlazeSubtarget.cpp
+ MBlazeTargetMachine.cpp
+ MBlazeTargetObjectFile.cpp
+ MBlazeIntrinsicInfo.cpp
+ )
+
+target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/MBlaze/MBlaze.h b/lib/Target/MBlaze/MBlaze.h
new file mode 100644
index 0000000..f9d828b
--- /dev/null
+++ b/lib/Target/MBlaze/MBlaze.h
@@ -0,0 +1,39 @@
+//===-- MBlaze.h - Top-level interface for MBlaze ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MBlaze back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MBLAZE_H
+#define TARGET_MBLAZE_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MBlazeTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class formatted_raw_ostream;
+
+ FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
+ FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
+
+ extern Target TheMBlazeTarget;
+} // end namespace llvm;
+
+// Defines symbolic names for MBlaze registers. This defines a mapping from
+// register name to register number.
+#include "MBlazeGenRegisterNames.inc"
+
+// Defines symbolic names for the MBlaze instructions.
+#include "MBlazeGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
new file mode 100644
index 0000000..1679752
--- /dev/null
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -0,0 +1,85 @@
+//===- MBlaze.td - Describe the MBlaze Target Machine -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MBlaze target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MBlazeRegisterInfo.td"
+include "MBlazeSchedule.td"
+include "MBlazeIntrinsics.td"
+include "MBlazeInstrInfo.td"
+include "MBlazeCallingConv.td"
+
+def MBlazeInstrInfo : InstrInfo {
+ let TSFlagsFields = [];
+ let TSFlagsShifts = [];
+}
+
+
+//===----------------------------------------------------------------------===//
+// Microblaze Subtarget features //
+//===----------------------------------------------------------------------===//
+
+def FeaturePipe3 : SubtargetFeature<"pipe3", "HasPipe3", "true",
+ "Implements 3-stage pipeline.">;
+def FeatureBarrel : SubtargetFeature<"barrel", "HasBarrel", "true",
+ "Implements barrel shifter.">;
+def FeatureDiv : SubtargetFeature<"div", "HasDiv", "true",
+ "Implements hardware divider.">;
+def FeatureMul : SubtargetFeature<"mul", "HasMul", "true",
+ "Implements hardware multiplier.">;
+def FeatureFSL : SubtargetFeature<"fsl", "HasFSL", "true",
+ "Implements FSL instructions.">;
+def FeatureEFSL : SubtargetFeature<"efsl", "HasEFSL", "true",
+ "Implements extended FSL instructions.">;
+def FeatureMSRSet : SubtargetFeature<"msrset", "HasMSRSet", "true",
+ "Implements MSR register set and clear.">;
+def FeatureException : SubtargetFeature<"exception", "HasException", "true",
+ "Implements hardware exception support.">;
+def FeaturePatCmp : SubtargetFeature<"patcmp", "HasPatCmp", "true",
+ "Implements pattern compare instruction.">;
+def FeatureFPU : SubtargetFeature<"fpu", "HasFPU", "true",
+ "Implements floating point unit.">;
+def FeatureESR : SubtargetFeature<"esr", "HasESR", "true",
+ "Implements ESR and EAR registers">;
+def FeaturePVR : SubtargetFeature<"pvr", "HasPVR", "true",
+ "Implements processor version register.">;
+def FeatureMul64 : SubtargetFeature<"mul64", "HasMul64", "true",
+ "Implements multiplier with 64-bit result">;
+def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true",
+ "Implements sqrt and floating point convert.">;
+def FeatureMMU : SubtargetFeature<"mmu", "HasMMU", "true",
+ "Implements memory management unit.">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, MBlazeGenericItineraries, Features>;
+
+
+def : Proc<"v400", []>;
+def : Proc<"v500", []>;
+def : Proc<"v600", []>;
+def : Proc<"v700", []>;
+def : Proc<"v710", []>;
+
+def MBlaze : Target {
+ let InstructionSet = MBlazeInstrInfo;
+}
diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td
new file mode 100644
index 0000000..ddd4998
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -0,0 +1,26 @@
+//===- MBlazeCallingConv.td - Calling Conventions for MBlaze ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MBlaze architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze ABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_MBlaze : CallingConv<[
+ // i32 are returned in registers R3, R4
+ CCIfType<[i32], CCAssignToReg<[R3, R4]>>,
+
+ // f32 are returned in registers F3, F4
+ CCIfType<[f32], CCAssignToReg<[F3, F4]>>
+]>;
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
new file mode 100644
index 0000000..42fea25
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -0,0 +1,75 @@
+//===-- DelaySlotFiller.cpp - MBlaze delay slot filler --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple pass to fills delay slots with NOPs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "MBlaze Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
+ MachineBasicBlock::iterator J = I;
+ ++J;
+ BuildMI(MBB, J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+ ++FilledSlots;
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// createMBlazeDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in MBlaze MachineFunctions
+FunctionPass *llvm::createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &tm) {
+ return new Filler(tm);
+}
+
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
new file mode 100644
index 0000000..7e59c4a
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -0,0 +1,339 @@
+//===-- MBlazeISelDAGToDAG.cpp - A dag to dag inst selector for MBlaze ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MBlaze target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-isel"
+#include "MBlaze.h"
+#include "MBlazeISelLowering.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlazeDAGToDAGISel - MBlaze specific code to select MBlaze machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class MBlazeDAGToDAGISel : public SelectionDAGISel {
+
+ /// TM - Keep a reference to MBlazeTargetMachine.
+ MBlazeTargetMachine &TM;
+
+ /// Subtarget - Keep a pointer to the MBlazeSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const MBlazeSubtarget &Subtarget;
+
+public:
+ explicit MBlazeDAGToDAGISel(MBlazeTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm), Subtarget(tm.getSubtarget<MBlazeSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MBlaze DAG->DAG Pattern Instruction Selection";
+ }
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MBlazeGenDAGISel.inc"
+
+ /// getTargetMachine - Return a reference to the TargetMachine, casted
+ /// to the target-specific type.
+ const MBlazeTargetMachine &getTargetMachine() {
+ return static_cast<const MBlazeTargetMachine &>(TM);
+ }
+
+ /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+ /// to the target-specific type.
+ const MBlazeInstrInfo *getInstrInfo() {
+ return getTargetMachine().getInstrInfo();
+ }
+
+ SDNode *getGlobalBaseReg();
+ SDNode *Select(SDNode *N);
+
+ // Complex Pattern.
+ bool SelectAddr(SDNode *Op, SDValue N,
+ SDValue &Base, SDValue &Offset);
+
+ // Address Selection
+ bool SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index);
+ bool SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base);
+
+ // getI32Imm - Return a target constant with the specified value, of type i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+};
+
+}
+
+/// isIntS32Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 32-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS32Immediate(SDNode *N, int32_t &Imm) {
+ unsigned Opc = N->getOpcode();
+ if (Opc != ISD::Constant)
+ return false;
+
+ Imm = (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS32Immediate(SDValue Op, int32_t &Imm) {
+ return isIntS32Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool MBlazeDAGToDAGISel::
+SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) {
+ if (N.getOpcode() == ISD::FrameIndex) return false;
+ if (N.getOpcode() == ISD::TargetExternalSymbol ||
+ N.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ int32_t imm = 0;
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ if (isIntS32Immediate(N.getOperand(1), imm))
+ return false; // r+i
+
+ if (N.getOperand(0).getOpcode() == ISD::TargetJumpTable ||
+ N.getOperand(1).getOpcode() == ISD::TargetJumpTable)
+ return false; // jump tables.
+
+ Base = N.getOperand(1);
+ Index = N.getOperand(0);
+ return true;
+ }
+
+ return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 32-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool MBlazeDAGToDAGISel::
+SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) {
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddrRegReg(Op, N, Disp, Base))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ int32_t imm = 0;
+ if (isIntS32Immediate(N.getOperand(1), imm)) {
+ Disp = CurDAG->getTargetConstant(imm, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ DEBUG( errs() << "WESLEY: Using Operand Immediate\n" );
+ return true; // [r+i]
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+ uint32_t Imm = CN->getZExtValue();
+ Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0));
+ Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0));
+ DEBUG( errs() << "WESLEY: Using Constant Node\n" );
+ return true;
+ }
+
+ Disp = CurDAG->getTargetConstant(0, TM.getTargetLowering()->getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// ComplexPattern used on MBlazeInstrInfo
+/// Used on MBlaze Load/Store instructions
+bool MBlazeDAGToDAGISel::
+SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) {
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // on PIC code Load GA
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
+ (Addr.getOpcode() == ISD::TargetConstantPool) ||
+ (Addr.getOpcode() == ISD::TargetJumpTable)){
+ Base = CurDAG->getRegister(MBlaze::R15, MVT::i32);
+ Offset = Addr;
+ return true;
+ }
+ } else {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (Predicate_immSExt16(CN)) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ } else {
+ Base = Addr.getOperand(0);
+ }
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Dump information about the Node being selected
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ return NULL;
+ }
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ switch(Opcode) {
+ default: break;
+
+ // Get target GOT address.
+ case ISD::GLOBAL_OFFSET_TABLE:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex: {
+ SDValue imm = CurDAG->getTargetConstant(0, MVT::i32);
+ int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
+ EVT VT = Node->getValueType(0);
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
+ unsigned Opc = MBlaze::ADDI;
+ if (Node->hasOneUse())
+ return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm);
+ return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm);
+ }
+
+
+ /// Handle direct and indirect calls when using PIC. On PIC, when
+ /// GOT is smaller than about 64k (small code) the GA target is
+ /// loaded with only one instruction. Otherwise GA's target must
+ /// be loaded with 3 instructions.
+ case MBlazeISD::JmpLink: {
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Callee = Node->getOperand(1);
+ SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32);
+ SDValue InFlag(0, 0);
+
+ if ( (isa<GlobalAddressSDNode>(Callee)) ||
+ (isa<ExternalSymbolSDNode>(Callee)) )
+ {
+ /// Direct call for global addresses and external symbols
+ SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32);
+
+ // Use load to get GOT target
+ SDValue Ops[] = { Callee, GPReg, Chain };
+ SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl,
+ MVT::i32, MVT::Other, Ops, 3), 0);
+ Chain = Load.getValue(1);
+
+ // Call target must be on T9
+ Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Load, InFlag);
+ } else
+ /// Indirect call
+ Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Callee, InFlag);
+
+ // Emit Jump and Link Register
+ SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other,
+ MVT::Flag, R20Reg, Chain);
+ Chain = SDValue(ResNode, 0);
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(Node, 0), Chain);
+ ReplaceUses(SDValue(Node, 1), InFlag);
+ return ResNode;
+ }
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == NULL || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+ return ResNode;
+}
+
+/// createMBlazeISelDag - This pass converts a legalized DAG into a
+/// MBlaze-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMBlazeISelDag(MBlazeTargetMachine &TM) {
+ return new MBlazeDAGToDAGISel(TM);
+}
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
new file mode 100644
index 0000000..f0864d0
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -0,0 +1,975 @@
+//===-- MBlazeISelLowering.cpp - MBlaze DAG Lowering Implementation -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-lower"
+#include "MBlazeISelLowering.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case MBlazeISD::JmpLink : return "MBlazeISD::JmpLink";
+ case MBlazeISD::GPRel : return "MBlazeISD::GPRel";
+ case MBlazeISD::Wrap : return "MBlazeISD::Wrap";
+ case MBlazeISD::ICmp : return "MBlazeISD::ICmp";
+ case MBlazeISD::Ret : return "MBlazeISD::Ret";
+ case MBlazeISD::Select_CC : return "MBlazeISD::Select_CC";
+ default : return NULL;
+ }
+}
+
+MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
+ : TargetLowering(TM, new MBlazeTargetObjectFile()) {
+ Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+
+ // MBlaze does not have i1 type, so use i32 for
+ // setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, MBlaze::CPURegsRegisterClass);
+ if (Subtarget->hasFPU()) {
+ addRegisterClass(MVT::f32, MBlaze::FGR32RegisterClass);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ }
+
+ // Floating point operations which are not supported
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::f32, Expand);
+
+ // Load extented operations for i1 types must be promoted
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // MBlaze has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ // If the processor doesn't support multiply then expand it
+ if (!Subtarget->hasMul()) {
+ setOperationAction(ISD::MUL, MVT::i32, Expand);
+ }
+
+ // If the processor doesn't support 64-bit multiply then expand
+ if (!Subtarget->hasMul() || !Subtarget->hasMul64()) {
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ }
+
+ // If the processor doesn't support division then expand
+ if (!Subtarget->hasDiv()) {
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ }
+
+ // Expand unsupported conversions
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+
+ // Expand SELECT_CC
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ // MBlaze doesn't have MUL_LOHI
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+ // Used by legalize types to correctly generate the setcc result.
+ // Without this, every float setcc comes with a AND/OR with the result,
+ // we don't want this, since the fpcmp result goes to a flag register,
+ // which is used implicitly by brcond and select operations.
+ AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+ AddPromotedToType(ISD::SELECT, MVT::i1, MVT::i32);
+ AddPromotedToType(ISD::SELECT_CC, MVT::i1, MVT::i32);
+
+ // MBlaze Custom Operations
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
+ // Variable Argument support
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+
+
+ // Operations not directly supported by MBlaze.
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Use the default for now
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ // MBlaze doesn't have extending float->double load/store
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ setStackPointerRegisterToSaveRestore(MBlaze::R1);
+ computeRegisterProperties();
+}
+
+MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i32;
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned MBlazeTargetLowering::getFunctionAlignment(const Function *) const {
+ return 2;
+}
+
+SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode())
+ {
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ }
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Lower helper functions
+//===----------------------------------------------------------------------===//
+MachineBasicBlock* MBlazeTargetLowering::
+EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB,
+ DenseMap<MachineBasicBlock*,
+ MachineBasicBlock*> *EM) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+ case MBlaze::ShiftRL:
+ case MBlaze::ShiftRA:
+ case MBlaze::ShiftL: {
+ // To "insert" a shift left instruction, we actually have to insert a
+ // simple loop. The incoming instruction knows the destination vreg to
+ // set, the source vreg to operate over and the shift amount.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // start:
+ // andi samt, samt, 31
+ // beqid samt, finish
+ // add dst, src, r0
+ // loop:
+ // addik samt, samt, -1
+ // sra dst, dst
+ // bneid samt, loop
+ // nop
+ // finish:
+ MachineFunction *F = BB->getParent();
+ MachineRegisterInfo &R = F->getRegInfo();
+ MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+
+ unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT)
+ .addReg(MI->getOperand(2).getReg())
+ .addImm(31);
+
+ unsigned IVAL = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ BuildMI(BB, dl, TII->get(MBlaze::ADDI), IVAL)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0);
+
+ BuildMI(BB, dl, TII->get(MBlaze::BEQID))
+ .addReg(IAMT)
+ .addMBB(finish);
+
+ F->insert(It, loop);
+ F->insert(It, finish);
+
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ // Also inform sdisel of the edge changes.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i) {
+ EM->insert(std::make_pair(*i, finish));
+ finish->addSuccessor(*i);
+ }
+
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(loop);
+ BB->addSuccessor(finish);
+
+ // Next, add the finish block as a successor of the loop block
+ loop->addSuccessor(finish);
+ loop->addSuccessor(loop);
+
+ unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
+ .addReg(IVAL).addMBB(BB)
+ .addReg(NDST).addMBB(loop);
+
+ unsigned SAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ unsigned NAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
+ .addReg(IAMT).addMBB(BB)
+ .addReg(NAMT).addMBB(loop);
+
+ if (MI->getOpcode() == MBlaze::ShiftL)
+ BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
+ else if (MI->getOpcode() == MBlaze::ShiftRA)
+ BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
+ else if (MI->getOpcode() == MBlaze::ShiftRL)
+ BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
+ else
+ llvm_unreachable( "Cannot lower unknown shift instruction" );
+
+ BuildMI(loop, dl, TII->get(MBlaze::ADDI), NAMT)
+ .addReg(SAMT)
+ .addImm(-1);
+
+ BuildMI(loop, dl, TII->get(MBlaze::BNEID))
+ .addReg(NAMT)
+ .addMBB(loop);
+
+ BuildMI(finish, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ .addReg(IVAL).addMBB(BB)
+ .addReg(NDST).addMBB(loop);
+
+ // The pseudo instruction is no longer needed so remove it
+ F->DeleteMachineInstr(MI);
+ return finish;
+ }
+
+ case MBlaze::Select_FCC:
+ case MBlaze::Select_CC: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ unsigned Opc;
+ switch (MI->getOperand(4).getImm()) {
+ default: llvm_unreachable( "Unknown branch condition" );
+ case MBlazeCC::EQ: Opc = MBlaze::BNEID; break;
+ case MBlazeCC::NE: Opc = MBlaze::BEQID; break;
+ case MBlazeCC::GT: Opc = MBlaze::BLEID; break;
+ case MBlazeCC::LT: Opc = MBlaze::BGEID; break;
+ case MBlazeCC::GE: Opc = MBlaze::BLTID; break;
+ case MBlazeCC::LE: Opc = MBlaze::BGTID; break;
+ }
+
+ BuildMI(BB, dl, TII->get(Opc))
+ .addReg(MI->getOperand(3).getReg())
+ .addMBB(dneBB);
+
+ F->insert(It, flsBB);
+ F->insert(It, dneBB);
+
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ // Also inform sdisel of the edge changes.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i) {
+ EM->insert(std::make_pair(*i, dneBB));
+ dneBB->addSuccessor(*i);
+ }
+
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(flsBB);
+ BB->addSuccessor(dneBB);
+ flsBB->addSuccessor(dneBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
+ // .addReg(MI->getOperand(2).getReg()).addMBB(BB);
+
+ BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(BB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return dneBB;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+//
+
+SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ Opc = MBlazeISD::Select_CC;
+ CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS)
+ .getValue(1);
+ } else {
+ llvm_unreachable( "Cannot lower select_cc with unknown type" );
+ }
+
+ return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+ CompareFlag);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+ llvm_unreachable("TLS not implemented for MicroBlaze.");
+ return SDValue(); // Not reached
+}
+
+SDValue MBlazeTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+ SDValue ResNode;
+ SDValue HiPart;
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ unsigned char OpFlag = IsPIC ? MBlazeII::MO_GOT : MBlazeII::MO_ABS_HILO;
+
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI);
+ //return JTI;
+}
+
+SDValue MBlazeTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+ SDValue ResNode;
+ EVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ Constant *C = N->getConstVal();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MBlazeII::MO_ABS_HILO);
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
+}
+
+SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0,
+ false, false, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeGenCallingConv.inc"
+
+static bool CC_MBlaze2(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ static const unsigned RegsSize=6;
+ static const unsigned IntRegs[] = {
+ MBlaze::R5, MBlaze::R6, MBlaze::R7,
+ MBlaze::R8, MBlaze::R9, MBlaze::R10
+ };
+
+ static const unsigned FltRegs[] = {
+ MBlaze::F5, MBlaze::F6, MBlaze::F7,
+ MBlaze::F8, MBlaze::F9, MBlaze::F10
+ };
+
+ unsigned Reg=0;
+
+ // Promote i8 and i16
+ if (LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ if (ValVT == MVT::i32) {
+ Reg = State.AllocateReg(IntRegs, RegsSize);
+ LocVT = MVT::i32;
+ } else if (ValVT == MVT::f32) {
+ Reg = State.AllocateReg(FltRegs, RegsSize);
+ LocVT = MVT::f32;
+ }
+
+ if (!Reg) {
+ unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+ unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ } else {
+ unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+ State.AllocateStack(SizeInBytes, SizeInBytes);
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ }
+
+ return false; // CC must always match
+}
+
+//===----------------------------------------------------------------------===//
+// Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall.
+SDValue MBlazeTargetLowering::
+LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) {
+ // MBlaze does not yet support tail call optimization
+ isTailCall = false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+ *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze2);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // First/LastArgStackLoc contains the first/last
+ // "at stack" argument location.
+ int LastArgStackLoc = 0;
+ unsigned FirstStackArgLoc = 0;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = Outs[i].Val;
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ // Register can't get to this point...
+ assert(VA.isMemLoc());
+
+ // Create the frame index object for this incoming parameter
+ LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ LastArgStackLoc, true, false);
+
+ SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ false, false, 0));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emited instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ unsigned char OpFlag = MBlazeII::MO_NO_FLAG;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(),
+ getPointerTy(), 0, OpFlag);
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+ getPointerTy(), OpFlag);
+
+ // MBlazeJmpLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MBlazeISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue MBlazeTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
+ bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) {
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerFormalArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+SDValue MBlazeTargetLowering::
+LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+ unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+ VarArgsFrameIndex = 0;
+
+ // Used with vargs to acumulate store chains.
+ std::vector<SDValue> OutChains;
+
+ // Keep track of the last register used for arguments
+ unsigned ArgRegEnd = 0;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze2);
+ SDValue StackPtr;
+
+ unsigned FirstStackArgLoc = 0;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored on registers
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ ArgRegEnd = VA.getLocReg();
+ TargetRegisterClass *RC = 0;
+
+ if (RegVT == MVT::i32)
+ RC = MBlaze::CPURegsRegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = MBlaze::FGR32RegisterClass;
+ else
+ llvm_unreachable("RegVT not supported by LowerFormalArguments");
+
+ // Transform the arguments stored on
+ // physical registers into virtual ones
+ unsigned Reg = MF.addLiveIn(ArgRegEnd, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it has been passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size. If if is a floating point value
+ // then convert to the correct type.
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ unsigned Opcode = 0;
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ Opcode = ISD::AssertSext;
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ Opcode = ISD::AssertZext;
+ if (Opcode)
+ ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+
+ InVals.push_back(ArgValue);
+
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+
+ // The last argument is not a register
+ ArgRegEnd = 0;
+
+ // The stack pointer offset is relative to the caller stack frame.
+ // Since the real stack size is unknown here, a negative SPOffset
+ // is used so there's a way to adjust these offsets when the stack
+ // size get known (on EliminateFrameIndex). A dummy SPOffset is
+ // used instead of a direct negative address (which is recorded to
+ // be used on emitPrologue) to avoid mis-calc of the first stack
+ // offset on PEI::calculateFrameObjectOffsets.
+ // Arguments are always 32-bit.
+ unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ int FI = MFI->CreateFixedObject(ArgSize, 0, true, false);
+ MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+
+ (FirstStackArgLoc + VA.getLocMemOffset())));
+
+ // Create load nodes to retrieve arguments from the stack
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+ false, false, 0));
+ }
+ }
+
+ // To meet ABI, when VARARGS are passed on registers, the registers
+ // must have their values written to the caller stack frame. If the last
+ // argument was placed in the stack, there's no need to save any register.
+ if ((isVarArg) && ArgRegEnd) {
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+ // The last register argument that must be saved is MBlaze::R10
+ TargetRegisterClass *RC = MBlaze::CPURegsRegisterClass;
+
+ unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5);
+ unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1);
+ unsigned End = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10);
+ unsigned StackLoc = ArgLocs.size()-1 + (Start - Begin);
+
+ for (; Start <= End; ++Start, ++StackLoc) {
+ unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start);
+ unsigned LiveReg = MF.addLiveIn(Reg, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
+
+ int FI = MFI->CreateFixedObject(4, 0, true, false);
+ MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+ OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+ false, false, 0));
+
+ // Record the frame index of the first variable argument
+ // which is a value necessary to VASTART.
+ if (!VarArgsFrameIndex)
+ VarArgsFrameIndex = FI;
+ }
+ }
+
+ // All stores are grouped in one node to allow the matching between
+ // the size of Ins and InVals. This only happens when on varg functions
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MBlazeTargetLowering::
+LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ DebugLoc dl, SelectionDAG &DAG) {
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Outs[i].Val, Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // Return on MBlaze is always a "rtsd R15, 8"
+ if (Flag.getNode())
+ return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other,
+ Chain, DAG.getRegister(MBlaze::R15, MVT::i32), Flag);
+ else // Return Void
+ return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other,
+ Chain, DAG.getRegister(MBlaze::R15, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MBlazeTargetLowering::ConstraintType MBlazeTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+ // MBlaze specific constrainy
+ //
+ // 'd' : An address register. Equivalent to r.
+ // 'y' : Equivalent to r; retained for
+ // backwards compatibility.
+ // 'f' : Floating Point registers.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'd':
+ case 'y':
+ case 'f':
+ return C_RegisterClass;
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
+/// return a list of registers that can be used to satisfy the constraint.
+/// This should only be used for C_RegisterClass constraints.
+std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, MBlaze::CPURegsRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, MBlaze::FGR32RegisterClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::vector<unsigned> MBlazeTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) {
+ default : break;
+ case 'r':
+ // GCC MBlaze Constraint Letters
+ case 'd':
+ case 'y':
+ return make_vector<unsigned>(
+ MBlaze::R3, MBlaze::R4, MBlaze::R5, MBlaze::R6,
+ MBlaze::R7, MBlaze::R9, MBlaze::R10, MBlaze::R11,
+ MBlaze::R12, MBlaze::R19, MBlaze::R20, MBlaze::R21,
+ MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25,
+ MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29,
+ MBlaze::R30, MBlaze::R31, 0);
+
+ case 'f':
+ return make_vector<unsigned>(
+ MBlaze::F3, MBlaze::F4, MBlaze::F5, MBlaze::F6,
+ MBlaze::F7, MBlaze::F9, MBlaze::F10, MBlaze::F11,
+ MBlaze::F12, MBlaze::F19, MBlaze::F20, MBlaze::F21,
+ MBlaze::F22, MBlaze::F23, MBlaze::F24, MBlaze::F25,
+ MBlaze::F26, MBlaze::F27, MBlaze::F28, MBlaze::F29,
+ MBlaze::F30, MBlaze::F31, 0);
+ }
+ return std::vector<unsigned>();
+}
+
+bool MBlazeTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The MBlaze target isn't yet aware of offsets.
+ return false;
+}
+
+bool MBlazeTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ return VT != MVT::f32;
+}
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
new file mode 100644
index 0000000..f8b1470
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -0,0 +1,149 @@
+//===-- MBlazeISelLowering.h - MBlaze DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBlazeISELLOWERING_H
+#define MBlazeISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+
+namespace llvm {
+ namespace MBlazeCC {
+ enum CC {
+ FIRST = 0,
+ EQ,
+ NE,
+ GT,
+ LT,
+ GE,
+ LE
+ };
+ }
+
+ namespace MBlazeISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Jump and link (call)
+ JmpLink,
+
+ // Handle gp_rel (small data/bss sections) relocation.
+ GPRel,
+
+ // Select CC Pseudo Instruction
+ Select_CC,
+
+ // Wrap up multiple types of instructions
+ Wrap,
+
+ // Integer Compare
+ ICmp,
+
+ // Return
+ Ret
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+
+ class MBlazeTargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+
+ public:
+
+ explicit MBlazeTargetLowering(MBlazeTargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - get the ISD::SETCC result ValueType
+ MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+ virtual unsigned getFunctionAlignment(const Function *F) const;
+ private:
+ // Subtarget Info
+ const MBlazeSubtarget *Subtarget;
+
+
+ // Lower Operand helpers
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals);
+
+ // Lower Operand specifics
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals);
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals);
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ DebugLoc dl, SelectionDAG &DAG);
+
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+
+ // Inline asm support
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ };
+}
+
+#endif // MBlazeISELLOWERING_H
diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td
new file mode 100644
index 0000000..a48a8c9
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -0,0 +1,223 @@
+//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze profiles and nodes
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs FGR32:$dst), (ins memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set FGR32:$dst, (OpNode xaddr:$addr))], IILoad>;
+
+class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TAI<op, (outs FGR32:$dst), (ins memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set FGR32:$dst, (OpNode iaddr:$addr))], IILoad>;
+
+class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs), (ins FGR32:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode FGR32:$dst, xaddr:$addr)], IIStore>;
+
+class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TAI<op, (outs), (ins FGR32:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode FGR32:$dst, iaddr:$addr)], IIStore>;
+
+class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>;
+
+class CmpFN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs CPURegs:$dst), (ins FGR32:$b, FGR32:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>;
+
+class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TF<op, flags, (outs FGR32:$dst), (ins FGR32:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TF<op, flags, (outs FGR32:$dst), (ins CPURegs:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TF<op, flags, (outs CPURegs:$dst), (ins FGR32:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+class LogicF<bits<6> op, string instr_asm> :
+ TAI<op, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [],
+ IIAlu>;
+
+class LogicFI<bits<6> op, string instr_asm> :
+ TAI<op, (outs FGR32:$dst), (ins FGR32:$b, fimm:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [],
+ IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPU Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+let Predicates=[HasFPU] in {
+ def FOR : LogicF<0x28, "or ">;
+ def FORI : LogicFI<0x28, "ori ">;
+ def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIAlu>;
+ def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIAlu>;
+ def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIAlu>;
+ def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIAlu>;
+
+ def LWF : LoadFM<0x32, "lw ", load>;
+ def LWFI : LoadFMI<0x32, "lwi ", load>;
+
+ def SWF : StoreFM<0x32, "sw ", store>;
+ def SWFI : StoreFMI<0x32, "swi ", store>;
+}
+
+let Predicates=[HasFPU,HasSqrt] in {
+ def FLT : ArithIF<0x16, 0x280, "flt ", IIAlu>;
+ def FINT : ArithFI<0x16, 0x300, "fint ", IIAlu>;
+ def FSQRT : ArithF2<0x16, 0x300, "fsqrt ", IIAlu>;
+}
+
+let isAsCheapAsAMove = 1 in {
+ def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIAlu>;
+ def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIAlu>;
+ def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIAlu>;
+ def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIAlu>;
+ def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIAlu>;
+ def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIAlu>;
+ def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIAlu>;
+}
+
+
+let usesCustomInserter = 1 in {
+ def Select_FCC : MBlazePseudo<(outs FGR32:$dst),
+ (ins FGR32:$T, FGR32:$F, CPURegs:$CMP, i32imm:$CC),
+ "; SELECT_FCC PSEUDO!",
+ []>;
+}
+
+// Floating point conversions
+let Predicates=[HasFPU] in {
+ def : Pat<(sint_to_fp CPURegs:$V), (FLT CPURegs:$V)>;
+ def : Pat<(fp_to_sint FGR32:$V), (FINT FGR32:$V)>;
+ def : Pat<(fsqrt FGR32:$V), (FSQRT FGR32:$V)>;
+}
+
+// SET_CC operations
+let Predicates=[HasFPU] in {
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETEQ),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_EQ FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETNE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_EQ FGR32:$L, FGR32:$R), 1)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETOEQ),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_EQ FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (XOR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (OR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETGT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_GT FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETLT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_LT FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETGE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_GE FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETLE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_LE FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_GT FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_LT FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_GE FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_LE FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETUEQ),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (OR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETUNE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_NE FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (OR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_GT FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETULT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (OR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_LT FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (OR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_GE FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETULE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (OR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_LE FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETO),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_UN FGR32:$L, FGR32:$R), 1)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETUO),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_UN FGR32:$L, FGR32:$R), 2)>;
+}
+
+// SELECT operations
+def : Pat<(select CPURegs:$C, FGR32:$T, FGR32:$F),
+ (Select_FCC FGR32:$T, FGR32:$F, CPURegs:$C, 2)>;
+
+//===----------------------------------------------------------------------===//
+// Patterns for Floating Point Instructions
+//===----------------------------------------------------------------------===//
+def : Pat<(f32 fpimm:$imm), (FORI F0, fpimm:$imm)>;
diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td
new file mode 100644
index 0000000..b59999e
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -0,0 +1,153 @@
+//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FSL Instruction Formats
+//===----------------------------------------------------------------------===//
+class FSLGetD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
+ TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b))], IIAlu>;
+
+class FSLGet<bits<6> op, string instr_asm, Intrinsic OpNode> :
+ TAI<op, (outs CPURegs:$dst), (ins fslimm:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [(set CPURegs:$dst, (OpNode immZExt4:$b))], IIAlu>;
+
+class FSLPutD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
+ TA<op, flags, (outs), (ins CPURegs:$v, CPURegs:$b),
+ !strconcat(instr_asm, " $v, $b"),
+ [(OpNode CPURegs:$v, CPURegs:$b)], IIAlu>;
+
+class FSLPut<bits<6> op, string instr_asm, Intrinsic OpNode> :
+ TAI<op, (outs), (ins CPURegs:$v, fslimm:$b),
+ !strconcat(instr_asm, " $v, $b"),
+ [(OpNode CPURegs:$v, immZExt4:$b)], IIAlu>;
+
+class FSLPutTD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
+ TA<op, flags, (outs), (ins CPURegs:$b),
+ !strconcat(instr_asm, " $b"),
+ [(OpNode CPURegs:$b)], IIAlu>;
+
+class FSLPutT<bits<6> op, string instr_asm, Intrinsic OpNode> :
+ TAI<op, (outs), (ins fslimm:$b),
+ !strconcat(instr_asm, " $b"),
+ [(OpNode immZExt4:$b)], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// FSL Get Instructions
+//===----------------------------------------------------------------------===//
+def GET : FSLGet<0x1B, "get ", int_mblaze_fsl_get>;
+def AGET : FSLGet<0x1B, "aget ", int_mblaze_fsl_aget>;
+def CGET : FSLGet<0x1B, "cget ", int_mblaze_fsl_cget>;
+def CAGET : FSLGet<0x1B, "caget ", int_mblaze_fsl_caget>;
+def EGET : FSLGet<0x1B, "eget ", int_mblaze_fsl_eget>;
+def EAGET : FSLGet<0x1B, "eaget ", int_mblaze_fsl_eaget>;
+def ECGET : FSLGet<0x1B, "ecget ", int_mblaze_fsl_ecget>;
+def ECAGET : FSLGet<0x1B, "ecaget ", int_mblaze_fsl_ecaget>;
+def NGET : FSLGet<0x1B, "nget ", int_mblaze_fsl_nget>;
+def NAGET : FSLGet<0x1B, "naget ", int_mblaze_fsl_naget>;
+def NCGET : FSLGet<0x1B, "ncget ", int_mblaze_fsl_ncget>;
+def NCAGET : FSLGet<0x1B, "ncaget ", int_mblaze_fsl_ncaget>;
+def NEGET : FSLGet<0x1B, "neget ", int_mblaze_fsl_neget>;
+def NEAGET : FSLGet<0x1B, "neaget ", int_mblaze_fsl_neaget>;
+def NECGET : FSLGet<0x1B, "necget ", int_mblaze_fsl_necget>;
+def NECAGET : FSLGet<0x1B, "necaget ", int_mblaze_fsl_necaget>;
+def TGET : FSLGet<0x1B, "tget ", int_mblaze_fsl_tget>;
+def TAGET : FSLGet<0x1B, "taget ", int_mblaze_fsl_taget>;
+def TCGET : FSLGet<0x1B, "tcget ", int_mblaze_fsl_tcget>;
+def TCAGET : FSLGet<0x1B, "tcaget ", int_mblaze_fsl_tcaget>;
+def TEGET : FSLGet<0x1B, "teget ", int_mblaze_fsl_teget>;
+def TEAGET : FSLGet<0x1B, "teaget ", int_mblaze_fsl_teaget>;
+def TECGET : FSLGet<0x1B, "tecget ", int_mblaze_fsl_tecget>;
+def TECAGET : FSLGet<0x1B, "tecaget ", int_mblaze_fsl_tecaget>;
+def TNGET : FSLGet<0x1B, "tnget ", int_mblaze_fsl_tnget>;
+def TNAGET : FSLGet<0x1B, "tnaget ", int_mblaze_fsl_tnaget>;
+def TNCGET : FSLGet<0x1B, "tncget ", int_mblaze_fsl_tncget>;
+def TNCAGET : FSLGet<0x1B, "tncaget ", int_mblaze_fsl_tncaget>;
+def TNEGET : FSLGet<0x1B, "tneget ", int_mblaze_fsl_tneget>;
+def TNEAGET : FSLGet<0x1B, "tneaget ", int_mblaze_fsl_tneaget>;
+def TNECGET : FSLGet<0x1B, "tnecget ", int_mblaze_fsl_tnecget>;
+def TNECAGET : FSLGet<0x1B, "tnecaget ", int_mblaze_fsl_tnecaget>;
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Get Instructions
+//===----------------------------------------------------------------------===//
+def GETD : FSLGetD<0x1B, 0x00, "getd ", int_mblaze_fsl_get>;
+def AGETD : FSLGetD<0x1B, 0x00, "agetd ", int_mblaze_fsl_aget>;
+def CGETD : FSLGetD<0x1B, 0x00, "cgetd ", int_mblaze_fsl_cget>;
+def CAGETD : FSLGetD<0x1B, 0x00, "cagetd ", int_mblaze_fsl_caget>;
+def EGETD : FSLGetD<0x1B, 0x00, "egetd ", int_mblaze_fsl_eget>;
+def EAGETD : FSLGetD<0x1B, 0x00, "eagetd ", int_mblaze_fsl_eaget>;
+def ECGETD : FSLGetD<0x1B, 0x00, "ecgetd ", int_mblaze_fsl_ecget>;
+def ECAGETD : FSLGetD<0x1B, 0x00, "ecagetd ", int_mblaze_fsl_ecaget>;
+def NGETD : FSLGetD<0x1B, 0x00, "ngetd ", int_mblaze_fsl_nget>;
+def NAGETD : FSLGetD<0x1B, 0x00, "nagetd ", int_mblaze_fsl_naget>;
+def NCGETD : FSLGetD<0x1B, 0x00, "ncgetd ", int_mblaze_fsl_ncget>;
+def NCAGETD : FSLGetD<0x1B, 0x00, "ncagetd ", int_mblaze_fsl_ncaget>;
+def NEGETD : FSLGetD<0x1B, 0x00, "negetd ", int_mblaze_fsl_neget>;
+def NEAGETD : FSLGetD<0x1B, 0x00, "neagetd ", int_mblaze_fsl_neaget>;
+def NECGETD : FSLGetD<0x1B, 0x00, "necgetd ", int_mblaze_fsl_necget>;
+def NECAGETD : FSLGetD<0x1B, 0x00, "necagetd ", int_mblaze_fsl_necaget>;
+def TGETD : FSLGetD<0x1B, 0x00, "tgetd ", int_mblaze_fsl_tget>;
+def TAGETD : FSLGetD<0x1B, 0x00, "tagetd ", int_mblaze_fsl_taget>;
+def TCGETD : FSLGetD<0x1B, 0x00, "tcgetd ", int_mblaze_fsl_tcget>;
+def TCAGETD : FSLGetD<0x1B, 0x00, "tcagetd ", int_mblaze_fsl_tcaget>;
+def TEGETD : FSLGetD<0x1B, 0x00, "tegetd ", int_mblaze_fsl_teget>;
+def TEAGETD : FSLGetD<0x1B, 0x00, "teagetd ", int_mblaze_fsl_teaget>;
+def TECGETD : FSLGetD<0x1B, 0x00, "tecgetd ", int_mblaze_fsl_tecget>;
+def TECAGETD : FSLGetD<0x1B, 0x00, "tecagetd ", int_mblaze_fsl_tecaget>;
+def TNGETD : FSLGetD<0x1B, 0x00, "tngetd ", int_mblaze_fsl_tnget>;
+def TNAGETD : FSLGetD<0x1B, 0x00, "tnagetd ", int_mblaze_fsl_tnaget>;
+def TNCGETD : FSLGetD<0x1B, 0x00, "tncgetd ", int_mblaze_fsl_tncget>;
+def TNCAGETD : FSLGetD<0x1B, 0x00, "tncagetd ", int_mblaze_fsl_tncaget>;
+def TNEGETD : FSLGetD<0x1B, 0x00, "tnegetd ", int_mblaze_fsl_tneget>;
+def TNEAGETD : FSLGetD<0x1B, 0x00, "tneagetd ", int_mblaze_fsl_tneaget>;
+def TNECGETD : FSLGetD<0x1B, 0x00, "tnecgetd ", int_mblaze_fsl_tnecget>;
+def TNECAGETD : FSLGetD<0x1B, 0x00, "tnecagetd", int_mblaze_fsl_tnecaget>;
+
+//===----------------------------------------------------------------------===//
+// FSL Put Instructions
+//===----------------------------------------------------------------------===//
+def PUT : FSLPut<0x1B, "put ", int_mblaze_fsl_put>;
+def APUT : FSLPut<0x1B, "aput ", int_mblaze_fsl_aput>;
+def CPUT : FSLPut<0x1B, "cput ", int_mblaze_fsl_cput>;
+def CAPUT : FSLPut<0x1B, "caput ", int_mblaze_fsl_caput>;
+def NPUT : FSLPut<0x1B, "nput ", int_mblaze_fsl_nput>;
+def NAPUT : FSLPut<0x1B, "naput ", int_mblaze_fsl_naput>;
+def NCPUT : FSLPut<0x1B, "ncput ", int_mblaze_fsl_ncput>;
+def NCAPUT : FSLPut<0x1B, "ncaput ", int_mblaze_fsl_ncaput>;
+def TPUT : FSLPutT<0x1B, "tput ", int_mblaze_fsl_tput>;
+def TAPUT : FSLPutT<0x1B, "taput ", int_mblaze_fsl_taput>;
+def TCPUT : FSLPutT<0x1B, "tcput ", int_mblaze_fsl_tcput>;
+def TCAPUT : FSLPutT<0x1B, "tcaput ", int_mblaze_fsl_tcaput>;
+def TNPUT : FSLPutT<0x1B, "tnput ", int_mblaze_fsl_tnput>;
+def TNAPUT : FSLPutT<0x1B, "tnaput ", int_mblaze_fsl_tnaput>;
+def TNCPUT : FSLPutT<0x1B, "tncput ", int_mblaze_fsl_tncput>;
+def TNCAPUT : FSLPutT<0x1B, "tncaput ", int_mblaze_fsl_tncaput>;
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Put Instructions
+//===----------------------------------------------------------------------===//
+def PUTD : FSLPutD<0x1B, 0x00, "putd ", int_mblaze_fsl_put>;
+def APUTD : FSLPutD<0x1B, 0x00, "aputd ", int_mblaze_fsl_aput>;
+def CPUTD : FSLPutD<0x1B, 0x00, "cputd ", int_mblaze_fsl_cput>;
+def CAPUTD : FSLPutD<0x1B, 0x00, "caputd ", int_mblaze_fsl_caput>;
+def NPUTD : FSLPutD<0x1B, 0x00, "nputd ", int_mblaze_fsl_nput>;
+def NAPUTD : FSLPutD<0x1B, 0x00, "naputd ", int_mblaze_fsl_naput>;
+def NCPUTD : FSLPutD<0x1B, 0x00, "ncputd ", int_mblaze_fsl_ncput>;
+def NCAPUTD : FSLPutD<0x1B, 0x00, "ncaputd ", int_mblaze_fsl_ncaput>;
+def TPUTD : FSLPutTD<0x1B, 0x00, "tputd ", int_mblaze_fsl_tput>;
+def TAPUTD : FSLPutTD<0x1B, 0x00, "taputd ", int_mblaze_fsl_taput>;
+def TCPUTD : FSLPutTD<0x1B, 0x00, "tcputd ", int_mblaze_fsl_tcput>;
+def TCAPUTD : FSLPutTD<0x1B, 0x00, "tcaputd ", int_mblaze_fsl_tcaput>;
+def TNPUTD : FSLPutTD<0x1B, 0x00, "tnputd ", int_mblaze_fsl_tnput>;
+def TNAPUTD : FSLPutTD<0x1B, 0x00, "tnaputd ", int_mblaze_fsl_tnaput>;
+def TNCPUTD : FSLPutTD<0x1B, 0x00, "tncputd ", int_mblaze_fsl_tncput>;
+def TNCAPUTD : FSLPutTD<0x1B, 0x00, "tncaputd ", int_mblaze_fsl_tncaput>;
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
new file mode 100644
index 0000000..7d65543
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -0,0 +1,246 @@
+//===- MBlazeInstrFormats.td - MB Instruction defs --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MBlaze instructions format
+//
+// CPU INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// rd - dst reg.
+// ra - first src. reg.
+// rb - second src. reg.
+// imm16 - 16-bit immediate value.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic MBlaze Format
+class MBlazeInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin> : Instruction
+{
+ field bits<32> Inst;
+
+ let Namespace = "MBlaze";
+
+ bits<6> opcode;
+
+ // Top 6 bits are the 'opcode' field
+ let Inst{0-5} = opcode;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = itin;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instruction class
+//===----------------------------------------------------------------------===//
+class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MBlazeInst<outs, ins, asmstr, pattern, IIPseudo>;
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+ bits<5> rb;
+
+ let opcode = op;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = rb;
+ let Inst{21-31} = flags;
+}
+
+class TAI<bits<6> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = imm16;
+}
+
+class TIMM<bits<6> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> ra;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-15} = 0;
+ let Inst{16-31} = imm16;
+}
+
+class TADDR<bits<6> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<26> addr;
+
+ let opcode = op;
+
+ let Inst{6-31} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+
+class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Float instruction class in MBlaze : <|opcode|rd|ra|flags|>
+//===----------------------------------------------------------------------===//
+
+class TF<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+
+ let opcode = op;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch instruction class in MBlaze : <|opcode|rd|br|ra|flags|>
+//===----------------------------------------------------------------------===//
+
+class TBR<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> ra;
+
+ let opcode = op;
+
+ let Inst{6-10} = 0;
+ let Inst{11-15} = br;
+ let Inst{16-20} = ra;
+ let Inst{21-31} = flags;
+}
+
+class TBRC<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> ra;
+ bits<5> rb;
+
+ let opcode = op;
+
+ let Inst{6-10} = br;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = rb;
+ let Inst{21-31} = flags;
+}
+
+class TBRL<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> ra;
+
+ let opcode = op;
+
+ let Inst{6-10} = 0xF;
+ let Inst{11-15} = br;
+ let Inst{16-20} = ra;
+ let Inst{21-31} = flags;
+}
+
+class TBRI<bits<6> op, bits<5> br, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-10} = 0;
+ let Inst{11-15} = br;
+ let Inst{16-31} = imm16;
+}
+
+class TBRLI<bits<6> op, bits<5> br, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-10} = 0xF;
+ let Inst{11-15} = br;
+ let Inst{16-31} = imm16;
+}
+
+class TBRCI<bits<6> op, bits<5> br, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> ra;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-10} = br;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = imm16;
+}
+
+class TRET<bits<6> op, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> ra;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-10} = 0x10;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = imm16;
+}
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
new file mode 100644
index 0000000..a7e8eb7
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -0,0 +1,222 @@
+//===- MBlazeInstrInfo.cpp - MBlaze Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "MBlazeGenInstrInfo.inc"
+
+using namespace llvm;
+
+MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm)
+ : TargetInstrInfoImpl(MBlazeInsts, array_lengthof(MBlazeInsts)),
+ TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+bool MBlazeInstrInfo::
+isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+ // add $dst, $src, $zero || addu $dst, $zero, $src
+ // or $dst, $src, $zero || or $dst, $zero, $src
+ if ((MI.getOpcode() == MBlaze::ADD) || (MI.getOpcode() == MBlaze::OR)) {
+ if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == MBlaze::R0) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ return true;
+ } else if (MI.getOperand(2).isReg() &&
+ MI.getOperand(2).getReg() == MBlaze::R0) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ }
+
+ // addi $dst, $src, 0
+ // ori $dst, $src, 0
+ if ((MI.getOpcode() == MBlaze::ADDI) || (MI.getOpcode() == MBlaze::ORI)) {
+ if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MBlazeInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+ if (MI->getOpcode() == MBlaze::LWI) {
+ if ((MI->getOperand(2).isFI()) && // is a stack slot
+ (MI->getOperand(1).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(1)))) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MBlazeInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+ if (MI->getOpcode() == MBlaze::SWI) {
+ if ((MI->getOperand(2).isFI()) && // is a stack slot
+ (MI->getOperand(1).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(1)))) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MBlazeInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ BuildMI(MBB, MI, DL, get(MBlaze::NOP));
+}
+
+bool MBlazeInstrInfo::
+copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ llvm::BuildMI(MBB, I, dl, get(MBlaze::ADD), DestReg)
+ .addReg(SrcReg).addReg(MBlaze::R0);
+ return true;
+}
+
+void MBlazeInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC) const {
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ BuildMI(MBB, I, dl, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
+ .addImm(0).addFrameIndex(FI);
+}
+
+void MBlazeInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const {
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ BuildMI(MBB, I, dl, get(MBlaze::LWI), DestReg)
+ .addImm(0).addFrameIndex(FI);
+}
+
+MachineInstr *MBlazeInstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops, int FI) const {
+ if (Ops.size() != 1) return NULL;
+
+ MachineInstr *NewMI = NULL;
+
+ switch (MI->getOpcode()) {
+ case MBlaze::OR:
+ case MBlaze::ADD:
+ if ((MI->getOperand(0).isReg()) &&
+ (MI->getOperand(2).isReg()) &&
+ (MI->getOperand(2).getReg() == MBlaze::R0) &&
+ (MI->getOperand(1).isReg())) {
+ if (Ops[0] == 0) { // COPY -> STORE
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ bool isUndef = MI->getOperand(1).isUndef();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::SW))
+ .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+ .addImm(0).addFrameIndex(FI);
+ } else { // COPY -> LOAD
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isUndef = MI->getOperand(0).isUndef();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::LW))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
+ getUndefRegState(isUndef))
+ .addImm(0).addFrameIndex(FI);
+ }
+ }
+ break;
+ }
+
+ return NewMI;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+unsigned MBlazeInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+
+ // Can only insert uncond branches so far.
+ assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
+ BuildMI(&MBB, dl, get(MBlaze::BRI)).addMBB(TBB);
+ return 1;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
+ unsigned GlobalBaseReg = MBlazeFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20,
+ MBlaze::CPURegsRegisterClass,
+ MBlaze::CPURegsRegisterClass);
+ assert(Ok && "Couldn't assign to global base register!");
+ Ok = Ok; // Silence warning when assertions are turned off.
+ RegInfo.addLiveIn(MBlaze::R20);
+
+ MBlazeFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
new file mode 100644
index 0000000..4f79f1c
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -0,0 +1,242 @@
+//===- MBlazeInstrInfo.h - MBlaze Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTRUCTIONINFO_H
+#define MBLAZEINSTRUCTIONINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MBlazeRegisterInfo.h"
+
+namespace llvm {
+
+namespace MBlaze {
+
+ // MBlaze Branch Codes
+ enum FPBranchCode {
+ BRANCH_F,
+ BRANCH_T,
+ BRANCH_FL,
+ BRANCH_TL,
+ BRANCH_INVALID
+ };
+
+ // MBlaze Condition Codes
+ enum CondCode {
+ // To be used with float branch True
+ FCOND_F,
+ FCOND_UN,
+ FCOND_EQ,
+ FCOND_UEQ,
+ FCOND_OLT,
+ FCOND_ULT,
+ FCOND_OLE,
+ FCOND_ULE,
+ FCOND_SF,
+ FCOND_NGLE,
+ FCOND_SEQ,
+ FCOND_NGL,
+ FCOND_LT,
+ FCOND_NGE,
+ FCOND_LE,
+ FCOND_NGT,
+
+ // To be used with float branch False
+ // This conditions have the same mnemonic as the
+ // above ones, but are used with a branch False;
+ FCOND_T,
+ FCOND_OR,
+ FCOND_NEQ,
+ FCOND_OGL,
+ FCOND_UGE,
+ FCOND_OGE,
+ FCOND_UGT,
+ FCOND_OGT,
+ FCOND_ST,
+ FCOND_GLE,
+ FCOND_SNE,
+ FCOND_GL,
+ FCOND_NLT,
+ FCOND_GE,
+ FCOND_NLE,
+ FCOND_GT,
+
+ // Only integer conditions
+ COND_E,
+ COND_GZ,
+ COND_GEZ,
+ COND_LZ,
+ COND_LEZ,
+ COND_NE,
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ unsigned GetCondBranchFromCond(CondCode CC);
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ CondCode GetOppositeBranchCondition(MBlaze::CondCode CC);
+
+ /// MBlazeCCToString - Map each FP condition code to its string
+ inline static const char *MBlazeFCCToString(MBlaze::CondCode CC)
+ {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case FCOND_F:
+ case FCOND_T: return "f";
+ case FCOND_UN:
+ case FCOND_OR: return "un";
+ case FCOND_EQ:
+ case FCOND_NEQ: return "eq";
+ case FCOND_UEQ:
+ case FCOND_OGL: return "ueq";
+ case FCOND_OLT:
+ case FCOND_UGE: return "olt";
+ case FCOND_ULT:
+ case FCOND_OGE: return "ult";
+ case FCOND_OLE:
+ case FCOND_UGT: return "ole";
+ case FCOND_ULE:
+ case FCOND_OGT: return "ule";
+ case FCOND_SF:
+ case FCOND_ST: return "sf";
+ case FCOND_NGLE:
+ case FCOND_GLE: return "ngle";
+ case FCOND_SEQ:
+ case FCOND_SNE: return "seq";
+ case FCOND_NGL:
+ case FCOND_GL: return "ngl";
+ case FCOND_LT:
+ case FCOND_NLT: return "lt";
+ case FCOND_NGE:
+ case FCOND_GE: return "ge";
+ case FCOND_LE:
+ case FCOND_NLE: return "nle";
+ case FCOND_NGT:
+ case FCOND_GT: return "gt";
+ }
+ }
+}
+
+/// MBlazeII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MBlazeII {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // MBlaze Specific MachineOperand flags.
+ MO_NO_FLAG,
+
+ /// MO_GOT - Represents the offset into the global offset table at which
+ /// the address the relocation entry symbol resides during execution.
+ MO_GOT,
+
+ /// MO_GOT_CALL - Represents the offset into the global offset table at
+ /// which the address of a call site relocation entry symbol resides
+ /// during execution. This is different from the above since this flag
+ /// can only be present in call instructions.
+ MO_GOT_CALL,
+
+ /// MO_GPREL - Represents the offset from the current gp value to be used
+ /// for the relocatable object file being produced.
+ MO_GPREL,
+
+ /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
+ /// address.
+ MO_ABS_HILO
+
+ };
+}
+
+class MBlazeInstrInfo : public TargetInstrInfoImpl {
+ MBlazeTargetMachine &TM;
+ const MBlazeRegisterInfo RI;
+public:
+ explicit MBlazeInstrInfo(MBlazeTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// Branch Analysis
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ /// Insert nop instruction when hazard condition is found
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
new file mode 100644
index 0000000..3c406dd
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -0,0 +1,672 @@
+//===- MBlazeInstrInfo.td - MBlaze Instruction defs -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze profiles and nodes
+//===----------------------------------------------------------------------===//
+def SDT_MBlazeRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+// Call
+def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
+ [SDNPHasChain,SDNPOptInFlag,SDNPOutFlag]>;
+
+// Return
+def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+// Hi and Lo nodes are used to handle global addresses. Used on
+// MBlazeISelLowering to lower stuff like GlobalAddress, ExternalSymbol
+// static model.
+def MBWrapper : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
+def MBlazeGPRel : SDNode<"MBlazeISD::GPRel", SDTIntUnaryOp>;
+
+def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasPipe3 : Predicate<"Subtarget.hasPipe3()">;
+def HasBarrel : Predicate<"Subtarget.hasBarrel()">;
+def NoBarrel : Predicate<"!Subtarget.hasBarrel()">;
+def HasDiv : Predicate<"Subtarget.hasDiv()">;
+def HasMul : Predicate<"Subtarget.hasMul()">;
+def HasFSL : Predicate<"Subtarget.hasFSL()">;
+def HasEFSL : Predicate<"Subtarget.hasEFSL()">;
+def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">;
+def HasException : Predicate<"Subtarget.hasException()">;
+def HasPatCmp : Predicate<"Subtarget.hasPatCmp()">;
+def HasFPU : Predicate<"Subtarget.hasFPU()">;
+def HasESR : Predicate<"Subtarget.hasESR()">;
+def HasPVR : Predicate<"Subtarget.hasPVR()">;
+def HasMul64 : Predicate<"Subtarget.hasMul64()">;
+def HasSqrt : Predicate<"Subtarget.hasSqrt()">;
+def HasMMU : Predicate<"Subtarget.hasMMU()">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+def simm16 : Operand<i32>;
+def uimm5 : Operand<i32>;
+def fimm : Operand<f32>;
+
+// Unsigned Operand
+def uimm16 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// FSL Operand
+def fslimm : Operand<i32> {
+ let PrintMethod = "printFSLImm";
+}
+
+// Address operand
+def memri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops simm16, CPURegs);
+}
+
+def memrr : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops CPURegs, CPURegs);
+}
+
+// Transformation Function - get the lower 16 bits.
+def LO16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HI16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt16 : PatLeaf<(imm), [{
+ return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16 : PatLeaf<(imm), [{
+ return (N->getZExtValue() >> 16) == 0;
+}], LO16>;
+
+// FSL immediate field must fit in 4 bits.
+def immZExt4 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
+}]>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// MBlaze Address Mode! SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>;
+def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt),
+ "${:comment} ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : MBlazePseudo<(outs),
+ (ins uimm16:$amt1, simm16:$amt2),
+ "${:comment} ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+// Some assembly macros need to avoid pseudoinstructions and assembler
+// automatic reodering, we should reorder ourselves.
+def MACRO : MBlazePseudo<(outs), (ins), ".set macro", []>;
+def REORDER : MBlazePseudo<(outs), (ins), ".set reorder", []>;
+def NOMACRO : MBlazePseudo<(outs), (ins), ".set nomacro", []>;
+def NOREORDER : MBlazePseudo<(outs), (ins), ".set noreorder", []>;
+
+// When handling PIC code the assembler needs .cpload and .cprestore
+// directives. If the real instructions corresponding these directives
+// are used, we have the same behavior, but get also a bunch of warnings
+// from the assembler.
+def CPLOAD : MBlazePseudo<(outs), (ins CPURegs:$reg), ".cpload $reg", []>;
+def CPRESTORE : MBlazePseudo<(outs), (ins uimm16:$l), ".cprestore $l\n", []>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+
+class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+
+class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ TAI<op, (outs CPURegs:$dst), (ins Od:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set CPURegs:$dst, (OpNode imm_type:$b, CPURegs:$c))], IIAlu>;
+
+class ArithN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIAlu>;
+
+class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TAI<op, (outs CPURegs:$dst), (ins Od:$c, CPURegs:$b),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Misc Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
+ TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
+ TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))],
+ IIAlu>;
+
+class EffectiveAddress<string instr_asm> :
+ TAI<0x08, (outs CPURegs:$dst), (ins memri:$addr),
+ instr_asm, [(set CPURegs:$dst, iaddr:$addr)], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+class LoadM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs CPURegs:$dst), (ins memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set CPURegs:$dst, (OpNode xaddr:$addr))], IILoad>;
+
+class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TAI<op, (outs CPURegs:$dst), (ins memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set CPURegs:$dst, (OpNode iaddr:$addr))], IILoad>;
+
+class StoreM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs), (ins CPURegs:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode CPURegs:$dst, xaddr:$addr)], IIStore>;
+
+class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TAI<op, (outs), (ins CPURegs:$dst, memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode CPURegs:$dst, iaddr:$addr)], IIStore>;
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TBR<op, br, flags, (outs), (ins CPURegs:$target),
+ !strconcat(instr_asm, " $target"),
+ [(brind CPURegs:$target)], IIBranch>;
+
+class BranchI<bits<6> op, bits<5> brf, string instr_asm> :
+ TBRI<op, brf, (outs), (ins brtarget:$target),
+ !strconcat(instr_asm, " $target"),
+ [(br bb:$target)], IIBranch>;
+
+//===----------------------------------------------------------------------===//
+// Branch and Link Instructions
+//===----------------------------------------------------------------------===//
+class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TBRL<op, br, flags, (outs), (ins CPURegs:$target),
+ !strconcat(instr_asm, " r15, $target"),
+ [], IIBranch>;
+
+class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
+ TBRLI<op, br, (outs), (ins calltarget:$target),
+ !strconcat(instr_asm, " r15, $target"),
+ [], IIBranch>;
+
+//===----------------------------------------------------------------------===//
+// Conditional Branch Instructions
+//===----------------------------------------------------------------------===//
+class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm,
+ PatFrag cond_op> :
+ TBRC<op, br, flags, (outs),
+ (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
+ !strconcat(instr_asm, " $a, $b, $offset"),
+ [], IIBranch>;
+ //(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
+ //IIBranch>;
+
+class BranchCI<bits<6> op, bits<5> br, string instr_asm, PatFrag cond_op> :
+ TBRCI<op, br, (outs), (ins CPURegs:$a, brtarget:$offset),
+ !strconcat(instr_asm, " $a, $offset"),
+ [], IIBranch>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1, isAsCheapAsAMove = 1 in {
+ def ADD : Arith<0x00, 0x000, "add ", add, IIAlu>;
+ def ADDC : Arith<0x02, 0x000, "addc ", adde, IIAlu>;
+ def ADDK : Arith<0x04, 0x000, "addk ", addc, IIAlu>;
+ def ADDKC : ArithN<0x06, 0x000, "addkc ", IIAlu>;
+ def AND : Logic<0x21, 0x000, "and ", and>;
+ def OR : Logic<0x20, 0x000, "or ", or>;
+ def XOR : Logic<0x22, 0x000, "xor ", xor>;
+}
+
+let isAsCheapAsAMove = 1 in {
+ def ANDN : ArithN<0x23, 0x000, "andn ", IIAlu>;
+ def CMP : ArithN<0x05, 0x001, "cmp ", IIAlu>;
+ def CMPU : ArithN<0x05, 0x003, "cmpu ", IIAlu>;
+ def RSUB : ArithR<0x01, 0x000, "rsub ", sub, IIAlu>;
+ def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIAlu>;
+ def RSUBK : ArithR<0x05, 0x000, "rsubk ", subc, IIAlu>;
+ def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>;
+}
+
+let isCommutable = 1, Predicates=[HasMul] in {
+ def MUL : Arith<0x10, 0x000, "mul ", mul, IIAlu>;
+}
+
+let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
+ def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIAlu>;
+ def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIAlu>;
+}
+
+let Predicates=[HasMul,HasMul64] in {
+ def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>;
+}
+
+let Predicates=[HasBarrel] in {
+ def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIAlu>;
+ def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIAlu>;
+ def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIAlu>;
+ def BSRLI : ArithI<0x11, "bsrli ", srl, uimm5, immZExt5>;
+ def BSRAI : ArithI<0x11, "bsrai ", sra, uimm5, immZExt5>;
+ def BSLLI : ArithI<0x11, "bslli ", shl, uimm5, immZExt5>;
+}
+
+let Predicates=[HasDiv] in {
+ def IDIV : Arith<0x12, 0x000, "idiv ", sdiv, IIAlu>;
+ def IDIVU : Arith<0x12, 0x002, "idivu ", udiv, IIAlu>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze immediate mode arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isAsCheapAsAMove = 1 in {
+ def ADDI : ArithI<0x08, "addi ", add, simm16, immSExt16>;
+ def ADDIC : ArithNI<0x0A, "addic ", simm16, immSExt16>;
+ def ADDIK : ArithNI<0x0C, "addik ", simm16, immSExt16>;
+ def ADDIKC : ArithI<0x0E, "addikc ", addc, simm16, immSExt16>;
+ def RSUBI : ArithRI<0x09, "rsubi ", sub, simm16, immSExt16>;
+ def RSUBIC : ArithRNI<0x0B, "rsubi ", simm16, immSExt16>;
+ def RSUBIK : ArithRNI<0x0E, "rsubic ", simm16, immSExt16>;
+ def RSUBIKC : ArithRI<0x0F, "rsubikc", subc, simm16, immSExt16>;
+ def ANDNI : ArithNI<0x2B, "andni ", uimm16, immZExt16>;
+ def ANDI : LogicI<0x29, "andi ", and>;
+ def ORI : LogicI<0x28, "ori ", or>;
+ def XORI : LogicI<0x2A, "xori ", xor>;
+}
+
+let Predicates=[HasMul] in {
+ def MULI : ArithI<0x18, "muli ", mul, simm16, immSExt16>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze memory access instructions
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def LBU : LoadM<0x30, "lbu ", zextloadi8>;
+ def LHU : LoadM<0x31, "lhu ", zextloadi16>;
+ def LW : LoadM<0x32, "lw ", load>;
+
+ def LBUI : LoadMI<0x30, "lbui ", zextloadi8>;
+ def LHUI : LoadMI<0x31, "lhui ", zextloadi16>;
+ def LWI : LoadMI<0x32, "lwi ", load>;
+}
+
+ def SB : StoreM<0x34, "sb ", truncstorei8>;
+ def SH : StoreM<0x35, "sh ", truncstorei16>;
+ def SW : StoreM<0x36, "sw ", store>;
+
+ def SBI : StoreMI<0x34, "sbi ", truncstorei8>;
+ def SHI : StoreMI<0x35, "shi ", truncstorei16>;
+ def SWI : StoreMI<0x36, "swi ", store>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze branch instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ def BRI : BranchI<0x2E, 0x00, "bri ">;
+ def BRAI : BranchI<0x2E, 0x08, "brai ">;
+ def BEQI : BranchCI<0x2F, 0x00, "beqi ", seteq>;
+ def BNEI : BranchCI<0x2F, 0x01, "bnei ", setne>;
+ def BLTI : BranchCI<0x2F, 0x02, "blti ", setlt>;
+ def BLEI : BranchCI<0x2F, 0x03, "blei ", setle>;
+ def BGTI : BranchCI<0x2F, 0x04, "bgti ", setgt>;
+ def BGEI : BranchCI<0x2F, 0x05, "bgei ", setge>;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ def BR : Branch<0x26, 0x00, 0x000, "br ">;
+ def BRA : Branch<0x26, 0x08, 0x000, "bra ">;
+ def BEQ : BranchC<0x27, 0x00, 0x000, "beq ", seteq>;
+ def BNE : BranchC<0x27, 0x01, 0x000, "bne ", setne>;
+ def BLT : BranchC<0x27, 0x02, 0x000, "blt ", setlt>;
+ def BLE : BranchC<0x27, 0x03, 0x000, "ble ", setle>;
+ def BGT : BranchC<0x27, 0x04, 0x000, "bgt ", setgt>;
+ def BGE : BranchC<0x27, 0x05, 0x000, "bge ", setge>;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in {
+ def BRID : BranchI<0x2E, 0x10, "brid ">;
+ def BRAID : BranchI<0x2E, 0x18, "braid ">;
+ def BEQID : BranchCI<0x2F, 0x10, "beqid ", seteq>;
+ def BNEID : BranchCI<0x2F, 0x11, "bneid ", setne>;
+ def BLTID : BranchCI<0x2F, 0x12, "bltid ", setlt>;
+ def BLEID : BranchCI<0x2F, 0x13, "bleid ", setle>;
+ def BGTID : BranchCI<0x2F, 0x14, "bgtid ", setgt>;
+ def BGEID : BranchCI<0x2F, 0x15, "bgeid ", setge>;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+ hasDelaySlot = 1, hasCtrlDep = 1 in {
+ def BRD : Branch<0x26, 0x10, 0x000, "brd ">;
+ def BRAD : Branch<0x26, 0x18, 0x000, "brad ">;
+ def BEQD : BranchC<0x27, 0x10, 0x000, "beqd ", seteq>;
+ def BNED : BranchC<0x27, 0x11, 0x000, "bned ", setne>;
+ def BLTD : BranchC<0x27, 0x12, 0x000, "bltd ", setlt>;
+ def BLED : BranchC<0x27, 0x13, 0x000, "bled ", setle>;
+ def BGTD : BranchC<0x27, 0x14, 0x000, "bgtd ", setgt>;
+ def BGED : BranchC<0x27, 0x15, 0x000, "bged ", setge>;
+}
+
+let isCall = 1, hasCtrlDep = 1, isIndirectBranch = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
+ Uses = [R1,R5,R6,R7,R8,R9,R10] in {
+ def BRL : BranchL<0x26, 0x04, 0x000, "brl ">;
+ def BRAL : BranchL<0x26, 0x0C, 0x000, "bral ">;
+}
+
+let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
+ Uses = [R1,R5,R6,R7,R8,R9,R10] in {
+ def BRLID : BranchLI<0x2E, 0x14, "brlid ">;
+ def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+}
+
+let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, isIndirectBranch = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
+ Uses = [R1,R5,R6,R7,R8,R9,R10] in {
+ def BRLD : BranchL<0x26, 0x14, 0x000, "brld ">;
+ def BRALD : BranchL<0x26, 0x1C, 0x000, "brald ">;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1,
+ isBarrier=1, hasCtrlDep=1, imm16=0x8 in {
+ def RTSD : TRET<0x2D, (outs), (ins CPURegs:$target),
+ "rtsd $target, 8",
+ [(MBlazeRet CPURegs:$target)],
+ IIBranch>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze misc instructions
+//===----------------------------------------------------------------------===//
+
+let addr = 0 in {
+ def NOP : TADDR<0x00, (outs), (ins), "nop ", [], IIAlu>;
+}
+
+let usesCustomInserter = 1 in {
+ //class PseudoSelCC<RegisterClass RC, string asmstr>:
+ // MBlazePseudo<(outs RC:$D), (ins RC:$T, RC:$F, CPURegs:$CMP), asmstr,
+ // [(set RC:$D, (MBlazeSelectCC RC:$T, RC:$F, CPURegs:$CMP))]>;
+ //def Select_CC : PseudoSelCC<CPURegs, "# MBlazeSelect_CC">;
+
+ def Select_CC : MBlazePseudo<(outs CPURegs:$dst),
+ (ins CPURegs:$T, CPURegs:$F, CPURegs:$CMP, i32imm:$CC),
+ "; SELECT_CC PSEUDO!",
+ []>;
+
+ def ShiftL : MBlazePseudo<(outs CPURegs:$dst),
+ (ins CPURegs:$L, CPURegs:$R),
+ "; ShiftL PSEUDO!",
+ []>;
+
+ def ShiftRA : MBlazePseudo<(outs CPURegs:$dst),
+ (ins CPURegs:$L, CPURegs:$R),
+ "; ShiftRA PSEUDO!",
+ []>;
+
+ def ShiftRL : MBlazePseudo<(outs CPURegs:$dst),
+ (ins CPURegs:$L, CPURegs:$R),
+ "; ShiftRL PSEUDO!",
+ []>;
+}
+
+
+let rb = 0 in {
+ def SEXT16 : TA<0x24, 0x061, (outs CPURegs:$dst), (ins CPURegs:$src),
+ "sext16 $dst, $src", [], IIAlu>;
+ def SEXT8 : TA<0x24, 0x060, (outs CPURegs:$dst), (ins CPURegs:$src),
+ "sext8 $dst, $src", [], IIAlu>;
+ def SRL : TA<0x24, 0x041, (outs CPURegs:$dst), (ins CPURegs:$src),
+ "srl $dst, $src", [], IIAlu>;
+ def SRA : TA<0x24, 0x001, (outs CPURegs:$dst), (ins CPURegs:$src),
+ "sra $dst, $src", [], IIAlu>;
+ def SRC : TA<0x24, 0x021, (outs CPURegs:$dst), (ins CPURegs:$src),
+ "src $dst, $src", [], IIAlu>;
+}
+
+def LEA_ADDI : EffectiveAddress<"addi $dst, ${addr:stackloc}">;
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 0), (ADD R0, R0)>;
+def : Pat<(i32 immSExt16:$imm), (ADDI R0, imm:$imm)>;
+def : Pat<(i32 immZExt16:$imm), (ORI R0, imm:$imm)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm), (ADDI R0, imm:$imm)>;
+
+// In register sign extension
+def : Pat<(sext_inreg CPURegs:$src, i16), (SEXT16 CPURegs:$src)>;
+def : Pat<(sext_inreg CPURegs:$src, i8), (SEXT8 CPURegs:$src)>;
+
+// Call
+def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)), (BRLID tglobaladdr:$dst)>;
+def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),(BRLID texternalsym:$dst)>;
+def : Pat<(MBlazeJmpLink CPURegs:$dst), (BRLD CPURegs:$dst)>;
+
+// Shift Instructions
+def : Pat<(shl CPURegs:$L, CPURegs:$R), (ShiftL CPURegs:$L, CPURegs:$R)>;
+def : Pat<(sra CPURegs:$L, CPURegs:$R), (ShiftRA CPURegs:$L, CPURegs:$R)>;
+def : Pat<(srl CPURegs:$L, CPURegs:$R), (ShiftRL CPURegs:$L, CPURegs:$R)>;
+
+// SET_CC operations
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETEQ),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMP CPURegs:$L, CPURegs:$R), 1)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETNE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMP CPURegs:$L, CPURegs:$R), 2)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMP CPURegs:$L, CPURegs:$R), 3)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMP CPURegs:$L, CPURegs:$R), 4)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMP CPURegs:$L, CPURegs:$R), 5)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMP CPURegs:$L, CPURegs:$R), 6)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMPU CPURegs:$L, CPURegs:$R), 3)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMPU CPURegs:$L, CPURegs:$R), 4)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMPU CPURegs:$L, CPURegs:$R), 5)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMPU CPURegs:$L, CPURegs:$R), 6)>;
+
+// SELECT operations
+def : Pat<(select CPURegs:$C, CPURegs:$T, CPURegs:$F),
+ (Select_CC CPURegs:$T, CPURegs:$F, CPURegs:$C, 2)>;
+
+// SELECT_CC
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETEQ),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 1)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETNE),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 2)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGT),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 3)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLT),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 4)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGE),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 5)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLE),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 6)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGT),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 3)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULT),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 4)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGE),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 5)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULE),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 6)>;
+
+// BRCOND instructions
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETEQ), bb:$T),
+ (BEQID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETNE), bb:$T),
+ (BNEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGT), bb:$T),
+ (BGTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLT), bb:$T),
+ (BLTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGE), bb:$T),
+ (BGEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLE), bb:$T),
+ (BLEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGT), bb:$T),
+ (BGTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULT), bb:$T),
+ (BLTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGE), bb:$T),
+ (BGEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULE), bb:$T),
+ (BLEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond CPURegs:$C, bb:$T),
+ (BNEID CPURegs:$C, bb:$T)>;
+
+// Jump tables, global addresses, and constant pools
+def : Pat<(MBWrapper tglobaladdr:$in), (ORI R0, tglobaladdr:$in)>;
+def : Pat<(MBWrapper tjumptable:$in), (ORI R0, tjumptable:$in)>;
+def : Pat<(MBWrapper tconstpool:$in), (ORI R0, tconstpool:$in)>;
+
+// Misc instructions
+def : Pat<(and CPURegs:$lh, (not CPURegs:$rh)),(ANDN CPURegs:$lh, CPURegs:$rh)>;
+
+// Arithmetic with immediates
+def : Pat<(add CPURegs:$in, imm:$imm),(ADDI CPURegs:$in, imm:$imm)>;
+def : Pat<(or CPURegs:$in, imm:$imm),(ORI CPURegs:$in, imm:$imm)>;
+def : Pat<(xor CPURegs:$in, imm:$imm),(XORI CPURegs:$in, imm:$imm)>;
+
+// extended load and stores
+def : Pat<(extloadi1 iaddr:$src), (LBUI iaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src), (LBUI iaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src), (LHUI iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src), (LBU xaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src), (LBU xaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src), (LHU xaddr:$src)>;
+
+def : Pat<(sextloadi1 iaddr:$src), (SEXT8 (LBUI iaddr:$src))>;
+def : Pat<(sextloadi8 iaddr:$src), (SEXT8 (LBUI iaddr:$src))>;
+def : Pat<(sextloadi16 iaddr:$src), (SEXT16 (LHUI iaddr:$src))>;
+def : Pat<(sextloadi1 xaddr:$src), (SEXT8 (LBU xaddr:$src))>;
+def : Pat<(sextloadi8 xaddr:$src), (SEXT8 (LBU xaddr:$src))>;
+def : Pat<(sextloadi16 xaddr:$src), (SEXT16 (LHU xaddr:$src))>;
+
+// peepholes
+def : Pat<(store (i32 0), iaddr:$dst), (SWI R0, iaddr:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFSL.td"
+include "MBlazeInstrFPU.td"
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
new file mode 100644
index 0000000..c8faffc
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -0,0 +1,109 @@
+//===- MBlazeIntrinsicInfo.cpp - Intrinsic Information -00-------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeIntrinsicInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+namespace mblazeIntrinsic {
+
+ enum ID {
+ last_non_mblaze_intrinsic = Intrinsic::num_intrinsics-1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_mblaze_intrinsics
+ };
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+}
+
+std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+ unsigned numTys) const {
+ static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+ if (IntrID < Intrinsic::num_intrinsics)
+ return 0;
+ assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics &&
+ "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupName(const char *Name, unsigned Len) const {
+#define GET_FUNCTION_RECOGNIZER
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ return 0;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupGCCName(const char *Name) const {
+ return mblazeIntrinsic::getIntrinsicForGCCBuiltin("mblaze",Name);
+}
+
+bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+ // Overload Table
+ const bool OTable[] = {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+ };
+ if (IntrID == 0)
+ return false;
+ else
+ return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static const FunctionType *getType(LLVMContext &Context, unsigned id) {
+ const Type *ResultTy = NULL;
+ std::vector<const Type*> ArgTys;
+ bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+ return FunctionType::get(ResultTy, ArgTys, IsVarArg);
+}
+
+Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ const Type **Tys,
+ unsigned numTy) const {
+ assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+ AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID);
+ return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+ getType(M->getContext(), IntrID),
+ AList));
+}
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
new file mode 100644
index 0000000..9804c77
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
@@ -0,0 +1,33 @@
+//===- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MBLAZEINTRINSICS_H
+#define MBLAZEINTRINSICS_H
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+
+ class MBlazeIntrinsicInfo : public TargetIntrinsicInfo {
+ public:
+ std::string getName(unsigned IntrID, const Type **Tys = 0,
+ unsigned numTys = 0) const;
+ unsigned lookupName(const char *Name, unsigned Len) const;
+ unsigned lookupGCCName(const char *Name) const;
+ bool isOverloaded(unsigned IID) const;
+ Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+ unsigned numTys = 0) const;
+ };
+
+}
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td
new file mode 100644
index 0000000..76eb563
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -0,0 +1,137 @@
+//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the MicroBlaze-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Definitions for all MBlaze intrinsics.
+//
+
+// MBlaze intrinsic classes.
+let TargetPrefix = "mblaze", isTarget = 1 in {
+ class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty],
+ [IntrWriteMem]>;
+
+ class MBFSL_Put_Intrinsic : Intrinsic<[llvm_void_ty],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrWriteMem]>;
+
+ class MBFSL_PutT_Intrinsic : Intrinsic<[llvm_void_ty],
+ [llvm_i32_ty],
+ [IntrWriteMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Get Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_get : GCCBuiltin<"__builtin_mblaze_fsl_get">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_aget : GCCBuiltin<"__builtin_mblaze_fsl_aget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_cget : GCCBuiltin<"__builtin_mblaze_fsl_cget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_caget : GCCBuiltin<"__builtin_mblaze_fsl_caget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eget : GCCBuiltin<"__builtin_mblaze_fsl_eget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eaget : GCCBuiltin<"__builtin_mblaze_fsl_eaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecget : GCCBuiltin<"__builtin_mblaze_fsl_ecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecaget : GCCBuiltin<"__builtin_mblaze_fsl_ecaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_nget : GCCBuiltin<"__builtin_mblaze_fsl_nget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_naget : GCCBuiltin<"__builtin_mblaze_fsl_naget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncget : GCCBuiltin<"__builtin_mblaze_fsl_ncget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncaget : GCCBuiltin<"__builtin_mblaze_fsl_ncaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neget : GCCBuiltin<"__builtin_mblaze_fsl_neget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neaget : GCCBuiltin<"__builtin_mblaze_fsl_neaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necget : GCCBuiltin<"__builtin_mblaze_fsl_necget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necaget : GCCBuiltin<"__builtin_mblaze_fsl_necaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tget : GCCBuiltin<"__builtin_mblaze_fsl_tget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_taget : GCCBuiltin<"__builtin_mblaze_fsl_taget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcget : GCCBuiltin<"__builtin_mblaze_fsl_tcget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcaget : GCCBuiltin<"__builtin_mblaze_fsl_tcaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teget : GCCBuiltin<"__builtin_mblaze_fsl_teget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teaget : GCCBuiltin<"__builtin_mblaze_fsl_teaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecget : GCCBuiltin<"__builtin_mblaze_fsl_tecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecaget : GCCBuiltin<"__builtin_mblaze_fsl_tecaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnget : GCCBuiltin<"__builtin_mblaze_fsl_tnget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnaget : GCCBuiltin<"__builtin_mblaze_fsl_tnaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncget : GCCBuiltin<"__builtin_mblaze_fsl_tncget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncaget : GCCBuiltin<"__builtin_mblaze_fsl_tncaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneget : GCCBuiltin<"__builtin_mblaze_fsl_tneget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneaget : GCCBuiltin<"__builtin_mblaze_fsl_tneaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecget : GCCBuiltin<"__builtin_mblaze_fsl_tnecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecaget : GCCBuiltin<"__builtin_mblaze_fsl_tnecaget">,
+ MBFSL_Get_Intrinsic;
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Put Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_put : GCCBuiltin<"__builtin_mblaze_fsl_put">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_aput : GCCBuiltin<"__builtin_mblaze_fsl_aput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_cput : GCCBuiltin<"__builtin_mblaze_fsl_cput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_caput : GCCBuiltin<"__builtin_mblaze_fsl_caput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_nput : GCCBuiltin<"__builtin_mblaze_fsl_nput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_naput : GCCBuiltin<"__builtin_mblaze_fsl_naput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncput : GCCBuiltin<"__builtin_mblaze_fsl_ncput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncaput : GCCBuiltin<"__builtin_mblaze_fsl_ncaput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_tput : GCCBuiltin<"__builtin_mblaze_fsl_tput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_taput : GCCBuiltin<"__builtin_mblaze_fsl_taput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcput : GCCBuiltin<"__builtin_mblaze_fsl_tcput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcaput : GCCBuiltin<"__builtin_mblaze_fsl_tcaput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnput : GCCBuiltin<"__builtin_mblaze_fsl_tnput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnaput : GCCBuiltin<"__builtin_mblaze_fsl_tnaput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncput : GCCBuiltin<"__builtin_mblaze_fsl_tncput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncaput : GCCBuiltin<"__builtin_mblaze_fsl_tncaput">,
+ MBFSL_PutT_Intrinsic;
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
new file mode 100644
index 0000000..7ae465d
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
@@ -0,0 +1,27 @@
+//===-- MBlazeMCAsmInfo.cpp - MBlaze asm properties -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MBlazeMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCAsmInfo.h"
+using namespace llvm;
+
+MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, const StringRef &TT) {
+ AlignmentIsInBytes = false;
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = 0;
+ PrivateGlobalPrefix = "$";
+ CommentString = "#";
+ ZeroDirective = "\t.space\t";
+ GPRel32Directive = "\t.gpword\t";
+ HasSetDirective = false;
+}
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
new file mode 100644
index 0000000..bccb418
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MBlazeMCAsmInfo.h - MBlaze asm properties -----------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MBlazeMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZETARGETASMINFO_H
+#define MBLAZETARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+ class StringRef;
+
+ class MBlazeMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit MBlazeMCAsmInfo(const Target &T, const StringRef &TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h
new file mode 100644
index 0000000..08d4dca
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -0,0 +1,136 @@
+//===-- MBlazeMachineFunctionInfo.h - Private data ----------------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MACHINE_FUNCTION_INFO_H
+#define MBLAZE_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MBlazeFunctionInfo - This class is derived from MachineFunction private
+/// MBlaze target-specific information for each MachineFunction.
+class MBlazeFunctionInfo : public MachineFunctionInfo {
+
+private:
+ /// Holds for each function where on the stack the Frame Pointer must be
+ /// saved. This is used on Prologue and Epilogue to emit FP save/restore
+ int FPStackOffset;
+
+ /// Holds for each function where on the stack the Return Address must be
+ /// saved. This is used on Prologue and Epilogue to emit RA save/restore
+ int RAStackOffset;
+
+ /// At each function entry a special bitmask directive must be emitted
+ /// to help in debugging CPU callee saved registers. It needs a negative
+ /// offset from the final stack size and its higher register location on
+ /// the stack.
+ int CPUTopSavedRegOff;
+
+ /// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
+ struct MBlazeFIHolder {
+
+ int FI;
+ int SPOffset;
+
+ MBlazeFIHolder(int FrameIndex, int StackPointerOffset)
+ : FI(FrameIndex), SPOffset(StackPointerOffset) {}
+ };
+
+ /// When PIC is used the GP must be saved on the stack on the function
+ /// prologue and must be reloaded from this stack location after every
+ /// call. A reference to its stack location and frame index must be kept
+ /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
+ MBlazeFIHolder GPHolder;
+
+ /// On LowerFormalArguments the stack size is unknown, so the Stack
+ /// Pointer Offset calculation of "not in register arguments" must be
+ /// postponed to emitPrologue.
+ SmallVector<MBlazeFIHolder, 16> FnLoadArgs;
+ bool HasLoadArgs;
+
+ // When VarArgs, we must write registers back to caller stack, preserving
+ // on register arguments. Since the stack size is unknown on
+ // LowerFormalArguments, the Stack Pointer Offset calculation must be
+ // postponed to emitPrologue.
+ SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs;
+ bool HasStoreVarArgs;
+
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+public:
+ MBlazeFunctionInfo(MachineFunction& MF)
+ : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0),
+ GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false),
+ SRetReturnReg(0), GlobalBaseReg(0)
+ {}
+
+ int getFPStackOffset() const { return FPStackOffset; }
+ void setFPStackOffset(int Off) { FPStackOffset = Off; }
+
+ int getRAStackOffset() const { return RAStackOffset; }
+ void setRAStackOffset(int Off) { RAStackOffset = Off; }
+
+ int getCPUTopSavedRegOff() const { return CPUTopSavedRegOff; }
+ void setCPUTopSavedRegOff(int Off) { CPUTopSavedRegOff = Off; }
+
+ int getGPStackOffset() const { return GPHolder.SPOffset; }
+ int getGPFI() const { return GPHolder.FI; }
+ void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
+ void setGPFI(int FI) { GPHolder.FI = FI; }
+ bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
+
+ bool hasLoadArgs() const { return HasLoadArgs; }
+ bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+
+ void recordLoadArgsFI(int FI, int SPOffset) {
+ if (!HasLoadArgs) HasLoadArgs=true;
+ FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+ }
+ void recordStoreVarArgsFI(int FI, int SPOffset) {
+ if (!HasStoreVarArgs) HasStoreVarArgs=true;
+ FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+ }
+
+ void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasLoadArgs()) return;
+ for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+ MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset );
+ }
+ void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasStoreVarArgs()) return;
+ for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+ MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset );
+ }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+};
+
+} // end of namespace llvm
+
+#endif // MBLAZE_MACHINE_FUNCTION_INFO_H
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
new file mode 100644
index 0000000..8dfca81
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -0,0 +1,421 @@
+//===- MBlazeRegisterInfo.cpp - MBlaze Register Information -== -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-reg-info"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MBlazeRegisterInfo::
+MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
+ : MBlazeGenRegisterInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP),
+ Subtarget(ST), TII(tii) {}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ switch (RegEnum) {
+ case MBlaze::R0 : case MBlaze::F0 : return 0;
+ case MBlaze::R1 : case MBlaze::F1 : return 1;
+ case MBlaze::R2 : case MBlaze::F2 : return 2;
+ case MBlaze::R3 : case MBlaze::F3 : return 3;
+ case MBlaze::R4 : case MBlaze::F4 : return 4;
+ case MBlaze::R5 : case MBlaze::F5 : return 5;
+ case MBlaze::R6 : case MBlaze::F6 : return 6;
+ case MBlaze::R7 : case MBlaze::F7 : return 7;
+ case MBlaze::R8 : case MBlaze::F8 : return 8;
+ case MBlaze::R9 : case MBlaze::F9 : return 9;
+ case MBlaze::R10 : case MBlaze::F10 : return 10;
+ case MBlaze::R11 : case MBlaze::F11 : return 11;
+ case MBlaze::R12 : case MBlaze::F12 : return 12;
+ case MBlaze::R13 : case MBlaze::F13 : return 13;
+ case MBlaze::R14 : case MBlaze::F14 : return 14;
+ case MBlaze::R15 : case MBlaze::F15 : return 15;
+ case MBlaze::R16 : case MBlaze::F16 : return 16;
+ case MBlaze::R17 : case MBlaze::F17 : return 17;
+ case MBlaze::R18 : case MBlaze::F18 : return 18;
+ case MBlaze::R19 : case MBlaze::F19 : return 19;
+ case MBlaze::R20 : case MBlaze::F20 : return 20;
+ case MBlaze::R21 : case MBlaze::F21 : return 21;
+ case MBlaze::R22 : case MBlaze::F22 : return 22;
+ case MBlaze::R23 : case MBlaze::F23 : return 23;
+ case MBlaze::R24 : case MBlaze::F24 : return 24;
+ case MBlaze::R25 : case MBlaze::F25 : return 25;
+ case MBlaze::R26 : case MBlaze::F26 : return 26;
+ case MBlaze::R27 : case MBlaze::F27 : return 27;
+ case MBlaze::R28 : case MBlaze::F28 : return 28;
+ case MBlaze::R29 : case MBlaze::F29 : return 29;
+ case MBlaze::R30 : case MBlaze::F30 : return 30;
+ case MBlaze::R31 : case MBlaze::F31 : return 31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+/// getRegisterFromNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) {
+ switch (Reg) {
+ case 0 : return MBlaze::R0;
+ case 1 : return MBlaze::R1;
+ case 2 : return MBlaze::R2;
+ case 3 : return MBlaze::R3;
+ case 4 : return MBlaze::R4;
+ case 5 : return MBlaze::R5;
+ case 6 : return MBlaze::R6;
+ case 7 : return MBlaze::R7;
+ case 8 : return MBlaze::R8;
+ case 9 : return MBlaze::R9;
+ case 10 : return MBlaze::R10;
+ case 11 : return MBlaze::R11;
+ case 12 : return MBlaze::R12;
+ case 13 : return MBlaze::R13;
+ case 14 : return MBlaze::R14;
+ case 15 : return MBlaze::R15;
+ case 16 : return MBlaze::R16;
+ case 17 : return MBlaze::R17;
+ case 18 : return MBlaze::R18;
+ case 19 : return MBlaze::R19;
+ case 20 : return MBlaze::R20;
+ case 21 : return MBlaze::R21;
+ case 22 : return MBlaze::R22;
+ case 23 : return MBlaze::R23;
+ case 24 : return MBlaze::R24;
+ case 25 : return MBlaze::R25;
+ case 26 : return MBlaze::R26;
+ case 27 : return MBlaze::R27;
+ case 28 : return MBlaze::R28;
+ case 29 : return MBlaze::R29;
+ case 30 : return MBlaze::R30;
+ case 31 : return MBlaze::R31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+unsigned MBlazeRegisterInfo::getPICCallReg() {
+ return MBlaze::R20;
+}
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// MBlaze Callee Saved Registers
+const unsigned* MBlazeRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const {
+ // MBlaze callee-save register range is R20 - R31
+ static const unsigned CalleeSavedRegs[] = {
+ MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23,
+ MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27,
+ MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31,
+ 0
+ };
+
+ return CalleeSavedRegs;
+}
+
+/// MBlaze Callee Saved Register Classes
+const TargetRegisterClass* const* MBlazeRegisterInfo::
+getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRC[] = {
+ &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+ &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+ &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+ &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+ &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+ &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+ 0
+ };
+
+ return CalleeSavedRC;
+}
+
+BitVector MBlazeRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(MBlaze::R0);
+ Reserved.set(MBlaze::R1);
+ Reserved.set(MBlaze::R2);
+ Reserved.set(MBlaze::R13);
+ Reserved.set(MBlaze::R14);
+ Reserved.set(MBlaze::R15);
+ Reserved.set(MBlaze::R16);
+ Reserved.set(MBlaze::R17);
+ Reserved.set(MBlaze::R18);
+ Reserved.set(MBlaze::R19);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are are done through a positive offset
+// from the stack/frame pointer, so the stack is considered
+// to grow up.
+//
+//===----------------------------------------------------------------------===//
+
+void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+ // See the description at MicroBlazeMachineFunction.h
+ int TopCPUSavedRegOff = -1;
+
+ // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
+ // be saved in this CPU Area there is the need. This whole Area must
+ // be aligned to the default Stack Alignment requirements.
+ unsigned StackOffset = MFI->getStackSize();
+ unsigned RegSize = 4;
+
+ // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+ // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
+ // the approach done by calculateFrameObjectOffsets to the stack frame.
+ MBlazeFI->adjustLoadArgsFI(MFI);
+ MBlazeFI->adjustStoreVarArgsFI(MFI);
+
+ if (hasFP(MF)) {
+ MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
+ StackOffset);
+ MBlazeFI->setFPStackOffset(StackOffset);
+ TopCPUSavedRegOff = StackOffset;
+ StackOffset += RegSize;
+ }
+
+ if (MFI->hasCalls()) {
+ MBlazeFI->setRAStackOffset(0);
+ MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
+ StackOffset);
+ TopCPUSavedRegOff = StackOffset;
+ StackOffset += RegSize;
+ }
+
+ // Update frame info
+ MFI->setStackSize(StackOffset);
+
+ // Recalculate the final tops offset. The final values must be '0'
+ // if there isn't a callee saved register for CPU or FPU, otherwise
+ // a negative offset is needed.
+ if (TopCPUSavedRegOff >= 0)
+ MBlazeFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MBlazeRegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MBlazeRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+unsigned MBlazeRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ int *Value, RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+
+ unsigned i = 0;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned oi = i == 2 ? 1 : 2;
+
+ DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ errs() << "<--------->\n" << MI);
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int stackSize = MF.getFrameInfo()->getStackSize();
+ int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
+ << "spOffset : " << spOffset << "\n"
+ << "stackSize : " << stackSize << "\n");
+
+ // as explained on LowerFormalArguments, detect negative offsets
+ // and adjust SPOffsets considering the final stack size.
+ int Offset = (spOffset < 0) ? (stackSize - spOffset) : (spOffset + 4);
+ Offset += MI.getOperand(oi).getImm();
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ MI.getOperand(oi).ChangeToImmediate(Offset);
+ MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+ return 0;
+}
+
+void MBlazeRegisterInfo::
+emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+ // Get the right frame order for MBlaze.
+ adjustMBlazeStackFrame(MF);
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->hasCalls()) return;
+ if (StackSize < 28 && MFI->hasCalls()) StackSize = 28;
+
+ int FPOffset = MBlazeFI->getFPStackOffset();
+ int RAOffset = MBlazeFI->getRAStackOffset();
+
+ // Adjust stack : addi R1, R1, -imm
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(-StackSize);
+
+ // Save the return address only if the function isnt a leaf one.
+ // swi R15, R1, stack_loc
+ if (MFI->hasCalls()) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::SWI))
+ .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1);
+ }
+
+ // if framepointer enabled, save it and set it
+ // to point to the stack pointer
+ if (hasFP(MF)) {
+ // swi R19, R1, stack_loc
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::SWI))
+ .addReg(MBlaze::R19).addImm(FPOffset).addReg(MBlaze::R1);
+
+ // add R19, R1, R0
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R19)
+ .addReg(MBlaze::R1).addReg(MBlaze::R0);
+ }
+}
+
+void MBlazeRegisterInfo::
+emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ // Get the FI's where RA and FP are saved.
+ int FPOffset = MBlazeFI->getFPStackOffset();
+ int RAOffset = MBlazeFI->getRAStackOffset();
+
+ // if framepointer enabled, restore it and restore the
+ // stack pointer
+ if (hasFP(MF)) {
+ // add R1, R19, R0
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1)
+ .addReg(MBlaze::R19).addReg(MBlaze::R0);
+
+ // lwi R19, R1, stack_loc
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19)
+ .addImm(FPOffset).addReg(MBlaze::R1);
+ }
+
+ // Restore the return address only if the function isnt a leaf one.
+ // lwi R15, R1, stack_loc
+ if (MFI->hasCalls()) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
+ .addImm(RAOffset).addReg(MBlaze::R1);
+ }
+
+ // Get the number of bytes from FrameInfo
+ int StackSize = (int) MFI->getStackSize();
+ if (StackSize < 28 && MFI->hasCalls()) StackSize = 28;
+
+ // adjust stack.
+ // addi R1, R1, imm
+ if (StackSize) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(StackSize);
+ }
+}
+
+
+void MBlazeRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+ // Set the stack offset where GP must be saved/loaded from.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ if (MBlazeFI->needGPSaveRestore())
+ MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset());
+}
+
+unsigned MBlazeRegisterInfo::getRARegister() const {
+ return MBlaze::R15;
+}
+
+unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ return hasFP(MF) ? MBlaze::R19 : MBlaze::R1;
+}
+
+unsigned MBlazeRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ llvm_unreachable("What is the dwarf register number");
+ return -1;
+}
+
+#include "MBlazeGenRegisterInfo.inc"
+
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
new file mode 100644
index 0000000..cde7d39
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -0,0 +1,96 @@
+//===- MBlazeRegisterInfo.h - MBlaze Register Information Impl --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEREGISTERINFO_H
+#define MBLAZEREGISTERINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "MBlazeGenRegisterInfo.h.inc"
+
+namespace llvm {
+class MBlazeSubtarget;
+class TargetInstrInfo;
+class Type;
+
+namespace MBlaze {
+ /// SubregIndex - The index of various sized subregister classes. Note that
+ /// these indices must be kept in sync with the class indices in the
+ /// MBlazeRegisterInfo.td file.
+ enum SubregIndex {
+ SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
+ };
+}
+
+struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
+ const MBlazeSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget,
+ const TargetInstrInfo &tii);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// MBlaze::RA, return the number that it corresponds to (e.g. 31).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+ static unsigned getRegisterFromNumbering(unsigned RegEnum);
+
+ /// Get PIC indirect call register
+ static unsigned getPICCallReg();
+
+ /// Adjust the MBlaze stack frame.
+ void adjustMBlazeStackFrame(MachineFunction &MF) const;
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction* MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ /// Stack Frame Processing Methods
+ unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, int *Value = NULL,
+ RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ /// Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ /// Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const { return true; }
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
new file mode 100644
index 0000000..96a5c98
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -0,0 +1,186 @@
+//===- MBlazeRegisterInfo.td - MBlaze Register defs -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MicroBlaze register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MBlazeReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "MBlaze";
+}
+
+class MBlazeRegWithSubRegs<string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ field bits<5> Num;
+ let Namespace = "MBlaze";
+}
+
+// MBlaze CPU Registers
+class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
+ let Num = num;
+}
+
+// MBlaze 32-bit (aliased) FPU Registers
+class FPR<bits<5> num, string n, list<Register> subregs>
+ : MBlazeRegWithSubRegs<n, subregs> {
+ let Num = num;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "MBlaze" in {
+
+ // General Purpose Registers
+ def R0 : MBlazeGPRReg< 0, "r0">, DwarfRegNum<[0]>;
+ def R1 : MBlazeGPRReg< 1, "r1">, DwarfRegNum<[1]>;
+ def R2 : MBlazeGPRReg< 2, "r2">, DwarfRegNum<[2]>;
+ def R3 : MBlazeGPRReg< 3, "r3">, DwarfRegNum<[3]>;
+ def R4 : MBlazeGPRReg< 4, "r4">, DwarfRegNum<[5]>;
+ def R5 : MBlazeGPRReg< 5, "r5">, DwarfRegNum<[5]>;
+ def R6 : MBlazeGPRReg< 6, "r6">, DwarfRegNum<[6]>;
+ def R7 : MBlazeGPRReg< 7, "r7">, DwarfRegNum<[7]>;
+ def R8 : MBlazeGPRReg< 8, "r8">, DwarfRegNum<[8]>;
+ def R9 : MBlazeGPRReg< 9, "r9">, DwarfRegNum<[9]>;
+ def R10 : MBlazeGPRReg< 10, "r10">, DwarfRegNum<[10]>;
+ def R11 : MBlazeGPRReg< 11, "r11">, DwarfRegNum<[11]>;
+ def R12 : MBlazeGPRReg< 12, "r12">, DwarfRegNum<[12]>;
+ def R13 : MBlazeGPRReg< 13, "r13">, DwarfRegNum<[13]>;
+ def R14 : MBlazeGPRReg< 14, "r14">, DwarfRegNum<[14]>;
+ def R15 : MBlazeGPRReg< 15, "r15">, DwarfRegNum<[15]>;
+ def R16 : MBlazeGPRReg< 16, "r16">, DwarfRegNum<[16]>;
+ def R17 : MBlazeGPRReg< 17, "r17">, DwarfRegNum<[17]>;
+ def R18 : MBlazeGPRReg< 18, "r18">, DwarfRegNum<[18]>;
+ def R19 : MBlazeGPRReg< 19, "r19">, DwarfRegNum<[19]>;
+ def R20 : MBlazeGPRReg< 20, "r20">, DwarfRegNum<[20]>;
+ def R21 : MBlazeGPRReg< 21, "r21">, DwarfRegNum<[21]>;
+ def R22 : MBlazeGPRReg< 22, "r22">, DwarfRegNum<[22]>;
+ def R23 : MBlazeGPRReg< 23, "r23">, DwarfRegNum<[23]>;
+ def R24 : MBlazeGPRReg< 24, "r24">, DwarfRegNum<[24]>;
+ def R25 : MBlazeGPRReg< 25, "r25">, DwarfRegNum<[25]>;
+ def R26 : MBlazeGPRReg< 26, "r26">, DwarfRegNum<[26]>;
+ def R27 : MBlazeGPRReg< 27, "r27">, DwarfRegNum<[27]>;
+ def R28 : MBlazeGPRReg< 28, "r28">, DwarfRegNum<[28]>;
+ def R29 : MBlazeGPRReg< 29, "r29">, DwarfRegNum<[29]>;
+ def R30 : MBlazeGPRReg< 30, "r30">, DwarfRegNum<[30]>;
+ def R31 : MBlazeGPRReg< 31, "r31">, DwarfRegNum<[31]>;
+
+ /// MBlaze Single point precision FPU Registers
+ def F0 : FPR< 0, "r0", [R0]>, DwarfRegNum<[32]>;
+ def F1 : FPR< 1, "r1", [R1]>, DwarfRegNum<[33]>;
+ def F2 : FPR< 2, "r2", [R2]>, DwarfRegNum<[34]>;
+ def F3 : FPR< 3, "r3", [R3]>, DwarfRegNum<[35]>;
+ def F4 : FPR< 4, "r4", [R4]>, DwarfRegNum<[36]>;
+ def F5 : FPR< 5, "r5", [R5]>, DwarfRegNum<[37]>;
+ def F6 : FPR< 6, "r6", [R6]>, DwarfRegNum<[38]>;
+ def F7 : FPR< 7, "r7", [R7]>, DwarfRegNum<[39]>;
+ def F8 : FPR< 8, "r8", [R8]>, DwarfRegNum<[40]>;
+ def F9 : FPR< 9, "r9", [R9]>, DwarfRegNum<[41]>;
+ def F10 : FPR<10, "r10", [R10]>, DwarfRegNum<[42]>;
+ def F11 : FPR<11, "r11", [R11]>, DwarfRegNum<[43]>;
+ def F12 : FPR<12, "r12", [R12]>, DwarfRegNum<[44]>;
+ def F13 : FPR<13, "r13", [R13]>, DwarfRegNum<[45]>;
+ def F14 : FPR<14, "r14", [R14]>, DwarfRegNum<[46]>;
+ def F15 : FPR<15, "r15", [R15]>, DwarfRegNum<[47]>;
+ def F16 : FPR<16, "r16", [R16]>, DwarfRegNum<[48]>;
+ def F17 : FPR<17, "r17", [R17]>, DwarfRegNum<[49]>;
+ def F18 : FPR<18, "r18", [R18]>, DwarfRegNum<[50]>;
+ def F19 : FPR<19, "r19", [R19]>, DwarfRegNum<[51]>;
+ def F20 : FPR<20, "r20", [R20]>, DwarfRegNum<[52]>;
+ def F21 : FPR<21, "r21", [R21]>, DwarfRegNum<[53]>;
+ def F22 : FPR<22, "r22", [R22]>, DwarfRegNum<[54]>;
+ def F23 : FPR<23, "r23", [R23]>, DwarfRegNum<[55]>;
+ def F24 : FPR<24, "r24", [R24]>, DwarfRegNum<[56]>;
+ def F25 : FPR<25, "r25", [R25]>, DwarfRegNum<[57]>;
+ def F26 : FPR<26, "r26", [R26]>, DwarfRegNum<[58]>;
+ def F27 : FPR<27, "r27", [R27]>, DwarfRegNum<[59]>;
+ def F28 : FPR<28, "r28", [R28]>, DwarfRegNum<[60]>;
+ def F29 : FPR<29, "r29", [R29]>, DwarfRegNum<[61]>;
+ def F30 : FPR<30, "r30", [R30]>, DwarfRegNum<[62]>;
+ def F31 : FPR<31, "r31", [R31]>, DwarfRegNum<[63]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def CPURegs : RegisterClass<"MBlaze", [i32], 32,
+ [
+ // Return Values and Arguments
+ R3, R4, R5, R6, R7, R8, R9, R10,
+
+ // Not preserved across procedure calls
+ R11, R12,
+
+ // Callee save
+ R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+
+ // Reserved
+ R0, // Always zero
+ R1, // The stack pointer
+ R2, // Read-only small data area anchor
+ R13, // Read-write small data area anchor
+ R14, // Return address for interrupts
+ R15, // Return address for sub-routines
+ R16, // Return address for trap
+ R17, // Return address for exceptions
+ R18, // Reserved for assembler
+ R19 // The frame-pointer
+ ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ CPURegsClass::iterator
+ CPURegsClass::allocation_order_end(const MachineFunction &MF) const {
+ // The last 10 registers on the list above are reserved
+ return end()-10;
+ }
+ }];
+}
+
+def FGR32 : RegisterClass<"MBlaze", [f32], 32,
+ [
+ // Return Values and Arguments
+ F3, F4, F5, F6, F7, F8, F9, F10,
+
+ // Not preserved across procedure calls
+ F11, F12,
+
+ // Callee save
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31,
+
+ // Reserved
+ F0, // Always zero
+ F1, // The stack pointer
+ F2, // Read-only small data area anchor
+ F13, // Read-write small data area anchor
+ F14, // Return address for interrupts
+ F15, // Return address for sub-routines
+ F16, // Return address for trap
+ F17, // Return address for exceptions
+ F18, // Reserved for assembler
+ F19 // The frame pointer
+ ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FGR32Class::iterator
+ FGR32Class::allocation_order_end(const MachineFunction &MF) const {
+ // The last 10 registers on the list above are reserved
+ return end()-10;
+ }
+ }];
+}
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
new file mode 100644
index 0000000..6a94491
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -0,0 +1,63 @@
+//===- MBlazeSchedule.td - MBlaze Scheduling Definitions --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across MBlaze chips sets. Based on GCC/MBlaze backend files.
+//===----------------------------------------------------------------------===//
+def ALU : FuncUnit;
+def IMULDIV : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for MBlaze
+//===----------------------------------------------------------------------===//
+def IIAlu : InstrItinClass;
+def IILoad : InstrItinClass;
+def IIStore : InstrItinClass;
+def IIXfer : InstrItinClass;
+def IIBranch : InstrItinClass;
+def IIHiLo : InstrItinClass;
+def IIImul : InstrItinClass;
+def IIIdiv : InstrItinClass;
+def IIFcvt : InstrItinClass;
+def IIFmove : InstrItinClass;
+def IIFcmp : InstrItinClass;
+def IIFadd : InstrItinClass;
+def IIFmulSingle : InstrItinClass;
+def IIFmulDouble : InstrItinClass;
+def IIFdivSingle : InstrItinClass;
+def IIFdivDouble : InstrItinClass;
+def IIFsqrtSingle : InstrItinClass;
+def IIFsqrtDouble : InstrItinClass;
+def IIFrecipFsqrtStep : InstrItinClass;
+def IIPseudo : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MBlazeGenericItineraries : ProcessorItineraries<[
+ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIXfer , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIHiLo , [InstrStage<1, [IMULDIV]>]>,
+ InstrItinData<IIImul , [InstrStage<17, [IMULDIV]>]>,
+ InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>,
+ InstrItinData<IIFcvt , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIFmove , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIFcmp , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIFadd , [InstrStage<4, [ALU]>]>,
+ InstrItinData<IIFmulSingle , [InstrStage<7, [ALU]>]>,
+ InstrItinData<IIFmulDouble , [InstrStage<8, [ALU]>]>,
+ InstrItinData<IIFdivSingle , [InstrStage<23, [ALU]>]>,
+ InstrItinData<IIFdivDouble , [InstrStage<36, [ALU]>]>,
+ InstrItinData<IIFsqrtSingle , [InstrStage<54, [ALU]>]>,
+ InstrItinData<IIFsqrtDouble , [InstrStage<12, [ALU]>]>,
+ InstrItinData<IIFrecipFsqrtStep , [InstrStage<5, [ALU]>]>
+]>;
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp
new file mode 100644
index 0000000..3440521
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -0,0 +1,31 @@
+//===- MBlazeSubtarget.cpp - MBlaze Subtarget Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlaze specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeSubtarget.h"
+#include "MBlaze.h"
+#include "MBlazeGenSubtarget.inc"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS):
+ HasPipe3(false), HasBarrel(false), HasDiv(false), HasMul(false),
+ HasFSL(false), HasEFSL(false), HasMSRSet(false), HasException(false),
+ HasPatCmp(false), HasFPU(false), HasESR(false), HasPVR(false),
+ HasMul64(false), HasSqrt(false), HasMMU(false)
+{
+ std::string CPU = "v400";
+ MBlazeArchVersion = V400;
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h
new file mode 100644
index 0000000..bebb3f7
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -0,0 +1,79 @@
+//=====-- MBlazeSubtarget.h - Define Subtarget for the MBlaze -*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESUBTARGET_H
+#define MBLAZESUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <string>
+
+namespace llvm {
+
+class MBlazeSubtarget : public TargetSubtarget {
+
+protected:
+
+ enum MBlazeArchEnum {
+ V400, V500, V600, V700, V710
+ };
+
+ // MBlaze architecture version
+ MBlazeArchEnum MBlazeArchVersion;
+
+ bool HasPipe3;
+ bool HasBarrel;
+ bool HasDiv;
+ bool HasMul;
+ bool HasFSL;
+ bool HasEFSL;
+ bool HasMSRSet;
+ bool HasException;
+ bool HasPatCmp;
+ bool HasFPU;
+ bool HasESR;
+ bool HasPVR;
+ bool HasMul64;
+ bool HasSqrt;
+ bool HasMMU;
+
+ InstrItineraryData InstrItins;
+
+public:
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ MBlazeSubtarget(const std::string &TT, const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ bool hasFPU() const { return HasFPU; }
+ bool hasSqrt() const { return HasSqrt; }
+ bool hasMul() const { return HasMul; }
+ bool hasMul64() const { return HasMul64; }
+ bool hasDiv() const { return HasDiv; }
+ bool hasBarrel() const { return HasBarrel; }
+
+ bool isV400() const { return MBlazeArchVersion == V400; }
+ bool isV500() const { return MBlazeArchVersion == V500; }
+ bool isV600() const { return MBlazeArchVersion == V600; }
+ bool isV700() const { return MBlazeArchVersion == V700; }
+ bool isV710() const { return MBlazeArchVersion == V710; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
new file mode 100644
index 0000000..9eba2b3
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -0,0 +1,66 @@
+//===-- MBlazeTargetMachine.cpp - Define TargetMachine for MBlaze ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about MBlaze target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeMCAsmInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMBlazeTarget() {
+ // Register the target.
+ RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
+ RegisterAsmInfo<MBlazeMCAsmInfo> A(TheMBlazeTarget);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+MBlazeTargetMachine::
+MBlazeTargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS):
+ LLVMTargetMachine(T, TT),
+ Subtarget(TT, FS),
+ DataLayout("E-p:32:32-i8:8:8-i16:16:16-i64:32:32-"
+ "f64:32:32-v64:32:32-v128:32:32-n32"),
+ InstrInfo(*this),
+ FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
+ TLInfo(*this) {
+ if (getRelocationModel() == Reloc::Default) {
+ setRelocationModel(Reloc::Static);
+ }
+
+ if (getCodeModel() == CodeModel::Default)
+ setCodeModel(CodeModel::Small);
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen MBlaze code.
+bool MBlazeTargetMachine::
+addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+ PM.add(createMBlazeISelDag(*this));
+ return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MBlazeTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+ PM.add(createMBlazeDelaySlotFillerPass(*this));
+ return true;
+}
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
new file mode 100644
index 0000000..85c975c
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -0,0 +1,69 @@
+//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze --- C++ ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_TARGETMACHINE_H
+#define MBLAZE_TARGETMACHINE_H
+
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeISelLowering.h"
+#include "MBlazeIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+
+namespace llvm {
+ class formatted_raw_ostream;
+
+ class MBlazeTargetMachine : public LLVMTargetMachine {
+ MBlazeSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MBlazeInstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+ MBlazeTargetLowering TLInfo;
+ MBlazeIntrinsicInfo IntrinsicInfo;
+ public:
+ MBlazeTargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS);
+
+ virtual const MBlazeInstrInfo *getInstrInfo() const
+ { return &InstrInfo; }
+
+ virtual const TargetFrameInfo *getFrameInfo() const
+ { return &FrameInfo; }
+
+ virtual const MBlazeSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+
+ virtual const TargetData *getTargetData() const
+ { return &DataLayout;}
+
+ virtual const MBlazeRegisterInfo *getRegisterInfo() const
+ { return &InstrInfo.getRegisterInfo(); }
+
+ virtual MBlazeTargetLowering *getTargetLowering() const
+ { return const_cast<MBlazeTargetLowering*>(&TLInfo); }
+
+ const TargetIntrinsicInfo *getIntrinsicInfo() const
+ { return &IntrinsicInfo; }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+
+ virtual bool addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ };
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
new file mode 100644
index 0000000..79c9494
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -0,0 +1,88 @@
+//===-- MBlazeTargetObjectFile.cpp - MBlaze object files ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+void MBlazeTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ SmallDataSection =
+ getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+
+ SmallBSSSection =
+ getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
+ MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= 8;
+}
+
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const {
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ // We can only do this for datarel or BSS objects for now.
+ if (!Kind.isBSS() && !Kind.isDataRel())
+ return false;
+
+ // If this is a internal constant string, there is a special
+ // section for it, but not in small data/bss.
+ if (Kind.isMergeable1ByteCString())
+ return false;
+
+ const Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+}
+
+const MCSection *MBlazeTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+ // sections?
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/lib/Target/MBlaze/MBlazeTargetObjectFile.h
new file mode 100644
index 0000000..20e7702
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.h
@@ -0,0 +1,41 @@
+//===-- llvm/Target/MBlazeTargetObjectFile.h - MBlaze Obj. Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+ class MBlazeTargetObjectFile : public TargetLoweringObjectFileELF {
+ const MCSection *SmallDataSection;
+ const MCSection *SmallBSSSection;
+ public:
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM,
+ SectionKind Kind) const;
+
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const;
+ };
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile
new file mode 100644
index 0000000..19e508c
--- /dev/null
+++ b/lib/Target/MBlaze/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/MBlaze/Makefile --------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMMBlazeCodeGen
+TARGET = MBlaze
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \
+ MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \
+ MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \
+ MBlazeGenDAGISel.inc MBlazeGenCallingConv.inc \
+ MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc
+
+DIRS = AsmPrinter TargetInfo
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..5afb14d
--- /dev/null
+++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeInfo
+ MBlazeTargetInfo.cpp
+ )
+
+add_dependencies(LLVMMBlazeInfo MBlazeCodeGenTable_gen)
diff --git a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
new file mode 100644
index 0000000..16e01db
--- /dev/null
+++ b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- MBlazeTargetInfo.cpp - MBlaze Target Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMBlazeTarget;
+
+extern "C" void LLVMInitializeMBlazeTargetInfo() {
+ RegisterTarget<Triple::mblaze> X(TheMBlazeTarget, "mblaze", "MBlaze");
+}
diff --git a/lib/Target/MBlaze/TargetInfo/Makefile b/lib/Target/MBlaze/TargetInfo/Makefile
new file mode 100644
index 0000000..fb7ea11
--- /dev/null
+++ b/lib/Target/MBlaze/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MBlaze/TargetInfo/Makefile ---------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index a96ee49..ac41cc8 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -38,7 +38,8 @@ namespace llvm {
virtual bool addPassesToEmitWholeFile(PassManager &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel);
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
};
@@ -57,7 +58,7 @@ bool MSILModule::runOnModule(Module &M) {
TypeSymbolTable& Table = M.getTypeSymbolTable();
std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes();
for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) {
- if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second))
+ if (!I->second->isStructTy() && !I->second->isOpaqueTy())
Table.remove(I++);
else {
std::set<const Type *>::iterator T = Types.find(I->second);
@@ -187,7 +188,7 @@ void MSILWriter::printModuleStartup() {
break;
case 1:
Arg1 = F->arg_begin();
- if (Arg1->getType()->isInteger()) {
+ if (Arg1->getType()->isIntegerTy()) {
Out << "\tldloc\targc\n";
Args = getTypeName(Arg1->getType());
BadSig = false;
@@ -195,7 +196,7 @@ void MSILWriter::printModuleStartup() {
break;
case 2:
Arg1 = Arg2 = F->arg_begin(); ++Arg2;
- if (Arg1->getType()->isInteger() &&
+ if (Arg1->getType()->isIntegerTy() &&
Arg2->getType()->getTypeID() == Type::PointerTyID) {
Out << "\tldloc\targc\n\tldloc\targv\n";
Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType());
@@ -207,7 +208,7 @@ void MSILWriter::printModuleStartup() {
}
bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID);
- if (BadSig || (!F->getReturnType()->isInteger() && !RetVoid)) {
+ if (BadSig || (!F->getReturnType()->isIntegerTy() && !RetVoid)) {
Out << "\tldc.i4.0\n";
} else {
Out << "\tcall\t" << getTypeName(F->getReturnType()) <<
@@ -334,7 +335,7 @@ std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) {
std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned,
bool isNested) {
- if (Ty->isPrimitiveType() || Ty->isInteger())
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy())
return getPrimitiveTypeName(Ty,isSigned);
// FIXME: "OpaqueType" support
switch (Ty->getTypeID()) {
@@ -1459,7 +1460,7 @@ void MSILWriter::printDeclarations(const TypeSymbolTable& ST) {
for (std::set<const Type*>::const_iterator
UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) {
const Type* Ty = *UI;
- if (isa<ArrayType>(Ty) || isa<VectorType>(Ty) || isa<StructType>(Ty))
+ if (Ty->isArrayTy() || Ty->isVectorTy() || Ty->isStructTy())
Name = getTypeName(Ty, false, true);
// Type with no need to declare.
else continue;
@@ -1688,7 +1689,8 @@ void MSILWriter::printExternals() {
bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM,
formatted_raw_ostream &o,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel)
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify)
{
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
MSILWriter* Writer = new MSILWriter(o);
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
index def5fc6..7a35eb0 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
@@ -98,12 +98,19 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
bool isMemOp = Modifier && !strcmp(Modifier, "mem");
uint64_t Offset = MO.getOffset();
- O << (isMemOp ? '&' : '#');
+ // If the global address expression is a part of displacement field with a
+ // register base, we should not emit any prefix symbol here, e.g.
+ // mov.w &foo, r1
+ // vs
+ // mov.w glb(r1), r2
+ // Otherwise (!) msp430-as will silently miscompile the output :(
+ if (!Modifier || strcmp(Modifier, "nohash"))
+ O << (isMemOp ? '&' : '#');
if (Offset)
O << '(' << Offset << '+';
O << *GetGlobalValueSymbol(MO.getGlobal());
-
+
if (Offset)
O << ')';
@@ -124,15 +131,11 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
const MachineOperand &Disp = MI->getOperand(OpNum+1);
// Print displacement first
- if (!Disp.isImm()) {
- printOperand(MI, OpNum+1, "mem");
- } else {
- if (!Base.getReg())
- O << '&';
-
- printOperand(MI, OpNum+1, "nohash");
- }
+ // Imm here is in fact global address - print extra modifier.
+ if (Disp.isImm() && !Base.getReg())
+ O << '&';
+ printOperand(MI, OpNum+1, "nohash");
// Print register base field
if (Base.getReg()) {
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
index f6565bd..d7636e6 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
@@ -62,21 +62,26 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
const MCOperand &Disp = MI->getOperand(OpNo+1);
// Print displacement first
- if (Disp.isExpr()) {
- O << '&' << *Disp.getExpr();
- } else {
- assert(Disp.isImm() && "Expected immediate in displacement field");
- if (!Base.getReg())
- O << '&';
+ // If the global address expression is a part of displacement field with a
+ // register base, we should not emit any prefix symbol here, e.g.
+ // mov.w &foo, r1
+ // vs
+ // mov.w glb(r1), r2
+ // Otherwise (!) msp430-as will silently miscompile the output :(
+ if (!Base.getReg())
+ O << '&';
+
+ if (Disp.isExpr())
+ O << *Disp.getExpr();
+ else {
+ assert(Disp.isImm() && "Expected immediate in displacement field");
O << Disp.getImm();
}
-
// Print register base field
- if (Base.getReg()) {
+ if (Base.getReg())
O << '(' << getRegisterName(Base.getReg()) << ')';
- }
}
void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo) {
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 4eec757..911cfcb 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -26,26 +26,12 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-
using namespace llvm;
-#ifndef NDEBUG
-static cl::opt<bool>
-ViewRMWDAGs("view-msp430-rmw-dags", cl::Hidden,
- cl::desc("Pop up a window to show isel dags after RMW preprocess"));
-#else
-static const bool ViewRMWDAGs = false;
-#endif
-
-STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
-
-
namespace {
struct MSP430ISelAddressMode {
enum {
@@ -123,8 +109,6 @@ namespace {
Lowering(*TM.getTargetLowering()),
Subtarget(*TM.getSubtargetImpl()) { }
- virtual void InstructionSelect();
-
virtual const char *getPassName() const {
return "MSP430 DAG->DAG Pattern Instruction Selection";
}
@@ -133,9 +117,6 @@ namespace {
bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
- bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
- SDNode *Root) const;
-
virtual bool
SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
std::vector<SDValue> &OutOps);
@@ -144,18 +125,12 @@ namespace {
#include "MSP430GenDAGISel.inc"
private:
- DenseMap<SDNode*, SDNode*> RMWStores;
- void PreprocessForRMW();
SDNode *Select(SDNode *N);
SDNode *SelectIndexedLoad(SDNode *Op);
SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
unsigned Opc8, unsigned Opc16);
bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp);
-
- #ifndef NDEBUG
- unsigned Indent;
- #endif
};
} // end anonymous namespace
@@ -217,10 +192,7 @@ bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM)
}
bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
- DEBUG({
- errs() << "MatchAddress: ";
- AM.dump();
- });
+ DEBUG(errs() << "MatchAddress: "; AM.dump());
switch (N.getOpcode()) {
default: break;
@@ -336,270 +308,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
return false;
}
-bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
- SDNode *Root) const {
- if (OptLevel == CodeGenOpt::None) return false;
-
- /// RMW preprocessing creates the following code:
- /// [Load1]
- /// ^ ^
- /// / |
- /// / |
- /// [Load2] |
- /// ^ ^ |
- /// | | |
- /// | \-|
- /// | |
- /// | [Op]
- /// | ^
- /// | |
- /// \ /
- /// \ /
- /// [Store]
- ///
- /// The path Store => Load2 => Load1 is via chain. Note that in general it is
- /// not allowed to fold Load1 into Op (and Store) since it will creates a
- /// cycle. However, this is perfectly legal for the loads moved below the
- /// TokenFactor by PreprocessForRMW. Query the map Store => Load1 (created
- /// during preprocessing) to determine whether it's legal to introduce such
- /// "cycle" for a moment.
- DenseMap<SDNode*, SDNode*>::const_iterator I = RMWStores.find(Root);
- if (I != RMWStores.end() && I->second == N)
- return true;
-
- // Proceed to 'generic' cycle finder code
- return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
-}
-
-
-/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
-/// and move load below the TokenFactor. Replace store's chain operand with
-/// load's chain result.
-static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
- SDValue Store, SDValue TF) {
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
- if (Load.getNode() == TF.getOperand(i).getNode())
- Ops.push_back(Load.getOperand(0));
- else
- Ops.push_back(TF.getOperand(i));
- SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
- SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF,
- Load.getOperand(1),
- Load.getOperand(2));
- CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1),
- Store.getOperand(2), Store.getOperand(3));
-}
-
-/// MoveBelowTokenFactor2 - Replace TokenFactor operand with load's chain operand
-/// and move load below the TokenFactor. Replace store's chain operand with
-/// load's chain result. This a version which sinks two loads below token factor.
-/// Look into PreprocessForRMW comments for explanation of transform.
-static void MoveBelowTokenFactor2(SelectionDAG *CurDAG,
- SDValue Load1, SDValue Load2,
- SDValue Store, SDValue TF) {
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) {
- SDNode* N = TF.getOperand(i).getNode();
- if (Load2.getNode() == N)
- Ops.push_back(Load2.getOperand(0));
- else if (Load1.getNode() != N)
- Ops.push_back(TF.getOperand(i));
- }
-
- SDValue NewTF = SDValue(CurDAG->MorphNodeTo(TF.getNode(),
- TF.getOpcode(),
- TF.getNode()->getVTList(),
- &Ops[0], Ops.size()), TF.getResNo());
- SDValue NewLoad2 = CurDAG->UpdateNodeOperands(Load2, NewTF,
- Load2.getOperand(1),
- Load2.getOperand(2));
-
- SDValue NewLoad1 = CurDAG->UpdateNodeOperands(Load1, NewLoad2.getValue(1),
- Load1.getOperand(1),
- Load1.getOperand(2));
-
- CurDAG->UpdateNodeOperands(Store,
- NewLoad1.getValue(1),
- Store.getOperand(1),
- Store.getOperand(2), Store.getOperand(3));
-}
-
-/// isAllowedToSink - return true if N a load which can be moved below token
-/// factor. Basically, the load should be non-volatile and has single use.
-static bool isLoadAllowedToSink(SDValue N, SDValue Chain) {
- if (N.getOpcode() == ISD::BIT_CONVERT)
- N = N.getOperand(0);
-
- LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
- if (!LD || LD->isVolatile())
- return false;
- if (LD->getAddressingMode() != ISD::UNINDEXED)
- return false;
-
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
- return false;
-
- return (N.hasOneUse() &&
- LD->hasNUsesOfValue(1, 1) &&
- LD->isOperandOf(Chain.getNode()));
-}
-
-
-/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
-/// The chain produced by the load must only be used by the store's chain
-/// operand, otherwise this may produce a cycle in the DAG.
-static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
- SDValue &Load) {
- if (isLoadAllowedToSink(N, Chain) &&
- N.getOperand(1) == Address) {
- Load = N;
- return true;
- }
- return false;
-}
-
-/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
-/// This is only run if not in -O0 mode.
-/// This allows the instruction selector to pick more read-modify-write
-/// instructions. This is a common case:
-///
-/// [Load chain]
-/// ^
-/// |
-/// [Load]
-/// ^ ^
-/// | |
-/// / \-
-/// / |
-/// [TokenFactor] [Op]
-/// ^ ^
-/// | |
-/// \ /
-/// \ /
-/// [Store]
-///
-/// The fact the store's chain operand != load's chain will prevent the
-/// (store (op (load))) instruction from being selected. We can transform it to:
-///
-/// [Load chain]
-/// ^
-/// |
-/// [TokenFactor]
-/// ^
-/// |
-/// [Load]
-/// ^ ^
-/// | |
-/// | \-
-/// | |
-/// | [Op]
-/// | ^
-/// | |
-/// \ /
-/// \ /
-/// [Store]
-///
-/// We also recognize the case where second operand of Op is load as well and
-/// move it below token factor as well creating DAG as follows:
-///
-/// [Load chain]
-/// ^
-/// |
-/// [TokenFactor]
-/// ^
-/// |
-/// [Load1]
-/// ^ ^
-/// / |
-/// / |
-/// [Load2] |
-/// ^ ^ |
-/// | | |
-/// | \-|
-/// | |
-/// | [Op]
-/// | ^
-/// | |
-/// \ /
-/// \ /
-/// [Store]
-///
-/// This allows selection of mem-mem instructions. Yay!
-
-void MSP430DAGToDAGISel::PreprocessForRMW() {
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
- if (!ISD::isNON_TRUNCStore(I))
- continue;
- SDValue Chain = I->getOperand(0);
-
- if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
- continue;
-
- SDValue N1 = I->getOperand(1);
- SDValue N2 = I->getOperand(2);
- if ((N1.getValueType().isFloatingPoint() &&
- !N1.getValueType().isVector()) ||
- !N1.hasOneUse())
- continue;
-
- unsigned RModW = 0;
- SDValue Load1, Load2;
- unsigned Opcode = N1.getNode()->getOpcode();
- switch (Opcode) {
- case ISD::ADD:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
- case ISD::ADDC:
- case ISD::ADDE: {
- SDValue N10 = N1.getOperand(0);
- SDValue N11 = N1.getOperand(1);
- if (isRMWLoad(N10, Chain, N2, Load1)) {
- if (isLoadAllowedToSink(N11, Chain)) {
- Load2 = N11;
- RModW = 2;
- } else
- RModW = 1;
- } else if (isRMWLoad(N11, Chain, N2, Load1)) {
- if (isLoadAllowedToSink(N10, Chain)) {
- Load2 = N10;
- RModW = 2;
- } else
- RModW = 1;
- }
- break;
- }
- case ISD::SUB:
- case ISD::SUBC:
- case ISD::SUBE: {
- SDValue N10 = N1.getOperand(0);
- SDValue N11 = N1.getOperand(1);
- if (isRMWLoad(N10, Chain, N2, Load1)) {
- if (isLoadAllowedToSink(N11, Chain)) {
- Load2 = N11;
- RModW = 2;
- } else
- RModW = 1;
- }
- break;
- }
- }
-
- NumLoadMoved += RModW;
- if (RModW == 1)
- MoveBelowTokenFactor(CurDAG, Load1, SDValue(I, 0), Chain);
- else if (RModW == 2) {
- MoveBelowTokenFactor2(CurDAG, Load1, Load2, SDValue(I, 0), Chain);
- SDNode* Store = I;
- RMWStores[Store] = Load2.getNode();
- }
- }
-}
-
-
static bool isValidIndexedLoad(const LoadSDNode *LD) {
ISD::MemIndexedMode AM = LD->getAddressingMode();
if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD)
@@ -656,7 +364,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
unsigned Opc8, unsigned Opc16) {
if (N1.getOpcode() == ISD::LOAD &&
N1.hasOneUse() &&
- IsLegalAndProfitableToFold(N1.getNode(), Op, Op)) {
+ IsLegalToFold(N1, Op, Op)) {
LoadSDNode *LD = cast<LoadSDNode>(N1);
if (!isValidIndexedLoad(LD))
return NULL;
@@ -682,46 +390,19 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void MSP430DAGToDAGISel::InstructionSelect() {
- std::string BlockName;
- if (ViewRMWDAGs)
- BlockName = MF->getFunction()->getNameStr() + ":" +
- BB->getBasicBlock()->getNameStr();
-
- PreprocessForRMW();
-
- if (ViewRMWDAGs) CurDAG->viewGraph("RMW preprocessed:" + BlockName);
-
- DEBUG(errs() << "Selection DAG after RMW preprocessing:\n");
- DEBUG(CurDAG->dump());
-
- // Codegen the basic block.
- DEBUG(errs() << "===== Instruction selection begins:\n");
- DEBUG(Indent = 0);
- SelectRoot(*CurDAG);
- DEBUG(errs() << "===== Instruction selection ends:\n");
-
- CurDAG->RemoveDeadNodes();
- RMWStores.clear();
-}
-
SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
DebugLoc dl = Node->getDebugLoc();
// Dump information about the Node being selected
- DEBUG(errs().indent(Indent) << "Selecting: ");
+ DEBUG(errs() << "Selecting: ");
DEBUG(Node->dump(CurDAG));
DEBUG(errs() << "\n");
- DEBUG(Indent += 2);
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
- DEBUG(errs().indent(Indent-2) << "== ";
+ DEBUG(errs() << "== ";
Node->dump(CurDAG);
errs() << "\n");
- DEBUG(Indent -= 2);
return NULL;
}
@@ -809,13 +490,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
// Select the default instruction
SDNode *ResNode = SelectCode(Node);
- DEBUG(errs() << std::string(Indent-2, ' ') << "=> ");
+ DEBUG(errs() << "=> ");
if (ResNode == NULL || ResNode == Node)
DEBUG(Node->dump(CurDAG));
else
DEBUG(ResNode->dump(CurDAG));
DEBUG(errs() << "\n");
- DEBUG(Indent -= 2);
return ResNode;
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index ef81f51..e6c7e1e 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -31,8 +31,8 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -371,7 +371,8 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
//from this parameter
SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0));
+ PseudoSourceValue::getFixedStack(FI), 0,
+ false, false, 0));
}
}
@@ -500,7 +501,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
PseudoSourceValue::getStack(),
- VA.getLocMemOffset()));
+ VA.getLocMemOffset(), false, false, 0));
}
}
@@ -794,18 +795,15 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
if (andCC) {
// C = ~Z, thus Res = SRW & 1, no processing is required
} else {
- // Res = (SRW >> 1) & 1
+ // Res = ~((SRW >> 1) & 1)
Shift = true;
+ Invert = true;
}
break;
case MSP430CC::COND_E:
- if (andCC) {
- // C = ~Z, thus Res = ~(SRW & 1)
- } else {
- // Res = ~((SRW >> 1) & 1)
- Shift = true;
- }
- Invert = true;
+ Shift = true;
+ // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however,
+ // Res = (SRW >> 1) & 1 is 1 word shorter.
break;
}
EVT VT = Op.getValueType();
@@ -893,13 +891,13 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
- NULL, 0);
+ NULL, 0, false, false, 0);
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, NULL, 0);
+ RetAddrFI, NULL, 0, false, false, 0);
}
SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
@@ -911,7 +909,8 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
MSP430::FPW, VT);
while (Depth--)
- FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+ false, false, 0);
return FrameAddr;
}
@@ -971,7 +970,7 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
bool MSP430TargetLowering::isTruncateFree(const Type *Ty1,
const Type *Ty2) const {
- if (!Ty1->isInteger() || !Ty2->isInteger())
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
return (Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits());
@@ -986,7 +985,7 @@ bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
// MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
- return 0 && Ty1->isInteger(8) && Ty2->isInteger(16);
+ return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16);
}
bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index bb06f7b..144ba26 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -250,7 +250,7 @@ def MOV16ri : I16ri<0x0,
[(set GR16:$dst, imm:$src)]>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOV8rm : I8rm<0x0,
(outs GR8:$dst), (ins memsrc:$src),
"mov.b\t{$src, $dst}",
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index f1d4a67..c4746db 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -59,8 +59,6 @@ public:
SelectionDAGISel(tm),
TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
- virtual void InstructionSelect();
-
// Pass Name
virtual const char *getPassName() const {
return "MIPS DAG->DAG Pattern Instruction Selection";
@@ -98,29 +96,10 @@ private:
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
-
-
- #ifndef NDEBUG
- unsigned Indent;
- #endif
};
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void MipsDAGToDAGISel::InstructionSelect() {
- // Codegen the basic block.
- DEBUG(errs() << "===== Instruction selection begins:\n");
- DEBUG(Indent = 0);
-
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
-
- DEBUG(errs() << "===== Instruction selection ends:\n");
-
- CurDAG->RemoveDeadNodes();
-}
/// getGlobalBaseReg - Output the instructions required to put the
/// GOT address into a register.
@@ -329,17 +308,11 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
DebugLoc dl = Node->getDebugLoc();
// Dump information about the Node being selected
- DEBUG(errs().indent(Indent) << "Selecting: ";
- Node->dump(CurDAG);
- errs() << "\n");
- DEBUG(Indent += 2);
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
- DEBUG(errs().indent(Indent-2) << "== ";
- Node->dump(CurDAG);
- errs() << "\n");
- DEBUG(Indent -= 2);
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
return NULL;
}
@@ -547,14 +520,12 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
// Select the default instruction
SDNode *ResNode = SelectCode(Node);
- DEBUG(errs().indent(Indent-2) << "=> ");
+ DEBUG(errs() << "=> ");
if (ResNode == NULL || ResNode == Node)
DEBUG(Node->dump(CurDAG));
else
DEBUG(ResNode->dump(CurDAG));
DEBUG(errs() << "\n");
- DEBUG(Indent -= 2);
-
return ResNode;
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index d94944f..584b887 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -510,7 +510,8 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0,
MipsII::MO_GOT);
SDValue ResNode = DAG.getLoad(MVT::i32, dl,
- DAG.getEntryNode(), GA, NULL, 0);
+ DAG.getEntryNode(), GA, NULL, 0,
+ false, false, 0);
// On functions and global targets not internal linked only
// a load from got/GP is necessary for PIC to work.
if (!GV->hasLocalLinkage() || isa<Function>(GV))
@@ -549,7 +550,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG)
SDValue Ops[] = { JTI };
HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
} else // Emit Load from Global Pointer
- HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0);
+ HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0,
+ false, false, 0);
SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI);
ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
@@ -586,7 +588,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
N->getOffset(), MipsII::MO_GOT);
SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
- CP, NULL, 0);
+ CP, NULL, 0, false, false, 0);
SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
}
@@ -601,7 +603,8 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0);
+ return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0,
+ false, false, 0);
}
//===----------------------------------------------------------------------===//
@@ -859,7 +862,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// emit ISD::STORE whichs stores the
// parameter value to a stack Location
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ false, false, 0));
}
// Transform all store nodes into one single node because all store
@@ -933,7 +937,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Reload GP value.
FI = MipsFI->getGPFI();
SDValue FIN = DAG.getFrameIndex(FI,getPointerTy());
- SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0);
+ SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0,
+ false, false, 0);
Chain = GPLoad.getValue(1);
Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32),
GPLoad, SDValue(0,0));
@@ -1097,7 +1102,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+ false, false, 0));
}
}
@@ -1132,7 +1138,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
int FI = MFI->CreateFixedObject(4, 0, true, false);
MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
- OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0));
+ OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+ false, false, 0));
// Record the frame index of the first variable argument
// which is a value necessary to VASTART.
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index e67bcbf..cef3697 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -120,7 +120,7 @@ def immZExt5 : PatLeaf<(imm), [{
// Mips Address Mode! SDNode frameindex could possibily be a match
// since load and store instructions from stack used it.
-def addr : ComplexPattern<i32, 2, "SelectAddr", [frameindex], []>;
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
//===----------------------------------------------------------------------===//
// Instructions specific format
@@ -300,9 +300,8 @@ class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1,
// All calls clobber the non-callee saved registers...
- Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
- K0, K1, F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13,
- F14, F15, F16, F17, F18, F19], Uses = [GP] in {
+ Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in {
class JumpLink<bits<6> op, string instr_asm>:
FJ< op,
(outs),
@@ -593,8 +592,8 @@ def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
(JAL tglobaladdr:$dst)>;
def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
(JAL texternalsym:$dst)>;
-def : Pat<(MipsJmpLink CPURegs:$dst),
- (JALR CPURegs:$dst)>;
+//def : Pat<(MipsJmpLink CPURegs:$dst),
+// (JALR CPURegs:$dst)>;
// hi/lo relocs
def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index 32e0436..237b160 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -10,7 +10,7 @@
#ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
#define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
namespace llvm {
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
index 72f7c16..44a6cc0 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -106,8 +106,9 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
DbgInfo.BeginFunction(MF);
// Now emit the instructions of function in its code section.
- const MCSection *fCodeSection
- = getObjFileLowering().SectionForCode(CurrentFnSym->getName());
+ const MCSection *fCodeSection =
+ getObjFileLowering().SectionForCode(CurrentFnSym->getName(),
+ PAN::isISR(F->getSection()));
// Start the Code Section.
O << "\n";
@@ -157,6 +158,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// printOperand - print operand of insn.
void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
const MachineOperand &MO = MI->getOperand(opNum);
+ const Function *F = MI->getParent()->getParent()->getFunction();
switch (MO.getType()) {
case MachineOperand::MO_Register:
@@ -189,19 +191,18 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
}
case MachineOperand::MO_ExternalSymbol: {
const char *Sname = MO.getSymbolName();
+ std::string Printname = Sname;
- // If its a libcall name, record it to decls section.
- if (PAN::getSymbolTag(Sname) == PAN::LIBCALL)
- LibcallDecls.push_back(Sname);
-
- // Record a call to intrinsic to print the extern declaration for it.
- std::string Sym = Sname;
- if (PAN::isMemIntrinsic(Sym)) {
- Sym = PAN::addPrefix(Sym);
- LibcallDecls.push_back(createESName(Sym));
+ // Intrinsic stuff needs to be renamed if we are printing IL fn.
+ if (PAN::isIntrinsicStuff(Printname)) {
+ if (PAN::isISR(F->getSection())) {
+ Printname = PAN::Rename(Sname);
+ }
+ // Record these decls, we need to print them in asm as extern.
+ LibcallDecls.push_back(createESName(Printname));
}
- O << Sym;
+ O << Printname;
break;
}
case MachineOperand::MO_MachineBasicBlock:
@@ -247,8 +248,6 @@ void PIC16AsmPrinter::printLibcallDecls() {
for (std::list<const char*>::const_iterator I = LibcallDecls.begin();
I != LibcallDecls.end(); I++) {
O << MAI->getExternDirective() << *I << "\n";
- O << MAI->getExternDirective() << PAN::getArgsLabel(*I) << "\n";
- O << MAI->getExternDirective() << PAN::getRetvalLabel(*I) << "\n";
}
O << MAI->getCommentString() << "External decls for libcalls - END." <<"\n";
}
diff --git a/lib/Target/PIC16/PIC16ABINames.h b/lib/Target/PIC16/PIC16ABINames.h
index e18ddf1..4c1a8da 100644
--- a/lib/Target/PIC16/PIC16ABINames.h
+++ b/lib/Target/PIC16/PIC16ABINames.h
@@ -178,18 +178,21 @@ namespace llvm {
return Func1 + tag;
}
+ // Get the retval label for the given function.
static std::string getRetvalLabel(const std::string &Func) {
std::string Func1 = addPrefix(Func);
std::string tag = getTagName(RET_LABEL);
return Func1 + tag;
}
+ // Get the argument label for the given function.
static std::string getArgsLabel(const std::string &Func) {
std::string Func1 = addPrefix(Func);
std::string tag = getTagName(ARGS_LABEL);
return Func1 + tag;
}
+ // Get the tempdata label for the given function.
static std::string getTempdataLabel(const std::string &Func) {
std::string Func1 = addPrefix(Func);
std::string tag = getTagName(TEMPS_LABEL);
@@ -263,6 +266,7 @@ namespace llvm {
return false;
}
+
inline static bool isMemIntrinsic (const std::string &Name) {
if (Name.compare("@memcpy") == 0 || Name.compare("@memset") == 0 ||
Name.compare("@memmove") == 0) {
@@ -272,6 +276,41 @@ namespace llvm {
return false;
}
+ // Currently names of libcalls are assigned during TargetLowering
+ // object construction. There is no provision to change the when the
+ // code for a function IL function being generated.
+ // So we have to change these names while printing assembly.
+ // We need to do that mainly for names related to intrinsics. This
+ // function returns true if a name needs to be cloned.
+ inline static bool isIntrinsicStuff(const std::string &Name) {
+ // Return true if the name contains LIBCALL marker, or a MemIntrinisc.
+ // these are mainly ARGS_LABEL, RET_LABEL, and the LIBCALL name itself.
+ if ((Name.find(getTagName(LIBCALL)) != std::string::npos)
+ || isMemIntrinsic(Name))
+ return true;
+
+ return false;
+ }
+
+ // Rename the name for IL.
+ inline static std::string Rename(const std::string &Name) {
+ std::string Newname;
+ // If its a label (LIBCALL+Func+LABEL), change it to
+ // (LIBCALL+Func+IL+LABEL).
+ TAGS id = getSymbolTag(Name);
+ if (id == ARGS_LABEL || id == RET_LABEL) {
+ std::size_t pos = Name.find(getTagName(id));
+ Newname = Name.substr(0, pos) + ".IL" + getTagName(id);
+ return Newname;
+ }
+
+ // Else, just append IL to name.
+ return Name + ".IL";
+ }
+
+
+
+
inline static bool isLocalToFunc (std::string &Func, std::string &Var) {
if (! isLocalName(Var)) return false;
@@ -325,6 +364,35 @@ namespace llvm {
return o.str();
}
+
+ // Return true if the current function is an ISR
+ inline static bool isISR(const std::string SectName) {
+ if (SectName.find("interrupt") != std::string::npos)
+ return true;
+
+ return false;
+ }
+
+ // Return the address for ISR starts in rom.
+ inline static std::string getISRAddr(void) {
+ return "0x4";
+ }
+
+ // Returns the name of clone of a function.
+ static std::string getCloneFnName(const std::string &Func) {
+ return (Func + ".IL");
+ }
+
+ // Returns the name of clone of a variable.
+ static std::string getCloneVarName(const std::string &Fn,
+ const std::string &Var) {
+ std::string cloneVarName = Var;
+ // These vars are named like fun.auto.var.
+ // Just replace the function name, with clone function name.
+ std::string cloneFnName = getCloneFnName(Fn);
+ cloneVarName.replace(cloneVarName.find(Fn), Fn.length(), cloneFnName);
+ return cloneVarName;
+ }
}; // class PAN.
} // end namespace llvm;
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index c517b1b..877e4ff 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -419,7 +419,7 @@ void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num,
if (TagName != "")
O << ", " << TagName;
for (int i = 0; i<Num; i++)
- O << "," << Aux[i];
+ O << "," << (Aux[i] && 0xff);
}
/// EmitSymbol - Emit .def for a symbol. Value is offset for the member.
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
index 82197ae..6cbd002 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
@@ -14,10 +14,7 @@
#define DEBUG_TYPE "pic16-isel"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "PIC16ISelDAGToDAG.h"
-#include "llvm/Support/Debug.h"
-
using namespace llvm;
/// createPIC16ISelDag - This pass converts a legalized DAG into a
@@ -27,13 +24,6 @@ FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) {
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void PIC16DAGToDAGISel::InstructionSelect() {
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
/// Select - Select instructions not customized! Used for
/// expanded, promoted and normal instructions.
SDNode* PIC16DAGToDAGISel::Select(SDNode *N) {
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
index 813a540..8ed5bf7 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
@@ -19,6 +19,8 @@
#include "PIC16TargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Intrinsics.h"
using namespace llvm;
@@ -46,8 +48,6 @@ public:
return "PIC16 DAG->DAG Pattern Instruction Selection";
}
- virtual void InstructionSelect();
-
private:
// Include the pieces autogenerated from the target description.
#include "PIC16GenDAGISel.inc"
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 7754a4f..d17abb9 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -419,8 +419,7 @@ PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl,
- DAG.GetOrdering(DAG.getEntryNode().getNode()));
+ Callee, Args, DAG, dl);
return CallInfo.first;
}
@@ -622,12 +621,12 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) {
ChainHi = Chain.getOperand(1);
}
SDValue Store1 = DAG.getStore(ChainLo, dl, SrcLo, Ptr, NULL,
- 0 + StoreOffset);
+ 0 + StoreOffset, false, false, 0);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
SDValue Store2 = DAG.getStore(ChainHi, dl, SrcHi, Ptr, NULL,
- 1 + StoreOffset);
+ 1 + StoreOffset, false, false, 0);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1,
Store2);
@@ -1513,8 +1512,7 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
// Direct load operands are folded in binary operations. But before folding
// verify if this folding is legal. Fold only if it is legal otherwise
// convert this direct load to a separate memory operation.
- if(ISel->IsLegalAndProfitableToFold(Op.getOperand(0).getNode(),
- Op.getNode(), Op.getNode()))
+ if(ISel->IsLegalToFold(Op.getOperand(0), Op.getNode(), Op.getNode()))
return false;
else
MemOp = 0;
@@ -1528,10 +1526,24 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
return true;
if (isDirectLoad(Op.getOperand(1))) {
- if (Op.getOperand(1).hasOneUse())
- return false;
- else
- MemOp = 1;
+ if (Op.getOperand(1).hasOneUse()) {
+ // Legal and profitable folding check uses the NodeId of DAG nodes.
+ // This NodeId is assigned by topological order. Therefore first
+ // assign topological order then perform legal and profitable check.
+ // Note:- Though this ordering is done before begining with legalization,
+ // newly added node during legalization process have NodeId=-1 (NewNode)
+ // therefore before performing any check proper ordering of the node is
+ // required.
+ DAG.AssignTopologicalOrder();
+
+ // Direct load operands are folded in binary operations. But before folding
+ // verify if this folding is legal. Fold only if it is legal otherwise
+ // convert this direct load to a separate memory operation.
+ if(ISel->IsLegalToFold(Op.getOperand(1), Op.getNode(), Op.getNode()))
+ return false;
+ else
+ MemOp = 1;
+ }
}
return true;
}
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
index cc71b04..ab81ed1 100644
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -59,6 +59,7 @@ namespace {
const TargetInstrInfo *TII; // Machine instruction info.
MachineBasicBlock *MBB; // Current basic block
std::string CurBank;
+ int PageChanged;
};
char MemSelOpt::ID = 0;
@@ -93,10 +94,56 @@ bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
// Let us assume that when entering a basic block now bank is selected.
// Ideally we should look at the predecessors for this information.
CurBank="";
+ PageChanged=0;
- for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ MachineBasicBlock::iterator I;
+ for (I = BB.begin(); I != BB.end(); ++I) {
Changed |= processInstruction(I);
+
+ // if the page has changed insert a page sel before
+ // any instruction that needs one
+ if (PageChanged == 1)
+ {
+ // Restore the page if it was changed, before leaving the basic block,
+ // because it may be required by the goto terminator or the fall thru
+ // basic blcok.
+ // If the terminator is return, we don't need to restore since there
+ // is no goto or fall thru basic block.
+ if ((I->getOpcode() == PIC16::sublw_3) || //macro has goto
+ (I->getOpcode() == PIC16::sublw_6) || //macro has goto
+ (I->getOpcode() == PIC16::addlwc) || //macro has goto
+ (TII->get(I->getOpcode()).isBranch()))
+ {
+ DebugLoc dl = I->getDebugLoc();
+ BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$");
+ Changed = true;
+ PageChanged = 0;
+ }
+ }
}
+
+ // The basic block is over, but if we did not find any goto yet,
+ // we haven't restored the page.
+ // Restore the page if it was changed, before leaving the basic block,
+ // because it may be required by fall thru basic blcok.
+ // If the terminator is return, we don't need to restore since there
+ // is fall thru basic block.
+ if (PageChanged == 1) {
+ // save the end pointer before we move back to last insn.
+ MachineBasicBlock::iterator J = I;
+ I--;
+ const TargetInstrDesc &TID = TII->get(I->getOpcode());
+ if (! TID.isReturn())
+ {
+ DebugLoc dl = I->getDebugLoc();
+ BuildMI(*MBB, J, dl,
+ TII->get(PIC16::pagesel)).addExternalSymbol("$");
+ Changed = true;
+ PageChanged = 0;
+ }
+ }
+
+
return Changed;
}
@@ -112,42 +159,74 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
if (!(TID.isBranch() || TID.isCall() || TID.mayLoad() || TID.mayStore()))
return false;
+ // The first thing we should do is that record if banksel/pagesel are
+ // changed in an unknown way. This can happend via any type of call.
+ // We do it here first before scanning of MemOp / BBOp as the indirect
+ // call insns do not have any operands, but they still may change bank/page.
+ if (TID.isCall()) {
+ // Record that we have changed the page, so that we can restore it
+ // before basic block ends.
+ // We require to signal that a page anc bank change happened even for
+ // indirect calls.
+ PageChanged = 1;
+
+ // When a call is made, there may be banksel for variables in callee.
+ // Hence the banksel in caller needs to be reset.
+ CurBank = "";
+ }
+
// Scan for the memory address operand.
// FIXME: Should we use standard interfaces like memoperands_iterator,
// hasMemOperand() etc ?
int MemOpPos = -1;
+ int BBOpPos = -1;
for (unsigned i = 0; i < NumOperands; i++) {
MachineOperand Op = MI->getOperand(i);
if (Op.getType() == MachineOperand::MO_GlobalAddress ||
- Op.getType() == MachineOperand::MO_ExternalSymbol ||
- Op.getType() == MachineOperand::MO_MachineBasicBlock) {
+ Op.getType() == MachineOperand::MO_ExternalSymbol) {
// We found one mem operand. Next one may be BS.
MemOpPos = i;
- break;
+ }
+ if (Op.getType() == MachineOperand::MO_MachineBasicBlock) {
+ // We found one BB operand. Next one may be pagesel.
+ BBOpPos = i;
}
}
// If we did not find an insn accessing memory. Continue.
- if (MemOpPos == -1) return Changed;
+ if ((MemOpPos == -1) &&
+ (BBOpPos == -1))
+ return false;
+ assert ((BBOpPos != MemOpPos) && "operand can only be of one type");
- // Get the MemOp.
- MachineOperand &Op = MI->getOperand(MemOpPos);
// If this is a pagesel material, handle it first.
- if (MI->getOpcode() == PIC16::CALL ||
- MI->getOpcode() == PIC16::br_uncond) {
+ // CALL and br_ucond insns use MemOp (GA or ES) and not BBOp.
+ // Pagesel is required only for a direct call.
+ if ((MI->getOpcode() == PIC16::CALL)) {
+ // Get the BBOp.
+ MachineOperand &MemOp = MI->getOperand(MemOpPos);
DebugLoc dl = MI->getDebugLoc();
- BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).
- addOperand(Op);
- return true;
+ BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).addOperand(MemOp);
+
+ // CALL and br_ucond needs only pagesel. so we are done.
+ return true;
}
+ // Pagesel is handled. Now, add a Banksel if needed.
+ if (MemOpPos == -1) return Changed;
+ // Get the MemOp.
+ MachineOperand &Op = MI->getOperand(MemOpPos);
+
// Get the section name(NewBank) for MemOp.
// This assumes that the section names for globals are already set by
// AsmPrinter->doInitialization.
std::string NewBank = CurBank;
+ bool hasExternalLinkage = false;
if (Op.getType() == MachineOperand::MO_GlobalAddress &&
Op.getGlobal()->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) {
+ if (Op.getGlobal()->hasExternalLinkage())
+ hasExternalLinkage= true;
NewBank = Op.getGlobal()->getSection();
} else if (Op.getType() == MachineOperand::MO_ExternalSymbol) {
// External Symbol is generated for temp data and arguments. They are
@@ -162,7 +241,7 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
// If the previous and new section names are same, we don't need to
// emit banksel.
- if (NewBank.compare(CurBank) != 0 ) {
+ if (NewBank.compare(CurBank) != 0 || hasExternalLinkage) {
DebugLoc dl = MI->getDebugLoc();
BuildMI(*MBB, MI, dl, TII->get(PIC16::banksel)).
addOperand(Op);
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
new file mode 100644
index 0000000..865da35
--- /dev/null
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
@@ -0,0 +1,299 @@
+//===-- PIC16Cloner.cpp - PIC16 LLVM Cloner for shared functions -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to clone all functions that are shared between
+// the main line code (ML) and interrupt line code (IL). It clones all such
+// shared functions and their automatic global vars by adding the .IL suffix.
+//
+// This pass is supposed to be run on the linked .bc module.
+// It traveses the module call graph twice. Once starting from the main function
+// and marking each reached function as "ML". Again, starting from the ISR
+// and cloning any reachable function that was marked as "ML". After cloning
+// the function, it remaps all the call sites in IL functions to call the
+// cloned functions.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "PIC16Cloner.h"
+#include "../PIC16ABINames.h"
+#include <vector>
+
+using namespace llvm;
+using std::vector;
+using std::string;
+using std::map;
+
+namespace llvm {
+ char PIC16Cloner::ID = 0;
+
+ ModulePass *createPIC16ClonerPass() { return new PIC16Cloner(); }
+}
+
+// We currently intend to run these passes in opt, which does not have any
+// diagnostic support. So use these functions for now. In future
+// we will probably write our own driver tool.
+//
+void PIC16Cloner::reportError(string ErrorString) {
+ errs() << "ERROR : " << ErrorString << "\n";
+ exit(1);
+}
+
+void PIC16Cloner::
+reportError (string ErrorString, vector<string> &Values) {
+ unsigned ValCount = Values.size();
+ string TargetString;
+ for (unsigned i=0; i<ValCount; ++i) {
+ TargetString = "%";
+ TargetString += ((char)i + '0');
+ ErrorString.replace(ErrorString.find(TargetString), TargetString.length(),
+ Values[i]);
+ }
+ errs() << "ERROR : " << ErrorString << "\n";
+ exit(1);
+}
+
+
+// Entry point
+//
+bool PIC16Cloner::runOnModule(Module &M) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+
+ // Search for the "main" and "ISR" functions.
+ CallGraphNode *mainCGN = NULL, *isrCGN = NULL;
+ for (CallGraph::iterator it = CG.begin() ; it != CG.end(); it++)
+ {
+ // External calling node doesn't have any function associated with it.
+ if (! it->first)
+ continue;
+
+ if (it->first->getName().str() == "main") {
+ mainCGN = it->second;
+ }
+
+ if (PAN::isISR(it->first->getSection())) {
+ isrCGN = it->second;
+ }
+
+ // Don't search further if we've found both.
+ if (mainCGN && isrCGN)
+ break;
+ }
+
+ // We have nothing to do if any of the main or ISR is missing.
+ if (! mainCGN || ! isrCGN) return false;
+
+ // Time for some diagnostics.
+ // See if the main itself is interrupt function then report an error.
+ if (PAN::isISR(mainCGN->getFunction()->getSection())) {
+ reportError("Function 'main' can't be interrupt function");
+ }
+
+
+ // Mark all reachable functions from main as ML.
+ markCallGraph(mainCGN, "ML");
+
+ // And then all the functions reachable from ISR will be cloned.
+ cloneSharedFunctions(isrCGN);
+
+ return true;
+}
+
+// Mark all reachable functions from the given node, with the given mark.
+//
+void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) {
+ // Mark the top node first.
+ Function *thisF = CGN->getFunction();
+
+ thisF->setSection(StringMark);
+
+ // Mark all the called functions
+ for(CallGraphNode::iterator cgn_it = CGN->begin();
+ cgn_it != CGN->end(); ++cgn_it) {
+ Function *CalledF = cgn_it->second->getFunction();
+
+ // If calling an external function then CallGraphNode
+ // will not be associated with any function.
+ if (! CalledF)
+ continue;
+
+ // Issue diagnostic if interrupt function is being called.
+ if (PAN::isISR(CalledF->getSection())) {
+ vector<string> Values;
+ Values.push_back(CalledF->getName().str());
+ reportError("Interrupt function (%0) can't be called", Values);
+ }
+
+ // Has already been mark
+ if (CalledF->getSection().find(StringMark) != string::npos) {
+ // Should we do anything here?
+ } else {
+ // Mark now
+ CalledF->setSection(StringMark);
+ }
+
+ // Before going any further mark all the called function by current
+ // function.
+ markCallGraph(cgn_it->second ,StringMark);
+ } // end of loop of all called functions.
+}
+
+
+// For PIC16, automatic variables of a function are emitted as globals.
+// Clone the auto variables of a function and put them in ValueMap,
+// this ValueMap will be used while
+// Cloning the code of function itself.
+//
+void PIC16Cloner::CloneAutos(Function *F) {
+ // We'll need to update module's globals list as well. So keep a reference
+ // handy.
+ Module *M = F->getParent();
+ Module::GlobalListType &Globals = M->getGlobalList();
+
+ // Clear the leftovers in ValueMap by any previous cloning.
+ ValueMap.clear();
+
+ // Find the auto globls for this function and clone them, and put them
+ // in ValueMap.
+ std::string FnName = F->getName().str();
+ std::string VarName, ClonedVarName;
+ for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ VarName = I->getName().str();
+ if (PAN::isLocalToFunc(FnName, VarName)) {
+ // Auto variable for current function found. Clone it.
+ GlobalVariable *GV = I;
+
+ const Type *InitTy = GV->getInitializer()->getType();
+ GlobalVariable *ClonedGV =
+ new GlobalVariable(InitTy, false, GV->getLinkage(),
+ GV->getInitializer());
+ ClonedGV->setName(PAN::getCloneVarName(FnName, VarName));
+ // Add these new globals to module's globals list.
+ Globals.push_back(ClonedGV);
+
+ // Update ValueMap.
+ ValueMap[GV] = ClonedGV;
+ }
+ }
+}
+
+
+// Clone all functions that are reachable from ISR and are already
+// marked as ML.
+//
+void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) {
+
+ // Check all the called functions from ISR.
+ for(CallGraphNode::iterator cgn_it = CGN->begin();
+ cgn_it != CGN->end(); ++cgn_it) {
+ Function *CalledF = cgn_it->second->getFunction();
+
+ // If calling an external function then CallGraphNode
+ // will not be associated with any function.
+ if (!CalledF)
+ continue;
+
+ // Issue diagnostic if interrupt function is being called.
+ if (PAN::isISR(CalledF->getSection())) {
+ vector<string> Values;
+ Values.push_back(CalledF->getName().str());
+ reportError("Interrupt function (%0) can't be called", Values);
+ }
+
+ if (CalledF->getSection().find("ML") != string::npos) {
+ // Function is alternatively marked. It should be a shared one.
+ // Create IL copy. Passing called function as first argument
+ // and the caller as the second argument.
+
+ // Before making IL copy, first ensure that this function has a
+ // body. If the function does have a body. It can't be cloned.
+ // Such a case may occur when the function has been declarated
+ // in the C source code but its body exists in assembly file.
+ if (!CalledF->isDeclaration()) {
+ Function *cf = cloneFunction(CalledF);
+ remapAllSites(CGN->getFunction(), CalledF, cf);
+ } else {
+ // It is called only from ISR. Still mark it as we need this info
+ // in code gen while calling intrinsics.Function is not marked.
+ CalledF->setSection("IL");
+ }
+ }
+ // Before going any further clone all the shared function reachaable
+ // by current function.
+ cloneSharedFunctions(cgn_it->second);
+ } // end of loop of all called functions.
+}
+
+// Clone the given function and return it.
+// Note: it uses the ValueMap member of the class, which is already populated
+// by cloneAutos by the time we reach here.
+// FIXME: Should we just pass ValueMap's ref as a parameter here? rather
+// than keeping the ValueMap as a member.
+Function *
+PIC16Cloner::cloneFunction(Function *OrgF) {
+ Function *ClonedF;
+
+ // See if we already cloned it. Return that.
+ cloned_map_iterator cm_it = ClonedFunctionMap.find(OrgF);
+ if(cm_it != ClonedFunctionMap.end()) {
+ ClonedF = cm_it->second;
+ return ClonedF;
+ }
+
+ // Clone does not exist.
+ // First clone the autos, and populate ValueMap.
+ CloneAutos(OrgF);
+
+ // Now create the clone.
+ ClonedF = CloneFunction(OrgF, ValueMap);
+
+ // The new function should be for interrupt line. Therefore should have
+ // the name suffixed with IL and section attribute marked with IL.
+ ClonedF->setName(PAN::getCloneFnName(OrgF->getName()));
+ ClonedF->setSection("IL");
+
+ // Add the newly created function to the module.
+ OrgF->getParent()->getFunctionList().push_back(ClonedF);
+
+ // Update the ClonedFunctionMap to record this cloning activity.
+ ClonedFunctionMap[OrgF] = ClonedF;
+
+ return ClonedF;
+}
+
+
+// Remap the call sites of shared functions, that are in IL.
+// Change the IL call site of a shared function to its clone.
+//
+void PIC16Cloner::
+remapAllSites(Function *Caller, Function *OrgF, Function *Clone) {
+ // First find the caller to update. If the caller itself is cloned
+ // then use the cloned caller. Otherwise use it.
+ cloned_map_iterator cm_it = ClonedFunctionMap.find(Caller);
+ if (cm_it != ClonedFunctionMap.end())
+ Caller = cm_it->second;
+
+ // For the lack of a better call site finding mechanism, iterate over
+ // all insns to find the uses of original fn.
+ for (Function::iterator BI = Caller->begin(); BI != Caller->end(); ++BI) {
+ BasicBlock &BB = *BI;
+ for (BasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) {
+ if (II->getNumOperands() > 0 && II->getOperand(0) == OrgF)
+ II->setOperand(0, Clone);
+ }
+ }
+}
+
+
+
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
new file mode 100644
index 0000000..24c1152
--- /dev/null
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
@@ -0,0 +1,83 @@
+//===-- PIC16Cloner.h - PIC16 LLVM Cloner for shared functions --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains declaration of a cloner class clone all functions that
+// are shared between the main line code (ML) and interrupt line code (IL).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16CLONER_H
+#define PIC16CLONER_H
+
+#include "llvm/ADT/DenseMap.h"
+
+using namespace llvm;
+using std::vector;
+using std::string;
+using std::map;
+
+namespace llvm {
+ // forward classes.
+ class Value;
+ class Function;
+ class Module;
+ class ModulePass;
+ class CallGraph;
+ class CallGraphNode;
+ class AnalysisUsage;
+
+ class PIC16Cloner : public ModulePass {
+ public:
+ static char ID; // Class identification
+ PIC16Cloner() : ModulePass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<CallGraph>();
+ }
+ virtual bool runOnModule(Module &M);
+
+ private: // Functions
+ // Mark reachable functions for the MainLine or InterruptLine.
+ void markCallGraph(CallGraphNode *CGN, string StringMark);
+
+ // Clone auto variables of function specified.
+ void CloneAutos(Function *F);
+
+ // Clone the body of a function.
+ Function *cloneFunction(Function *F);
+
+ // Clone all shared functions.
+ void cloneSharedFunctions(CallGraphNode *isrCGN);
+
+ // Remap all call sites to the shared function.
+ void remapAllSites(Function *Caller, Function *OrgF, Function *Clone);
+
+ // Error reporting for PIC16Pass
+ void reportError(string ErrorString, vector<string> &Values);
+ void reportError(string ErrorString);
+
+ private: //data
+ // Records if the interrupt function has already been found.
+ // If more than one interrupt function is found then an error
+ // should be thrown.
+ bool foundISR;
+
+ // This ValueMap maps the auto variables of the original functions with
+ // the corresponding cloned auto variable of the cloned function.
+ // This value map is passed during the function cloning so that all the
+ // uses of auto variables be updated properly.
+ DenseMap<const Value*, Value*> ValueMap;
+
+ // Map of a already cloned functions.
+ map<Function *, Function *> ClonedFunctionMap;
+ typedef map<Function *, Function *>::iterator cloned_map_iterator;
+ };
+} // End of anonymous namespace
+
+#endif
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
index 197c398..5ecb6aa 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
@@ -24,27 +24,27 @@
using namespace llvm;
namespace llvm {
- char PIC16FrameOverlay::ID = 0;
- ModulePass *createPIC16OverlayPass() { return new PIC16FrameOverlay(); }
+ char PIC16Overlay::ID = 0;
+ ModulePass *createPIC16OverlayPass() { return new PIC16Overlay(); }
}
-void PIC16FrameOverlay::getAnalysisUsage(AnalysisUsage &AU) const {
+void PIC16Overlay::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<CallGraph>();
}
-void PIC16FrameOverlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) {
+void PIC16Overlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) {
// Do not set any color for external calling node.
if (Depth != 0 && CGN->getFunction()) {
unsigned Color = getColor(CGN->getFunction());
// Handle indirectly called functions
- if (Color >= PIC16Overlay::StartIndirectCallColor ||
- Depth >= PIC16Overlay::StartIndirectCallColor) {
+ if (Color >= PIC16OVERLAY::StartIndirectCallColor ||
+ Depth >= PIC16OVERLAY::StartIndirectCallColor) {
// All functions called from an indirectly called function are given
// an unique color.
- if (Color < PIC16Overlay::StartIndirectCallColor &&
- Depth >= PIC16Overlay::StartIndirectCallColor)
+ if (Color < PIC16OVERLAY::StartIndirectCallColor &&
+ Depth >= PIC16OVERLAY::StartIndirectCallColor)
setColor(CGN->getFunction(), Depth);
for (unsigned int i = 0; i < CGN->size(); i++)
@@ -65,7 +65,7 @@ void PIC16FrameOverlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) {
DFSTraverse((*CGN)[i], Depth+1);
}
-unsigned PIC16FrameOverlay::ModifyDepthForInterrupt(CallGraphNode *CGN,
+unsigned PIC16Overlay::ModifyDepthForInterrupt(CallGraphNode *CGN,
unsigned Depth) {
Function *Fn = CGN->getFunction();
@@ -81,7 +81,7 @@ unsigned PIC16FrameOverlay::ModifyDepthForInterrupt(CallGraphNode *CGN,
return Depth;
}
-void PIC16FrameOverlay::setColor(Function *Fn, unsigned Color) {
+void PIC16Overlay::setColor(Function *Fn, unsigned Color) {
std::string Section = "";
if (Fn->hasSection())
Section = Fn->getSection();
@@ -119,7 +119,7 @@ void PIC16FrameOverlay::setColor(Function *Fn, unsigned Color) {
Fn->setSection(Section);
}
-unsigned PIC16FrameOverlay::getColor(Function *Fn) {
+unsigned PIC16Overlay::getColor(Function *Fn) {
int Color = 0;
if (!Fn->hasSection())
return 0;
@@ -150,7 +150,7 @@ unsigned PIC16FrameOverlay::getColor(Function *Fn) {
return Color;
}
-bool PIC16FrameOverlay::runOnModule(Module &M) {
+bool PIC16Overlay::runOnModule(Module &M) {
CallGraph &CG = getAnalysis<CallGraph>();
CallGraphNode *ECN = CG.getExternalCallingNode();
@@ -164,7 +164,7 @@ bool PIC16FrameOverlay::runOnModule(Module &M) {
return false;
}
-void PIC16FrameOverlay::MarkIndirectlyCalledFunctions(Module &M) {
+void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) {
// If the use of a function is not a call instruction then this
// function might be called indirectly. In that case give it
// an unique color.
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
index d70c4e7..5a2551f 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
@@ -1,4 +1,4 @@
-//===-- PIC16FrameOverlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===//
+//===-- PIC16Overlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,30 +14,35 @@
#ifndef PIC16FRAMEOVERLAY_H
#define PIC16FRAMEOVERLAY_H
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Pass.h"
-#include "llvm/CallGraphSCCPass.h"
using std::string;
using namespace llvm;
namespace llvm {
- namespace PIC16Overlay {
+ // Forward declarations.
+ class Function;
+ class Module;
+ class ModulePass;
+ class AnalysisUsage;
+ class CallGraphNode;
+ class CallGraph;
+
+ namespace PIC16OVERLAY {
enum OverlayConsts {
StartInterruptColor = 200,
StartIndirectCallColor = 300
};
}
- class PIC16FrameOverlay : public ModulePass {
+ class PIC16Overlay : public ModulePass {
std::string OverlayStr;
unsigned InterruptDepth;
unsigned IndirectCallColor;
public:
static char ID; // Class identification
- PIC16FrameOverlay() : ModulePass(&ID) {
+ PIC16Overlay() : ModulePass(&ID) {
OverlayStr = "Overlay=";
- InterruptDepth = PIC16Overlay::StartInterruptColor;
- IndirectCallColor = PIC16Overlay::StartIndirectCallColor;
+ InterruptDepth = PIC16OVERLAY::StartInterruptColor;
+ IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor;
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
index d7cfe02..b891c18 100644
--- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
@@ -315,8 +315,12 @@ PIC16TargetObjectFile::allocateSHARED(const GlobalVariable *GV,
// Interface used by AsmPrinter to get a code section for a function.
const PIC16Section *
-PIC16TargetObjectFile::SectionForCode(const std::string &FnName) const {
+PIC16TargetObjectFile::SectionForCode(const std::string &FnName,
+ bool isISR) const {
const std::string &sec_name = PAN::getCodeSectionName(FnName);
+ // If it is ISR, its code section starts at a specific address.
+ if (isISR)
+ return getPIC16Section(sec_name, CODE, PAN::getISRAddr());
return getPIC16Section(sec_name, CODE);
}
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h
index 0b0ad43..cf8bf84 100644
--- a/lib/Target/PIC16/PIC16TargetObjectFile.h
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.h
@@ -137,7 +137,8 @@ namespace llvm {
/// Return a code section for a function.
- const PIC16Section *SectionForCode (const std::string &FnName) const;
+ const PIC16Section *SectionForCode (const std::string &FnName,
+ bool isISR) const;
/// Return a frame section for a function.
const PIC16Section *SectionForFrame (const std::string &FnName) const;
diff --git a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
index 46cc819..f1bdb12 100644
--- a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
+++ b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
@@ -15,7 +15,8 @@ using namespace llvm;
Target llvm::ThePIC16Target, llvm::TheCooperTarget;
extern "C" void LLVMInitializePIC16TargetInfo() {
- RegisterTarget<> X(ThePIC16Target, "pic16", "PIC16 14-bit [experimental]");
+ RegisterTarget<Triple::pic16> X(ThePIC16Target, "pic16",
+ "PIC16 14-bit [experimental]");
RegisterTarget<> Y(TheCooperTarget, "cooper", "PIC16 Cooper [experimental]");
}
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index afc90b1..ac901d0 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -31,13 +31,13 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index c7ce171..155fba2 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -66,28 +66,13 @@ def CC_PPC : CallingConv<[
// PowerPC System V Release 4 ABI
//===----------------------------------------------------------------------===//
-// _Complex arguments are never split, thus their two scalars are either
-// passed both in argument registers or both on the stack. Also _Complex
-// arguments are always passed in general purpose registers, never in
-// Floating-point registers or vector registers. Arguments which should go
-// on the stack are marked with the inreg parameter attribute.
-// Giving inreg this target-dependent (and counter-intuitive) meaning
-// simplifies things, because functions calls are not always coming from the
-// frontend but are also created implicitly e.g. for libcalls. If inreg would
-// actually mean that the argument is passed in a register, then all places
-// which create function calls/function definitions implicitly would need to
-// be aware of this fact and would need to mark arguments accordingly. With
-// inreg meaning that the argument is passed on the stack, this is not an
-// issue, except for calls which involve _Complex types.
-
def CC_PPC_SVR4_Common : CallingConv<[
// The ABI requires i64 to be passed in two adjacent registers with the first
// register having an odd register number.
CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
// The first 8 integer arguments are passed in integer registers.
- CCIfType<[i32], CCIf<"!ArgFlags.isInReg()",
- CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
// Make sure the i64 words from a long double are either both passed in
// registers or both passed on the stack.
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 3a15f7e..66dfd4b 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -257,7 +257,7 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
case PPC::STWX: case PPC::STWX8:
case PPC::STWUX:
case PPC::STW: case PPC::STW8:
- case PPC::STWU: case PPC::STWU8:
+ case PPC::STWU:
case PPC::STVEWX:
case PPC::STFIWX:
case PPC::STWBRX:
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 004997f..9d79c0d 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -156,10 +156,6 @@ namespace {
SDValue BuildSDIVSequence(SDNode *N);
SDValue BuildUDIVSequence(SDNode *N);
- /// InstructionSelect - This callback is invoked by
- /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
- virtual void InstructionSelect();
-
void InsertVRSaveCode(MachineFunction &MF);
virtual const char *getPassName() const {
@@ -184,14 +180,6 @@ private:
};
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void PPCDAGToDAGISel::InstructionSelect() {
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
/// InsertVRSaveCode - Once the entire function has been instruction selected,
/// all virtual registers are created and all machine instructions are built,
/// check to see if we need to save/restore VRSAVE. If so, do it.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index a11d624..3d81afa 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -25,13 +25,13 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -1243,7 +1243,8 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
// If the global is weak or external, we have to go through the lazy
// resolution stub.
- return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0,
+ false, false, 0);
}
SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
@@ -1333,7 +1334,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
false, false, false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__trampoline_setup", PtrVT),
- Args, DAG, dl, DAG.GetOrdering(Chain.getNode()));
+ Args, DAG, dl);
SDValue Ops[] =
{ CallResult.first, CallResult.second };
@@ -1355,7 +1356,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+ false, false, 0);
}
// For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
@@ -1405,25 +1407,29 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
// Store first byte : number of int regs
SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
- Op.getOperand(1), SV, 0, MVT::i8);
+ Op.getOperand(1), SV, 0, MVT::i8,
+ false, false, 0);
uint64_t nextOffset = FPROffset;
SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
ConstFPROffset);
// Store second byte : number of float regs
SDValue secondStore =
- DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8);
+ DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8,
+ false, false, 0);
nextOffset += StackOffset;
nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
// Store second word : arguments given on stack
SDValue thirdStore =
- DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset);
+ DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset,
+ false, false, 0);
nextOffset += FrameOffset;
nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
// Store third word : arguments given in registers
- return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset);
+ return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset,
+ false, false, 0);
}
@@ -1628,7 +1634,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+ false, false, 0));
}
}
@@ -1700,7 +1707,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
unsigned GPRIndex = 0;
for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) {
SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
- SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0);
+ SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
+ false, false, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -1714,7 +1722,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+ false, false, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -1729,7 +1738,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
unsigned FPRIndex = 0;
for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) {
SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
- SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0);
+ SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
+ false, false, 0);
MemOps.push_back(Store);
// Increment the address by eight for the next argument to store
SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
@@ -1741,7 +1751,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+ false, false, 0);
MemOps.push_back(Store);
// Increment the address by eight for the next argument to store
SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
@@ -1903,7 +1914,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );
+ NULL, 0,
+ ObjSize==1 ? MVT::i8 : MVT::i16,
+ false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
}
@@ -1921,7 +1934,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+ false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
ArgOffset += PtrByteSize;
@@ -2045,7 +2059,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
CurArgOffset + (ArgSize - ObjSize),
isImmutable, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
+ false, false, 0);
}
InVals.push_back(ArgVal);
@@ -2091,7 +2106,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+ false, false, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -2271,7 +2287,7 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
// Store relative to framepointer.
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
PseudoSourceValue::getFixedStack(FI),
- 0));
+ 0, false, false, 0));
}
}
@@ -2297,7 +2313,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
- PseudoSourceValue::getFixedStack(NewRetAddr), 0);
+ PseudoSourceValue::getFixedStack(NewRetAddr), 0,
+ false, false, 0);
// When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
// slot as the FP is never overwritten.
@@ -2308,7 +2325,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
true, false);
SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
- PseudoSourceValue::getFixedStack(NewFPIdx), 0);
+ PseudoSourceValue::getFixedStack(NewFPIdx), 0,
+ false, false, 0);
}
}
return Chain;
@@ -2346,14 +2364,16 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
// Load the LR and FP stack slot for later adjusting.
EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
LROpOut = getReturnAddrFrameIndex(DAG);
- LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0);
+ LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0,
+ false, false, 0);
Chain = SDValue(LROpOut.getNode(), 1);
// When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
// slot as the FP is never overwritten.
if (isDarwinABI) {
FPOpOut = getFramePointerFrameIndex(DAG);
- FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
+ FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0,
+ false, false, 0);
Chain = SDValue(FPOpOut.getNode(), 1);
}
}
@@ -2395,7 +2415,8 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
DAG.getConstant(ArgOffset, PtrVT));
}
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ false, false, 0));
// Calculate and remember argument location.
} else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
TailCallArguments);
@@ -2862,7 +2883,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), LocMemOffset));
+ PseudoSourceValue::getStack(), LocMemOffset,
+ false, false, 0));
} else {
// Calculate and remember argument location.
CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
@@ -3024,7 +3046,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
- NULL, 0, VT);
+ NULL, 0, VT, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3061,7 +3083,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
if (GPR_idx != NumGPRs) {
- SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0,
+ false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
ArgOffset += PtrByteSize;
@@ -3092,19 +3115,22 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
if (isVarArg) {
- SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ false, false, 0);
MemOpChains.push_back(Store);
// Float varargs are always shadowed in available integer registers
if (GPR_idx != NumGPRs) {
- SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0,
+ false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
- SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0,
+ false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
@@ -3147,10 +3173,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// entirely in R registers. Maybe later.
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
DAG.getConstant(ArgOffset, PtrVT));
- SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ false, false, 0);
MemOpChains.push_back(Store);
if (VR_idx != NumVRs) {
- SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0);
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0,
+ false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
}
@@ -3160,7 +3188,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
break;
SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
DAG.getConstant(i, PtrVT));
- SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0,
+ false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
@@ -3225,7 +3254,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// TOC save area offset.
SDValue PtrOff = DAG.getIntPtrConstant(40);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0);
+ Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0,
+ false, false, 0);
}
// Build a sequence of copy-to-reg nodes chained together with token chain
@@ -3300,13 +3330,15 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
SDValue SaveSP = Op.getOperand(1);
// Load the old link SP.
- SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0);
+ SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0,
+ false, false, 0);
// Restore the stack pointer.
Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
// Store the old link SP.
- return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0);
+ return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0,
+ false, false, 0);
}
@@ -3483,14 +3515,16 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
// Emit a store to the stack slot.
- SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0);
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0,
+ false, false, 0);
// Result is a load from the stack slot. If loading 4 bytes, make sure to
// add in a bias.
if (Op.getValueType() == MVT::i32)
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
DAG.getConstant(4, FIPtr.getValueType()));
- return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0);
+ return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0,
+ false, false, 0);
}
SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -3533,7 +3567,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
Ops, 4, MVT::i64, MMO);
// Load the value as a double.
- SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0);
+ SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0, false, false, 0);
// FCFID it and return it.
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
@@ -3578,12 +3612,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
- StackSlot, NULL, 0);
+ StackSlot, NULL, 0, false, false, 0);
// Load FP Control Word from low 32 bits of stack slot.
SDValue Four = DAG.getConstant(4, PtrVT);
SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
- SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0);
+ SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0,
+ false, false, 0);
// Transform as necessary
SDValue CWD1 =
@@ -4249,9 +4284,11 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
// Store the input value into Value#0 of the stack slot.
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
- Op.getOperand(0), FIdx, NULL, 0);
+ Op.getOperand(0), FIdx, NULL, 0,
+ false, false, 0);
// Load it out.
- return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0);
+ return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0,
+ false, false, 0);
}
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
@@ -5460,7 +5497,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
// to the stack.
FuncInfo->setLRStoreRequired();
return DAG.getLoad(getPointerTy(), dl,
- DAG.getEntryNode(), RetAddrFI, NULL, 0);
+ DAG.getEntryNode(), RetAddrFI, NULL, 0,
+ false, false, 0);
}
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 219efb9..a0781b9 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -366,7 +366,7 @@ def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
[(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"addme $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (adde G8RC:$rA, immAllOnes))]>;
+ [(set G8RC:$rT, (adde G8RC:$rA, -1))]>;
def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"addze $rT, $rA", IntGeneral,
[(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
@@ -375,7 +375,7 @@ def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
[(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"subfme $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (sube immAllOnes, G8RC:$rA))]>;
+ [(set G8RC:$rT, (sube -1, G8RC:$rA))]>;
def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"subfze $rT, $rA", IntGeneral,
[(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
@@ -635,13 +635,6 @@ def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
(pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STWU8 : DForm_1<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
- [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
def STDU : DSForm_1<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
s16immX4:$ptroff, ptr_rc:$ptrreg),
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index af7d812..9895bea 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -19,6 +19,7 @@
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -73,8 +74,7 @@ bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI,
destReg = MI.getOperand(0).getReg();
return true;
}
- } else if (oc == PPC::FMRS || oc == PPC::FMRD ||
- oc == PPC::FMRSD) { // fmr r1, r2
+ } else if (oc == PPC::FMR || oc == PPC::FMRSD) { // fmr r1, r2
assert(MI.getNumOperands() >= 2 &&
MI.getOperand(0).isReg() &&
MI.getOperand(1).isReg() &&
@@ -344,10 +344,9 @@ bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg);
} else if (DestRC == PPC::G8RCRegisterClass) {
BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg);
- } else if (DestRC == PPC::F4RCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::FMRS), DestReg).addReg(SrcReg);
- } else if (DestRC == PPC::F8RCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::FMRD), DestReg).addReg(SrcReg);
+ } else if (DestRC == PPC::F4RCRegisterClass ||
+ DestRC == PPC::F8RCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::FMR), DestReg).addReg(SrcReg);
} else if (DestRC == PPC::CRRCRegisterClass) {
BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg);
} else if (DestRC == PPC::VRRCRegisterClass) {
@@ -421,22 +420,30 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
return true;
} else {
- // FIXME: We use R0 here, because it isn't available for RA. We need to
- // store the CR in the low 4-bits of the saved value. First, issue a MFCR
- // to save all of the CRBits.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), PPC::R0));
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+
+ // We need to store the CR in the low 4-bits of the saved value. First,
+ // issue a MFCR to save all of the CRBits.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg));
// If the saved register wasn't CR0, shift the bits left so that they are
// in CR0's slot.
if (SrcReg != PPC::CR0) {
unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
- // rlwinm r0, r0, ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
- .addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31));
+ // rlwinm scratch, scratch, ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(ShiftBits)
+ .addImm(0).addImm(31));
}
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
- .addReg(PPC::R0,
+ .addReg(ScratchReg,
getKillRegState(isKill)),
FrameIdx));
}
@@ -540,20 +547,28 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (RC == PPC::CRRCRegisterClass) {
- // FIXME: We use R0 here, because it isn't available for RA.
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), PPC::R0),
- FrameIdx));
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ ScratchReg), FrameIdx));
// If the reloaded register isn't CR0, shift the bits right so that they are
// in the right CR's slot.
if (DestReg != PPC::CR0) {
unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
// rlwinm r11, r11, 32-ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
- .addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+ .addImm(31));
}
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg).addReg(PPC::R0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
+ .addReg(ScratchReg));
} else if (RC == PPC::CRBITRCRegisterClass) {
unsigned Reg = 0;
@@ -672,33 +687,21 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
getUndefRegState(isUndef)),
FrameIndex);
}
- } else if (Opc == PPC::FMRD) {
- if (OpNum == 0) { // move -> store
- unsigned InReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFD))
- .addReg(InReg,
- getKillRegState(isKill) |
- getUndefRegState(isUndef)),
- FrameIndex);
- } else { // move -> load
- unsigned OutReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFD))
- .addReg(OutReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef)),
- FrameIndex);
- }
- } else if (Opc == PPC::FMRS) {
+ } else if (Opc == PPC::FMR || Opc == PPC::FMRSD) {
+ // The register may be F4RC or F8RC, and that determines the memory op.
+ unsigned OrigReg = MI->getOperand(OpNum).getReg();
+ // We cannot tell the register class from a physreg alone.
+ if (TargetRegisterInfo::isPhysicalRegister(OrigReg))
+ return NULL;
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigReg);
+ const bool is64 = RC == PPC::F8RCRegisterClass;
+
if (OpNum == 0) { // move -> store
unsigned InReg = MI->getOperand(1).getReg();
bool isKill = MI->getOperand(1).isKill();
bool isUndef = MI->getOperand(1).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFS))
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(),
+ get(is64 ? PPC::STFD : PPC::STFS))
.addReg(InReg,
getKillRegState(isKill) |
getUndefRegState(isUndef)),
@@ -707,7 +710,8 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
unsigned OutReg = MI->getOperand(0).getReg();
bool isDead = MI->getOperand(0).isDead();
bool isUndef = MI->getOperand(0).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFS))
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(),
+ get(is64 ? PPC::LFD : PPC::LFS))
.addReg(OutReg,
RegState::Define |
getDeadRegState(isDead) |
@@ -733,7 +737,7 @@ bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
else if ((Opc == PPC::OR8 &&
MI->getOperand(1).getReg() == MI->getOperand(2).getReg()))
return true;
- else if (Opc == PPC::FMRD || Opc == PPC::FMRS)
+ else if (Opc == PPC::FMR || Opc == PPC::FMRSD)
return true;
return false;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 842f8ee..845cd8f 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1019,20 +1019,16 @@ let Uses = [RM] in {
}
}
-/// FMR is split into 3 versions, one for 4/8 byte FP, and one for extending.
+/// FMR is split into 2 versions, one for 4/8 byte FP, and one for extending.
///
/// Note that these are defined as pseudo-ops on the PPC970 because they are
/// often coalesced away and we don't want the dispatch group builder to think
/// that they will fill slots (which could cause the load of a LSU reject to
/// sneak into a d-group with a store).
-def FMRS : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
- "fmr $frD, $frB", FPGeneral,
- []>, // (set F4RC:$frD, F4RC:$frB)
- PPC970_Unit_Pseudo;
-def FMRD : XForm_26<63, 72, (outs F8RC:$frD), (ins F8RC:$frB),
- "fmr $frD, $frB", FPGeneral,
- []>, // (set F8RC:$frD, F8RC:$frB)
- PPC970_Unit_Pseudo;
+def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ []>, // (set F4RC:$frD, F4RC:$frB)
+ PPC970_Unit_Pseudo;
def FMRSD : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB),
"fmr $frD, $frB", FPGeneral,
[(set F8RC:$frD, (fextend F4RC:$frB))]>,
@@ -1215,7 +1211,7 @@ def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
[(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"addme $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (adde GPRC:$rA, immAllOnes))]>;
+ [(set GPRC:$rT, (adde GPRC:$rA, -1))]>;
def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"addze $rT, $rA", IntGeneral,
[(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
@@ -1224,7 +1220,7 @@ def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
[(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"subfme $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (sube immAllOnes, GPRC:$rA))]>;
+ [(set GPRC:$rT, (sube -1, GPRC:$rA))]>;
def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"subfze $rT, $rA", IntGeneral,
[(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
@@ -1480,11 +1476,11 @@ def : Pat<(extloadf32 xaddr:$src),
(FMRSD (LFSX xaddr:$src))>;
// Memory barriers
-def : Pat<(membarrier (i32 imm:$ll),
- (i32 imm:$ls),
- (i32 imm:$sl),
- (i32 imm:$ss),
- (i32 imm:$device)),
+def : Pat<(membarrier (i32 imm /*ll*/),
+ (i32 imm /*ls*/),
+ (i32 imm /*sl*/),
+ (i32 imm /*ss*/),
+ (i32 imm /*device*/)),
(SYNC)>;
include "PPCInstrAltivec.td"
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 20e77e7..0b509ac 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -427,6 +427,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R2); // System-reserved register
Reserved.set(PPC::R13); // Small Data Area pointer register
}
+ // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+ // when the stack frame is too big to address directly; we need two regs.
+ // This is a hack.
+ if (Subtarget.isDarwinABI()) {
+ Reserved.set(PPC::R2);
+ }
// On PPC64, r13 is the thread pointer. Never allocate this register.
// Note that this is over conservative, as it also prevents allocation of R31
@@ -447,6 +453,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::X2);
}
+ // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+ // when the stack frame is too big to address directly; we need two regs.
+ // This is a hack.
+ if (Subtarget.isDarwinABI()) {
+ Reserved.set(PPC::X2);
+ }
}
if (needsFP(MF))
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 049e893..1cb7340 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -287,10 +287,8 @@ def GPRC : RegisterClass<"PPC", [i32], 32,
GPRCClass::allocation_order_begin(const MachineFunction &MF) const {
// 32-bit SVR4 ABI: r2 is reserved for the OS.
// 64-bit SVR4 ABI: r2 is reserved for the TOC pointer.
- if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
- return begin()+1;
-
- return begin();
+ // Darwin: R2 is reserved for CR save/restore sequence.
+ return begin()+1;
}
GPRCClass::iterator
GPRCClass::allocation_order_end(const MachineFunction &MF) const {
@@ -325,10 +323,8 @@ def G8RC : RegisterClass<"PPC", [i64], 64,
G8RCClass::iterator
G8RCClass::allocation_order_begin(const MachineFunction &MF) const {
// 64-bit SVR4 ABI: r2 is reserved for the TOC pointer.
- if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
- return begin()+1;
-
- return begin();
+ // Darwin: r2 is reserved for CR save/restore sequence.
+ return begin()+1;
}
G8RCClass::iterator
G8RCClass::allocation_order_end(const MachineFunction &MF) const {
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 22eecd4..cac6962 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -115,32 +115,3 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
return false;
}
-
-/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte,
-/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA
-/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte
-/// pointer by default. However, some systems may require a different size due
-/// to bugs or other conditions. We will default to a 4-byte encoding unless the
-/// system tells us otherwise.
-///
-/// The issue is when the CIE says their is an LSDA. That mandates that every
-/// FDE have an LSDA slot. But if the function does not need an LSDA. There
-/// needs to be some way to signify there is none. The LSDA is encoded as
-/// pc-rel. But you don't look for some magic value after adding the pc. You
-/// have to look for a zero before adding the pc. The problem is that the size
-/// of the zero to look for depends on the encoding. The unwinder bug in SL is
-/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8
-/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are
-/// non-zero so it goes ahead and then reads the value based on the encoding.
-/// But if you use sdata4 and there is no LSDA, then the test for zero gives a
-/// false negative and the unwinder thinks there is an LSDA.
-///
-/// FIXME: This call-back isn't good! We should be using the correct encoding
-/// regardless of the system. However, there are some systems which have bugs
-/// that prevent this from occuring.
-DwarfLSDAEncoding::Encoding PPCTargetMachine::getLSDAEncoding() const {
- if (Subtarget.isDarwin() && Subtarget.getDarwinVers() != 10)
- return DwarfLSDAEncoding::Default;
-
- return DwarfLSDAEncoding::EightByte;
-}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index a654435..ac9ae2b 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -57,18 +57,6 @@ public:
return InstrItins;
}
- /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are
- /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that
- /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded
- /// as a 4-byte pointer by default. However, some systems may require a
- /// different size due to bugs or other conditions. We will default to a
- /// 4-byte encoding unless the system tells us otherwise.
- ///
- /// FIXME: This call-back isn't good! We should be using the correct encoding
- /// regardless of the system. However, there are some systems which have bugs
- /// that prevent this from occuring.
- virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const;
-
// Pass Pipeline Configuration
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 8f265cf..3465779 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -889,9 +889,26 @@ entry:
; recognize a more elaborate tree than a simple SETxx.
define double @test_FNEG_sel(double %A, double %B, double %C) {
- %D = sub double -0.000000e+00, %A ; <double> [#uses=1]
+ %D = fsub double -0.000000e+00, %A ; <double> [#uses=1]
%Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1]
%E = select i1 %Cond, double %B, double %C ; <double> [#uses=1]
ret double %E
}
+//===----------------------------------------------------------------------===//
+The save/restore sequence for CR in prolog/epilog is terrible:
+- Each CR subreg is saved individually, rather than doing one save as a unit.
+- On Darwin, the save is done after the decrement of SP, which means the offset
+from SP of the save slot can be too big for a store instruction, which means we
+need an additional register (currently hacked in 96015+96020; the solution there
+is correct, but poor).
+- On SVR4 the same thing can happen, and I don't think saving before the SP
+decrement is safe on that target, as there is no red zone. This is currently
+broken AFAIK, although it's not a target I can exercise.
+The following demonstrates the problem:
+extern void bar(char *p);
+void foo() {
+ char x[100000];
+ bar(x);
+ __asm__("" ::: "cr2");
+}
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 9a2ce6b..f6753a6 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -56,6 +56,9 @@ namespace {
unsigned AsmVariant, const char *ExtraCode);
bool printGetPCX(const MachineInstr *MI, unsigned OpNo);
+
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const;
};
} // end of anonymous namespace
@@ -140,18 +143,19 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum) {
break;
}
+ unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber();
unsigned bbNum = MI->getParent()->getNumber();
- O << '\n' << ".LLGETPCH" << bbNum << ":\n";
- O << "\tcall\t.LLGETPC" << bbNum << '\n' ;
+ O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n";
+ O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
O << "\t sethi\t"
- << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), "
+ << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), "
<< operand << '\n' ;
- O << ".LLGETPC" << bbNum << ":\n" ;
+ O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
O << "\tor\t" << operand
- << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), "
+ << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), "
<< operand << '\n';
O << "\tadd\t" << operand << ", %o7, " << operand << '\n';
@@ -197,6 +201,39 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+///
+/// This overrides AsmPrinter's implementation to handle delay slots.
+bool SparcAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // Check if the last terminator is an unconditional branch.
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+ ; // Noop
+ return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+
+
// Force static initialization.
extern "C" void LLVMInitializeSparcAsmPrinter() {
RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt
index cc24abf..b4991fe 100644
--- a/lib/Target/Sparc/README.txt
+++ b/lib/Target/Sparc/README.txt
@@ -56,3 +56,4 @@ int %t1(int %a, int %b) {
leaf fns.
* Fill delay slots
+* Implement JIT support
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index e1b3299..a7d1805 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -35,7 +35,6 @@ class SparcDAGToDAGISel : public SelectionDAGISel {
/// make the right decision when generating code for different targets.
const SparcSubtarget &Subtarget;
SparcTargetMachine& TM;
- MachineBasicBlock *CurBB;
public:
explicit SparcDAGToDAGISel(SparcTargetMachine &tm)
: SelectionDAGISel(tm),
@@ -56,10 +55,6 @@ public:
char ConstraintCode,
std::vector<SDValue> &OutOps);
- /// InstructionSelect - This callback is invoked by
- /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
- virtual void InstructionSelect();
-
virtual const char *getPassName() const {
return "SPARC DAG->DAG Pattern Instruction Selection";
}
@@ -72,17 +67,8 @@ private:
};
} // end anonymous namespace
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void SparcDAGToDAGISel::InstructionSelect() {
- CurBB = BB;
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
- MachineFunction *MF = CurBB->getParent();
+ MachineFunction *MF = BB->getParent();
unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index e67002a..4e93ef0 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -21,7 +21,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -134,7 +134,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
SDValue Load;
if (ObjectVT == MVT::i32) {
- Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
+ Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+ false, false, 0);
} else {
ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
@@ -143,7 +144,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
DAG.getConstant(Offset, MVT::i32));
Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
- NULL, 0, ObjectVT);
+ NULL, 0, ObjectVT, false, false, 0);
Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load);
}
InVals.push_back(Load);
@@ -167,7 +168,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
true, false);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
- SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0);
+ SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0,
+ false, false, 0);
InVals.push_back(Load);
}
ArgOffset += 4;
@@ -189,7 +191,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
true, false);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
- HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
+ HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+ false, false, 0);
}
SDValue LoVal;
@@ -201,7 +204,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4,
true, false);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
- LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
+ LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+ false, false, 0);
}
// Compose the two halves together into an i64 unit.
@@ -235,7 +239,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
true, false);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
- OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0));
+ OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0,
+ false, false, 0));
ArgOffset += 4;
}
@@ -339,7 +344,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// FIXME: VERIFY THAT 68 IS RIGHT.
SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68);
PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
- MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0,
+ false, false, 0));
}
#else
@@ -385,14 +391,17 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// out the parts as integers. Top part goes in a reg.
SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
- Val, StackPtr, NULL, 0);
+ Val, StackPtr, NULL, 0,
+ false, false, 0);
// Sparc is big-endian, so the high part comes first.
- SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0);
+ SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
+ false, false, 0);
// Increment the pointer to the other half.
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
DAG.getIntPtrConstant(4));
// Load the low part.
- SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0);
+ SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
+ false, false, 0);
RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
@@ -435,7 +444,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore,
- PtrOff, NULL, 0));
+ PtrOff, NULL, 0,
+ false, false, 0));
}
ArgOffset += ObjSize;
}
@@ -759,7 +769,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
GlobalBase, RelAddr);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- AbsAddr, NULL, 0);
+ AbsAddr, NULL, 0, false, false, 0);
}
SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
@@ -780,7 +790,7 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
GlobalBase, RelAddr);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- AbsAddr, NULL, 0);
+ AbsAddr, NULL, 0, false, false, 0);
}
static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -872,7 +882,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(TLI.getVarArgsFrameOffset(),
MVT::i32));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0);
+ return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0,
+ false, false, 0);
}
static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
@@ -882,21 +893,23 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
SDValue VAListPtr = Node->getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
DebugLoc dl = Node->getDebugLoc();
- SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0);
+ SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0,
+ false, false, 0);
// Increment the pointer, VAList, to the next vaarg
SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
DAG.getConstant(VT.getSizeInBits()/8,
MVT::i32));
// Store the incremented VAList to the legalized pointer
InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
- VAListPtr, SV, 0);
+ VAListPtr, SV, 0, false, false, 0);
// Load the actual argument out of the pointer VAList, unless this is an
// f64 load.
if (VT != MVT::f64)
- return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0);
+ return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0, false, false, 0);
// Otherwise, load it as i64, then do a bitconvert.
- SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0);
+ SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0,
+ false, false, 0);
// Bit-Convert the value to f64.
SDValue Ops[2] = {
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index e457235..56d8708 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -22,7 +22,7 @@ namespace llvm {
unsigned GlobalBaseReg;
public:
SparcMachineFunctionInfo() : GlobalBaseReg(0) {}
- SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {}
+ explicit SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {}
unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index f6f632d..8152e1d 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -100,8 +100,6 @@ namespace {
Lowering(*TM.getTargetLowering()),
Subtarget(*TM.getSubtargetImpl()) { }
- virtual void InstructionSelect();
-
virtual const char *getPassName() const {
return "SystemZ DAG->DAG Pattern Instruction Selection";
}
@@ -152,10 +150,6 @@ namespace {
bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
bool MatchAddressRI(SDValue N, SystemZRRIAddressMode &AM,
bool is12Bit);
-
- #ifndef NDEBUG
- unsigned Indent;
- #endif
};
} // end anonymous namespace
@@ -594,41 +588,22 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr,
bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Disp, SDValue &Index) {
if (ISD::isNON_EXTLoad(N.getNode()) &&
- N.hasOneUse() &&
- IsLegalAndProfitableToFold(N.getNode(), P, P))
+ IsLegalToFold(N, P, P))
return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index);
return false;
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void SystemZDAGToDAGISel::InstructionSelect() {
- // Codegen the basic block.
- DEBUG(errs() << "===== Instruction selection begins:\n");
- DEBUG(Indent = 0);
- SelectRoot(*CurDAG);
- DEBUG(errs() << "===== Instruction selection ends:\n");
-
- CurDAG->RemoveDeadNodes();
-}
-
SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
EVT NVT = Node->getValueType(0);
DebugLoc dl = Node->getDebugLoc();
unsigned Opcode = Node->getOpcode();
// Dump information about the Node being selected
- DEBUG(errs().indent(Indent) << "Selecting: ";
- Node->dump(CurDAG);
- errs() << "\n");
- DEBUG(Indent += 2);
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
- DEBUG(errs().indent(Indent-2) << "== ";
- Node->dump(CurDAG);
- errs() << "\n");
- DEBUG(Indent -= 2);
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
return NULL; // Already selected.
}
@@ -694,9 +669,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
MVT::i32));
ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
- DEBUG(errs().indent(Indent-2) << "=> ";
- Result->dump(CurDAG);
- errs() << "\n");
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
}
// Copy the remainder (even subreg) result, if it is needed.
@@ -709,15 +682,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
MVT::i32));
ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
- DEBUG(errs().indent(Indent-2) << "=> ";
- Result->dump(CurDAG);
- errs() << "\n");
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
}
-#ifndef NDEBUG
- Indent -= 2;
-#endif
-
return NULL;
}
case ISD::UDIVREM: {
@@ -783,9 +750,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
CurDAG->getTargetConstant(SubRegIdx,
MVT::i32));
ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
- DEBUG(errs().indent(Indent-2) << "=> ";
- Result->dump(CurDAG);
- errs() << "\n");
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
}
// Copy the remainder (even subreg) result, if it is needed.
@@ -797,15 +762,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
CurDAG->getTargetConstant(SubRegIdx,
MVT::i32));
ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
- DEBUG(errs().indent(Indent-2) << "=> ";
- Result->dump(CurDAG);
- errs() << "\n");
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
}
-#ifndef NDEBUG
- Indent -= 2;
-#endif
-
return NULL;
}
}
@@ -813,14 +772,12 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Select the default instruction
SDNode *ResNode = SelectCode(Node);
- DEBUG(errs().indent(Indent-2) << "=> ";
+ DEBUG(errs() << "=> ";
if (ResNode == NULL || ResNode == Node)
Node->dump(CurDAG);
else
ResNode->dump(CurDAG);
errs() << "\n";
);
- DEBUG(Indent -= 2);
-
return ResNode;
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index f7405a5..6f4b30f 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -30,9 +30,9 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -337,7 +337,8 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
// from this parameter
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0);
+ PseudoSourceValue::getFixedStack(FI), 0,
+ false, false, 0);
}
// If this is an 8/16/32-bit value, it is really passed promoted to 64
@@ -435,7 +436,8 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
DAG.getIntPtrConstant(Offset));
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), Offset));
+ PseudoSourceValue::getStack(), Offset,
+ false, false, 0));
}
}
@@ -738,7 +740,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
if (ExtraLoadRequired)
Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
- PseudoSourceValue::getGOT(), 0);
+ PseudoSourceValue::getGOT(), 0, false, false, 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 336e20e..f46840c 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -58,7 +58,7 @@ def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
[]>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def FMOV32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
"le\t{$dst, $src}",
[(set FP32:$dst, (load rriaddr12:$src))]>;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 1891bba..a44f6d9 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -257,7 +257,7 @@ def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins i64imm:$src),
[(set GR64:$dst, i64hi32:$src)]>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOV32rm : RXI<0x58,
(outs GR32:$dst), (ins rriaddr12:$src),
"l\t{$dst, $src}",
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index e47d419..fd6e330 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -33,7 +33,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
public:
SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {}
- SystemZMachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {}
+ explicit SystemZMachineFunctionInfo(MachineFunction &MF)
+ : CalleeSavedFrameSize(0) {}
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 295b30f..9a16808 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -580,7 +580,7 @@ const IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
unsigned NumIndices) const {
const Type *Ty = ptrTy;
- assert(isa<PointerType>(Ty) && "Illegal argument for getIndexedOffset()");
+ assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
uint64_t Result = 0;
generic_gep_type_iterator<Value* const*>
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index 094a57e..18b0fa4 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -15,6 +15,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include <ctype.h>
using namespace llvm;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index a231ebc..82619c7 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -19,17 +19,15 @@
#include "llvm/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -289,832 +287,54 @@ TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
}
/// getSymbolForDwarfGlobalReference - Return an MCExpr to use for a
-/// pc-relative reference to the specified global variable from exception
-/// handling information. In addition to the symbol, this returns
-/// by-reference:
-///
-/// IsIndirect - True if the returned symbol is actually a stub that contains
-/// the address of the symbol, false if the symbol is the global itself.
-///
-/// IsPCRel - True if the symbol reference is already pc-relative, false if
-/// the caller needs to subtract off the address of the reference from the
-/// symbol.
-///
+/// reference to the specified global variable from exception
+/// handling information.
const MCExpr *TargetLoweringObjectFile::
getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI,
- bool &IsIndirect, bool &IsPCRel) const {
- // The generic implementation of this just returns a direct reference to the
- // symbol.
- IsIndirect = false;
- IsPCRel = false;
-
+ MachineModuleInfo *MMI, unsigned Encoding) const {
// FIXME: Use GetGlobalValueSymbol.
SmallString<128> Name;
Mang->getNameWithPrefix(Name, GV, false);
- return MCSymbolRefExpr::Create(Name.str(), getContext());
-}
-
-
-//===----------------------------------------------------------------------===//
-// ELF
-//===----------------------------------------------------------------------===//
-typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
-
-TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() {
- // If we have the section uniquing map, free it.
- delete (ELFUniqueMapTy*)UniquingMap;
-}
-
-const MCSection *TargetLoweringObjectFileELF::
-getELFSection(StringRef Section, unsigned Type, unsigned Flags,
- SectionKind Kind, bool IsExplicit) const {
- if (UniquingMap == 0)
- UniquingMap = new ELFUniqueMapTy();
- ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap;
-
- // Do the lookup, if we have a hit, return it.
- const MCSectionELF *&Entry = Map[Section];
- if (Entry) return Entry;
-
- return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit,
- getContext());
-}
-
-void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- if (UniquingMap != 0)
- ((ELFUniqueMapTy*)UniquingMap)->clear();
- TargetLoweringObjectFile::Initialize(Ctx, TM);
-
- BSSSection =
- getELFSection(".bss", MCSectionELF::SHT_NOBITS,
- MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
- SectionKind::getBSS());
-
- TextSection =
- getELFSection(".text", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC,
- SectionKind::getText());
-
- DataSection =
- getELFSection(".data", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
- SectionKind::getDataRel());
-
- ReadOnlySection =
- getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC,
- SectionKind::getReadOnly());
-
- TLSDataSection =
- getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
- MCSectionELF::SHF_WRITE, SectionKind::getThreadData());
-
- TLSBSSSection =
- getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
- MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS());
-
- DataRelSection =
- getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
- SectionKind::getDataRel());
-
- DataRelLocalSection =
- getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
- SectionKind::getDataRelLocal());
-
- DataRelROSection =
- getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
- SectionKind::getReadOnlyWithRel());
-
- DataRelROLocalSection =
- getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
- SectionKind::getReadOnlyWithRelLocal());
-
- MergeableConst4Section =
- getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
- SectionKind::getMergeableConst4());
-
- MergeableConst8Section =
- getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
- SectionKind::getMergeableConst8());
-
- MergeableConst16Section =
- getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
- SectionKind::getMergeableConst16());
-
- StaticCtorSection =
- getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
- SectionKind::getDataRel());
-
- StaticDtorSection =
- getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
- SectionKind::getDataRel());
-
- // Exception Handling Sections.
-
- // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
- // it contains relocatable pointers. In PIC mode, this is probably a big
- // runtime hit for C++ apps. Either the contents of the LSDA need to be
- // adjusted or this should be a data section.
- LSDASection =
- getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly());
- EHFrameSection =
- getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
- SectionKind::getDataRel());
-
- // Debug Info Sections.
- DwarfAbbrevSection =
- getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfInfoSection =
- getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfLineSection =
- getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfFrameSection =
- getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfPubNamesSection =
- getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfPubTypesSection =
- getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfStrSection =
- getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfLocSection =
- getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfARangesSection =
- getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfRangesSection =
- getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
- DwarfMacroInfoSection =
- getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
- SectionKind::getMetadata());
-}
-
-
-static SectionKind
-getELFKindForNamedSection(StringRef Name, SectionKind K) {
- if (Name.empty() || Name[0] != '.') return K;
-
- // Some lame default implementation based on some magic section names.
- if (Name == ".bss" ||
- Name.startswith(".bss.") ||
- Name.startswith(".gnu.linkonce.b.") ||
- Name.startswith(".llvm.linkonce.b.") ||
- Name == ".sbss" ||
- Name.startswith(".sbss.") ||
- Name.startswith(".gnu.linkonce.sb.") ||
- Name.startswith(".llvm.linkonce.sb."))
- return SectionKind::getBSS();
-
- if (Name == ".tdata" ||
- Name.startswith(".tdata.") ||
- Name.startswith(".gnu.linkonce.td.") ||
- Name.startswith(".llvm.linkonce.td."))
- return SectionKind::getThreadData();
-
- if (Name == ".tbss" ||
- Name.startswith(".tbss.") ||
- Name.startswith(".gnu.linkonce.tb.") ||
- Name.startswith(".llvm.linkonce.tb."))
- return SectionKind::getThreadBSS();
-
- return K;
-}
-
-
-static unsigned getELFSectionType(StringRef Name, SectionKind K) {
-
- if (Name == ".init_array")
- return MCSectionELF::SHT_INIT_ARRAY;
-
- if (Name == ".fini_array")
- return MCSectionELF::SHT_FINI_ARRAY;
+ const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
- if (Name == ".preinit_array")
- return MCSectionELF::SHT_PREINIT_ARRAY;
-
- if (K.isBSS() || K.isThreadBSS())
- return MCSectionELF::SHT_NOBITS;
-
- return MCSectionELF::SHT_PROGBITS;
-}
-
-
-static unsigned
-getELFSectionFlags(SectionKind K) {
- unsigned Flags = 0;
-
- if (!K.isMetadata())
- Flags |= MCSectionELF::SHF_ALLOC;
-
- if (K.isText())
- Flags |= MCSectionELF::SHF_EXECINSTR;
-
- if (K.isWriteable())
- Flags |= MCSectionELF::SHF_WRITE;
-
- if (K.isThreadLocal())
- Flags |= MCSectionELF::SHF_TLS;
-
- // K.isMergeableConst() is left out to honour PR4650
- if (K.isMergeableCString() || K.isMergeableConst4() ||
- K.isMergeableConst8() || K.isMergeableConst16())
- Flags |= MCSectionELF::SHF_MERGE;
-
- if (K.isMergeableCString())
- Flags |= MCSectionELF::SHF_STRINGS;
-
- return Flags;
-}
-
-
-const MCSection *TargetLoweringObjectFileELF::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang, const TargetMachine &TM) const {
- StringRef SectionName = GV->getSection();
-
- // Infer section flags from the section name if we can.
- Kind = getELFKindForNamedSection(SectionName, Kind);
-
- return getELFSection(SectionName,
- getELFSectionType(SectionName, Kind),
- getELFSectionFlags(Kind), Kind, true);
-}
-
-static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
- if (Kind.isText()) return ".gnu.linkonce.t.";
- if (Kind.isReadOnly()) return ".gnu.linkonce.r.";
-
- if (Kind.isThreadData()) return ".gnu.linkonce.td.";
- if (Kind.isThreadBSS()) return ".gnu.linkonce.tb.";
-
- if (Kind.isDataNoRel()) return ".gnu.linkonce.d.";
- if (Kind.isDataRelLocal()) return ".gnu.linkonce.d.rel.local.";
- if (Kind.isDataRel()) return ".gnu.linkonce.d.rel.";
- if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local.";
-
- assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
- return ".gnu.linkonce.d.rel.ro.";
+ return getSymbolForDwarfReference(Sym, MMI, Encoding);
}
-const MCSection *TargetLoweringObjectFileELF::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang, const TargetMachine &TM) const {
-
- // If this global is linkonce/weak and the target handles this by emitting it
- // into a 'uniqued' section name, create and return the section now.
- if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) {
- const char *Prefix = getSectionPrefixForUniqueGlobal(Kind);
- SmallString<128> Name;
- Name.append(Prefix, Prefix+strlen(Prefix));
- Mang->getNameWithPrefix(Name, GV, false);
- return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind),
- getELFSectionFlags(Kind), Kind);
- }
-
- if (Kind.isText()) return TextSection;
-
- if (Kind.isMergeable1ByteCString() ||
- Kind.isMergeable2ByteCString() ||
- Kind.isMergeable4ByteCString()) {
-
- // We also need alignment here.
- // FIXME: this is getting the alignment of the character, not the
- // alignment of the global!
- unsigned Align =
- TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
-
- const char *SizeSpec = ".rodata.str1.";
- if (Kind.isMergeable2ByteCString())
- SizeSpec = ".rodata.str2.";
- else if (Kind.isMergeable4ByteCString())
- SizeSpec = ".rodata.str4.";
- else
- assert(Kind.isMergeable1ByteCString() && "unknown string width");
-
-
- std::string Name = SizeSpec + utostr(Align);
- return getELFSection(Name, MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |
- MCSectionELF::SHF_MERGE |
- MCSectionELF::SHF_STRINGS,
- Kind);
- }
-
- if (Kind.isMergeableConst()) {
- if (Kind.isMergeableConst4() && MergeableConst4Section)
- return MergeableConst4Section;
- if (Kind.isMergeableConst8() && MergeableConst8Section)
- return MergeableConst8Section;
- if (Kind.isMergeableConst16() && MergeableConst16Section)
- return MergeableConst16Section;
- return ReadOnlySection; // .const
- }
-
- if (Kind.isReadOnly()) return ReadOnlySection;
-
- if (Kind.isThreadData()) return TLSDataSection;
- if (Kind.isThreadBSS()) return TLSBSSSection;
-
- // Note: we claim that common symbols are put in BSSSection, but they are
- // really emitted with the magic .comm directive, which creates a symbol table
- // entry but not a section.
- if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
-
- if (Kind.isDataNoRel()) return DataSection;
- if (Kind.isDataRelLocal()) return DataRelLocalSection;
- if (Kind.isDataRel()) return DataRelSection;
- if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
-
- assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
- return DataRelROSection;
-}
-
-/// getSectionForConstant - Given a mergeable constant with the
-/// specified size and relocation information, return a section that it
-/// should be placed in.
-const MCSection *TargetLoweringObjectFileELF::
-getSectionForConstant(SectionKind Kind) const {
- if (Kind.isMergeableConst4() && MergeableConst4Section)
- return MergeableConst4Section;
- if (Kind.isMergeableConst8() && MergeableConst8Section)
- return MergeableConst8Section;
- if (Kind.isMergeableConst16() && MergeableConst16Section)
- return MergeableConst16Section;
- if (Kind.isReadOnly())
- return ReadOnlySection;
-
- if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
- assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
- return DataRelROSection;
-}
-
-//===----------------------------------------------------------------------===//
-// MachO
-//===----------------------------------------------------------------------===//
-
-typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
-
-TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() {
- // If we have the MachO uniquing map, free it.
- delete (MachOUniqueMapTy*)UniquingMap;
-}
-
-
-const MCSectionMachO *TargetLoweringObjectFileMachO::
-getMachOSection(StringRef Segment, StringRef Section,
- unsigned TypeAndAttributes,
- unsigned Reserved2, SectionKind Kind) const {
- // We unique sections by their segment/section pair. The returned section
- // may not have the same flags as the requested section, if so this should be
- // diagnosed by the client as an error.
-
- // Create the map if it doesn't already exist.
- if (UniquingMap == 0)
- UniquingMap = new MachOUniqueMapTy();
- MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap;
-
- // Form the name to look up.
- SmallString<64> Name;
- Name += Segment;
- Name.push_back(',');
- Name += Section;
-
- // Do the lookup, if we have a hit, return it.
- const MCSectionMachO *&Entry = Map[Name.str()];
- if (Entry) return Entry;
-
- // Otherwise, return a new section.
- return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
- Reserved2, Kind, getContext());
-}
-
-
-void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- if (UniquingMap != 0)
- ((MachOUniqueMapTy*)UniquingMap)->clear();
- TargetLoweringObjectFile::Initialize(Ctx, TM);
-
- TextSection // .text
- = getMachOSection("__TEXT", "__text",
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
- SectionKind::getText());
- DataSection // .data
- = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel());
-
- CStringSection // .cstring
- = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS,
- SectionKind::getMergeable1ByteCString());
- UStringSection
- = getMachOSection("__TEXT","__ustring", 0,
- SectionKind::getMergeable2ByteCString());
- FourByteConstantSection // .literal4
- = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS,
- SectionKind::getMergeableConst4());
- EightByteConstantSection // .literal8
- = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS,
- SectionKind::getMergeableConst8());
-
- // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
- // to using it in -static mode.
- SixteenByteConstantSection = 0;
- if (TM.getRelocationModel() != Reloc::Static &&
- TM.getTargetData()->getPointerSize() == 32)
- SixteenByteConstantSection = // .literal16
- getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS,
- SectionKind::getMergeableConst16());
-
- ReadOnlySection // .const
- = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly());
-
- TextCoalSection
- = getMachOSection("__TEXT", "__textcoal_nt",
- MCSectionMachO::S_COALESCED |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
- SectionKind::getText());
- ConstTextCoalSection
- = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED,
- SectionKind::getText());
- ConstDataCoalSection
- = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED,
- SectionKind::getText());
- ConstDataSection // .const_data
- = getMachOSection("__DATA", "__const", 0,
- SectionKind::getReadOnlyWithRel());
- DataCoalSection
- = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED,
- SectionKind::getDataRel());
- DataCommonSection
- = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL,
- SectionKind::getBSS());
- DataBSSSection
- = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
- SectionKind::getBSS());
-
-
- LazySymbolPointerSection
- = getMachOSection("__DATA", "__la_symbol_ptr",
- MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
- SectionKind::getMetadata());
- NonLazySymbolPointerSection
- = getMachOSection("__DATA", "__nl_symbol_ptr",
- MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
- SectionKind::getMetadata());
-
- if (TM.getRelocationModel() == Reloc::Static) {
- StaticCtorSection
- = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel());
- StaticDtorSection
- = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel());
- } else {
- StaticCtorSection
- = getMachOSection("__DATA", "__mod_init_func",
- MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
- SectionKind::getDataRel());
- StaticDtorSection
- = getMachOSection("__DATA", "__mod_term_func",
- MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
- SectionKind::getDataRel());
- }
-
- // Exception Handling.
- LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
- SectionKind::getDataRel());
- EHFrameSection =
- getMachOSection("__TEXT", "__eh_frame",
- MCSectionMachO::S_COALESCED |
- MCSectionMachO::S_ATTR_NO_TOC |
- MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
- MCSectionMachO::S_ATTR_LIVE_SUPPORT,
- SectionKind::getReadOnly());
-
- // Debug Information.
- DwarfAbbrevSection =
- getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfInfoSection =
- getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfLineSection =
- getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfFrameSection =
- getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfPubNamesSection =
- getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfPubTypesSection =
- getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfStrSection =
- getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfLocSection =
- getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfARangesSection =
- getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfRangesSection =
- getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfMacroInfoSection =
- getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfDebugInlineSection =
- getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
-}
-
-const MCSection *TargetLoweringObjectFileMachO::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang, const TargetMachine &TM) const {
- // Parse the section specifier and create it if valid.
- StringRef Segment, Section;
- unsigned TAA, StubSize;
- std::string ErrorCode =
- MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
- TAA, StubSize);
- if (!ErrorCode.empty()) {
- // If invalid, report the error with llvm_report_error.
- llvm_report_error("Global variable '" + GV->getNameStr() +
- "' has an invalid section specifier '" + GV->getSection()+
- "': " + ErrorCode + ".");
- // Fall back to dropping it into the data section.
- return DataSection;
- }
-
- // Get the section.
- const MCSectionMachO *S =
- getMachOSection(Segment, Section, TAA, StubSize, Kind);
-
- // Okay, now that we got the section, verify that the TAA & StubSize agree.
- // If the user declared multiple globals with different section flags, we need
- // to reject it here.
- if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
- // If invalid, report the error with llvm_report_error.
- llvm_report_error("Global variable '" + GV->getNameStr() +
- "' section type or attributes does not match previous"
- " section specifier");
- }
-
- return S;
-}
-
-const MCSection *TargetLoweringObjectFileMachO::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang, const TargetMachine &TM) const {
- assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS");
-
- if (Kind.isText())
- return GV->isWeakForLinker() ? TextCoalSection : TextSection;
-
- // If this is weak/linkonce, put this in a coalescable section, either in text
- // or data depending on if it is writable.
- if (GV->isWeakForLinker()) {
- if (Kind.isReadOnly())
- return ConstTextCoalSection;
- return DataCoalSection;
- }
-
- // FIXME: Alignment check should be handled by section classifier.
- if (Kind.isMergeable1ByteCString() ||
- Kind.isMergeable2ByteCString()) {
- if (TM.getTargetData()->getPreferredAlignment(
- cast<GlobalVariable>(GV)) < 32) {
- if (Kind.isMergeable1ByteCString())
- return CStringSection;
- assert(Kind.isMergeable2ByteCString());
- return UStringSection;
- }
- }
-
- if (Kind.isMergeableConst()) {
- if (Kind.isMergeableConst4())
- return FourByteConstantSection;
- if (Kind.isMergeableConst8())
- return EightByteConstantSection;
- if (Kind.isMergeableConst16() && SixteenByteConstantSection)
- return SixteenByteConstantSection;
- }
-
- // Otherwise, if it is readonly, but not something we can specially optimize,
- // just drop it in .const.
- if (Kind.isReadOnly())
- return ReadOnlySection;
-
- // If this is marked const, put it into a const section. But if the dynamic
- // linker needs to write to it, put it in the data segment.
- if (Kind.isReadOnlyWithRel())
- return ConstDataSection;
-
- // Put zero initialized globals with strong external linkage in the
- // DATA, __common section with the .zerofill directive.
- if (Kind.isBSSExtern())
- return DataCommonSection;
-
- // Put zero initialized globals with local linkage in __DATA,__bss directive
- // with the .zerofill directive (aka .lcomm).
- if (Kind.isBSSLocal())
- return DataBSSSection;
-
- // Otherwise, just drop the variable in the normal data section.
- return DataSection;
-}
-
-const MCSection *
-TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
- // If this constant requires a relocation, we have to put it in the data
- // segment, not in the text segment.
- if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
- return ConstDataSection;
-
- if (Kind.isMergeableConst4())
- return FourByteConstantSection;
- if (Kind.isMergeableConst8())
- return EightByteConstantSection;
- if (Kind.isMergeableConst16() && SixteenByteConstantSection)
- return SixteenByteConstantSection;
- return ReadOnlySection; // .const
-}
-
-/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
-/// not to emit the UsedDirective for some symbols in llvm.used.
-// FIXME: REMOVE this (rdar://7071300)
-bool TargetLoweringObjectFileMachO::
-shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
- /// On Darwin, internally linked data beginning with "L" or "l" does not have
- /// the directive emitted (this occurs in ObjC metadata).
- if (!GV) return false;
-
- // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
- if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
- // FIXME: ObjC metadata is currently emitted as internal symbols that have
- // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and
- // this horrible hack can go away.
- SmallString<64> Name;
- Mang->getNameWithPrefix(Name, GV, false);
- if (Name[0] == 'L' || Name[0] == 'l')
- return false;
+const MCExpr *TargetLoweringObjectFile::
+getSymbolForDwarfReference(const MCSymbol *Sym, MachineModuleInfo *MMI,
+ unsigned Encoding) const {
+ const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
+
+ switch (Encoding & 0xF0) {
+ default:
+ llvm_report_error("We do not support this DWARF encoding yet!");
+ break;
+ case dwarf::DW_EH_PE_absptr:
+ // Do nothing special
+ break;
+ case dwarf::DW_EH_PE_pcrel:
+ // FIXME: PCSymbol
+ const MCExpr *PC = MCSymbolRefExpr::Create(".", getContext());
+ Res = MCBinaryExpr::CreateSub(Res, PC, getContext());
+ break;
}
- return true;
+ return Res;
}
-const MCExpr *TargetLoweringObjectFileMachO::
-getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI,
- bool &IsIndirect, bool &IsPCRel) const {
- // The mach-o version of this method defaults to returning a stub reference.
- IsIndirect = true;
- IsPCRel = false;
-
- SmallString<128> Name;
- Mang->getNameWithPrefix(Name, GV, true);
- Name += "$non_lazy_ptr";
- return MCSymbolRefExpr::Create(Name.str(), getContext());
+unsigned TargetLoweringObjectFile::getPersonalityEncoding() const {
+ return dwarf::DW_EH_PE_absptr;
}
-
-//===----------------------------------------------------------------------===//
-// COFF
-//===----------------------------------------------------------------------===//
-
-typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
-
-TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() {
- delete (COFFUniqueMapTy*)UniquingMap;
+unsigned TargetLoweringObjectFile::getLSDAEncoding() const {
+ return dwarf::DW_EH_PE_absptr;
}
-
-const MCSection *TargetLoweringObjectFileCOFF::
-getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const {
- // Create the map if it doesn't already exist.
- if (UniquingMap == 0)
- UniquingMap = new MachOUniqueMapTy();
- COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap;
-
- // Do the lookup, if we have a hit, return it.
- const MCSectionCOFF *&Entry = Map[Name];
- if (Entry) return Entry;
-
- return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext());
+unsigned TargetLoweringObjectFile::getFDEEncoding() const {
+ return dwarf::DW_EH_PE_absptr;
}
-void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- if (UniquingMap != 0)
- ((COFFUniqueMapTy*)UniquingMap)->clear();
- TargetLoweringObjectFile::Initialize(Ctx, TM);
- TextSection = getCOFFSection("\t.text", true, SectionKind::getText());
- DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel());
- StaticCtorSection =
- getCOFFSection(".ctors", false, SectionKind::getDataRel());
- StaticDtorSection =
- getCOFFSection(".dtors", false, SectionKind::getDataRel());
-
- // FIXME: We're emitting LSDA info into a readonly section on COFF, even
- // though it contains relocatable pointers. In PIC mode, this is probably a
- // big runtime hit for C++ apps. Either the contents of the LSDA need to be
- // adjusted or this should be a data section.
- LSDASection =
- getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly());
- EHFrameSection =
- getCOFFSection(".eh_frame", false, SectionKind::getDataRel());
-
- // Debug info.
- // FIXME: Don't use 'directive' mode here.
- DwarfAbbrevSection =
- getCOFFSection("\t.section\t.debug_abbrev,\"dr\"",
- true, SectionKind::getMetadata());
- DwarfInfoSection =
- getCOFFSection("\t.section\t.debug_info,\"dr\"",
- true, SectionKind::getMetadata());
- DwarfLineSection =
- getCOFFSection("\t.section\t.debug_line,\"dr\"",
- true, SectionKind::getMetadata());
- DwarfFrameSection =
- getCOFFSection("\t.section\t.debug_frame,\"dr\"",
- true, SectionKind::getMetadata());
- DwarfPubNamesSection =
- getCOFFSection("\t.section\t.debug_pubnames,\"dr\"",
- true, SectionKind::getMetadata());
- DwarfPubTypesSection =
- getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"",
- true, SectionKind::getMetadata());
- DwarfStrSection =
- getCOFFSection("\t.section\t.debug_str,\"dr\"",
- true, SectionKind::getMetadata());
- DwarfLocSection =
- getCOFFSection("\t.section\t.debug_loc,\"dr\"",
- true, SectionKind::getMetadata());
- DwarfARangesSection =
- getCOFFSection("\t.section\t.debug_aranges,\"dr\"",
- true, SectionKind::getMetadata());
- DwarfRangesSection =
- getCOFFSection("\t.section\t.debug_ranges,\"dr\"",
- true, SectionKind::getMetadata());
- DwarfMacroInfoSection =
- getCOFFSection("\t.section\t.debug_macinfo,\"dr\"",
- true, SectionKind::getMetadata());
-}
-
-const MCSection *TargetLoweringObjectFileCOFF::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang, const TargetMachine &TM) const {
- return getCOFFSection(GV->getSection(), false, Kind);
-}
-
-static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
- if (Kind.isText())
- return ".text$linkonce";
- if (Kind.isWriteable())
- return ".data$linkonce";
- return ".rdata$linkonce";
-}
-
-
-const MCSection *TargetLoweringObjectFileCOFF::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang, const TargetMachine &TM) const {
- assert(!Kind.isThreadLocal() && "Doesn't support TLS");
-
- // If this global is linkonce/weak and the target handles this by emitting it
- // into a 'uniqued' section name, create and return the section now.
- if (GV->isWeakForLinker()) {
- const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
- SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
- Mang->getNameWithPrefix(Name, GV, false);
- return getCOFFSection(Name.str(), false, Kind);
- }
-
- if (Kind.isText())
- return getTextSection();
-
- return getDataSection();
+unsigned TargetLoweringObjectFile::getTTypeEncoding() const {
+ return dwarf::DW_EH_PE_absptr;
}
diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk
new file mode 100644
index 0000000..f5b8180
--- /dev/null
+++ b/lib/Target/X86/Android.mk
@@ -0,0 +1,47 @@
+LOCAL_PATH := $(call my-dir)
+
+# For the host only
+# =====================================================
+include $(CLEAR_VARS)
+include $(CLEAR_TBLGEN_VARS)
+
+TBLGEN_TABLES := \
+ X86GenRegisterInfo.h.inc \
+ X86GenRegisterNames.inc \
+ X86GenRegisterInfo.inc \
+ X86GenInstrNames.inc \
+ X86GenInstrInfo.inc \
+ X86GenAsmMatcher.inc \
+ X86GenDAGISel.inc \
+ X86GenDisassemblerTables.inc \
+ X86GenFastISel.inc \
+ X86GenCallingConv.inc \
+ X86GenSubtarget.inc \
+ X86GenEDInfo.inc
+
+LOCAL_SRC_FILES := \
+ X86AsmBackend.cpp \
+ X86COFFMachineModuleInfo.cpp \
+ X86CodeEmitter.cpp \
+ X86ELFWriterInfo.cpp \
+ X86FastISel.cpp \
+ X86FloatingPoint.cpp \
+ X86FloatingPointRegKill.cpp \
+ X86ISelDAGToDAG.cpp \
+ X86ISelLowering.cpp \
+ X86InstrInfo.cpp \
+ X86JITInfo.cpp \
+ X86MCAsmInfo.cpp \
+ X86MCCodeEmitter.cpp \
+ X86MCTargetExpr.cpp \
+ X86RegisterInfo.cpp \
+ X86Subtarget.cpp \
+ X86TargetMachine.cpp \
+ X86TargetObjectFile.cpp
+
+LOCAL_MODULE:= libLLVMX86CodeGen
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_TBLGEN_RULES_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index acf497a..84d7bb7 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -10,6 +10,7 @@
#include "llvm/Target/TargetAsmParser.h"
#include "X86.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
@@ -183,6 +184,14 @@ struct X86Operand : public MCParsedAsmOperand {
bool isReg() const { return Kind == Register; }
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
void addRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getReg()));
@@ -190,13 +199,13 @@ struct X86Operand : public MCParsedAsmOperand {
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ addExpr(Inst, getImm());
}
void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
// FIXME: Support user customization of the render method.
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ addExpr(Inst, getImm());
}
void addMemOperands(MCInst &Inst, unsigned N) const {
@@ -204,7 +213,7 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
Inst.addOperand(MCOperand::CreateImm(getMemScale()));
Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
- Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+ addExpr(Inst, getMemDisp());
Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
}
@@ -218,7 +227,7 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
Inst.addOperand(MCOperand::CreateImm(getMemScale()));
Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
- Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+ addExpr(Inst, getMemDisp());
}
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
@@ -492,24 +501,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() {
bool X86ATTAsmParser::
ParseInstruction(const StringRef &Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // FIXME: Hack to recognize "sal..." for now. We need a way to represent
- // alternative syntaxes in the .td file, without requiring instruction
- // duplication.
- if (Name.startswith("sal")) {
- std::string Tmp = "shl" + Name.substr(3).str();
- Operands.push_back(X86Operand::CreateToken(Tmp, NameLoc));
- } else {
- // FIXME: This is a hack. We eventually want to add a general pattern
- // mechanism to be used in the table gen file for these assembly names that
- // use the same opcodes. Also we should only allow the "alternate names"
- // for rep and repne with the instructions they can only appear with.
- StringRef PatchedName = Name;
- if (Name == "repe" || Name == "repz")
- PatchedName = "rep";
- else if (Name == "repnz")
- PatchedName = "repne";
- Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
- }
+ // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
+ // represent alternative syntaxes in the .td file, without requiring
+ // instruction duplication.
+ StringRef PatchedName = StringSwitch<StringRef>(Name)
+ .Case("sal", "shl")
+ .Case("salb", "shlb")
+ .Case("sall", "shll")
+ .Case("salq", "shlq")
+ .Case("salw", "shlw")
+ .Case("repe", "rep")
+ .Case("repz", "rep")
+ .Case("repnz", "repne")
+ .Default(Name);
+ Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
if (getLexer().isNot(AsmToken::EndOfStatement)) {
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index 38ccbf9..734a545 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -25,10 +25,15 @@ using namespace llvm;
// Include the auto-generated portion of the assembly writer.
#define MachineInstr MCInst
+#define GET_INSTRUCTION_NAME
#include "X86GenAsmWriter.inc"
#undef MachineInstr
void X86ATTInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op) {
switch (MI->getOperand(Op).getImm()) {
@@ -68,7 +73,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo) {
O << '$' << Op.getImm();
if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
- *CommentStream << format("imm = 0x%X\n", Op.getImm());
+ *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
index 3180618..d109a07 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
@@ -26,11 +26,12 @@ public:
virtual void printInst(const MCInst *MI);
-
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI);
static const char *getRegisterName(unsigned RegNo);
-
+ static const char *getInstructionName(unsigned Opcode);
void printOperand(const MCInst *MI, unsigned OpNo);
void printMemReference(const MCInst *MI, unsigned Op);
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index 304306d..8cab24c 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -33,10 +33,11 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/SmallString.h"
@@ -52,40 +53,42 @@ void X86AsmPrinter::PrintPICBaseSymbol() const {
OutContext);
}
+MCSymbol *X86AsmPrinter::GetGlobalValueSymbol(const GlobalValue *GV) const {
+ SmallString<60> NameStr;
+ Mang->getNameWithPrefix(NameStr, GV, false);
+ MCSymbol *Symb = OutContext.GetOrCreateSymbol(NameStr.str());
+
+ if (Subtarget->isTargetCygMing()) {
+ X86COFFMachineModuleInfo &COFFMMI =
+ MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+ COFFMMI.DecorateCygMingName(Symb, OutContext, GV, *TM.getTargetData());
+
+ // Save function name for later type emission.
+ if (const Function *F = dyn_cast<Function>(GV))
+ if (F->isDeclaration())
+ COFFMMI.addExternalFunction(Symb->getName());
+
+ }
+
+ return Symb;
+}
+
/// runOnMachineFunction - Emit the function body.
///
bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
-
- // COFF and Cygwin specific mangling stuff. This should be moved out to the
- // mangler or handled some other way?
- if (Subtarget->isTargetCOFF()) {
- X86COFFMachineModuleInfo &COFFMMI =
- MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
- // Populate function information map. Don't want to populate
- // non-stdcall or non-fastcall functions' information right now.
- const Function *F = MF.getFunction();
- CallingConv::ID CC = F->getCallingConv();
- if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall)
- COFFMMI.AddFunctionInfo(F, *MF.getInfo<X86MachineFunctionInfo>());
- }
- if (Subtarget->isTargetCygMing()) {
+ if (Subtarget->isTargetCOFF()) {
const Function *F = MF.getFunction();
- X86COFFMachineModuleInfo &COFFMMI =
- MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
- COFFMMI.DecorateCygMingName(CurrentFnSym, OutContext,F,*TM.getTargetData());
-
- O << "\t.def\t " << *CurrentFnSym;
- O << ";\t.scl\t" <<
+ O << "\t.def\t " << *CurrentFnSym << ";\t.scl\t" <<
(F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT)
<< ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
<< ";\t.endef\n";
}
-
+
// Have common code print out the function header with linkage info etc.
EmitFunctionHeader();
-
+
// Emit the rest of the function body.
EmitFunctionBody();
@@ -119,12 +122,6 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
else
GVSym = GetGlobalValueSymbol(GV);
- if (Subtarget->isTargetCygMing()) {
- X86COFFMachineModuleInfo &COFFMMI =
- MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
- COFFMMI.DecorateCygMingName(GVSym, OutContext, GV, *TM.getTargetData());
- }
-
// Handle dllimport linkage.
if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
@@ -585,7 +582,6 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
if (I->hasDLLExportLinkage()) {
MCSymbol *Sym = GetGlobalValueSymbol(I);
- COFFMMI.DecorateCygMingName(Sym, OutContext, I, *TM.getTargetData());
DLLExportedFns.push_back(Sym);
}
@@ -607,6 +603,28 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
}
}
+
+ if (Subtarget->isTargetELF()) {
+ TargetLoweringObjectFileELF &TLOFELF =
+ static_cast<TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+ const TargetData *TD = TM.getTargetData();
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i)
+ O << *Stubs[i].first << ":\n"
+ << (TD->getPointerSize() == 8 ?
+ MAI->getData64bitsDirective() : MAI->getData32bitsDirective())
+ << *Stubs[i].second << '\n';
+
+ Stubs.clear();
+ }
+ }
}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
index 1d32a5f..039214a 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
@@ -61,8 +61,7 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
virtual void EmitInstruction(const MachineInstr *MI);
void printSymbolOperand(const MachineOperand &MO);
-
-
+ virtual MCSymbol *GetGlobalValueSymbol(const GlobalValue *GV) const;
// These methods are used by the tablegen'erated instruction printer.
void printOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
index 4274d0a..610beb5 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
@@ -24,10 +24,14 @@ using namespace llvm;
// Include the auto-generated portion of the assembly writer.
#define MachineInstr MCInst
+#define GET_INSTRUCTION_NAME
#include "X86GenAsmWriter1.inc"
#undef MachineInstr
void X86IntelInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op) {
switch (MI->getOperand(Op).getImm()) {
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
index 1976177..545bf84 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
@@ -26,10 +26,12 @@ public:
: MCInstPrinter(O, MAI) {}
virtual void printInst(const MCInst *MI);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI);
static const char *getRegisterName(unsigned RegNo);
+ static const char *getInstructionName(unsigned Opcode);
void printOperand(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 61f26a7..eed3b45 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv)
tablegen(X86GenSubtarget.inc -gen-subtarget)
set(sources
+ X86AsmBackend.cpp
X86CodeEmitter.cpp
X86COFFMachineModuleInfo.cpp
X86ELFWriterInfo.cpp
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 19eb05e..e5f84e8 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -67,8 +67,8 @@ no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit
[ %tmp.34.i18, %no_exit.i7 ]
%tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ],
[ %tmp.28.i16, %no_exit.i7 ]
- %tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00
- %tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00
+ %tmp.28.i16 = fadd double %tmp.0.0.0.i10, 0.000000e+00
+ %tmp.34.i18 = fadd double %tmp.0.1.0.i9, 0.000000e+00
br i1 false, label %Compute_Tree.exit23, label %no_exit.i7
Compute_Tree.exit23: ; preds = %no_exit.i7
@@ -97,7 +97,7 @@ pcmp/pand/pandn/por to do a selection instead of a conditional branch:
double %X(double %Y, double %Z, double %A, double %B) {
%C = setlt double %A, %B
- %z = add double %Z, 0.0 ;; select operand is not a load
+ %z = fadd double %Z, 0.0 ;; select operand is not a load
%D = select bool %C, double %Y, double %z
ret double %D
}
@@ -545,7 +545,7 @@ eliminates a constant pool load. For example, consider:
define i64 @ccosf(float %z.0, float %z.1) nounwind readonly {
entry:
- %tmp6 = sub float -0.000000e+00, %z.1 ; <float> [#uses=1]
+ %tmp6 = fsub float -0.000000e+00, %z.1 ; <float> [#uses=1]
%tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly
ret i64 %tmp20
}
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index aa7bb3d..d4545a6 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -227,11 +227,6 @@ lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor.
//===---------------------------------------------------------------------===//
-Teach the coalescer to coalesce vregs of different register classes. e.g. FR32 /
-FR64 to VR128.
-
-//===---------------------------------------------------------------------===//
-
Adding to the list of cmp / test poor codegen issues:
int test(__m128 *A, __m128 *B) {
@@ -1868,3 +1863,69 @@ carried over to machine instructions. Asm printer (or JIT) can use this
information to add the "lock" prefix.
//===---------------------------------------------------------------------===//
+
+_Bool bar(int *x) { return *x & 1; }
+
+define zeroext i1 @bar(i32* nocapture %x) nounwind readonly {
+entry:
+ %tmp1 = load i32* %x ; <i32> [#uses=1]
+ %and = and i32 %tmp1, 1 ; <i32> [#uses=1]
+ %tobool = icmp ne i32 %and, 0 ; <i1> [#uses=1]
+ ret i1 %tobool
+}
+
+bar: # @bar
+# BB#0: # %entry
+ movl 4(%esp), %eax
+ movb (%eax), %al
+ andb $1, %al
+ movzbl %al, %eax
+ ret
+
+Missed optimization: should be movl+andl.
+
+//===---------------------------------------------------------------------===//
+
+Consider the following two functions compiled with clang:
+_Bool foo(int *x) { return !(*x & 4); }
+unsigned bar(int *x) { return !(*x & 4); }
+
+foo:
+ movl 4(%esp), %eax
+ testb $4, (%eax)
+ sete %al
+ movzbl %al, %eax
+ ret
+
+bar:
+ movl 4(%esp), %eax
+ movl (%eax), %eax
+ shrl $2, %eax
+ andl $1, %eax
+ xorl $1, %eax
+ ret
+
+The second function generates more code even though the two functions are
+are functionally identical.
+
+//===---------------------------------------------------------------------===//
+
+Take the following C code:
+int x(int y) { return (y & 63) << 14; }
+
+Code produced by gcc:
+ andl $63, %edi
+ sall $14, %edi
+ movl %edi, %eax
+ ret
+
+Code produced by clang:
+ shll $14, %edi
+ movl %edi, %eax
+ andl $1032192, %eax
+ ret
+
+The code produced by gcc is 3 bytes shorter. This sort of construct often
+shows up with bitfields.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/TargetInfo/Android.mk b/lib/Target/X86/TargetInfo/Android.mk
new file mode 100644
index 0000000..ee53f0d
--- /dev/null
+++ b/lib/Target/X86/TargetInfo/Android.mk
@@ -0,0 +1,24 @@
+LOCAL_PATH := $(call my-dir)
+
+# For the device only
+# =====================================================
+include $(CLEAR_VARS)
+include $(CLEAR_TBLGEN_VARS)
+
+TBLGEN_TABLES := \
+ X86GenRegisterNames.inc \
+ X86GenInstrNames.inc
+
+TBLGEN_TD_DIR := $(LOCAL_PATH)/..
+
+LOCAL_SRC_FILES := \
+ X86TargetInfo.cpp
+
+LOCAL_C_INCLUDES += \
+ $(LOCAL_PATH)/..
+
+LOCAL_MODULE:= libLLVMX86Info
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_TBLGEN_RULES_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 1d17a05..ba0ee6c 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -19,12 +19,15 @@
namespace llvm {
-class X86TargetMachine;
class FunctionPass;
-class MachineCodeEmitter;
-class MCCodeEmitter;
class JITCodeEmitter;
+class MCAssembler;
+class MCCodeEmitter;
+class MCContext;
+class MachineCodeEmitter;
class Target;
+class TargetAsmBackend;
+class X86TargetMachine;
class formatted_raw_ostream;
/// createX86ISelDag - This pass converts a legalized DAG into a
@@ -49,9 +52,13 @@ FunctionPass *createX87FPRegKillInserterPass();
FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
JITCodeEmitter &JCE);
-MCCodeEmitter *createHeinousX86MCCodeEmitter(const Target &, TargetMachine &TM);
-MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM);
-MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM);
+MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM,
+ MCContext &Ctx);
+MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM,
+ MCContext &Ctx);
+
+TargetAsmBackend *createX86_32AsmBackend(const Target &, MCAssembler &);
+TargetAsmBackend *createX86_64AsmBackend(const Target &, MCAssembler &);
/// createX86EmitCodeToMemory - Returns a pass that converts a register
/// allocated function into raw machine code in a dynamically
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
new file mode 100644
index 0000000..e6654ef
--- /dev/null
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -0,0 +1,34 @@
+//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "X86.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+namespace {
+
+class X86AsmBackend : public TargetAsmBackend {
+public:
+ X86AsmBackend(const Target &T, MCAssembler &A)
+ : TargetAsmBackend(T) {}
+};
+
+}
+
+TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
+ MCAssembler &A) {
+ return new X86AsmBackend(T, A);
+}
+
+TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
+ MCAssembler &A) {
+ return new X86AsmBackend(T, A);
+}
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
index ea52795..ab67acb 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -27,90 +27,55 @@ X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) {
X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
}
-void X86COFFMachineModuleInfo::AddFunctionInfo(const Function *F,
- const X86MachineFunctionInfo &Val) {
- FunctionInfoMap[F] = Val;
+void X86COFFMachineModuleInfo::addExternalFunction(const StringRef& Name) {
+ CygMingStubs.insert(Name);
}
-
-
-static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
- const TargetData &TD) {
- X86MachineFunctionInfo Info;
- uint64_t Size = 0;
-
- switch (F->getCallingConv()) {
- case CallingConv::X86_StdCall:
- Info.setDecorationStyle(StdCall);
- break;
- case CallingConv::X86_FastCall:
- Info.setDecorationStyle(FastCall);
- break;
- default:
- return Info;
- }
-
- unsigned argNum = 1;
- for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- AI != AE; ++AI, ++argNum) {
- const Type* Ty = AI->getType();
-
- // 'Dereference' type in case of byval parameter attribute
- if (F->paramHasAttr(argNum, Attribute::ByVal))
- Ty = cast<PointerType>(Ty)->getElementType();
-
- // Size should be aligned to DWORD boundary
- Size += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
- }
-
- // We're not supporting tooooo huge arguments :)
- Info.setBytesToPopOnReturn((unsigned int)Size);
- return Info;
-}
-
-
-/// DecorateCygMingName - Query FunctionInfoMap and use this information for
-/// various name decorations for Cygwin and MingW.
+/// DecorateCygMingName - Apply various name decorations if the function uses
+/// stdcall or fastcall calling convention.
void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl<char> &Name,
const GlobalValue *GV,
const TargetData &TD) {
const Function *F = dyn_cast<Function>(GV);
if (!F) return;
-
- // Save function name for later type emission.
- if (F->isDeclaration())
- CygMingStubs.insert(StringRef(Name.data(), Name.size()));
-
+
// We don't want to decorate non-stdcall or non-fastcall functions right now
CallingConv::ID CC = F->getCallingConv();
if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall)
return;
-
- const X86MachineFunctionInfo *Info;
-
- FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F);
- if (info_item == FunctionInfoMap.end()) {
- // Calculate apropriate function info and populate map
- FunctionInfoMap[F] = calculateFunctionInfo(F, TD);
- Info = &FunctionInfoMap[F];
- } else {
- Info = &info_item->second;
- }
-
- if (Info->getDecorationStyle() == None) return;
+
+ unsigned ArgWords = 0;
+ DenseMap<const Function*, unsigned>::const_iterator item = FnArgWords.find(F);
+ if (item == FnArgWords.end()) {
+ // Calculate arguments sizes
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ const Type* Ty = AI->getType();
+
+ // 'Dereference' type in case of byval parameter attribute
+ if (AI->hasByValAttr())
+ Ty = cast<PointerType>(Ty)->getElementType();
+
+ // Size should be aligned to DWORD boundary
+ ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+ }
+
+ FnArgWords[F] = ArgWords;
+ } else
+ ArgWords = item->second;
+
const FunctionType *FT = F->getFunctionType();
-
// "Pure" variadic functions do not receive @0 suffix.
if (!FT->isVarArg() || FT->getNumParams() == 0 ||
(FT->getNumParams() == 1 && F->hasStructRetAttr()))
- raw_svector_ostream(Name) << '@' << Info->getBytesToPopOnReturn();
-
- if (Info->getDecorationStyle() == FastCall) {
+ raw_svector_ostream(Name) << '@' << ArgWords;
+
+ if (CC == CallingConv::X86_FastCall) {
if (Name[0] == '_')
Name[0] = '@';
else
Name.insert(Name.begin(), '@');
- }
+ }
}
/// DecorateCygMingName - Query FunctionInfoMap and use this information for
@@ -121,6 +86,6 @@ void X86COFFMachineModuleInfo::DecorateCygMingName(MCSymbol *&Name,
const TargetData &TD) {
SmallString<128> NameStr(Name->getName().begin(), Name->getName().end());
DecorateCygMingName(NameStr, GV, TD);
-
+
Name = Ctx.GetOrCreateSymbol(NameStr.str());
}
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 0e2009e..9de3dcd 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -21,44 +21,25 @@
namespace llvm {
class X86MachineFunctionInfo;
class TargetData;
-
+
/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
/// for X86 COFF targets.
class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
StringSet<> CygMingStubs;
-
- // We have to propagate some information about MachineFunction to
- // AsmPrinter. It's ok, when we're printing the function, since we have
- // access to MachineFunction and can get the appropriate MachineFunctionInfo.
- // Unfortunately, this is not possible when we're printing reference to
- // Function (e.g. calling it and so on). Even more, there is no way to get the
- // corresponding MachineFunctions: it can even be not created at all. That's
- // why we should use additional structure, when we're collecting all necessary
- // information.
- //
- // This structure is using e.g. for name decoration for stdcall & fastcall'ed
- // function, since we have to use arguments' size for decoration.
- typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap;
- FMFInfoMap FunctionInfoMap;
-
+ DenseMap<const Function*, unsigned> FnArgWords;
public:
X86COFFMachineModuleInfo(const MachineModuleInfo &);
~X86COFFMachineModuleInfo();
-
-
+
void DecorateCygMingName(MCSymbol* &Name, MCContext &Ctx,
const GlobalValue *GV, const TargetData &TD);
void DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV,
const TargetData &TD);
-
- void AddFunctionInfo(const Function *F, const X86MachineFunctionInfo &Val);
-
+ void addExternalFunction(const StringRef& Name);
typedef StringSet<>::const_iterator stub_iterator;
stub_iterator stub_begin() const { return CygMingStubs.begin(); }
stub_iterator stub_end() const { return CygMingStubs.end(); }
-
-
};
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index f0bceb1..8deadf6 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -387,10 +387,16 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
// If no BaseReg, issue a RIP relative instruction only if the MCE can
// resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
// 2-7) and absolute references.
+ unsigned BaseRegNo = -1U;
+ if (BaseReg != 0 && BaseReg != X86::RIP)
+ BaseRegNo = getX86RegNum(BaseReg);
+
if (// The SIB byte must be used if there is an index register.
IndexReg.getReg() == 0 &&
- // The SIB byte must be used if the base is ESP/RSP.
- BaseReg != X86::ESP && BaseReg != X86::RSP &&
+ // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+ // encode to an R/M value of 4, which indicates that a SIB byte is
+ // present.
+ BaseRegNo != N86::ESP &&
// If there is no base register and we're in 64-bit mode, we need a SIB
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
(!Is64BitMode || BaseReg != 0)) {
@@ -401,7 +407,6 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
return;
}
- unsigned BaseRegNo = getX86RegNum(BaseReg);
// If the base is not EBP/ESP and there is no displacement, use simple
// indirect register encoding, this handles addresses like [EAX]. The
// encoding for [EBP] with no displacement means [disp32] so we handle it
@@ -757,27 +762,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
case X86II::MRM4r: case X86II::MRM5r:
case X86II::MRM6r: case X86II::MRM7r: {
MCE.emitByte(BaseOpcode);
-
- // Special handling of lfence, mfence, monitor, and mwait.
- if (Desc->getOpcode() == X86::LFENCE ||
- Desc->getOpcode() == X86::MFENCE ||
- Desc->getOpcode() == X86::MONITOR ||
- Desc->getOpcode() == X86::MWAIT) {
- emitRegModRMByte((Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
-
- switch (Desc->getOpcode()) {
- default: break;
- case X86::MONITOR:
- MCE.emitByte(0xC8);
- break;
- case X86::MWAIT:
- MCE.emitByte(0xC9);
- break;
- }
- } else {
- emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
- (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
- }
+ emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
+ (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
if (CurOp == NumOps)
break;
@@ -853,6 +839,27 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
getX86RegNum(MI.getOperand(CurOp).getReg()));
++CurOp;
break;
+
+ case X86II::MRM_C1:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xC1);
+ break;
+ case X86II::MRM_C8:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xC8);
+ break;
+ case X86II::MRM_C9:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xC9);
+ break;
+ case X86II::MRM_E8:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xE8);
+ break;
+ case X86II::MRM_F0:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xF0);
+ break;
}
if (!Desc->isVariadic() && CurOp != NumOps) {
@@ -864,335 +871,3 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
MCE.processDebugLoc(MI.getDebugLoc(), false);
}
-
-// Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter.
-//
-// FIXME: This is a total hack designed to allow work on llvm-mc to proceed
-// without being blocked on various cleanups needed to support a clean interface
-// to instruction encoding.
-//
-// Look away!
-
-#include "llvm/DerivedTypes.h"
-
-namespace {
-class MCSingleInstructionCodeEmitter : public MachineCodeEmitter {
- uint8_t Data[256];
- const MCInst *CurrentInst;
- SmallVectorImpl<MCFixup> *FixupList;
-
-public:
- MCSingleInstructionCodeEmitter() { reset(0, 0); }
-
- void reset(const MCInst *Inst, SmallVectorImpl<MCFixup> *Fixups) {
- CurrentInst = Inst;
- FixupList = Fixups;
- BufferBegin = Data;
- BufferEnd = array_endof(Data);
- CurBufferPtr = Data;
- }
-
- StringRef str() {
- return StringRef(reinterpret_cast<char*>(BufferBegin),
- CurBufferPtr - BufferBegin);
- }
-
- virtual void startFunction(MachineFunction &F) {}
- virtual bool finishFunction(MachineFunction &F) { return false; }
- virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {}
- virtual bool earlyResolveAddresses() const { return false; }
- virtual void addRelocation(const MachineRelocation &MR) {
- unsigned Offset = 0, OpIndex = 0, Kind = MR.getRelocationType();
-
- // This form is only used in one case, for branches.
- if (MR.isBasicBlock()) {
- Offset = unsigned(MR.getMachineCodeOffset());
- OpIndex = 0;
- } else {
- assert(MR.isJumpTableIndex() && "Unexpected relocation!");
-
- Offset = unsigned(MR.getMachineCodeOffset());
-
- // The operand index is encoded as the first byte of the fake operand.
- OpIndex = MR.getJumpTableIndex();
- }
-
- MCOperand Op = CurrentInst->getOperand(OpIndex);
- assert(Op.isExpr() && "FIXME: Not yet implemented!");
- FixupList->push_back(MCFixup::Create(Offset, Op.getExpr(),
- MCFixupKind(FirstTargetFixupKind + Kind)));
- }
- virtual void setModuleInfo(MachineModuleInfo* Info) {}
-
- // Interface functions which should never get called in our usage.
-
- virtual void emitLabel(uint64_t LabelID) {
- assert(0 && "Unexpected code emitter call!");
- }
- virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
- assert(0 && "Unexpected code emitter call!");
- return 0;
- }
- virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
- assert(0 && "Unexpected code emitter call!");
- return 0;
- }
- virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
- assert(0 && "Unexpected code emitter call!");
- return 0;
- }
- virtual uintptr_t getLabelAddress(uint64_t LabelID) const {
- assert(0 && "Unexpected code emitter call!");
- return 0;
- }
-};
-
-class X86MCCodeEmitter : public MCCodeEmitter {
- X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
-
-private:
- X86TargetMachine &TM;
- llvm::Function *DummyF;
- TargetData *DummyTD;
- mutable llvm::MachineFunction *DummyMF;
- llvm::MachineBasicBlock *DummyMBB;
-
- MCSingleInstructionCodeEmitter *InstrEmitter;
- Emitter<MachineCodeEmitter> *Emit;
-
-public:
- X86MCCodeEmitter(X86TargetMachine &_TM) : TM(_TM) {
- // Verily, thou shouldst avert thine eyes.
- const llvm::FunctionType *FTy =
- FunctionType::get(llvm::Type::getVoidTy(getGlobalContext()), false);
- DummyF = Function::Create(FTy, GlobalValue::InternalLinkage);
- DummyTD = new TargetData("");
- DummyMF = new MachineFunction(DummyF, TM, 0);
- DummyMBB = DummyMF->CreateMachineBasicBlock();
-
- InstrEmitter = new MCSingleInstructionCodeEmitter();
- Emit = new Emitter<MachineCodeEmitter>(TM, *InstrEmitter,
- *TM.getInstrInfo(),
- *DummyTD, false);
- }
- ~X86MCCodeEmitter() {
- delete Emit;
- delete InstrEmitter;
- delete DummyMF;
- delete DummyF;
- }
-
- unsigned getNumFixupKinds() const {
- return 5;
- }
-
- MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
- static MCFixupKindInfo Infos[] = {
- { "reloc_pcrel_word", 0, 4 * 8 },
- { "reloc_picrel_word", 0, 4 * 8 },
- { "reloc_absolute_word", 0, 4 * 8 },
- { "reloc_absolute_word_sext", 0, 4 * 8 },
- { "reloc_absolute_dword", 0, 8 * 8 }
- };
-
- assert(Kind >= FirstTargetFixupKind && Kind < MaxTargetFixupKind &&
- "Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
- }
-
- bool AddRegToInstr(const MCInst &MI, MachineInstr *Instr,
- unsigned Start) const {
- if (Start + 1 > MI.getNumOperands())
- return false;
-
- const MCOperand &Op = MI.getOperand(Start);
- if (!Op.isReg()) return false;
-
- Instr->addOperand(MachineOperand::CreateReg(Op.getReg(), false));
- return true;
- }
-
- bool AddImmToInstr(const MCInst &MI, MachineInstr *Instr,
- unsigned Start) const {
- if (Start + 1 > MI.getNumOperands())
- return false;
-
- const MCOperand &Op = MI.getOperand(Start);
- if (Op.isImm()) {
- Instr->addOperand(MachineOperand::CreateImm(Op.getImm()));
- return true;
- }
- if (!Op.isExpr())
- return false;
-
- const MCExpr *Expr = Op.getExpr();
- if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) {
- Instr->addOperand(MachineOperand::CreateImm(CE->getValue()));
- return true;
- }
-
- // Fake this as an external symbol to the code emitter to add a relcoation
- // entry we will recognize.
- Instr->addOperand(MachineOperand::CreateJTI(Start, 0));
- return true;
- }
-
- bool AddLMemToInstr(const MCInst &MI, MachineInstr *Instr,
- unsigned Start) const {
- return (AddRegToInstr(MI, Instr, Start + 0) &&
- AddImmToInstr(MI, Instr, Start + 1) &&
- AddRegToInstr(MI, Instr, Start + 2) &&
- AddImmToInstr(MI, Instr, Start + 3));
- }
-
- bool AddMemToInstr(const MCInst &MI, MachineInstr *Instr,
- unsigned Start) const {
- return (AddRegToInstr(MI, Instr, Start + 0) &&
- AddImmToInstr(MI, Instr, Start + 1) &&
- AddRegToInstr(MI, Instr, Start + 2) &&
- AddImmToInstr(MI, Instr, Start + 3) &&
- AddRegToInstr(MI, Instr, Start + 4));
- }
-
- void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const {
- // Don't look yet!
-
- // Convert the MCInst to a MachineInstr so we can (ab)use the regular
- // emitter.
- const X86InstrInfo &II = *TM.getInstrInfo();
- const TargetInstrDesc &Desc = II.get(MI.getOpcode());
- MachineInstr *Instr = DummyMF->CreateMachineInstr(Desc, DebugLoc());
- DummyMBB->push_back(Instr);
-
- unsigned Opcode = MI.getOpcode();
- unsigned NumOps = MI.getNumOperands();
- unsigned CurOp = 0;
- bool AddTied = false;
- if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1)
- AddTied = true;
- else if (NumOps > 2 &&
- Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
- // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
- --NumOps;
-
- bool OK = true;
- switch (Desc.TSFlags & X86II::FormMask) {
- case X86II::MRMDestReg:
- case X86II::MRMSrcReg:
- // Matching doesn't fill this in completely, we have to choose operand 0
- // for a tied register.
- OK &= AddRegToInstr(MI, Instr, CurOp++);
- if (AddTied)
- OK &= AddRegToInstr(MI, Instr, CurOp++ - 1);
- OK &= AddRegToInstr(MI, Instr, CurOp++);
- if (CurOp < NumOps)
- OK &= AddImmToInstr(MI, Instr, CurOp);
- break;
-
- case X86II::RawFrm:
- if (CurOp < NumOps) {
- // Hack to make branches work.
- if (!(Desc.TSFlags & X86II::ImmMask) &&
- MI.getOperand(0).isExpr() &&
- isa<MCSymbolRefExpr>(MI.getOperand(0).getExpr()))
- Instr->addOperand(MachineOperand::CreateMBB(DummyMBB));
- else
- OK &= AddImmToInstr(MI, Instr, CurOp);
- }
- break;
-
- case X86II::AddRegFrm:
- // Matching doesn't fill this in completely, we have to choose operand 0
- // for a tied register.
- OK &= AddRegToInstr(MI, Instr, CurOp++);
- if (AddTied)
- OK &= AddRegToInstr(MI, Instr, CurOp++ - 1);
- if (CurOp < NumOps)
- OK &= AddImmToInstr(MI, Instr, CurOp);
- break;
-
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r:
- // Matching doesn't fill this in completely, we have to choose operand 0
- // for a tied register.
- OK &= AddRegToInstr(MI, Instr, CurOp++);
- if (AddTied)
- OK &= AddRegToInstr(MI, Instr, CurOp++ - 1);
- if (CurOp < NumOps)
- OK &= AddImmToInstr(MI, Instr, CurOp);
- break;
-
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
- OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5;
- if (CurOp < NumOps)
- OK &= AddImmToInstr(MI, Instr, CurOp);
- break;
-
- case X86II::MRMSrcMem:
- // Matching doesn't fill this in completely, we have to choose operand 0
- // for a tied register.
- OK &= AddRegToInstr(MI, Instr, CurOp++);
- if (AddTied)
- OK &= AddRegToInstr(MI, Instr, CurOp++ - 1);
- if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
- Opcode == X86::LEA16r || Opcode == X86::LEA32r)
- OK &= AddLMemToInstr(MI, Instr, CurOp);
- else
- OK &= AddMemToInstr(MI, Instr, CurOp);
- break;
-
- case X86II::MRMDestMem:
- OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5;
- OK &= AddRegToInstr(MI, Instr, CurOp);
- break;
-
- default:
- case X86II::MRMInitReg:
- case X86II::Pseudo:
- OK = false;
- break;
- }
-
- if (!OK) {
- dbgs() << "couldn't convert inst '";
- MI.dump();
- dbgs() << "' to machine instr:\n";
- Instr->dump();
- }
-
- InstrEmitter->reset(&MI, &Fixups);
- if (OK)
- Emit->emitInstruction(*Instr, &Desc);
- OS << InstrEmitter->str();
-
- Instr->eraseFromParent();
- }
-};
-}
-
-#include "llvm/Support/CommandLine.h"
-
-static cl::opt<bool> EnableNewEncoder("enable-new-x86-encoder",
- cl::ReallyHidden);
-
-
-// Ok, now you can look.
-MCCodeEmitter *llvm::createHeinousX86MCCodeEmitter(const Target &T,
- TargetMachine &TM) {
-
- // FIXME: Remove the heinous one when the new one works.
- if (EnableNewEncoder) {
- if (TM.getTargetData()->getPointerSize() == 4)
- return createX86_32MCCodeEmitter(T, TM);
- return createX86_64MCCodeEmitter(T, TM);
- }
-
- return new X86MCCodeEmitter(static_cast<X86TargetMachine&>(TM));
-}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ea398e9..98e3f4e 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -388,6 +388,8 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
}
case Instruction::GetElementPtr: {
+ X86AddressMode SavedAM = AM;
+
// Pattern-match simple GEPs.
uint64_t Disp = (int32_t)AM.Disp;
unsigned IndexReg = AM.IndexReg;
@@ -428,7 +430,13 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
AM.IndexReg = IndexReg;
AM.Scale = Scale;
AM.Disp = (uint32_t)Disp;
- return X86SelectAddress(U->getOperand(0), AM);
+ if (X86SelectAddress(U->getOperand(0), AM))
+ return true;
+
+ // If we couldn't merge the sub value into this addr mode, revert back to
+ // our address and just match the value instead of completely failing.
+ AM = SavedAM;
+ break;
unsupported_gep:
// Ok, the GEP indices weren't all covered.
break;
@@ -786,8 +794,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {
bool X86FastISel::X86SelectZExt(Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
- if (I->getType()->isInteger(8) &&
- I->getOperand(0)->getType()->isInteger(1)) {
+ if (I->getType()->isIntegerTy(8) &&
+ I->getOperand(0)->getType()->isIntegerTy(1)) {
unsigned ResultReg = getRegForValue(I->getOperand(0));
if (ResultReg == 0) return false;
// Set the high bits to zero.
@@ -828,30 +836,30 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
std::swap(TrueMBB, FalseMBB);
Predicate = CmpInst::FCMP_UNE;
// FALL THROUGH
- case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break;
- case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break;
- case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break;
- case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break;
- case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break;
- case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break;
- case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break;
- case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break;
- case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break;
- case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break;
- case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break;
- case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break;
- case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
+ case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+ case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
+ case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+ case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break;
+ case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break;
+ case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+ case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
+ case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break;
+ case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
+ case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break;
+ case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break;
+ case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
+ case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
- case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break;
- case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break;
- case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break;
- case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break;
- case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break;
- case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
- case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break;
- case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break;
- case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break;
- case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break;
+ case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
+ case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+ case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
+ case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+ case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
+ case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+ case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break;
+ case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
+ case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break;
+ case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
default:
return false;
}
@@ -869,7 +877,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
if (Predicate == CmpInst::FCMP_UNE) {
// X86 requires a second branch to handle UNE (and OEQ,
// which is mapped to UNE above).
- BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB);
+ BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB);
}
FastEmitBranch(FalseMBB);
@@ -923,7 +931,8 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
unsigned OpCode = SetMI->getOpcode();
if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
- BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB))
+ BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ?
+ X86::JO_4 : X86::JB_4))
.addMBB(TrueMBB);
FastEmitBranch(FalseMBB);
MBB->addSuccessor(TrueMBB);
@@ -939,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
if (OpReg == 0) return false;
BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
- BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB);
+ BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB);
FastEmitBranch(FalseMBB);
MBB->addSuccessor(TrueMBB);
return true;
@@ -948,7 +957,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
bool X86FastISel::X86SelectShift(Instruction *I) {
unsigned CReg = 0, OpReg = 0, OpImm = 0;
const TargetRegisterClass *RC = NULL;
- if (I->getType()->isInteger(8)) {
+ if (I->getType()->isIntegerTy(8)) {
CReg = X86::CL;
RC = &X86::GR8RegClass;
switch (I->getOpcode()) {
@@ -957,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
default: return false;
}
- } else if (I->getType()->isInteger(16)) {
+ } else if (I->getType()->isIntegerTy(16)) {
CReg = X86::CX;
RC = &X86::GR16RegClass;
switch (I->getOpcode()) {
@@ -966,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
default: return false;
}
- } else if (I->getType()->isInteger(32)) {
+ } else if (I->getType()->isIntegerTy(32)) {
CReg = X86::ECX;
RC = &X86::GR32RegClass;
switch (I->getOpcode()) {
@@ -975,7 +984,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
default: return false;
}
- } else if (I->getType()->isInteger(64)) {
+ } else if (I->getType()->isIntegerTy(64)) {
CReg = X86::RCX;
RC = &X86::GR64RegClass;
switch (I->getOpcode()) {
@@ -1160,6 +1169,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
if (!X86SelectAddress(DI->getAddress(), AM))
return false;
const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ // FIXME may need to add RegState::Debug to any registers produced,
+ // although ESP/EBP should be the only ones at the moment.
addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0).
addMetadata(DI->getVariable());
return true;
diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h
new file mode 100644
index 0000000..c8dac3c
--- /dev/null
+++ b/lib/Target/X86/X86FixupKinds.h
@@ -0,0 +1,25 @@
+//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_X86_X86FIXUPKINDS_H
+#define LLVM_X86_X86FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace X86 {
+enum Fixups {
+ reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch.
+ reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1
+ reloc_riprel_4byte // 32-bit rip-relative
+};
+}
+}
+
+#endif
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 34a0045..6a117dd 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -118,7 +118,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
for (BasicBlock::const_iterator II = SI->begin();
(PN = dyn_cast<PHINode>(II)); ++II) {
if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) ||
- (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) ||
+ (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) ||
(!Subtarget.hasSSE2() &&
PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) {
ContainsFPCode = true;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index e44ce421..3fad8ad 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -12,15 +12,6 @@
//
//===----------------------------------------------------------------------===//
-// Force NDEBUG on in any optimized build on Darwin.
-//
-// FIXME: This is a huge hack, to work around ridiculously awful compile times
-// on this file with gcc-4.2 on Darwin, in Release mode.
-#if (!defined(__llvm__) && defined(__APPLE__) && \
- defined(__OPTIMIZE__) && !defined(NDEBUG))
-#define NDEBUG
-#endif
-
#define DEBUG_TYPE "x86-isel"
#include "X86.h"
#include "X86InstrBuilder.h"
@@ -177,14 +168,11 @@ namespace {
return "X86 DAG->DAG Instruction Selection";
}
- /// InstructionSelect - This callback is invoked by
- /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
- virtual void InstructionSelect();
-
virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
- virtual
- bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
+ virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
+
+ virtual void PreprocessISelDAG();
// Include the pieces autogenerated from the target description.
#include "X86GenDAGISel.inc"
@@ -208,18 +196,17 @@ namespace {
SDValue &Scale, SDValue &Index, SDValue &Disp);
bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp);
- bool SelectScalarSSELoad(SDNode *Op, SDValue Pred,
- SDValue N, SDValue &Base, SDValue &Scale,
+ bool SelectScalarSSELoad(SDNode *Root, SDValue N,
+ SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment,
- SDValue &InChain, SDValue &OutChain);
+ SDValue &NodeWithChain);
+
bool TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment);
- void PreprocessForRMW();
- void PreprocessForFPConvert();
-
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -295,19 +282,22 @@ namespace {
const X86InstrInfo *getInstrInfo() {
return getTargetMachine().getInstrInfo();
}
-
-#ifndef NDEBUG
- unsigned Indent;
-#endif
};
}
-bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
- SDNode *Root) const {
+bool
+X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
if (OptLevel == CodeGenOpt::None) return false;
- if (U == Root)
+ if (!N.hasOneUse())
+ return false;
+
+ if (N.getOpcode() != ISD::LOAD)
+ return true;
+
+ // If N is a load, do additional profitability checks.
+ if (U == Root) {
switch (U->getOpcode()) {
default: break;
case X86ISD::ADD:
@@ -354,60 +344,9 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
}
}
}
-
- // Proceed to 'generic' cycle finder code
- return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
-}
-
-/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
-/// and move load below the TokenFactor. Replace store's chain operand with
-/// load's chain result.
-static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
- SDValue Store, SDValue TF) {
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
- if (Load.getNode() == TF.getOperand(i).getNode())
- Ops.push_back(Load.getOperand(0));
- else
- Ops.push_back(TF.getOperand(i));
- SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
- SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF,
- Load.getOperand(1),
- Load.getOperand(2));
- CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1),
- Store.getOperand(2), Store.getOperand(3));
-}
-
-/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The
-/// chain produced by the load must only be used by the store's chain operand,
-/// otherwise this may produce a cycle in the DAG.
-///
-static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
- SDValue &Load) {
- if (N.getOpcode() == ISD::BIT_CONVERT) {
- if (!N.hasOneUse())
- return false;
- N = N.getOperand(0);
}
- LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
- if (!LD || LD->isVolatile())
- return false;
- if (LD->getAddressingMode() != ISD::UNINDEXED)
- return false;
-
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
- return false;
-
- if (N.hasOneUse() &&
- LD->hasNUsesOfValue(1, 1) &&
- N.getOperand(1) == Address &&
- LD->isOperandOf(Chain.getNode())) {
- Load = N;
- return true;
- }
- return false;
+ return true;
}
/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
@@ -473,51 +412,15 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
return false;
}
-
-/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
-/// This is only run if not in -O0 mode.
-/// This allows the instruction selector to pick more read-modify-write
-/// instructions. This is a common case:
-///
-/// [Load chain]
-/// ^
-/// |
-/// [Load]
-/// ^ ^
-/// | |
-/// / \-
-/// / |
-/// [TokenFactor] [Op]
-/// ^ ^
-/// | |
-/// \ /
-/// \ /
-/// [Store]
-///
-/// The fact the store's chain operand != load's chain will prevent the
-/// (store (op (load))) instruction from being selected. We can transform it to:
-///
-/// [Load chain]
-/// ^
-/// |
-/// [TokenFactor]
-/// ^
-/// |
-/// [Load]
-/// ^ ^
-/// | |
-/// | \-
-/// | |
-/// | [Op]
-/// | ^
-/// | |
-/// \ /
-/// \ /
-/// [Store]
-void X86DAGToDAGISel::PreprocessForRMW() {
+void X86DAGToDAGISel::PreprocessISelDAG() {
+ // OptForSize is used in pattern predicates that isel is matching.
+ OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
- if (I->getOpcode() == X86ISD::CALL) {
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ if (OptLevel != CodeGenOpt::None && N->getOpcode() == X86ISD::CALL) {
/// Also try moving call address load from outside callseq_start to just
/// before the call to allow it to be folded.
///
@@ -537,85 +440,23 @@ void X86DAGToDAGISel::PreprocessForRMW() {
/// \ /
/// \ /
/// [CALL]
- SDValue Chain = I->getOperand(0);
- SDValue Load = I->getOperand(1);
+ SDValue Chain = N->getOperand(0);
+ SDValue Load = N->getOperand(1);
if (!isCalleeLoad(Load, Chain))
continue;
- MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
+ MoveBelowCallSeqStart(CurDAG, Load, SDValue(N, 0), Chain);
++NumLoadMoved;
continue;
}
-
- if (!ISD::isNON_TRUNCStore(I))
- continue;
- SDValue Chain = I->getOperand(0);
-
- if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
- continue;
-
- SDValue N1 = I->getOperand(1);
- SDValue N2 = I->getOperand(2);
- if ((N1.getValueType().isFloatingPoint() &&
- !N1.getValueType().isVector()) ||
- !N1.hasOneUse())
- continue;
-
- bool RModW = false;
- SDValue Load;
- unsigned Opcode = N1.getNode()->getOpcode();
- switch (Opcode) {
- case ISD::ADD:
- case ISD::MUL:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
- case ISD::ADDC:
- case ISD::ADDE:
- case ISD::VECTOR_SHUFFLE: {
- SDValue N10 = N1.getOperand(0);
- SDValue N11 = N1.getOperand(1);
- RModW = isRMWLoad(N10, Chain, N2, Load);
- if (!RModW)
- RModW = isRMWLoad(N11, Chain, N2, Load);
- break;
- }
- case ISD::SUB:
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::ROTL:
- case ISD::ROTR:
- case ISD::SUBC:
- case ISD::SUBE:
- case X86ISD::SHLD:
- case X86ISD::SHRD: {
- SDValue N10 = N1.getOperand(0);
- RModW = isRMWLoad(N10, Chain, N2, Load);
- break;
- }
- }
-
- if (RModW) {
- MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
- ++NumLoadMoved;
- checkForCycles(I);
- }
- }
-}
-
-
-/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
-/// nodes that target the FP stack to be store and load to the stack. This is a
-/// gross hack. We would like to simply mark these as being illegal, but when
-/// we do that, legalize produces these when it expands calls, then expands
-/// these in the same legalize pass. We would like dag combine to be able to
-/// hack on these between the call expansion and the node legalization. As such
-/// this pass basically does "really late" legalization of these inline with the
-/// X86 isel pass.
-void X86DAGToDAGISel::PreprocessForFPConvert() {
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ) {
- SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ // Lower fpround and fpextend nodes that target the FP stack to be store and
+ // load to the stack. This is a gross hack. We would like to simply mark
+ // these as being illegal, but when we do that, legalize produces these when
+ // it expands calls, then expands these in the same legalize pass. We would
+ // like dag combine to be able to hack on these between the call expansion
+ // and the node legalization. As such this pass basically does "really
+ // late" legalization of these inline with the X86 isel pass.
+ // FIXME: This should only happen when not compiled with -O0.
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
@@ -652,9 +493,10 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
// FIXME: optimize the case where the src/dest is a load or store?
SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
N->getOperand(0),
- MemTmp, NULL, 0, MemVT);
+ MemTmp, NULL, 0, MemVT,
+ false, false, 0);
SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
- NULL, 0, MemVT);
+ NULL, 0, MemVT, false, false, 0);
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
// extload we created. This will cause general havok on the dag because
@@ -670,30 +512,6 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
}
}
-/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
-/// when it has created a SelectionDAG for us to codegen.
-void X86DAGToDAGISel::InstructionSelect() {
- const Function *F = MF->getFunction();
- OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
-
- if (OptLevel != CodeGenOpt::None)
- PreprocessForRMW();
-
- // FIXME: This should only happen when not compiled with -O0.
- PreprocessForFPConvert();
-
- // Codegen the basic block.
-#ifndef NDEBUG
- DEBUG(dbgs() << "===== Instruction selection begins:\n");
- Indent = 0;
-#endif
- SelectRoot(*CurDAG);
-#ifndef NDEBUG
- DEBUG(dbgs() << "===== Instruction selection ends:\n");
-#endif
-
- CurDAG->RemoveDeadNodes();
-}
/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
/// the main function.
@@ -1300,22 +1118,24 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
/// match a load whose top elements are either undef or zeros. The load flavor
/// is derived from the type of N, which is either v4f32 or v2f64.
-bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred,
+///
+/// We also return:
+/// PatternChainNode: this is the matched node that has a chain input and
+/// output.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment,
- SDValue &InChain,
- SDValue &OutChain) {
+ SDValue &PatternNodeWithChain) {
if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- InChain = N.getOperand(0).getValue(1);
- if (ISD::isNON_EXTLoad(InChain.getNode()) &&
- InChain.getValue(0).hasOneUse() &&
- N.hasOneUse() &&
- IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) {
- LoadSDNode *LD = cast<LoadSDNode>(InChain);
- if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ PatternNodeWithChain = N.getOperand(0);
+ if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
+ PatternNodeWithChain.hasOneUse() &&
+ IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+ IsLegalToFold(N.getOperand(0), N.getNode(), Root)) {
+ LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+ if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment))
return false;
- OutChain = LD->getChain();
return true;
}
}
@@ -1327,13 +1147,14 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred,
N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
N.getOperand(0).getNode()->hasOneUse() &&
ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
- N.getOperand(0).getOperand(0).hasOneUse()) {
+ N.getOperand(0).getOperand(0).hasOneUse() &&
+ IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+ IsLegalToFold(N.getOperand(0), N.getNode(), Root)) {
// Okay, this is a zero extending load. Fold it.
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
- if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
- OutChain = LD->getChain();
- InChain = SDValue(LD, 1);
+ PatternNodeWithChain = SDValue(LD, 0);
return true;
}
return false;
@@ -1407,7 +1228,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp) {
- assert(Op->getOpcode() == X86ISD::TLSADDR);
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
@@ -1434,11 +1254,12 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
- if (ISD::isNON_EXTLoad(N.getNode()) &&
- N.hasOneUse() &&
- IsLegalAndProfitableToFold(N.getNode(), P, P))
- return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
- return false;
+ if (!ISD::isNON_EXTLoad(N.getNode()) ||
+ !IsProfitableToFold(N, P, P) ||
+ !IsLegalToFold(N, P, P))
+ return false;
+
+ return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
/// getGlobalBaseReg - Return an SDNode that returns the value of
@@ -1541,7 +1362,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_DEC16m;
else if (isSub) {
if (isCN) {
- if (Predicate_i16immSExt8(Val.getNode()))
+ if (Predicate_immSext8(Val.getNode()))
Opc = X86::LOCK_SUB16mi8;
else
Opc = X86::LOCK_SUB16mi;
@@ -1549,7 +1370,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_SUB16mr;
} else {
if (isCN) {
- if (Predicate_i16immSExt8(Val.getNode()))
+ if (Predicate_immSext8(Val.getNode()))
Opc = X86::LOCK_ADD16mi8;
else
Opc = X86::LOCK_ADD16mi;
@@ -1564,7 +1385,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_DEC32m;
else if (isSub) {
if (isCN) {
- if (Predicate_i32immSExt8(Val.getNode()))
+ if (Predicate_immSext8(Val.getNode()))
Opc = X86::LOCK_SUB32mi8;
else
Opc = X86::LOCK_SUB32mi;
@@ -1572,7 +1393,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_SUB32mr;
} else {
if (isCN) {
- if (Predicate_i32immSExt8(Val.getNode()))
+ if (Predicate_immSext8(Val.getNode()))
Opc = X86::LOCK_ADD32mi8;
else
Opc = X86::LOCK_ADD32mi;
@@ -1588,7 +1409,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
else if (isSub) {
Opc = X86::LOCK_SUB64mr;
if (isCN) {
- if (Predicate_i64immSExt8(Val.getNode()))
+ if (Predicate_immSext8(Val.getNode()))
Opc = X86::LOCK_SUB64mi8;
else if (Predicate_i64immSExt32(Val.getNode()))
Opc = X86::LOCK_SUB64mi32;
@@ -1596,7 +1417,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
} else {
Opc = X86::LOCK_ADD64mr;
if (isCN) {
- if (Predicate_i64immSExt8(Val.getNode()))
+ if (Predicate_immSext8(Val.getNode()))
Opc = X86::LOCK_ADD64mi8;
else if (Predicate_i64immSExt32(Val.getNode()))
Opc = X86::LOCK_ADD64mi32;
@@ -1652,8 +1473,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
- case X86::JA: case X86::JAE: case X86::JB: case X86::JBE:
- case X86::JE: case X86::JNE: case X86::JP: case X86::JNP:
+ case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
+ case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
case X86::CMOVA16rr: case X86::CMOVA16rm:
case X86::CMOVA32rr: case X86::CMOVA32rm:
case X86::CMOVA64rr: case X86::CMOVA64rm:
@@ -1693,24 +1514,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
DebugLoc dl = Node->getDebugLoc();
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent, ' ') << "Selecting: ";
- Node->dump(CurDAG);
- dbgs() << '\n';
- });
- Indent += 2;
-#endif
+ DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
if (Node->isMachineOpcode()) {
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "== ";
- Node->dump(CurDAG);
- dbgs() << '\n';
- });
- Indent -= 2;
-#endif
+ DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
return NULL; // Already selected.
}
@@ -1806,13 +1613,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- Result.getNode()->dump(CurDAG);
- dbgs() << '\n';
- });
-#endif
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
@@ -1835,19 +1636,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
InFlag = Result.getValue(2);
}
ReplaceUses(SDValue(Node, 1), Result);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- Result.getNode()->dump(CurDAG);
- dbgs() << '\n';
- });
-#endif
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-#ifndef NDEBUG
- Indent -= 2;
-#endif
-
return NULL;
}
@@ -1962,13 +1753,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- Result.getNode()->dump(CurDAG);
- dbgs() << '\n';
- });
-#endif
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the remainder (high) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
@@ -1992,19 +1777,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
InFlag = Result.getValue(2);
}
ReplaceUses(SDValue(Node, 1), Result);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- Result.getNode()->dump(CurDAG);
- dbgs() << '\n';
- });
-#endif
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-
-#ifndef NDEBUG
- Indent -= 2;
-#endif
-
return NULL;
}
@@ -2117,17 +1891,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDNode *ResNode = SelectCode(Node);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- if (ResNode == NULL || ResNode == Node)
- Node->dump(CurDAG);
- else
- ResNode->dump(CurDAG);
- dbgs() << '\n';
- });
- Indent -= 2;
-#endif
+ DEBUG(dbgs() << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ dbgs() << '\n');
return ResNode;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 515bc84..802bedc 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -73,15 +73,16 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
case X86Subtarget::isDarwin:
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return new X8664_MachoTargetObjectFile();
- return new X8632_MachoTargetObjectFile();
+ return new TargetLoweringObjectFileMachO();
case X86Subtarget::isELF:
- return new TargetLoweringObjectFileELF();
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return new X8664_ELFTargetObjectFile(TM);
+ return new X8632_ELFTargetObjectFile(TM);
case X86Subtarget::isMingw:
case X86Subtarget::isCygwin:
case X86Subtarget::isWindows:
return new TargetLoweringObjectFileCOFF();
}
-
}
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
@@ -1001,19 +1002,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
computeRegisterProperties();
- // Divide and reminder operations have no vector equivalent and can
- // trap. Do a custom widening for these operations in which we never
- // generate more divides/remainder than the original vector width.
- for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
- if (!isTypeLegal((MVT::SimpleValueType)VT)) {
- setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom);
- setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom);
- setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom);
- setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom);
- }
- }
-
// FIXME: These should be based on subtarget info. Plus, the values should
// be smaller when we are in optimizing for size mode.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
@@ -1411,18 +1399,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
return CC_X86_32_C;
}
-/// NameDecorationForCallConv - Selects the appropriate decoration to
-/// apply to a MachineFunction containing a given calling convention.
-NameDecorationStyle
-X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) {
- if (CallConv == CallingConv::X86_FastCall)
- return FastCall;
- else if (CallConv == CallingConv::X86_StdCall)
- return StdCall;
- return None;
-}
-
-
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
@@ -1476,7 +1452,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
VA.getLocMemOffset(), isImmutable, false);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0);
+ PseudoSourceValue::getFixedStack(FI), 0,
+ false, false, 0);
}
}
@@ -1498,9 +1475,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
Fn->getName() == "main")
FuncInfo->setForceFramePointer(true);
- // Decorate the function name.
- FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv));
-
MachineFrameInfo *MFI = MF.getFrameInfo();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isTargetWin64();
@@ -1573,7 +1547,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// If value is passed via pointer - do a load.
if (VA.getLocInfo() == CCValAssign::Indirect)
- ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0);
+ ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0,
+ false, false, 0);
InVals.push_back(ArgValue);
}
@@ -1668,7 +1643,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
- Offset);
+ Offset, false, false, 0);
MemOps.push_back(Store);
Offset += 8;
}
@@ -1737,7 +1712,8 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
}
return DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), LocMemOffset);
+ PseudoSourceValue::getStack(), LocMemOffset,
+ false, false, 0);
}
/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
@@ -1752,7 +1728,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
- OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0);
+ OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0);
return SDValue(OutRetAddr.getNode(), 1);
}
@@ -1767,11 +1743,12 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
// Calculate the new stack slot for the return address.
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
- MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, true,false);
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false);
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
- PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0);
+ PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0,
+ false, false, 0);
return Chain;
}
@@ -1882,7 +1859,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
- PseudoSourceValue::getFixedStack(FI), 0);
+ PseudoSourceValue::getFixedStack(FI), 0,
+ false, false, 0);
Arg = SpillSlot;
break;
}
@@ -2013,7 +1991,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Store relative to framepointer.
MemOpChains2.push_back(
DAG.getStore(ArgChain, dl, Arg, FIN,
- PseudoSourceValue::getFixedStack(FI), 0));
+ PseudoSourceValue::getFixedStack(FI), 0,
+ false, false, 0));
}
}
}
@@ -2256,7 +2235,8 @@ static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
const X86InstrInfo *TII) {
- int FI;
+ unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
@@ -2272,25 +2252,30 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
Def->getOperand(1).isFI()) {
FI = Def->getOperand(1).getIndex();
- if (MFI->getObjectSize(FI) != Flags.getByValSize())
- return false;
+ Bytes = Flags.getByValSize();
} else
return false;
}
- } else {
- LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg);
- if (!Ld)
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
return false;
SDValue Ptr = Ld->getBasePtr();
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
if (!FINode)
return false;
FI = FINode->getIndex();
- }
+ } else
+ return false;
+ assert(FI != INT_MAX);
if (!MFI->isFixedObjectIndex(FI))
return false;
- return Offset == MFI->getObjectOffset(FI);
+ return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
}
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
@@ -2397,7 +2382,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
// Set up a frame object for the return address.
uint64_t SlotSize = TD->getPointerSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
- true, false);
+ false, false);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
@@ -3592,7 +3577,8 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
int EltNo = (Offset - StartOffset) >> 2;
int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
- SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0);
+ SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0,
+ false, false, 0);
// Canonicalize it to a v4i32 shuffle.
V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
@@ -4836,8 +4822,16 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
isa<ConstantSDNode>(N2)) {
- unsigned Opc = (EltVT.getSizeInBits() == 8) ? X86ISD::PINSRB
- : X86ISD::PINSRW;
+ unsigned Opc;
+ if (VT == MVT::v8i16)
+ Opc = X86ISD::PINSRW;
+ else if (VT == MVT::v4i16)
+ Opc = X86ISD::MMX_PINSRW;
+ else if (VT == MVT::v16i8)
+ Opc = X86ISD::PINSRB;
+ else
+ Opc = X86ISD::PINSRB;
+
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument.
if (N1.getValueType() != MVT::i32)
@@ -4888,7 +4882,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
- return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
+ return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW,
+ dl, VT, N0, N1, N2);
}
return SDValue();
}
@@ -5091,7 +5086,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
// load.
if (isGlobalStubReference(OpFlags))
Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
- PseudoSourceValue::getGOT(), 0);
+ PseudoSourceValue::getGOT(), 0, false, false, 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
@@ -5171,7 +5166,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
MVT::i32));
SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base,
- NULL, 0);
+ NULL, 0, false, false, 0);
unsigned char OperandFlags = 0;
// Most TLS accesses are not RIP relative, even on x86-64. One exception is
@@ -5196,7 +5191,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
if (model == TLSModel::InitialExec)
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
- PseudoSourceValue::getGOT(), 0);
+ PseudoSourceValue::getGOT(), 0, false, false, 0);
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
@@ -5264,7 +5259,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits, MVT::i8));
- SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT,
+ SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
AndNode, DAG.getConstant(0, MVT::i8));
SDValue Hi, Lo;
@@ -5313,7 +5308,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot,
- PseudoSourceValue::getFixedStack(SSFI), 0);
+ PseudoSourceValue::getFixedStack(SSFI), 0,
+ false, false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
}
@@ -5348,7 +5344,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
};
Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops));
Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot,
- PseudoSourceValue::getFixedStack(SSFI), 0);
+ PseudoSourceValue::getFixedStack(SSFI), 0,
+ false, false, 0);
}
return Result;
@@ -5421,12 +5418,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
PseudoSourceValue::getConstantPool(), 0,
- false, 16);
+ false, false, 16);
SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2);
SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
PseudoSourceValue::getConstantPool(), 0,
- false, 16);
+ false, false, 16);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
// Add the halves; easiest way is to swap them into another reg first.
@@ -5513,9 +5510,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
getPointerTy(), StackSlot, WordOff);
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
- StackSlot, NULL, 0);
+ StackSlot, NULL, 0, false, false, 0);
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
- OffsetSlot, NULL, 0);
+ OffsetSlot, NULL, 0, false, false, 0);
return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
}
@@ -5563,7 +5560,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, dl, Value, StackSlot,
- PseudoSourceValue::getFixedStack(SSFI), 0);
+ PseudoSourceValue::getFixedStack(SSFI), 0,
+ false, false, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDValue Ops[] = {
Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
@@ -5597,7 +5595,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
// Load the result.
return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
- FIST, StackSlot, NULL, 0);
+ FIST, StackSlot, NULL, 0, false, false, 0);
}
SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
@@ -5607,7 +5605,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
// Load the result.
return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
- FIST, StackSlot, NULL, 0);
+ FIST, StackSlot, NULL, 0, false, false, 0);
}
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
@@ -5632,8 +5630,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
- false, 16);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 16);
return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
}
@@ -5659,8 +5657,8 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
- false, 16);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 16);
if (VT.isVector()) {
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
DAG.getNode(ISD::XOR, dl, MVT::v2i64,
@@ -5708,8 +5706,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
- false, 16);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 16);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
// Shift sign bit right or left if the two operands have different types.
@@ -5737,8 +5735,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
- false, 16);
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 16);
SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
// Or the value with the sign bit.
@@ -5890,26 +5888,31 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
/// if it's possible.
-static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC,
+static SDValue LowerToBT(SDValue And, ISD::CondCode CC,
DebugLoc dl, SelectionDAG &DAG) {
+ SDValue Op0 = And.getOperand(0);
+ SDValue Op1 = And.getOperand(1);
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::TRUNCATE)
+ Op1 = Op1.getOperand(0);
+
SDValue LHS, RHS;
- if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
- if (ConstantSDNode *Op010C =
- dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
- if (Op010C->getZExtValue() == 1) {
- LHS = Op0.getOperand(0);
- RHS = Op0.getOperand(1).getOperand(1);
+ if (Op1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0)))
+ if (And10C->getZExtValue() == 1) {
+ LHS = Op0;
+ RHS = Op1.getOperand(1);
}
- } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
- if (ConstantSDNode *Op000C =
- dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
- if (Op000C->getZExtValue() == 1) {
- LHS = Op0.getOperand(1);
- RHS = Op0.getOperand(0).getOperand(1);
+ } else if (Op0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
+ if (And00C->getZExtValue() == 1) {
+ LHS = Op1;
+ RHS = Op0.getOperand(1);
}
- } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
- ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
- SDValue AndLHS = Op0.getOperand(0);
+ } else if (Op1.getOpcode() == ISD::Constant) {
+ ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
+ SDValue AndLHS = Op0;
if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
LHS = AndLHS.getOperand(0);
RHS = AndLHS.getOperand(1);
@@ -5959,6 +5962,21 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
return NewSetCC;
}
+ // Look for "(setcc) == / != 1" to avoid unncessary setcc.
+ if (Op0.getOpcode() == X86ISD::SETCC &&
+ Op1.getOpcode() == ISD::Constant &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ bool Invert = (CC == ISD::SETNE) ^
+ cast<ConstantSDNode>(Op1)->isNullValue();
+ if (Invert)
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ }
+
bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
if (X86CC == X86::COND_INVALID)
@@ -6400,24 +6418,13 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
EVT IntPtr = getPointerTy();
EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
-
Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
Flag = Chain.getValue(1);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain,
- DAG.getTargetExternalSymbol("_alloca", IntPtr),
- DAG.getRegister(X86::EAX, IntPtr),
- DAG.getRegister(X86StackPtr, SPTy),
- Flag };
- Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops, 5);
- Flag = Chain.getValue(1);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(0, true),
- DAG.getIntPtrConstant(0, true),
- Flag);
+ Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag);
+ Flag = Chain.getValue(1);
Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
@@ -6461,8 +6468,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
false, false, false, false,
0, CallingConv::C, false, /*isReturnValueUsed=*/false,
- DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl,
- DAG.GetOrdering(Chain.getNode()));
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
return CallResult.second;
}
@@ -6646,7 +6652,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
- return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+ false, false, 0);
}
// __va_list_tag:
@@ -6658,8 +6665,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
SDValue FIN = Op.getOperand(1);
// Store gp_offset
SDValue Store = DAG.getStore(Op.getOperand(0), dl,
- DAG.getConstant(VarArgsGPOffset, MVT::i32),
- FIN, SV, 0);
+ DAG.getConstant(VarArgsGPOffset, MVT::i32),
+ FIN, SV, 0, false, false, 0);
MemOps.push_back(Store);
// Store fp_offset
@@ -6667,21 +6674,23 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
FIN, DAG.getIntPtrConstant(4));
Store = DAG.getStore(Op.getOperand(0), dl,
DAG.getConstant(VarArgsFPOffset, MVT::i32),
- FIN, SV, 0);
+ FIN, SV, 0, false, false, 0);
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
FIN, DAG.getIntPtrConstant(4));
SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0);
+ Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0,
+ false, false, 0);
MemOps.push_back(Store);
// Store ptr to reg_save_area.
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
FIN, DAG.getIntPtrConstant(8));
SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0);
+ Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0,
+ false, false, 0);
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
@@ -6967,13 +6976,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
- NULL, 0);
+ NULL, 0, false, false, 0);
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, NULL, 0);
+ RetAddrFI, NULL, 0, false, false, 0);
}
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
@@ -6985,7 +6994,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
- FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+ false, false, 0);
return FrameAddr;
}
@@ -7009,7 +7019,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame,
DAG.getIntPtrConstant(-TD->getPointerSize()));
StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
- Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0);
+ Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
MF.getRegInfo().addLiveOut(StoreAddrReg);
@@ -7044,11 +7054,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
SDValue Addr = Trmp;
OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
- Addr, TrmpAddr, 0);
+ Addr, TrmpAddr, 0, false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(2, MVT::i64));
- OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, false, 2);
+ OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2,
+ false, false, 2);
// Load the 'nest' parameter value into R10.
// R10 is specified in X86CallingConv.td
@@ -7056,24 +7067,25 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(10, MVT::i64));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
- Addr, TrmpAddr, 10);
+ Addr, TrmpAddr, 10, false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(12, MVT::i64));
- OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, false, 2);
+ OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12,
+ false, false, 2);
// Jump to the nested function.
OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(20, MVT::i64));
OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
- Addr, TrmpAddr, 20);
+ Addr, TrmpAddr, 20, false, false, 0);
unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(22, MVT::i64));
OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
- TrmpAddr, 22);
+ TrmpAddr, 22, false, false, 0);
SDValue Ops[] =
{ Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
@@ -7133,21 +7145,23 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, dl,
DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
- Trmp, TrmpAddr, 0);
+ Trmp, TrmpAddr, 0, false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(1, MVT::i32));
- OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, false, 1);
+ OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1,
+ false, false, 1);
const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(5, MVT::i32));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
- TrmpAddr, 5, false, 1);
+ TrmpAddr, 5, false, false, 1);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(6, MVT::i32));
- OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, false, 1);
+ OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6,
+ false, false, 1);
SDValue Ops[] =
{ Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) };
@@ -7190,7 +7204,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
DAG.getEntryNode(), StackSlot);
// Load FP Control Word from stack slot
- SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0);
+ SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0,
+ false, false, 0);
// Transform as necessary
SDValue CWD1 =
@@ -7554,7 +7569,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (FIST.getNode() != 0) {
EVT VT = N->getValueType(0);
// Return a load from the stack slot.
- Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0));
+ Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0,
+ false, false, 0));
}
return;
}
@@ -7572,14 +7588,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(edx.getValue(1));
return;
}
- case ISD::SDIV:
- case ISD::UDIV:
- case ISD::SREM:
- case ISD::UREM: {
- EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()));
- return;
- }
case ISD::ATOMIC_CMP_SWAP: {
EVT T = N->getValueType(0);
assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
@@ -7677,6 +7685,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
+ case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
@@ -7721,6 +7730,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
+ case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";
}
}
@@ -7778,13 +7788,13 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
- if (!Ty1->isInteger() || !Ty2->isInteger())
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
if (NumBits1 <= NumBits2)
return false;
- return Subtarget->is64Bit() || NumBits1 < 64;
+ return true;
}
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
@@ -7794,12 +7804,12 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
unsigned NumBits2 = VT2.getSizeInBits();
if (NumBits1 <= NumBits2)
return false;
- return Subtarget->is64Bit() || NumBits1 < 64;
+ return true;
}
bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
- return Ty1->isInteger(32) && Ty2->isInteger(64) && Subtarget->is64Bit();
+ return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
}
bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
@@ -7955,7 +7965,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
MIB.addReg(EAXreg);
// insert branch
- BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+ BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
return nextMBB;
@@ -8112,7 +8122,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
MIB.addReg(X86::EDX);
// insert branch
- BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+ BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
return nextMBB;
@@ -8215,7 +8225,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
MIB.addReg(X86::EAX);
// insert branch
- BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+ BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now.
return nextMBB;
@@ -8297,7 +8307,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
if (!Subtarget->isTargetWin64()) {
// If %al is 0, branch around the XMM save block.
BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
- BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB);
+ BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB);
MBB->addSuccessor(EndMBB);
}
@@ -8390,6 +8400,29 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
return BB;
}
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *F = BB->getParent();
+
+ // The lowering is pretty easy: we're just emitting the call to _alloca. The
+ // non-trivial part is impdef of ESP.
+ // FIXME: The code should be tweaked as soon as we'll try to do codegen for
+ // mingw-w64.
+
+ BuildMI(BB, DL, TII->get(X86::CALLpcrel32))
+ .addExternalSymbol("_alloca")
+ .addReg(X86::EAX, RegState::Implicit)
+ .addReg(X86::ESP, RegState::Implicit)
+ .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::ESP, RegState::Define | RegState::Implicit);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+}
MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
@@ -8397,6 +8430,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
+ case X86::MINGW_ALLOCA:
+ return EmitLoweredMingwAlloca(MI, BB, EM);
case X86::CMOV_GR8:
case X86::CMOV_V1I64:
case X86::CMOV_FR32:
@@ -8783,10 +8818,11 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16)
return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile());
+ LD->isVolatile(), LD->isNonTemporal(), 0);
return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile(), LD->getAlignment());
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
} else if (NumElems == 4 && LastLoadedElt == 1) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
@@ -8806,10 +8842,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(2);
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
- // instructions have the peculiarity that if either operand is a NaN,
- // they chose what we call the RHS operand (and as such are not symmetric).
- // It happens that this matches the semantics of the common C idiom
- // x<y?x:y and related forms, so we can recognize these cases.
+ // instructions match the semantics of the common C idiom x<y?x:y but not
+ // x<=y?x:y, because of how they handle negative zero (which can be
+ // ignored in unsafe-math mode).
if (Subtarget->hasSSE2() &&
(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
Cond.getOpcode() == ISD::SETCC) {
@@ -8817,36 +8852,34 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode = 0;
// Check for x CC y ? x : y.
- if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
+ if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(1))) {
switch (CC) {
default: break;
case ISD::SETULT:
- // This can be a min if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(RHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(LHS))
+ // Converting this to a min would handle NaNs incorrectly, and swapping
+ // the operands would cause it to handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
+ std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETOLE:
- // This can be a min if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(LHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(RHS))
- break;
- }
+ // Converting this to a min would handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
+ break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETULE:
- // This can be a min, but if either operand is a NaN we need it to
- // preserve the original LHS.
+ // Converting this to a min would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
@@ -8855,32 +8888,29 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
break;
case ISD::SETOGE:
- // This can be a max if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(LHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(RHS))
- break;
- }
+ // Converting this to a max would handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS))
+ break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGT:
- // This can be a max if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(RHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(LHS))
+ // Converting this to a max would handle NaNs incorrectly, and swapping
+ // the operands would cause it to handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
+ std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGE:
- // This can be a max, but if either operand is a NaN we need it to
- // preserve the original LHS.
+ // Converting this to a max would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOGT:
case ISD::SETGT:
@@ -8889,36 +8919,33 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
break;
}
// Check for x CC y ? y : x -- a min/max with reversed arms.
- } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
+ } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(0))) {
switch (CC) {
default: break;
case ISD::SETOGE:
- // This can be a min if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(RHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(LHS))
+ // Converting this to a min would handle comparisons between positive
+ // and negative zero incorrectly, and swapping the operands would
+ // cause it to handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
+ std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGT:
- // This can be a min if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(LHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(RHS))
- break;
- }
+ // Converting this to a min would handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGE:
- // This can be a min, but if either operand is a NaN we need it to
- // preserve the original LHS.
+ // Converting this to a min would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOGT:
case ISD::SETGT:
@@ -8927,32 +8954,28 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
break;
case ISD::SETULT:
- // This can be a max if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(LHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(RHS))
- break;
- }
+ // Converting this to a max would handle NaNs incorrectly.
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETOLE:
- // This can be a max if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(RHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(LHS))
+ // Converting this to a max would handle comparisons between positive
+ // and negative zero incorrectly, and swapping the operands would
+ // cause it to handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
+ std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETULE:
- // This can be a max, but if either operand is a NaN we need it to
- // preserve the original LHS.
+ // Converting this to a max would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
@@ -9177,10 +9200,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
/// LEA + SHL, LEA + LEA.
static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
- if (DAG.getMachineFunction().
- getFunction()->hasFnAttr(Attribute::OptimizeForSize))
- return SDValue();
-
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
@@ -9319,7 +9338,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
}
} else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
- unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
+ unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
if (C->getZExtValue() == SplatIdx)
BaseShAmt = InVec.getOperand(1);
}
@@ -9505,7 +9524,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
Ld->getBasePtr(), Ld->getSrcValue(),
Ld->getSrcValueOffset(), Ld->isVolatile(),
- Ld->getAlignment());
+ Ld->isNonTemporal(), Ld->getAlignment());
SDValue NewChain = NewLd.getValue(1);
if (TokenFactorIndex != -1) {
Ops.push_back(NewChain);
@@ -9514,7 +9533,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
}
return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
St->getSrcValue(), St->getSrcValueOffset(),
- St->isVolatile(), St->getAlignment());
+ St->isVolatile(), St->isNonTemporal(),
+ St->getAlignment());
}
// Otherwise, lower to two pairs of 32-bit loads / stores.
@@ -9524,10 +9544,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
Ld->getSrcValue(), Ld->getSrcValueOffset(),
- Ld->isVolatile(), Ld->getAlignment());
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
- Ld->isVolatile(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
MinAlign(Ld->getAlignment(), 4));
SDValue NewChain = LoLd.getValue(1);
@@ -9544,11 +9565,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
St->getSrcValue(), St->getSrcValueOffset(),
- St->isVolatile(), St->getAlignment());
+ St->isVolatile(), St->isNonTemporal(),
+ St->getAlignment());
SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
St->getSrcValue(),
St->getSrcValueOffset() + 4,
St->isVolatile(),
+ St->isNonTemporal(),
MinAlign(St->getAlignment(), 4));
return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
}
@@ -9731,7 +9754,7 @@ static bool LowerToBSwap(CallInst *CI) {
// Verify this is a simple bswap.
if (CI->getNumOperands() != 2 ||
CI->getType() != CI->getOperand(1)->getType() ||
- !CI->getType()->isInteger())
+ !CI->getType()->isIntegerTy())
return false;
const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
@@ -9780,17 +9803,26 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
return LowerToBSwap(CI);
}
// rorw $$8, ${0:w} --> llvm.bswap.i16
- if (CI->getType()->isInteger(16) &&
+ if (CI->getType()->isIntegerTy(16) &&
AsmPieces.size() == 3 &&
- AsmPieces[0] == "rorw" &&
+ (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") &&
AsmPieces[1] == "$$8," &&
AsmPieces[2] == "${0:w}" &&
- IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") {
- return LowerToBSwap(CI);
+ IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+ AsmPieces.clear();
+ SplitString(IA->getConstraintString().substr(5), AsmPieces, ",");
+ std::sort(AsmPieces.begin(), AsmPieces.end());
+ if (AsmPieces.size() == 4 &&
+ AsmPieces[0] == "~{cc}" &&
+ AsmPieces[1] == "~{dirflag}" &&
+ AsmPieces[2] == "~{flags}" &&
+ AsmPieces[3] == "~{fpsr}") {
+ return LowerToBSwap(CI);
+ }
}
break;
case 3:
- if (CI->getType()->isInteger(64) &&
+ if (CI->getType()->isIntegerTy(64) &&
Constraints.size() >= 2 &&
Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 193ef05..4c12fcc 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -180,7 +180,7 @@ namespace llvm {
/// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
/// corresponds to X86::PINSRW.
- PINSRW,
+ PINSRW, MMX_PINSRW,
/// PSHUFB - Shuffle 16 8-bit values within a vector.
PSHUFB,
@@ -249,6 +249,9 @@ namespace llvm {
// with control flow.
VASTART_SAVE_XMM_REGS,
+ // MINGW_ALLOCA - MingW's __alloca call to do stack probing.
+ MINGW_ALLOCA,
+
// ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
// ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
// Atomic 64-bit binary operations.
@@ -259,6 +262,10 @@ namespace llvm {
ATOMAND64_DAG,
ATOMNAND64_DAG,
ATOMSWAP64_DAG
+
+ // WARNING: Do not add anything in the end unless you want the node to
+ // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
+ // thought as target memory ops!
};
}
@@ -639,7 +646,6 @@ namespace llvm {
int FPDiff, DebugLoc dl);
CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const;
- NameDecorationStyle NameDecorationForCallConv(CallingConv::ID CallConv);
unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG);
std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
@@ -790,7 +796,11 @@ namespace llvm {
MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
MachineBasicBlock *BB,
DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
-
+
+ MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent, for use with the given x86 condition code.
SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 468dd67..8462255 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -59,10 +59,11 @@ def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
// Pattern fragments.
//
-def i64immSExt8 : PatLeaf<(i64 imm), [{
- // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit
- // sign extended field.
- return (int64_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+def i64immSExt8 : PatLeaf<(i64 immSext8)>;
+
+def GetLo32XForm : SDNodeXForm<imm, [{
+ // Transformation function: get the low 32 bits.
+ return getI32Imm((unsigned)N->getZExtValue());
}]>;
def i64immSExt32 : PatLeaf<(i64 imm), [{
@@ -71,6 +72,7 @@ def i64immSExt32 : PatLeaf<(i64 imm), [{
return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
}]>;
+
def i64immZExt32 : PatLeaf<(i64 imm), [{
// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
// unsignedsign extended field.
@@ -325,7 +327,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (load addr:$src))]>;
@@ -556,7 +558,7 @@ def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
addr:$dst)]>;
def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
"adc{q}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i64immSExt8:$src2),
+ [(store (adde (load addr:$dst), i64immSExt32:$src2),
addr:$dst)]>;
} // Uses = [EFLAGS]
@@ -893,35 +895,38 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
let isTwoAddress = 1 in {
def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src),
"rcl{q}\t{1, $dst|$dst, 1}", []>;
-def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src),
- "rcl{q}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
- "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src),
- "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-}
def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
"rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst),
- (ins i64mem:$src, i8imm:$cnt),
- "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src),
"rcr{q}\t{1, $dst|$dst, 1}", []>;
-def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src),
- "rcr{q}\t{1, $dst|$dst, 1}", []>;
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
let Uses = [CL] in {
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src),
"rcr{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src),
- "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
- "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst),
- (ins i64mem:$src, i8imm:$cnt),
+}
+
+let isTwoAddress = 0 in {
+def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
+ "rcl{q}\t{1, $dst|$dst, 1}", []>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
+ "rcr{q}\t{1, $dst|$dst, 1}", []>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
"rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+let Uses = [CL] in {
+def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+}
}
let isTwoAddress = 1 in {
@@ -1771,7 +1776,7 @@ def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def SWPGS : I<0x01, RawFrm, (outs), (ins), "swpgs", []>, TB;
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
"push{q}\t%fs", []>, TB;
@@ -1978,7 +1983,7 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm),
(i64 0),
(AND32ri
(EXTRACT_SUBREG GR64:$src, x86_subreg_32bit),
- imm:$imm),
+ (i32 (GetLo32XForm imm:$imm))),
x86_subreg_32bit)>;
// r & (2^32-1) ==> movz
@@ -2102,34 +2107,34 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
// (shl x (and y, 63)) ==> (shl x, y)
-def : Pat<(shl GR64:$src1, (and CL:$amt, 63)),
+def : Pat<(shl GR64:$src1, (and CL, 63)),
(SHL64rCL GR64:$src1)>;
-def : Pat<(store (shl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SHL64mCL addr:$dst)>;
-def : Pat<(srl GR64:$src1, (and CL:$amt, 63)),
+def : Pat<(srl GR64:$src1, (and CL, 63)),
(SHR64rCL GR64:$src1)>;
-def : Pat<(store (srl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SHR64mCL addr:$dst)>;
-def : Pat<(sra GR64:$src1, (and CL:$amt, 63)),
+def : Pat<(sra GR64:$src1, (and CL, 63)),
(SAR64rCL GR64:$src1)>;
-def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SAR64mCL addr:$dst)>;
// Double shift patterns
-def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)),
(SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1),
- GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+ GR64:$src2, (i8 imm)), addr:$dst),
(SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
-def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)),
(SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
- GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+ GR64:$src2, (i8 imm)), addr:$dst),
(SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index e22a903..ae24bfb 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -397,7 +397,7 @@ def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
let canFoldAsLoad = 1 in {
def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP32:$dst, (loadf32 addr:$src))]>;
-let isReMaterializable = 1, mayHaveSideEffects = 1 in
+let isReMaterializable = 1 in
def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
[(set RFP64:$dst, (loadf64 addr:$src))]>;
def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index a799f16..bb81cbf 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -29,7 +29,16 @@ def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>;
def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>;
def MRM6m : Format<30>; def MRM7m : Format<31>;
def MRMInitReg : Format<32>;
-
+def MRM_C1 : Format<33>;
+def MRM_C2 : Format<34>;
+def MRM_C3 : Format<35>;
+def MRM_C4 : Format<36>;
+def MRM_C8 : Format<37>;
+def MRM_C9 : Format<38>;
+def MRM_E8 : Format<39>;
+def MRM_F0 : Format<40>;
+def MRM_F8 : Format<41>;
+def MRM_F9 : Format<42>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -37,11 +46,13 @@ def MRMInitReg : Format<32>;
class ImmType<bits<3> val> {
bits<3> Value = val;
}
-def NoImm : ImmType<0>;
-def Imm8 : ImmType<1>;
-def Imm16 : ImmType<2>;
-def Imm32 : ImmType<3>;
-def Imm64 : ImmType<4>;
+def NoImm : ImmType<0>;
+def Imm8 : ImmType<1>;
+def Imm8PCRel : ImmType<2>;
+def Imm16 : ImmType<3>;
+def Imm32 : ImmType<4>;
+def Imm32PCRel : ImmType<5>;
+def Imm64 : ImmType<6>;
// FPFormat - This specifies what form this FP instruction has. This is used by
// the Floating-Point stackifier pass.
@@ -121,6 +132,12 @@ class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
let Pattern = pattern;
let CodeSize = 3;
}
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm16, outs, ins, asm> {
@@ -134,6 +151,13 @@ class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
let CodeSize = 3;
}
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
// FPStack Instruction Templates:
// FPI - Floating Point Instruction template.
class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 8d13c0f..39bda04 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -276,11 +276,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVDQArr, X86::MOVDQAmr, 0, 16 },
{ X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
{ X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
- { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 },
- { X86::MOVSDrr, X86::MOVSDmr, 0, 0 },
{ X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
{ X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 },
- { X86::MOVSSrr, X86::MOVSSmr, 0, 0 },
{ X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 },
{ X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 },
{ X86::MUL16r, X86::MUL16m, 1, 0 },
@@ -389,12 +386,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
{ X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
{ X86::MOVDQArr, X86::MOVDQArm, 16 },
- { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 },
- { X86::MOVSDrr, X86::MOVSDrm, 0 },
{ X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 },
{ X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 },
- { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 },
- { X86::MOVSSrr, X86::MOVSSrm, 0 },
{ X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
{ X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
{ X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
@@ -682,23 +675,20 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
case X86::MOV16rr:
case X86::MOV32rr:
case X86::MOV64rr:
- case X86::MOVSSrr:
- case X86::MOVSDrr:
// FP Stack register class copies
case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
case X86::MOV_Fp3264: case X86::MOV_Fp3280:
case X86::MOV_Fp6432: case X86::MOV_Fp8032:
-
+
+ // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64
+ // copies are done with FsMOVAPSrr and FsMOVAPDrr.
+
case X86::FsMOVAPSrr:
case X86::FsMOVAPDrr:
case X86::MOVAPSrr:
case X86::MOVAPDrr:
case X86::MOVDQArr:
- case X86::MOVSS2PSrr:
- case X86::MOVSD2PDrr:
- case X86::MOVPS2SSrr:
- case X86::MOVPD2SDrr:
case X86::MMX_MOVQ64rr:
assert(MI.getNumOperands() >= 2 &&
MI.getOperand(0).isReg() &&
@@ -1083,7 +1073,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
case X86::MOV8r0: Opc = X86::MOV8ri; break;
case X86::MOV16r0: Opc = X86::MOV16ri; break;
case X86::MOV32r0: Opc = X86::MOV32ri; break;
- case X86::MOV64r0: Opc = X86::MOV64ri; break;
+ case X86::MOV64r0: Opc = X86::MOV64ri64i32; break;
}
Clone = false;
}
@@ -1587,44 +1577,44 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
switch (BrOpc) {
default: return X86::COND_INVALID;
- case X86::JE: return X86::COND_E;
- case X86::JNE: return X86::COND_NE;
- case X86::JL: return X86::COND_L;
- case X86::JLE: return X86::COND_LE;
- case X86::JG: return X86::COND_G;
- case X86::JGE: return X86::COND_GE;
- case X86::JB: return X86::COND_B;
- case X86::JBE: return X86::COND_BE;
- case X86::JA: return X86::COND_A;
- case X86::JAE: return X86::COND_AE;
- case X86::JS: return X86::COND_S;
- case X86::JNS: return X86::COND_NS;
- case X86::JP: return X86::COND_P;
- case X86::JNP: return X86::COND_NP;
- case X86::JO: return X86::COND_O;
- case X86::JNO: return X86::COND_NO;
+ case X86::JE_4: return X86::COND_E;
+ case X86::JNE_4: return X86::COND_NE;
+ case X86::JL_4: return X86::COND_L;
+ case X86::JLE_4: return X86::COND_LE;
+ case X86::JG_4: return X86::COND_G;
+ case X86::JGE_4: return X86::COND_GE;
+ case X86::JB_4: return X86::COND_B;
+ case X86::JBE_4: return X86::COND_BE;
+ case X86::JA_4: return X86::COND_A;
+ case X86::JAE_4: return X86::COND_AE;
+ case X86::JS_4: return X86::COND_S;
+ case X86::JNS_4: return X86::COND_NS;
+ case X86::JP_4: return X86::COND_P;
+ case X86::JNP_4: return X86::COND_NP;
+ case X86::JO_4: return X86::COND_O;
+ case X86::JNO_4: return X86::COND_NO;
}
}
unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Illegal condition code!");
- case X86::COND_E: return X86::JE;
- case X86::COND_NE: return X86::JNE;
- case X86::COND_L: return X86::JL;
- case X86::COND_LE: return X86::JLE;
- case X86::COND_G: return X86::JG;
- case X86::COND_GE: return X86::JGE;
- case X86::COND_B: return X86::JB;
- case X86::COND_BE: return X86::JBE;
- case X86::COND_A: return X86::JA;
- case X86::COND_AE: return X86::JAE;
- case X86::COND_S: return X86::JS;
- case X86::COND_NS: return X86::JNS;
- case X86::COND_P: return X86::JP;
- case X86::COND_NP: return X86::JNP;
- case X86::COND_O: return X86::JO;
- case X86::COND_NO: return X86::JNO;
+ case X86::COND_E: return X86::JE_4;
+ case X86::COND_NE: return X86::JNE_4;
+ case X86::COND_L: return X86::JL_4;
+ case X86::COND_LE: return X86::JLE_4;
+ case X86::COND_G: return X86::JG_4;
+ case X86::COND_GE: return X86::JGE_4;
+ case X86::COND_B: return X86::JB_4;
+ case X86::COND_BE: return X86::JBE_4;
+ case X86::COND_A: return X86::JA_4;
+ case X86::COND_AE: return X86::JAE_4;
+ case X86::COND_S: return X86::JS_4;
+ case X86::COND_NS: return X86::JNS_4;
+ case X86::COND_P: return X86::JP_4;
+ case X86::COND_NP: return X86::JNP_4;
+ case X86::COND_O: return X86::JO_4;
+ case X86::COND_NO: return X86::JNO_4;
}
}
@@ -1694,7 +1684,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return true;
// Handle unconditional branches.
- if (I->getOpcode() == X86::JMP) {
+ if (I->getOpcode() == X86::JMP_4) {
if (!AllowModify) {
TBB = I->getOperand(0).getMBB();
continue;
@@ -1778,7 +1768,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
while (I != MBB.begin()) {
--I;
- if (I->getOpcode() != X86::JMP &&
+ if (I->getOpcode() != X86::JMP_4 &&
GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
break;
// Remove the branch.
@@ -1804,7 +1794,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
if (Cond.empty()) {
// Unconditional branch?
assert(!FBB && "Unconditional branch with multiple successors!");
- BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB);
+ BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB);
return 1;
}
@@ -1814,16 +1804,16 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
switch (CC) {
case X86::COND_NP_OR_E:
// Synthesize NP_OR_E with two branches.
- BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB);
+ BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
++Count;
- BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB);
+ BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB);
++Count;
break;
case X86::COND_NE_OR_P:
// Synthesize NE_OR_P with two branches.
- BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB);
+ BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB);
++Count;
- BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB);
+ BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
++Count;
break;
default: {
@@ -1834,7 +1824,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
}
if (FBB) {
// Two-way Conditional branch. Insert the second branch.
- BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB);
+ BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB);
++Count;
}
return Count;
@@ -1860,7 +1850,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
CommonRC = SrcRC;
else if (!DestRC->hasSubClass(SrcRC)) {
// Neither of GR64_NOREX or GR64_NOSP is a superclass of the other,
- // but we want to copy then as GR64. Similarly, for GR32_NOREX and
+ // but we want to copy them as GR64. Similarly, for GR32_NOREX and
// GR32_NOSP, copy as GR32.
if (SrcRC->hasSuperClass(&X86::GR64RegClass) &&
DestRC->hasSuperClass(&X86::GR64RegClass))
@@ -3556,6 +3546,14 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
}
}
break;
+
+ case X86II::MRM_C1:
+ case X86II::MRM_C8:
+ case X86II::MRM_C9:
+ case X86II::MRM_E8:
+ case X86II::MRM_F0:
+ FinalSize += 2;
+ break;
}
case X86II::MRMInitReg:
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index a6b3863..5111719 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -268,6 +268,18 @@ namespace X86II {
// MRMInitReg - This form is used for instructions whose source and
// destinations are the same register.
MRMInitReg = 32,
+
+ //// MRM_C1 - A mod/rm byte of exactly 0xC1.
+ MRM_C1 = 33,
+ MRM_C2 = 34,
+ MRM_C3 = 35,
+ MRM_C4 = 36,
+ MRM_C8 = 37,
+ MRM_C9 = 38,
+ MRM_E8 = 39,
+ MRM_F0 = 40,
+ MRM_F8 = 41,
+ MRM_F9 = 42,
FormMask = 63,
@@ -331,11 +343,13 @@ namespace X86II {
// This three-bit field describes the size of an immediate operand. Zero is
// unused so that we can tell if we forgot to set a value.
ImmShift = 13,
- ImmMask = 7 << ImmShift,
- Imm8 = 1 << ImmShift,
- Imm16 = 2 << ImmShift,
- Imm32 = 3 << ImmShift,
- Imm64 = 4 << ImmShift,
+ ImmMask = 7 << ImmShift,
+ Imm8 = 1 << ImmShift,
+ Imm8PCRel = 2 << ImmShift,
+ Imm16 = 3 << ImmShift,
+ Imm32 = 4 << ImmShift,
+ Imm32PCRel = 5 << ImmShift,
+ Imm64 = 6 << ImmShift,
//===------------------------------------------------------------------===//
// FP Instruction Classification... Zero is non-fp instruction.
@@ -396,15 +410,37 @@ namespace X86II {
return TSFlags >> X86II::OpcodeShift;
}
+ static inline bool hasImm(unsigned TSFlags) {
+ return (TSFlags & X86II::ImmMask) != 0;
+ }
+
/// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
/// of the specified instruction.
static inline unsigned getSizeOfImm(unsigned TSFlags) {
switch (TSFlags & X86II::ImmMask) {
default: assert(0 && "Unknown immediate size");
- case X86II::Imm8: return 1;
- case X86II::Imm16: return 2;
- case X86II::Imm32: return 4;
- case X86II::Imm64: return 8;
+ case X86II::Imm8:
+ case X86II::Imm8PCRel: return 1;
+ case X86II::Imm16: return 2;
+ case X86II::Imm32:
+ case X86II::Imm32PCRel: return 4;
+ case X86II::Imm64: return 8;
+ }
+ }
+
+ /// isImmPCRel - Return true if the immediate of the specified instruction's
+ /// TSFlags indicates that it is pc relative.
+ static inline unsigned isImmPCRel(unsigned TSFlags) {
+ switch (TSFlags & X86II::ImmMask) {
+ default: assert(0 && "Unknown immediate size");
+ case X86II::Imm8PCRel:
+ case X86II::Imm32PCRel:
+ return true;
+ case X86II::Imm8:
+ case X86II::Imm16:
+ case X86II::Imm32:
+ case X86II::Imm64:
+ return false;
}
}
}
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index f0b4239..8a6ff54 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -65,7 +65,7 @@ def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
-def SDTX86RdTsc : SDTypeProfile<0, 0, []>;
+def SDTX86Void : SDTypeProfile<0, 0, []>;
def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
@@ -143,7 +143,7 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
SDNPMayLoad]>;
-def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc,
+def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
[SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>;
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
@@ -178,6 +178,9 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
+def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
//
@@ -343,18 +346,37 @@ def X86_COND_O : PatLeaf<(i8 13)>;
def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
def X86_COND_S : PatLeaf<(i8 15)>;
-def i16immSExt8 : PatLeaf<(i16 imm), [{
- // i16immSExt8 predicate - True if the 16-bit immediate fits in a 8-bit
- // sign extended field.
- return (int16_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+def immSext8 : PatLeaf<(imm), [{
+ return N->getSExtValue() == (int8_t)N->getSExtValue();
}]>;
-def i32immSExt8 : PatLeaf<(i32 imm), [{
- // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
- // sign extended field.
- return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+def i16immSExt8 : PatLeaf<(i16 immSext8)>;
+def i32immSExt8 : PatLeaf<(i32 immSext8)>;
+
+/// Load patterns: these constraint the match to the right address space.
+def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ return true;
}]>;
+def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ return PT->getAddressSpace() == 256;
+ return false;
+}]>;
+
+def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ return PT->getAddressSpace() == 257;
+ return false;
+}]>;
+
+
// Helper fragments for loads.
// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
// known to be 32-bit aligned or better. Ditto for i8 to i16.
@@ -372,8 +394,7 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
return false;
}]>;
-def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),
-[{
+def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
LoadSDNode *LD = cast<LoadSDNode>(N);
if (const Value *Src = LD->getSrcValue())
if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
@@ -399,72 +420,11 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
return false;
}]>;
-def nvloadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
- LoadSDNode *LD = cast<LoadSDNode>(N);
- if (const Value *Src = LD->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- if (LD->isVolatile())
- return false;
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType == ISD::NON_EXTLOAD)
- return true;
- if (ExtType == ISD::EXTLOAD)
- return LD->getAlignment() >= 4;
- return false;
-}]>;
-
-def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- return PT->getAddressSpace() == 256;
- return false;
-}]>;
-
-def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- return PT->getAddressSpace() == 257;
- return false;
-}]>;
-
-def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr)), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
-}]>;
-def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr)), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
-}]>;
-
-def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr)), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
-}]>;
-def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr)), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
-}]>;
-def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr)), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
-}]>;
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (dsload node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>;
def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
@@ -562,7 +522,7 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
}
// x86-64 va_start lowering magic.
-let usesCustomInserter = 1 in
+let usesCustomInserter = 1 in {
def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
(outs),
(ins GR8:$al,
@@ -573,6 +533,19 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
imm:$regsavefi,
imm:$offset)]>;
+// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets. Calls
+// to _alloca is needed to probe the stack when allocating more than 4k bytes in
+// one go. Touching the stack at 4K increments is necessary to ensure that the
+// guard pages used by the OS virtual memory manager are allocated in correct
+// sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
+ "# dynamic stack allocation",
+ [(X86MingwAlloca)]>;
+}
+
// Nop
let neverHasSideEffects = 1 in {
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
@@ -596,7 +569,7 @@ let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
"", []>;
//===----------------------------------------------------------------------===//
-// Control Flow Instructions...
+// Control Flow Instructions.
//
// Return instructions.
@@ -614,16 +587,46 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
"lret\t$amt", []>;
}
-// All branches are RawFrm, Void, Branch, and Terminators
-let isBranch = 1, isTerminator = 1 in
- class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> :
- I<opcode, RawFrm, (outs), ins, asm, pattern>;
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+ def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+ "jmp\t$dst", [(br bb:$dst)]>;
+ def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+ "jmp\t$dst", []>;
+}
-let isBranch = 1, isBarrier = 1 in {
- def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>;
- def JMP8 : IBr<0xEB, (ins brtarget8:$dst), "jmp\t$dst", []>;
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+ multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+ def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
+ def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
+ [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
+ }
}
+defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
+defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// FIXME: What about the CX/RCX versions of this instruction?
+let Uses = [ECX], isBranch = 1, isTerminator = 1 in
+ def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jcxz\t$dst", []>;
+
+
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
@@ -644,63 +647,6 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
"ljmp{l}\t{*}$dst", []>;
}
-// Conditional branches
-let Uses = [EFLAGS] in {
-// Short conditional jumps
-def JO8 : IBr<0x70, (ins brtarget8:$dst), "jo\t$dst", []>;
-def JNO8 : IBr<0x71, (ins brtarget8:$dst), "jno\t$dst", []>;
-def JB8 : IBr<0x72, (ins brtarget8:$dst), "jb\t$dst", []>;
-def JAE8 : IBr<0x73, (ins brtarget8:$dst), "jae\t$dst", []>;
-def JE8 : IBr<0x74, (ins brtarget8:$dst), "je\t$dst", []>;
-def JNE8 : IBr<0x75, (ins brtarget8:$dst), "jne\t$dst", []>;
-def JBE8 : IBr<0x76, (ins brtarget8:$dst), "jbe\t$dst", []>;
-def JA8 : IBr<0x77, (ins brtarget8:$dst), "ja\t$dst", []>;
-def JS8 : IBr<0x78, (ins brtarget8:$dst), "js\t$dst", []>;
-def JNS8 : IBr<0x79, (ins brtarget8:$dst), "jns\t$dst", []>;
-def JP8 : IBr<0x7A, (ins brtarget8:$dst), "jp\t$dst", []>;
-def JNP8 : IBr<0x7B, (ins brtarget8:$dst), "jnp\t$dst", []>;
-def JL8 : IBr<0x7C, (ins brtarget8:$dst), "jl\t$dst", []>;
-def JGE8 : IBr<0x7D, (ins brtarget8:$dst), "jge\t$dst", []>;
-def JLE8 : IBr<0x7E, (ins brtarget8:$dst), "jle\t$dst", []>;
-def JG8 : IBr<0x7F, (ins brtarget8:$dst), "jg\t$dst", []>;
-
-def JCXZ8 : IBr<0xE3, (ins brtarget8:$dst), "jcxz\t$dst", []>;
-
-def JE : IBr<0x84, (ins brtarget:$dst), "je\t$dst",
- [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB;
-def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst",
- [(X86brcond bb:$dst, X86_COND_NE, EFLAGS)]>, TB;
-def JL : IBr<0x8C, (ins brtarget:$dst), "jl\t$dst",
- [(X86brcond bb:$dst, X86_COND_L, EFLAGS)]>, TB;
-def JLE : IBr<0x8E, (ins brtarget:$dst), "jle\t$dst",
- [(X86brcond bb:$dst, X86_COND_LE, EFLAGS)]>, TB;
-def JG : IBr<0x8F, (ins brtarget:$dst), "jg\t$dst",
- [(X86brcond bb:$dst, X86_COND_G, EFLAGS)]>, TB;
-def JGE : IBr<0x8D, (ins brtarget:$dst), "jge\t$dst",
- [(X86brcond bb:$dst, X86_COND_GE, EFLAGS)]>, TB;
-
-def JB : IBr<0x82, (ins brtarget:$dst), "jb\t$dst",
- [(X86brcond bb:$dst, X86_COND_B, EFLAGS)]>, TB;
-def JBE : IBr<0x86, (ins brtarget:$dst), "jbe\t$dst",
- [(X86brcond bb:$dst, X86_COND_BE, EFLAGS)]>, TB;
-def JA : IBr<0x87, (ins brtarget:$dst), "ja\t$dst",
- [(X86brcond bb:$dst, X86_COND_A, EFLAGS)]>, TB;
-def JAE : IBr<0x83, (ins brtarget:$dst), "jae\t$dst",
- [(X86brcond bb:$dst, X86_COND_AE, EFLAGS)]>, TB;
-
-def JS : IBr<0x88, (ins brtarget:$dst), "js\t$dst",
- [(X86brcond bb:$dst, X86_COND_S, EFLAGS)]>, TB;
-def JNS : IBr<0x89, (ins brtarget:$dst), "jns\t$dst",
- [(X86brcond bb:$dst, X86_COND_NS, EFLAGS)]>, TB;
-def JP : IBr<0x8A, (ins brtarget:$dst), "jp\t$dst",
- [(X86brcond bb:$dst, X86_COND_P, EFLAGS)]>, TB;
-def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp\t$dst",
- [(X86brcond bb:$dst, X86_COND_NP, EFLAGS)]>, TB;
-def JO : IBr<0x80, (ins brtarget:$dst), "jo\t$dst",
- [(X86brcond bb:$dst, X86_COND_O, EFLAGS)]>, TB;
-def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst",
- [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB;
-} // Uses = [EFLAGS]
// Loop instructions
@@ -721,7 +667,7 @@ let isCall = 1 in
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [ESP] in {
- def CALLpcrel32 : Ii32<0xE8, RawFrm,
+ def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i32imm_pcrel:$dst,variable_ops),
"call\t$dst", []>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
@@ -761,8 +707,10 @@ def TCRETURNri : I<0, Pseudo, (outs),
"#TC_RETURN $dst $offset",
[]>;
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
- def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst, variable_ops),
+// FIXME: The should be pseudo instructions that are lowered when going to
+// mcinst.
+let isCall = 1, isBranch = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+ def TAILJMPd : Ii32<0xE9, RawFrm, (outs),(ins i32imm_pcrel:$dst,variable_ops),
"jmp\t$dst # TAILCALL",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
@@ -929,6 +877,9 @@ let Defs = [RAX, RDX] in
def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
TB;
+let Defs = [RAX, RCX, RDX] in
+def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+
let isBarrier = 1, hasCtrlDep = 1 in {
def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
}
@@ -1059,7 +1010,7 @@ def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(set GR8:$dst, (loadi8 addr:$src))]>;
@@ -1093,7 +1044,7 @@ def MOV8mr_NOREX : I<0x88, MRMDestMem,
(outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
let mayLoad = 1,
- canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+ canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
@@ -1115,7 +1066,10 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src),
//
// Extra precision multiplication
-let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+
+// AL is really implied by AX, by the registers in Defs must match the
+// SDNode results (i8, i32).
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
// FIXME: Used for 8-bit mul, ignore result upper 8 bits.
// This probably ought to be moved to a def : Pat<> if the
@@ -1133,7 +1087,7 @@ def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
"mul{l}\t$src",
[]>; // EAX,EDX = EAX*GR32
-let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
"mul{b}\t$src",
// FIXME: Used for 8-bit mul, ignore result upper 8 bits.
@@ -1155,7 +1109,7 @@ def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
}
let neverHasSideEffects = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>;
// AL,AH = AL*GR8
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
@@ -1165,7 +1119,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>;
// EAX,EDX = EAX*GR32
let mayLoad = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
"imul{b}\t$src", []>; // AL,AH = AL*[mem8]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
@@ -1178,7 +1132,7 @@ def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
} // neverHasSideEffects
// unsigned division/remainder
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"div{b}\t$src", []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
@@ -1188,7 +1142,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
"div{l}\t$src", []>;
let mayLoad = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
"div{b}\t$src", []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
@@ -1201,7 +1155,7 @@ def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
}
// Signed division/remainder.
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"idiv{b}\t$src", []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
@@ -1211,7 +1165,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
"idiv{l}\t$src", []>;
let mayLoad = 1, mayLoad = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
"idiv{b}\t$src", []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
@@ -2328,98 +2282,100 @@ let isTwoAddress = 0 in {
def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src),
"rcl{b}\t{1, $dst|$dst, 1}", []>;
-def RCL8m1 : I<0xD0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src),
- "rcl{b}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src),
"rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCL8mCL : I<0xD2, MRM2m, (outs i8mem:$dst), (ins i8mem:$src),
- "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
}
def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
"rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL8mi : Ii8<0xC0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt),
- "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src),
"rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCL16m1 : I<0xD1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src),
- "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
let Uses = [CL] in {
def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src),
"rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCL16mCL : I<0xD3, MRM2m, (outs i16mem:$dst), (ins i16mem:$src),
- "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
}
def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
"rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst),
- (ins i16mem:$src, i8imm:$cnt),
- "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src),
"rcl{l}\t{1, $dst|$dst, 1}", []>;
-def RCL32m1 : I<0xD1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src),
- "rcl{l}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src),
"rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-def RCL32mCL : I<0xD3, MRM2m, (outs i32mem:$dst), (ins i32mem:$src),
- "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
}
def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
"rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst),
- (ins i32mem:$src, i8imm:$cnt),
- "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src),
"rcr{b}\t{1, $dst|$dst, 1}", []>;
-def RCR8m1 : I<0xD0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src),
- "rcr{b}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src),
"rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCR8mCL : I<0xD2, MRM3m, (outs i8mem:$dst), (ins i8mem:$src),
- "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
}
def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
"rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR8mi : Ii8<0xC0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt),
- "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src),
"rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCR16m1 : I<0xD1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src),
- "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
let Uses = [CL] in {
def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src),
"rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCR16mCL : I<0xD3, MRM3m, (outs i16mem:$dst), (ins i16mem:$src),
- "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
}
def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
"rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst),
- (ins i16mem:$src, i8imm:$cnt),
- "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src),
"rcr{l}\t{1, $dst|$dst, 1}", []>;
-def RCR32m1 : I<0xD1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src),
- "rcr{l}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src),
"rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-def RCR32mCL : I<0xD3, MRM3m, (outs i32mem:$dst), (ins i32mem:$src),
- "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
}
def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
"rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst),
- (ins i32mem:$src, i8imm:$cnt),
+
+let isTwoAddress = 0 in {
+def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
+ "rcl{b}\t{1, $dst|$dst, 1}", []>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
+ "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
+ "rcl{l}\t{1, $dst|$dst, 1}", []>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
+ "rcr{b}\t{1, $dst|$dst, 1}", []>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
+ "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
+ "rcr{l}\t{1, $dst|$dst, 1}", []>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
"rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in {
+def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+}
+}
+
// FIXME: provide shorter instructions when imm8 == 1
let Uses = [CL] in {
def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
@@ -4100,7 +4056,7 @@ def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def INVLPG : I<0x01, RawFrm, (outs), (ins), "invlpg", []>, TB;
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
"str{w}\t{$dst}", []>, TB;
@@ -4262,17 +4218,17 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
// VMX instructions
// 66 0F 38 80
-def INVEPT : I<0x38, RawFrm, (outs), (ins), "invept", []>, OpSize, TB;
+def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
// 66 0F 38 81
-def INVVPID : I<0x38, RawFrm, (outs), (ins), "invvpid", []>, OpSize, TB;
+def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
// 0F 01 C1
-def VMCALL : I<0x01, RawFrm, (outs), (ins), "vmcall", []>, TB;
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
"vmclear\t$vmcs", []>, OpSize, TB;
// 0F 01 C2
-def VMLAUNCH : I<0x01, RawFrm, (outs), (ins), "vmlaunch", []>, TB;
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
// 0F 01 C3
-def VMRESUME : I<0x01, RawFrm, (outs), (ins), "vmresume", []>, TB;
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
"vmptrld\t$vmcs", []>, TB;
def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
@@ -4294,7 +4250,7 @@ def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
// 0F 01 C4
-def VMXOFF : I<0x01, RawFrm, (outs), (ins), "vmxoff", []>, OpSize;
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
"vmxon\t{$vmxon}", []>, XD;
@@ -4462,12 +4418,6 @@ def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>;
-// (and (i32 load), 255) -> (zextload i8)
-def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))),
- (MOVZX32rm8 addr:$src)>;
-def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))),
- (MOVZX32rm16 addr:$src)>;
-
//===----------------------------------------------------------------------===//
// Some peepholes
//===----------------------------------------------------------------------===//
@@ -4563,43 +4513,43 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
// (shl x (and y, 31)) ==> (shl x, y)
-def : Pat<(shl GR8:$src1, (and CL:$amt, 31)),
+def : Pat<(shl GR8:$src1, (and CL, 31)),
(SHL8rCL GR8:$src1)>;
-def : Pat<(shl GR16:$src1, (and CL:$amt, 31)),
+def : Pat<(shl GR16:$src1, (and CL, 31)),
(SHL16rCL GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (and CL:$amt, 31)),
+def : Pat<(shl GR32:$src1, (and CL, 31)),
(SHL32rCL GR32:$src1)>;
-def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
(SHL8mCL addr:$dst)>;
-def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
(SHL16mCL addr:$dst)>;
-def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
(SHL32mCL addr:$dst)>;
-def : Pat<(srl GR8:$src1, (and CL:$amt, 31)),
+def : Pat<(srl GR8:$src1, (and CL, 31)),
(SHR8rCL GR8:$src1)>;
-def : Pat<(srl GR16:$src1, (and CL:$amt, 31)),
+def : Pat<(srl GR16:$src1, (and CL, 31)),
(SHR16rCL GR16:$src1)>;
-def : Pat<(srl GR32:$src1, (and CL:$amt, 31)),
+def : Pat<(srl GR32:$src1, (and CL, 31)),
(SHR32rCL GR32:$src1)>;
-def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
(SHR8mCL addr:$dst)>;
-def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
(SHR16mCL addr:$dst)>;
-def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
(SHR32mCL addr:$dst)>;
-def : Pat<(sra GR8:$src1, (and CL:$amt, 31)),
+def : Pat<(sra GR8:$src1, (and CL, 31)),
(SAR8rCL GR8:$src1)>;
-def : Pat<(sra GR16:$src1, (and CL:$amt, 31)),
+def : Pat<(sra GR16:$src1, (and CL, 31)),
(SAR16rCL GR16:$src1)>;
-def : Pat<(sra GR32:$src1, (and CL:$amt, 31)),
+def : Pat<(sra GR32:$src1, (and CL, 31)),
(SAR32rCL GR32:$src1)>;
-def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
(SAR8mCL addr:$dst)>;
-def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
(SAR16mCL addr:$dst)>;
-def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
(SAR32mCL addr:$dst)>;
// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
@@ -4620,11 +4570,11 @@ def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
addr:$dst),
(SHRD32mrCL addr:$dst, GR32:$src2)>;
-def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)),
(SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1),
- GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+ GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
(SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
@@ -4645,11 +4595,11 @@ def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
addr:$dst),
(SHLD32mrCL addr:$dst, GR32:$src2)>;
-def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)),
(SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1),
- GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+ GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
(SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
@@ -4670,11 +4620,11 @@ def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
addr:$dst),
(SHRD16mrCL addr:$dst, GR16:$src2)>;
-def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)),
(SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1),
- GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+ GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
(SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
@@ -4695,11 +4645,11 @@ def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
addr:$dst),
(SHLD16mrCL addr:$dst, GR16:$src2)>;
-def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)),
(SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
- GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+ GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
(SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
// (anyext (setcc_carry)) -> (setcc_carry)
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 89f020c..c8e0723 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -141,7 +141,7 @@ def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (load_mmx addr:$src))]>;
@@ -426,13 +426,15 @@ def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
// Extract / Insert
-def MMX_X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
-def MMX_X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
+def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+
def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1),
+ [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1),
(iPTR imm:$src2)))]>;
let Constraints = "$src1 = $dst" in {
def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
@@ -597,13 +599,6 @@ let AddedComplexity = 10 in {
(MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
}
-// Patterns to perform vector shuffling with a zeroed out vector.
-let AddedComplexity = 20 in {
- def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV,
- (v2i32 (scalar_to_vector (load_mmx addr:$src))))),
- (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>;
-}
-
// Some special case PANDN patterns.
// FIXME: Get rid of these.
def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index e26c979..2743dba 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -160,6 +160,32 @@ def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
+// MOVNT Support
+// Like 'store', but requires the non-temporal bit to be set
+def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal();
+ return false;
+}]>;
+
+def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() && !ST->isTruncatingStore() &&
+ ST->getAddressingMode() == ISD::UNINDEXED &&
+ ST->getAlignment() >= 16;
+ return false;
+}]>;
+
+def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() &&
+ ST->getAlignment() < 16;
+ return false;
+}]>;
+
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
@@ -344,18 +370,56 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in {
// SSE1 Instructions
//===----------------------------------------------------------------------===//
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movss\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+// Move Instructions. Register-to-register movss is not used for FR32
+// register copies because it's a partial register update; FsMOVAPSrr is
+// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG
+// because INSERT_SUBREG requires that the insert be implementable in terms of
+// a copy, and just mentioned, we don't use movss for copies.
+let Constraints = "$src1 = $dst" in
+def MOVSSrr : SSI<0x10, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
+ "movss\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>;
+
+// Extract the low 32-bit value from one vector and insert it into another.
+let AddedComplexity = 15 in
+def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
+ (MOVSSrr VR128:$src1,
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>;
+
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>;
+
+// Loading from memory automatically zeroing upper bits.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
"movss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (loadf32 addr:$src))]>;
+
+// MOVSSrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+let AddedComplexity = 20 in {
+def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+}
+
+// Store scalar value to memory.
def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store FR32:$src, addr:$dst)]>;
+// Extract and store.
+def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSSmr addr:$dst,
+ (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
+
// Conversion instructions
def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
"cvttss2si\t{$src, $dst|$dst, $src}",
@@ -518,7 +582,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
// disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
@@ -715,7 +779,7 @@ defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
let neverHasSideEffects = 1 in
def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
@@ -727,7 +791,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
let neverHasSideEffects = 1 in
def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movups\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movups\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (loadv4f32 addr:$src))]>;
@@ -736,7 +800,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(store (v4f32 VR128:$src), addr:$dst)]>;
// Intrinsic forms of MOVUPS load and store
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movups\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
@@ -762,6 +826,9 @@ let Constraints = "$src1 = $dst" in {
} // Constraints = "$src1 = $dst"
+def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -793,9 +860,9 @@ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
let AddedComplexity = 20 in {
def : Pat<(v4f32 (movddup VR128:$src, (undef))),
- (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+ (MOVLHPSrr VR128:$src, VR128:$src)>;
def : Pat<(v2i64 (movddup VR128:$src, (undef))),
- (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+ (MOVLHPSrr VR128:$src, VR128:$src)>;
}
@@ -1010,10 +1077,33 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
"prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
// Non-temporal stores
-def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQ_64mr VR128:$src, addr:$dst)>;
+
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
+
+def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
+}
+
// Load, store, and memory fence
def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
@@ -1032,84 +1122,73 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
-let Predicates = [HasSSE1] in {
- def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
- def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
- def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
- def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
- def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
-}
-
-// FR32 to 128-bit vector conversion.
-let isAsCheapAsAMove = 1 in
-def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4f32 (scalar_to_vector FR32:$src)))]>;
-def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
-
-// FIXME: may not be able to eliminate this movss with coalescing the src and
-// dest register classes are different. We really want to write this pattern
-// like this:
-// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
-// (f32 FR32:$src)>;
-let isAsCheapAsAMove = 1 in
-def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
- (iPTR 0)))]>;
-def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(store (f32 (vector_extract (v4f32 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
-
-
-// Move to lower bits of a VR128, leaving upper bits alone.
-// Three operand (but two address) aliases.
-let Constraints = "$src1 = $dst" in {
-let neverHasSideEffects = 1 in
- def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $dst|$dst, $src2}", []>;
-
- let AddedComplexity = 15 in
- def MOVLPSrr : SSI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "movss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (movl VR128:$src1, VR128:$src2)))]>;
-}
+def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
-// Move to lower bits of a VR128 and zeroing upper bits.
-// Loading from memory automatically zeroing upper bits.
-let AddedComplexity = 20 in
-def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
- (loadf32 addr:$src))))))]>;
-
-def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (MOVZSS2PSrm addr:$src)>;
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
//===---------------------------------------------------------------------===//
// SSE2 Instructions
//===---------------------------------------------------------------------===//
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+// Move Instructions. Register-to-register movsd is not used for FR64
+// register copies because it's a partial register update; FsMOVAPDrr is
+// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG
+// because INSERT_SUBREG requires that the insert be implementable in terms of
+// a copy, and just mentioned, we don't use movsd for copies.
+let Constraints = "$src1 = $dst" in
+def MOVSDrr : SDI<0x10, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
+ "movsd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>;
+
+// Extract the low 64-bit value from one vector and insert it into another.
+let AddedComplexity = 15 in
+def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1,
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>;
+
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>;
+
+// Loading from memory automatically zeroing upper bits.
+let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (loadf64 addr:$src))]>;
+
+// MOVSDrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+let AddedComplexity = 20 in {
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+}
+
+// Store scalar value to memory.
def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>;
+// Extract and store.
+def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+
// Conversion instructions
def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
"cvttsd2si\t{$src, $dst|$dst, $src}",
@@ -1163,7 +1242,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
Requires<[HasSSE2, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
- (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
+ (CVTSS2SDrr (MOVSSrm addr:$src))>,
+ Requires<[HasSSE2, OptForSpeed]>;
// Match intrinsics which expect XMM operand(s).
def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
@@ -1282,7 +1362,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
// disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
@@ -1480,7 +1560,7 @@ defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
let neverHasSideEffects = 1 in
def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movapd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
@@ -2295,34 +2375,47 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
// Non-temporal stores
-def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
-def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
-def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
+def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
TB, Requires<[HasSSE2]>;
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
+ (MOVNTDQmr VR128:$src, addr:$dst)>;
+}
+
// Flush cache
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
TB, Requires<[HasSSE2]>;
// Load, store, and memory fence
-def LFENCE : I<0xAE, MRM5r, (outs), (ins),
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
"lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
-def MFENCE : I<0xAE, MRM6r, (outs), (ins),
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
//TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
(i8 0)), (NOOP)>;
def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
(i8 1)), (MFENCE)>;
// Alias instructions that map zero vector to pxor / xorp* for sse.
@@ -2334,17 +2427,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
-// FR64 to 128-bit vector conversion.
-let isAsCheapAsAMove = 1 in
-def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (scalar_to_vector FR64:$src)))]>;
-def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
-
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2373,20 +2455,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
[(store (i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)]>;
-// FIXME: may not be able to eliminate this movss with coalescing the src and
-// dest register classes are different. We really want to write this pattern
-// like this:
-// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
-// (f32 FR32:$src)>;
-let isAsCheapAsAMove = 1 in
-def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
- (iPTR 0)))]>;
-def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (v2f64 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2403,44 +2474,11 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
-
-// Move to lower bits of a VR128, leaving upper bits alone.
-// Three operand (but two address) aliases.
-let Constraints = "$src1 = $dst" in {
- let neverHasSideEffects = 1 in
- def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $dst|$dst, $src2}", []>;
-
- let AddedComplexity = 15 in
- def MOVLPDrr : SDI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "movsd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (movl VR128:$src1, VR128:$src2)))]>;
-}
-
// Store / copy lower 64-bits of a XMM register.
def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
-// Move to lower bits of a VR128 and zeroing upper bits.
-// Loading from memory automatically zeroing upper bits.
-let AddedComplexity = 20 in {
-def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (X86vzmovl (v2f64 (scalar_to_vector
- (loadf64 addr:$src))))))]>;
-
-def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (MOVZSD2PDrm addr:$src)>;
-def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (MOVZSD2PDrm addr:$src)>;
-def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>;
-}
-
// movd / movq to XMM register zero-extends
let AddedComplexity = 15 in {
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
@@ -2613,9 +2651,9 @@ let Constraints = "$src1 = $dst" in {
}
// Thread synchronization
-def MONITOR : I<0x01, MRM1r, (outs), (ins), "monitor",
+def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
[(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
-def MWAIT : I<0x01, MRM1r, (outs), (ins), "mwait",
+def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait",
[(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
// vector_shuffle v1, <undef> <1, 1, 3, 3>
@@ -2986,13 +3024,15 @@ let Predicates = [HasSSE2] in {
let AddedComplexity = 15 in {
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
- (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
+ (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
- (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>;
+ (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
+ (MOVSSrr (v4f32 (V_SET0)),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
+ (MOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
}
// Splat v2f64 / v2i64
@@ -3010,8 +3050,7 @@ def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
// Special unary SHUFPSrri case.
def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
(SHUFPSrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE1]>;
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
let AddedComplexity = 5 in
def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
(PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
@@ -3057,13 +3096,13 @@ def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
}
let AddedComplexity = 10 in {
def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
- (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+ (UNPCKLPSrr VR128:$src, VR128:$src)>;
def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
- (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ (PUNPCKLBWrr VR128:$src, VR128:$src)>;
def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
- (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ (PUNPCKLWDrr VR128:$src, VR128:$src)>;
def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
- (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ (PUNPCKLDQrr VR128:$src, VR128:$src)>;
}
// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
@@ -3077,13 +3116,13 @@ def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
}
let AddedComplexity = 10 in {
def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
- (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+ (UNPCKHPSrr VR128:$src, VR128:$src)>;
def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
- (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ (PUNPCKHBWrr VR128:$src, VR128:$src)>;
def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
- (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ (PUNPCKHWDrr VR128:$src, VR128:$src)>;
def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
- (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ (PUNPCKHDQrr VR128:$src, VR128:$src)>;
}
let AddedComplexity = 20 in {
@@ -3105,45 +3144,49 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
let AddedComplexity = 20 in {
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
}
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
let AddedComplexity = 15 in {
// Setting the lowest element in the vector.
def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
- (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>;
def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
- (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>;
-// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
+// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
- (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+ Requires<[HasSSE2]>;
def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
- (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+ Requires<[HasSSE2]>;
}
// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
// fall back to this for SSE1)
def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
(SHUFPSrri VR128:$src2, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>;
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
// Set lowest element and zero upper elements.
let AddedComplexity = 15 in
@@ -3185,30 +3228,30 @@ def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
// Use movaps / movups for SSE integer load / store (one byte shorter).
def : Pat<(alignedloadv4i32 addr:$src),
- (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
+ (MOVAPSrm addr:$src)>;
def : Pat<(loadv4i32 addr:$src),
- (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
+ (MOVUPSrm addr:$src)>;
def : Pat<(alignedloadv2i64 addr:$src),
- (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
+ (MOVAPSrm addr:$src)>;
def : Pat<(loadv2i64 addr:$src),
- (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>;
+ (MOVUPSrm addr:$src)>;
def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVUPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVUPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVUPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVUPSmr addr:$dst, VR128:$src)>;
//===----------------------------------------------------------------------===//
// SSE4.1 Instructions
@@ -3397,7 +3440,7 @@ let Constraints = "$src1 = $dst" in {
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
- (OpNode VR128:$src1, (memop addr:$src2)))]>, OpSize;
+ (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize;
def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index d3b0052..250634f 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -55,6 +55,11 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
if (!is64Bit)
Data64bitsDirective = 0; // we can't emit a 64-bit unit
+ // Use ## as a comment string so that .s files generated by llvm can go
+ // through the GCC preprocessor without causing an error. This is needed
+ // because "clang foo.s" runs the C preprocessor, which is usually reserved
+ // for .S files on other systems. Perhaps this is because the file system
+ // wasn't always case preserving or something.
CommentString = "##";
PCSymbol = ".";
@@ -70,6 +75,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) {
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
+ TextAlignFillValue = 0x90;
+
PrivateGlobalPrefix = ".L";
WeakRefDirective = "\t.weak\t";
PCSymbol = ".";
@@ -94,27 +101,6 @@ MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const {
X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
-}
-
-
-X86WinMCAsmInfo::X86WinMCAsmInfo(const Triple &Triple) {
- AsmTransCBE = x86_asm_table;
- AssemblerDialect = AsmWriterFlavor;
- GlobalPrefix = "_";
- CommentString = ";";
-
- PrivateGlobalPrefix = "$";
- AlignDirective = "\tALIGN\t";
- ZeroDirective = "\tdb\t";
- AsciiDirective = "\tdb\t";
- AscizDirective = 0;
- Data8bitsDirective = "\tdb\t";
- Data16bitsDirective = "\tdw\t";
- Data32bitsDirective = "\tdd\t";
- Data64bitsDirective = "\tdq\t";
- HasDotTypeDotSizeDirective = false;
- HasSingleParameterDotFile = false;
-
- AlignmentIsInBytes = true;
+ TextAlignFillValue = 0x90;
}
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h
index ca227b7..69716bf 100644
--- a/lib/Target/X86/X86MCAsmInfo.h
+++ b/lib/Target/X86/X86MCAsmInfo.h
@@ -33,11 +33,6 @@ namespace llvm {
struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
explicit X86MCAsmInfoCOFF(const Triple &Triple);
};
-
- struct X86WinMCAsmInfo : public MCAsmInfo {
- explicit X86WinMCAsmInfo(const Triple &Triple);
- };
-
} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 764c87a..3f18696 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -14,53 +14,44 @@
#define DEBUG_TYPE "x86-emitter"
#include "X86.h"
#include "X86InstrInfo.h"
+#include "X86FixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-// FIXME: This should move to a header.
-namespace llvm {
-namespace X86 {
-enum Fixups {
- reloc_pcrel_word = FirstTargetFixupKind,
- reloc_picrel_word,
- reloc_absolute_word,
- reloc_absolute_word_sext,
- reloc_absolute_dword
-};
-}
-}
-
namespace {
class X86MCCodeEmitter : public MCCodeEmitter {
X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
const TargetMachine &TM;
const TargetInstrInfo &TII;
+ MCContext &Ctx;
bool Is64BitMode;
public:
- X86MCCodeEmitter(TargetMachine &tm, bool is64Bit)
- : TM(tm), TII(*TM.getInstrInfo()) {
+ X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit)
+ : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
Is64BitMode = is64Bit;
}
~X86MCCodeEmitter() {}
unsigned getNumFixupKinds() const {
- return 5;
+ return 3;
}
- MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
- static MCFixupKindInfo Infos[] = {
- { "reloc_pcrel_word", 0, 4 * 8 },
- { "reloc_picrel_word", 0, 4 * 8 },
- { "reloc_absolute_word", 0, 4 * 8 },
- { "reloc_absolute_word_sext", 0, 4 * 8 },
- { "reloc_absolute_dword", 0, 8 * 8 }
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[] = {
+ { "reloc_pcrel_4byte", 0, 4 * 8 },
+ { "reloc_pcrel_1byte", 0, 1 * 8 },
+ { "reloc_riprel_4byte", 0, 4 * 8 }
};
+
+ if (Kind < FirstTargetFixupKind)
+ return MCCodeEmitter::getFixupKindInfo(Kind);
- assert(Kind >= FirstTargetFixupKind && Kind < MaxTargetFixupKind &&
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
return Infos[Kind - FirstTargetFixupKind];
}
@@ -83,9 +74,11 @@ public:
}
}
- void EmitDisplacementField(const MCOperand &Disp, int64_t Adj, bool IsPCRel,
- unsigned &CurByte, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ void EmitImmediate(const MCOperand &Disp,
+ unsigned ImmSize, MCFixupKind FixupKind,
+ unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ int ImmOffset = 0) const;
inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
unsigned RM) {
@@ -106,8 +99,8 @@ public:
void EmitMemModRMByte(const MCInst &MI, unsigned Op,
- unsigned RegOpcodeField, intptr_t PCAdj,
- unsigned &CurByte, raw_ostream &OS,
+ unsigned RegOpcodeField,
+ unsigned TSFlags, unsigned &CurByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const;
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -119,13 +112,15 @@ public:
MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &,
- TargetMachine &TM) {
- return new X86MCCodeEmitter(TM, false);
+ TargetMachine &TM,
+ MCContext &Ctx) {
+ return new X86MCCodeEmitter(TM, Ctx, false);
}
MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &,
- TargetMachine &TM) {
- return new X86MCCodeEmitter(TM, true);
+ TargetMachine &TM,
+ MCContext &Ctx) {
+ return new X86MCCodeEmitter(TM, Ctx, true);
}
@@ -135,36 +130,59 @@ static bool isDisp8(int Value) {
return Value == (signed char)Value;
}
+/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
+/// in an instruction with the specified TSFlags.
+static MCFixupKind getImmFixupKind(unsigned TSFlags) {
+ unsigned Size = X86II::getSizeOfImm(TSFlags);
+ bool isPCRel = X86II::isImmPCRel(TSFlags);
+
+ switch (Size) {
+ default: assert(0 && "Unknown immediate size");
+ case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1;
+ case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4;
+ case 2: assert(!isPCRel); return FK_Data_2;
+ case 8: assert(!isPCRel); return FK_Data_8;
+ }
+}
+
+
void X86MCCodeEmitter::
-EmitDisplacementField(const MCOperand &DispOp, int64_t Adj, bool IsPCRel,
- unsigned &CurByte, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const {
+EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
+ unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
// If this is a simple integer displacement that doesn't require a relocation,
// emit it now.
if (DispOp.isImm()) {
- EmitConstant(DispOp.getImm(), 4, CurByte, OS);
+ // FIXME: is this right for pc-rel encoding?? Probably need to emit this as
+ // a fixup if so.
+ EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
return;
}
-#if 0
- // Otherwise, this is something that requires a relocation. Emit it as such
- // now.
- unsigned RelocType = Is64BitMode ?
- (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext)
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
-#endif
+ // If we have an immoffset, add it to the expression.
+ const MCExpr *Expr = DispOp.getExpr();
+
+ // If the fixup is pc-relative, we need to bias the value to be relative to
+ // the start of the field, not the end of the field.
+ if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
+ FixupKind == MCFixupKind(X86::reloc_riprel_4byte))
+ ImmOffset -= 4;
+ if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte))
+ ImmOffset -= 1;
+
+ if (ImmOffset)
+ Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx),
+ Ctx);
// Emit a symbolic constant as a fixup and 4 zeros.
- Fixups.push_back(MCFixup::Create(CurByte, DispOp.getExpr(),
- MCFixupKind(X86::reloc_absolute_word)));
- EmitConstant(0, 4, CurByte, OS);
+ Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
+ EmitConstant(0, Size, CurByte, OS);
}
void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
unsigned RegOpcodeField,
- intptr_t PCAdj,
- unsigned &CurByte,
+ unsigned TSFlags, unsigned &CurByte,
raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const{
const MCOperand &Disp = MI.getOperand(Op+3);
@@ -172,31 +190,48 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
const MCOperand &Scale = MI.getOperand(Op+1);
const MCOperand &IndexReg = MI.getOperand(Op+2);
unsigned BaseReg = Base.getReg();
-
- // FIXME: Eliminate!
- bool IsPCRel = false;
+
+ // Handle %rip relative addressing.
+ if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
+ assert(IndexReg.getReg() == 0 && Is64BitMode &&
+ "Invalid rip-relative address");
+ EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+
+ // rip-relative addressing is actually relative to the *next* instruction.
+ // Since an immediate can follow the mod/rm byte for an instruction, this
+ // means that we need to bias the immediate field of the instruction with
+ // the size of the immediate field. If we have this case, add it into the
+ // expression to emit.
+ int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
+ EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_riprel_4byte),
+ CurByte, OS, Fixups, -ImmSize);
+ return;
+ }
+
+ unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;
+
// Determine whether a SIB byte is needed.
// If no BaseReg, issue a RIP relative instruction only if the MCE can
// resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
// 2-7) and absolute references.
+
if (// The SIB byte must be used if there is an index register.
IndexReg.getReg() == 0 &&
- // The SIB byte must be used if the base is ESP/RSP.
- BaseReg != X86::ESP && BaseReg != X86::RSP &&
+ // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+ // encode to an R/M value of 4, which indicates that a SIB byte is
+ // present.
+ BaseRegNo != N86::ESP &&
// If there is no base register and we're in 64-bit mode, we need a SIB
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
(!Is64BitMode || BaseReg != 0)) {
- if (BaseReg == 0 || // [disp32] in X86-32 mode
- BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
+ if (BaseReg == 0) { // [disp32] in X86-32 mode
EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
- EmitDisplacementField(Disp, PCAdj, true, CurByte, OS, Fixups);
+ EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
return;
}
- unsigned BaseRegNo = GetX86RegNum(Base);
-
// If the base is not EBP/ESP and there is no displacement, use simple
// indirect register encoding, this handles addresses like [EAX]. The
// encoding for [EBP] with no displacement means [disp32] so we handle it
@@ -209,13 +244,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
if (Disp.isImm() && isDisp8(Disp.getImm())) {
EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
- EmitConstant(Disp.getImm(), 1, CurByte, OS);
+ EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
return;
}
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
- EmitDisplacementField(Disp, PCAdj, IsPCRel, CurByte, OS, Fixups);
+ EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
return;
}
@@ -270,9 +305,9 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// Do we need to output a displacement?
if (ForceDisp8)
- EmitConstant(Disp.getImm(), 1, CurByte, OS);
+ EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
else if (ForceDisp32 || Disp.getImm() != 0)
- EmitDisplacementField(Disp, PCAdj, IsPCRel, CurByte, OS, Fixups);
+ EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
}
/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
@@ -280,11 +315,11 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
/// size, and 3) use of X86-64 extended registers.
static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
const TargetInstrDesc &Desc) {
- unsigned REX = 0;
-
- // Pseudo instructions do not need REX prefix byte.
+ // Pseudo instructions never have a rex byte.
if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
return 0;
+
+ unsigned REX = 0;
if (TSFlags & X86II::REX_W)
REX |= 1 << 3;
@@ -482,52 +517,29 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMInitReg:
assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n";
- assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
- case X86II::RawFrm: {
+ assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
+ case X86II::Pseudo: return; // Pseudo instructions encode to nothing.
+ case X86II::RawFrm:
EmitByte(BaseOpcode, CurByte, OS);
-
- if (CurOp == NumOps)
- break;
-
- assert(0 && "Unimpl RawFrm expr");
break;
- }
- case X86II::AddRegFrm: {
+ case X86II::AddRegFrm:
EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
- if (CurOp == NumOps)
- break;
-
- const MCOperand &MO1 = MI.getOperand(CurOp++);
- if (MO1.isImm()) {
- unsigned Size = X86II::getSizeOfImm(TSFlags);
- EmitConstant(MO1.getImm(), Size, CurByte, OS);
- break;
- }
-
- assert(0 && "Unimpl AddRegFrm expr");
break;
- }
case X86II::MRMDestReg:
EmitByte(BaseOpcode, CurByte, OS);
EmitRegModRMByte(MI.getOperand(CurOp),
GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
CurOp += 2;
- if (CurOp != NumOps)
- EmitConstant(MI.getOperand(CurOp++).getImm(),
- X86II::getSizeOfImm(TSFlags), CurByte, OS);
break;
case X86II::MRMDestMem:
EmitByte(BaseOpcode, CurByte, OS);
EmitMemModRMByte(MI, CurOp,
GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)),
- 0, CurByte, OS, Fixups);
+ TSFlags, CurByte, OS, Fixups);
CurOp += X86AddrNumOperands + 1;
- if (CurOp != NumOps)
- EmitConstant(MI.getOperand(CurOp++).getImm(),
- X86II::getSizeOfImm(TSFlags), CurByte, OS);
break;
case X86II::MRMSrcReg:
@@ -535,9 +547,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)),
CurByte, OS);
CurOp += 2;
- if (CurOp != NumOps)
- EmitConstant(MI.getOperand(CurOp++).getImm(),
- X86II::getSizeOfImm(TSFlags), CurByte, OS);
break;
case X86II::MRMSrcMem: {
@@ -551,117 +560,78 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
else
AddrOperands = X86AddrNumOperands;
- // FIXME: What is this actually doing?
- intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
- X86II::getSizeOfImm(TSFlags) : 0;
-
EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)),
- PCAdj, CurByte, OS, Fixups);
+ TSFlags, CurByte, OS, Fixups);
CurOp += AddrOperands + 1;
- if (CurOp != NumOps)
- EmitConstant(MI.getOperand(CurOp++).getImm(),
- X86II::getSizeOfImm(TSFlags), CurByte, OS);
break;
}
case X86II::MRM0r: case X86II::MRM1r:
case X86II::MRM2r: case X86II::MRM3r:
case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r: {
+ case X86II::MRM6r: case X86II::MRM7r:
EmitByte(BaseOpcode, CurByte, OS);
-
- // Special handling of lfence, mfence, monitor, and mwait.
- // FIXME: This is terrible, they should get proper encoding bits in TSFlags.
- if (Opcode == X86::LFENCE || Opcode == X86::MFENCE ||
- Opcode == X86::MONITOR || Opcode == X86::MWAIT) {
- EmitByte(ModRMByte(3, (TSFlags & X86II::FormMask)-X86II::MRM0r, 0),
- CurByte, OS);
-
- switch (Opcode) {
- default: break;
- case X86::MONITOR: EmitByte(0xC8, CurByte, OS); break;
- case X86::MWAIT: EmitByte(0xC9, CurByte, OS); break;
- }
- } else {
- EmitRegModRMByte(MI.getOperand(CurOp++),
- (TSFlags & X86II::FormMask)-X86II::MRM0r,
- CurByte, OS);
- }
-
- if (CurOp == NumOps)
- break;
-
- const MCOperand &MO1 = MI.getOperand(CurOp++);
- if (MO1.isImm()) {
- EmitConstant(MO1.getImm(), X86II::getSizeOfImm(TSFlags), CurByte, OS);
- break;
- }
-
- assert(0 && "relo unimpl");
-#if 0
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- if (Opcode == X86::MOV64ri32)
- rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
- if (MO1.isGlobal()) {
- bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
- emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- Indirect);
- } else if (MO1.isSymbol())
- emitExternalSymbolAddress(MO1.getSymbolName(), rt);
- else if (MO1.isCPI())
- emitConstPoolAddress(MO1.getIndex(), rt);
- else if (MO1.isJTI())
- emitJumpTableAddress(MO1.getIndex(), rt);
+ EmitRegModRMByte(MI.getOperand(CurOp++),
+ (TSFlags & X86II::FormMask)-X86II::MRM0r,
+ CurByte, OS);
break;
-#endif
- }
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
- intptr_t PCAdj = 0;
- if (CurOp + X86AddrNumOperands != NumOps) {
- if (MI.getOperand(CurOp+X86AddrNumOperands).isImm())
- PCAdj = X86II::getSizeOfImm(TSFlags);
- else
- PCAdj = 4;
- }
-
+ case X86II::MRM6m: case X86II::MRM7m:
EmitByte(BaseOpcode, CurByte, OS);
EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
- PCAdj, CurByte, OS, Fixups);
+ TSFlags, CurByte, OS, Fixups);
CurOp += X86AddrNumOperands;
-
- if (CurOp == NumOps)
- break;
-
- const MCOperand &MO = MI.getOperand(CurOp++);
- if (MO.isImm()) {
- EmitConstant(MO.getImm(), X86II::getSizeOfImm(TSFlags), CurByte, OS);
- break;
- }
-
- assert(0 && "relo not handled");
-#if 0
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- if (Opcode == X86::MOV64mi32)
- rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
- if (MO.isGlobal()) {
- bool Indirect = gvNeedsNonLazyPtr(MO, TM);
- emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
- Indirect);
- } else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), rt);
- else if (MO.isCPI())
- emitConstPoolAddress(MO.getIndex(), rt);
- else if (MO.isJTI())
- emitJumpTableAddress(MO.getIndex(), rt);
-#endif
+ break;
+ case X86II::MRM_C1:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC1, CurByte, OS);
+ break;
+ case X86II::MRM_C2:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC2, CurByte, OS);
+ break;
+ case X86II::MRM_C3:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC3, CurByte, OS);
+ break;
+ case X86II::MRM_C4:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC4, CurByte, OS);
+ break;
+ case X86II::MRM_C8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC8, CurByte, OS);
+ break;
+ case X86II::MRM_C9:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xC9, CurByte, OS);
+ break;
+ case X86II::MRM_E8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xE8, CurByte, OS);
+ break;
+ case X86II::MRM_F0:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xF0, CurByte, OS);
+ break;
+ case X86II::MRM_F8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xF8, CurByte, OS);
+ break;
+ case X86II::MRM_F9:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xF9, CurByte, OS);
break;
}
- }
+
+ // If there is a remaining operand, it must be a trailing immediate. Emit it
+ // according to the right size for the instruction.
+ if (CurOp != NumOps)
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
#ifndef NDEBUG
// FIXME: Verify.
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index bb53bf1..4b2529b 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -18,12 +18,6 @@
namespace llvm {
-enum NameDecorationStyle {
- None,
- StdCall,
- FastCall
-};
-
/// X86MachineFunctionInfo - This class is derived from MachineFunction and
/// contains private X86 target-specific information for each MachineFunction.
class X86MachineFunctionInfo : public MachineFunctionInfo {
@@ -41,10 +35,6 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// Used on windows platform for stdcall & fastcall name decoration
unsigned BytesToPopOnReturn;
- /// DecorationStyle - If the function requires additional name decoration,
- /// DecorationStyle holds the right way to do so.
- NameDecorationStyle DecorationStyle;
-
/// ReturnAddrIndex - FrameIndex for return slot.
int ReturnAddrIndex;
@@ -66,7 +56,6 @@ public:
X86MachineFunctionInfo() : ForceFramePointer(false),
CalleeSavedFrameSize(0),
BytesToPopOnReturn(0),
- DecorationStyle(None),
ReturnAddrIndex(0),
TailCallReturnAddrDelta(0),
SRetReturnReg(0),
@@ -76,7 +65,6 @@ public:
: ForceFramePointer(false),
CalleeSavedFrameSize(0),
BytesToPopOnReturn(0),
- DecorationStyle(None),
ReturnAddrIndex(0),
TailCallReturnAddrDelta(0),
SRetReturnReg(0),
@@ -91,9 +79,6 @@ public:
unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
- NameDecorationStyle getDecorationStyle() const { return DecorationStyle; }
- void setDecorationStyle(NameDecorationStyle style) { DecorationStyle = style;}
-
int getRAIndex() const { return ReturnAddrIndex; }
void setRAIndex(int Index) { ReturnAddrIndex = Index; }
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 081c6d9..0f4ce37 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -191,6 +191,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR16_NOREXRegClass;
else if (A == &X86::GR16_ABCDRegClass)
return &X86::GR16_ABCDRegClass;
+ } else if (B == &X86::FR32RegClass) {
+ return A;
}
break;
case 2:
@@ -207,6 +209,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
A == &X86::GR16_NOREXRegClass)
return &X86::GR16_ABCDRegClass;
+ } else if (B == &X86::FR64RegClass) {
+ return A;
}
break;
case 3:
@@ -234,6 +238,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR32_NOREXRegClass;
else if (A == &X86::GR32_ABCDRegClass)
return &X86::GR64_ABCDRegClass;
+ } else if (B == &X86::VR128RegClass) {
+ return A;
}
break;
case 4:
@@ -446,8 +452,10 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
bool requiresRealignment =
- RealignStack && (MFI->getMaxAlignment() > StackAlign);
+ RealignStack && ((MFI->getMaxAlignment() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
// FIXME: Currently we don't support stack realignment for functions with
// variable-sized allocas.
@@ -485,7 +493,7 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
Offset += SlotSize;
} else {
unsigned Align = MFI->getObjectAlignment(FI);
- assert( (-(Offset + StackSize)) % Align == 0);
+ assert((-(Offset + StackSize)) % Align == 0);
Align = 0;
return Offset + StackSize;
}
@@ -627,10 +635,6 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
- // Calculate and set max stack object alignment early, so we can decide
- // whether we will need stack realignment (and thus FP).
- MFI->calculateMaxStackAlignment();
-
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -1053,7 +1057,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes);
BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca");
+ .addExternalSymbol("_alloca")
+ .addReg(StackPtr, RegState::Define | RegState::Implicit);
} else {
// Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
@@ -1064,7 +1069,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes - 4);
BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca");
+ .addExternalSymbol("_alloca")
+ .addReg(StackPtr, RegState::Define | RegState::Implicit);
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 8fb5e92..e4bdb4e 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -35,7 +35,8 @@ namespace X86 {
/// these indices must be kept in sync with the class indices in the
/// X86RegisterInfo.td file.
enum SubregIndex {
- SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4
+ SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4,
+ SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3
};
}
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 1559bf7..ed2ce6c 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -158,22 +158,22 @@ let Namespace = "X86" in {
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
// YMM Registers, used by AVX instructions
- def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>;
- def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>;
- def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>;
- def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>;
- def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>;
- def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>;
- def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>;
- def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>;
- def YMM8: Register<"ymm8">, DwarfRegNum<[25, -2, -2]>;
- def YMM9: Register<"ymm9">, DwarfRegNum<[26, -2, -2]>;
- def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>;
- def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>;
- def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>;
- def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>;
- def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>;
- def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>;
+ def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
+ def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
+ def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
+ def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegNum<[20, 24, 24]>;
+ def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegNum<[21, 25, 25]>;
+ def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegNum<[22, 26, 26]>;
+ def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegNum<[23, 27, 27]>;
+ def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegNum<[24, 28, 28]>;
+ def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegNum<[25, -2, -2]>;
+ def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegNum<[26, -2, -2]>;
+ def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegNum<[27, -2, -2]>;
+ def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegNum<[28, -2, -2]>;
+ def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegNum<[29, -2, -2]>;
+ def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>;
+ def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>;
+ def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>;
// Floating point stack registers
def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
@@ -238,6 +238,10 @@ def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
def x86_subreg_16bit : PatLeaf<(i32 3)>;
def x86_subreg_32bit : PatLeaf<(i32 4)>;
+def x86_subreg_ss : PatLeaf<(i32 1)>;
+def x86_subreg_sd : PatLeaf<(i32 2)>;
+def x86_subreg_xmm : PatLeaf<(i32 3)>;
+
def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI,
R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
[AL, CL, DL, BL, SPL, BPL, SIL, DIL,
@@ -277,11 +281,31 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
[EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
-def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+ YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
+def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+ YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
+def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
+def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
//===----------------------------------------------------------------------===//
// Register Class Definitions... now that we have all of the pieces, define the
// top-level register classes. The order specified in the register list is
@@ -793,6 +817,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
+ let SubRegClassList = [FR32, FR64];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -811,7 +836,9 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
- YMM12, YMM13, YMM14, YMM15]>;
+ YMM12, YMM13, YMM14, YMM15]> {
+ let SubRegClassList = [FR32, FR64, VR128];
+}
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 618dd10..594a470 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -20,9 +20,9 @@
namespace llvm {
class GlobalValue;
class TargetMachine;
-
+
/// PICStyles - The X86 backend supports a number of different styles of PIC.
-///
+///
namespace PICStyles {
enum Style {
StubPIC, // Used on i386-darwin in -fPIC mode.
@@ -46,7 +46,7 @@ protected:
/// PICStyle - Which PIC style to use
///
PICStyles::Style PICStyle;
-
+
/// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or
/// none supported.
X86SSEEnum X86SSELevel;
@@ -58,7 +58,7 @@ protected:
/// HasCMov - True if this processor has conditional move instructions
/// (generally pentium pro+).
bool HasCMov;
-
+
/// HasX86_64 - True if the processor supports X86-64 instructions.
///
bool HasX86_64;
@@ -78,8 +78,9 @@ protected:
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
- /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands.
- /// This may require setting a feature bit in the processor.
+ /// HasVectorUAMem - True if SIMD operations can have unaligned memory
+ /// operands. This may require setting a feature bit in the
+ /// processor.
bool HasVectorUAMem;
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
@@ -150,20 +151,20 @@ public:
bool isTargetDarwin() const { return TargetType == isDarwin; }
bool isTargetELF() const { return TargetType == isELF; }
-
+
bool isTargetWindows() const { return TargetType == isWindows; }
bool isTargetMingw() const { return TargetType == isMingw; }
bool isTargetCygwin() const { return TargetType == isCygwin; }
bool isTargetCygMing() const {
return TargetType == isMingw || TargetType == isCygwin;
}
-
+
/// isTargetCOFF - Return true if this is any COFF/Windows target variant.
bool isTargetCOFF() const {
return TargetType == isMingw || TargetType == isCygwin ||
TargetType == isWindows;
}
-
+
bool isTargetWin64() const {
return Is64Bit && (TargetType == isMingw || TargetType == isWindows);
}
@@ -175,7 +176,7 @@ public:
else if (isTargetDarwin())
p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
else if (isTargetMingw() || isTargetWindows())
- p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32";
+ p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32";
else
p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
@@ -196,11 +197,11 @@ public:
bool isPICStyleStubAny() const {
return PICStyle == PICStyles::StubDynamicNoPIC ||
PICStyle == PICStyles::StubPIC; }
-
+
/// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard,
/// 10 = Snow Leopard, etc.
unsigned getDarwinVers() const { return DarwinVers; }
-
+
/// ClassifyGlobalReference - Classify a global variable reference for the
/// current subtarget according to how we should reference it in a non-pcrel
/// context.
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f835e29..56ddaf8 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -30,9 +30,8 @@ static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
case Triple::MinGW32:
case Triple::MinGW64:
case Triple::Cygwin:
- return new X86MCAsmInfoCOFF(TheTriple);
case Triple::Win32:
- return new X86WinMCAsmInfo(TheTriple);
+ return new X86MCAsmInfoCOFF(TheTriple);
default:
return new X86ELFMCAsmInfo(TheTriple);
}
@@ -48,11 +47,16 @@ extern "C" void LLVMInitializeX86Target() {
RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
// Register the code emitter.
- // FIXME: Remove the heinous one when the new one works.
TargetRegistry::RegisterCodeEmitter(TheX86_32Target,
- createHeinousX86MCCodeEmitter);
+ createX86_32MCCodeEmitter);
TargetRegistry::RegisterCodeEmitter(TheX86_64Target,
- createHeinousX86MCCodeEmitter);
+ createX86_64MCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterAsmBackend(TheX86_32Target,
+ createX86_32AsmBackend);
+ TargetRegistry::RegisterAsmBackend(TheX86_64Target,
+ createX86_64AsmBackend);
}
@@ -201,32 +205,3 @@ void X86TargetMachine::setCodeModelForJIT() {
else
setCodeModel(CodeModel::Small);
}
-
-/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte,
-/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA
-/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte
-/// pointer by default. However, some systems may require a different size due
-/// to bugs or other conditions. We will default to a 4-byte encoding unless the
-/// system tells us otherwise.
-///
-/// The issue is when the CIE says their is an LSDA. That mandates that every
-/// FDE have an LSDA slot. But if the function does not need an LSDA. There
-/// needs to be some way to signify there is none. The LSDA is encoded as
-/// pc-rel. But you don't look for some magic value after adding the pc. You
-/// have to look for a zero before adding the pc. The problem is that the size
-/// of the zero to look for depends on the encoding. The unwinder bug in SL is
-/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8
-/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are
-/// non-zero so it goes ahead and then reads the value based on the encoding.
-/// But if you use sdata4 and there is no LSDA, then the test for zero gives a
-/// false negative and the unwinder thinks there is an LSDA.
-///
-/// FIXME: This call-back isn't good! We should be using the correct encoding
-/// regardless of the system. However, there are some systems which have bugs
-/// that prevent this from occuring.
-DwarfLSDAEncoding::Encoding X86TargetMachine::getLSDAEncoding() const {
- if (Subtarget.isTargetDarwin() && Subtarget.getDarwinVers() != 10)
- return DwarfLSDAEncoding::Default;
-
- return DwarfLSDAEncoding::EightByte;
-}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index eee29be..2bb5454 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -62,18 +62,6 @@ public:
return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
}
- /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are
- /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that
- /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded
- /// as a 4-byte pointer by default. However, some systems may require a
- /// different size due to bugs or other conditions. We will default to a
- /// 4-byte encoding unless the system tells us otherwise.
- ///
- /// FIXME: This call-back isn't good! We should be using the correct encoding
- /// regardless of the system. However, there are some systems which have bugs
- /// that prevent this from occuring.
- virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const;
-
// Set up the pass pipeline.
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index b8cef7d..29a0be5 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -7,61 +7,112 @@
//
//===----------------------------------------------------------------------===//
-#include "X86TargetObjectFile.h"
#include "X86MCTargetExpr.h"
+#include "X86TargetObjectFile.h"
+#include "X86TargetMachine.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Target/Mangler.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Dwarf.h"
using namespace llvm;
+using namespace dwarf;
-const MCExpr *X8632_MachoTargetObjectFile::
+const MCExpr *X8664_MachoTargetObjectFile::
getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI,
- bool &IsIndirect, bool &IsPCRel) const {
- // The mach-o version of this method defaults to returning a stub reference.
- IsIndirect = true;
- IsPCRel = false;
-
-
- MachineModuleInfoMachO &MachOMMI =
- MMI->getObjFileInfo<MachineModuleInfoMachO>();
-
- // FIXME: Use GetSymbolWithGlobalValueBase.
- SmallString<128> Name;
- Mang->getNameWithPrefix(Name, GV, true);
- Name += "$non_lazy_ptr";
-
- // Add information about the stub reference to MachOMMI so that the stub gets
- // emitted by the asmprinter.
- MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
- MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym);
- if (StubSym == 0) {
- Name.clear();
+ MachineModuleInfo *MMI, unsigned Encoding) const {
+
+ // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
+ // is an indirect pc-relative reference.
+ if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
+ SmallString<128> Name;
Mang->getNameWithPrefix(Name, GV, false);
- StubSym = getContext().GetOrCreateSymbol(Name.str());
+ const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ const MCExpr *Res =
+ X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext());
+ const MCExpr *Four = MCConstantExpr::Create(4, getContext());
+ return MCBinaryExpr::CreateAdd(Res, Four, getContext());
}
-
- return MCSymbolRefExpr::Create(Sym, getContext());
+
+ return TargetLoweringObjectFileMachO::
+ getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
}
-const MCExpr *X8664_MachoTargetObjectFile::
-getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI,
- bool &IsIndirect, bool &IsPCRel) const {
-
- // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
- // is an indirect pc-relative reference.
- IsIndirect = true;
- IsPCRel = true;
-
- // FIXME: Use GetSymbolWithGlobalValueBase.
- SmallString<128> Name;
- Mang->getNameWithPrefix(Name, GV, false);
- const MCSymbol *Sym = getContext().CreateSymbol(Name);
- const MCExpr *Res =
- X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext());
- const MCExpr *Four = MCConstantExpr::Create(4, getContext());
- return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+ else
+ return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+ else
+ return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+ else
+ return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+ else
+ return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const {
+ CodeModel::Model Model = TM.getCodeModel();
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+ Model == CodeModel::Medium ?
+ DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+ if (Model == CodeModel::Small || Model == CodeModel::Medium)
+ return DW_EH_PE_udata4;
+
+ return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const {
+ CodeModel::Model Model = TM.getCodeModel();
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_pcrel | (Model == CodeModel::Small ?
+ DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+ if (Model == CodeModel::Small)
+ return DW_EH_PE_udata4;
+
+ return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const {
+ CodeModel::Model Model = TM.getCodeModel();
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+ Model == CodeModel::Medium ?
+ DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+ if (Model == CodeModel::Small || Model == CodeModel::Medium)
+ return DW_EH_PE_udata4;
+
+ return DW_EH_PE_absptr;
}
+unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
+ CodeModel::Model Model = TM.getCodeModel();
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+ Model == CodeModel::Medium ?
+ DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+ if (Model == CodeModel::Small)
+ return DW_EH_PE_udata4;
+
+ return DW_EH_PE_absptr;
+}
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 377a93b..0444417 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -10,21 +10,13 @@
#ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H
#define LLVM_TARGET_X86_TARGETOBJECTFILE_H
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
-
- /// X8632_MachoTargetObjectFile - This TLOF implementation is used for
- /// Darwin/x86-32.
- class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
- public:
-
- virtual const MCExpr *
- getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI,
- bool &IsIndirect, bool &IsPCRel) const;
- };
-
+ class X86TargetMachine;
+
/// X8664_MachoTargetObjectFile - This TLOF implementation is used for
/// Darwin/x86-64.
class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
@@ -32,9 +24,31 @@ namespace llvm {
virtual const MCExpr *
getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI,
- bool &IsIndirect, bool &IsPCRel) const;
+ MachineModuleInfo *MMI, unsigned Encoding) const;
+ };
+
+ class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+ const X86TargetMachine &TM;
+ public:
+ X8632_ELFTargetObjectFile(const X86TargetMachine &tm)
+ :TM(tm) { }
+ virtual unsigned getPersonalityEncoding() const;
+ virtual unsigned getLSDAEncoding() const;
+ virtual unsigned getFDEEncoding() const;
+ virtual unsigned getTTypeEncoding() const;
+ };
+
+ class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+ const X86TargetMachine &TM;
+ public:
+ X8664_ELFTargetObjectFile(const X86TargetMachine &tm)
+ :TM(tm) { }
+ virtual unsigned getPersonalityEncoding() const;
+ virtual unsigned getLSDAEncoding() const;
+ virtual unsigned getFDEEncoding() const;
+ virtual unsigned getTTypeEncoding() const;
};
+
} // end namespace llvm
#endif
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index d18f55d..82e23a1 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetData.h"
@@ -62,6 +63,11 @@ namespace {
}
void printMemOperand(const MachineInstr *MI, int opNum);
+ void printInlineJT(const MachineInstr *MI, int opNum,
+ const std::string &directive = ".jmptable");
+ void printInlineJT32(const MachineInstr *MI, int opNum) {
+ printInlineJT(MI, opNum, ".jmptable32");
+ }
void printOperand(const MachineInstr *MI, int opNum);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode);
@@ -257,6 +263,23 @@ void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum)
printOperand(MI, opNum+1);
}
+void XCoreAsmPrinter::
+printInlineJT(const MachineInstr *MI, int opNum, const std::string &directive)
+{
+ unsigned JTI = MI->getOperand(opNum).getIndex();
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ O << "\t" << directive << " ";
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ if (i > 0)
+ O << ",";
+ O << *MBB->getSymbol(OutContext);
+ }
+}
+
void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 383fd91..b1ab132 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -65,8 +65,6 @@ namespace {
bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base,
SDValue &Offset);
- virtual void InstructionSelect();
-
virtual const char *getPassName() const {
return "XCore DAG->DAG Pattern Instruction Selection";
}
@@ -147,15 +145,6 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr,
return false;
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void XCoreDAGToDAGISel::InstructionSelect() {
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
-
- CurDAG->RemoveDeadNodes();
-}
-
SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
EVT NVT = N->getValueType(0);
@@ -164,7 +153,11 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
default: break;
case ISD::Constant: {
if (Predicate_immMskBitp(N)) {
- SDValue MskSize = Transform_msksize_xform(N);
+ // Transformation function: get the size of a mask
+ int64_t MaskVal = cast<ConstantSDNode>(N)->getZExtValue();
+ assert(isMask_32(MaskVal));
+ // Look for the first non-zero bit
+ SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(MaskVal));
return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
MVT::i32, MskSize);
}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index bf8c38f..e6515d8 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -53,6 +54,8 @@ getTargetNodeName(unsigned Opcode) const
case XCoreISD::RETSP : return "XCoreISD::RETSP";
case XCoreISD::LADD : return "XCoreISD::LADD";
case XCoreISD::LSUB : return "XCoreISD::LSUB";
+ case XCoreISD::BR_JT : return "XCoreISD::BR_JT";
+ case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32";
default : return NULL;
}
}
@@ -106,9 +109,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::TRAP, MVT::Other, Legal);
- // Expand jump tables for now
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ // Jump tables.
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
@@ -157,7 +159,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
@@ -315,14 +317,27 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
}
SDValue XCoreTargetLowering::
-LowerJumpTable(SDValue Op, SelectionDAG &DAG)
+LowerBR_JT(SDValue Op, SelectionDAG &DAG)
{
- // FIXME there isn't really debug info here
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
- EVT PtrVT = Op.getValueType();
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
- return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI);
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ unsigned JTI = JT->getIndex();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+ unsigned NumEntries = MJTI->getJumpTables()[JTI].MBBs.size();
+ if (NumEntries <= 32) {
+ return DAG.getNode(XCoreISD::BR_JT, dl, MVT::Other, Chain, TargetJT, Index);
+ }
+ assert((NumEntries >> 31) == 0);
+ SDValue ScaledIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(XCoreISD::BR_JT32, dl, MVT::Other, Chain, TargetJT,
+ ScaledIndex);
}
static bool
@@ -390,7 +405,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
if (Offset % 4 == 0) {
// We've managed to infer better alignment information than the load
// already has. Use an aligned load.
- return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4);
+ //
+ // FIXME: No new alignment information is actually passed here.
+ // Should the offset really be 4?
+ //
+ return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4,
+ false, false, 0);
}
// Lower to
// ldw low, base[offset >> 2]
@@ -407,9 +427,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset);
SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain,
- LowAddr, NULL, 4);
+ LowAddr, NULL, 4, false, false, 0);
SDValue High = DAG.getLoad(getPointerTy(), dl, Chain,
- HighAddr, NULL, 4);
+ HighAddr, NULL, 4, false, false, 0);
SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift);
SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift);
SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted);
@@ -423,12 +443,13 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
int SVOffset = LD->getSrcValueOffset();
SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
- LD->isVolatile(), 2);
+ LD->isVolatile(), LD->isNonTemporal(), 2);
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
DAG.getConstant(2, MVT::i32));
SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain,
HighAddr, LD->getSrcValue(), SVOffset + 2,
- MVT::i16, LD->isVolatile(), 2);
+ MVT::i16, LD->isVolatile(),
+ LD->isNonTemporal(), 2);
SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High,
DAG.getConstant(16, MVT::i32));
SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted);
@@ -452,7 +473,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
- Args, DAG, dl, DAG.GetOrdering(Chain.getNode()));
+ Args, DAG, dl);
SDValue Ops[] =
{ CallResult.first, CallResult.second };
@@ -487,12 +508,14 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
DAG.getConstant(16, MVT::i32));
SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
ST->getSrcValue(), SVOffset, MVT::i16,
- ST->isVolatile(), 2);
+ ST->isVolatile(), ST->isNonTemporal(),
+ 2);
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
DAG.getConstant(2, MVT::i32));
SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
ST->getSrcValue(), SVOffset + 2,
- MVT::i16, ST->isVolatile(), 2);
+ MVT::i16, ST->isVolatile(),
+ ST->isNonTemporal(), 2);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
}
@@ -513,7 +536,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
- Args, DAG, dl, DAG.GetOrdering(Chain.getNode()));
+ Args, DAG, dl);
return CallResult.second;
}
@@ -561,15 +584,16 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG)
const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
EVT VT = Node->getValueType(0);
SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
- Node->getOperand(1), V, 0);
+ Node->getOperand(1), V, 0, false, false, 0);
// Increment the pointer, VAList, to the next vararg
SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
DAG.getConstant(VT.getSizeInBits(),
getPointerTy()));
// Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0);
+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0,
+ false, false, 0);
// Load the actual argument out of the pointer VAList
- return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0);
+ return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, false, false, 0);
}
SDValue XCoreTargetLowering::
@@ -582,7 +606,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG)
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0);
+ return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0,
+ false, false, 0);
}
SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
@@ -877,7 +902,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0));
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0,
+ false, false, 0));
}
}
@@ -908,7 +934,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
RegInfo.addLiveIn(ArgRegs[i], VReg);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
// Move argument from virt reg -> stack
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+ false, false, 0);
MemOps.push_back(Store);
}
if (!MemOps.empty())
@@ -1134,10 +1161,8 @@ static inline bool isImmUs4(int64_t val)
bool
XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const {
- // Be conservative with void
- // FIXME: Can we be more aggressive?
if (Ty->getTypeID() == Type::VoidTyID)
- return false;
+ return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
const TargetData *TD = TM.getTargetData();
unsigned Size = TD->getTypeAllocSize(Ty);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index f7b620e..0c638af 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -28,7 +28,7 @@ namespace llvm {
namespace XCoreISD {
enum NodeType {
// Start the numbering where the builtin ops and target ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END+XCore::INSTRUCTION_LIST_END,
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
// Branch and link (call)
BL,
@@ -52,7 +52,13 @@ namespace llvm {
LADD,
// Corresponds to LSUB instruction
- LSUB
+ LSUB,
+
+ // Jumptable branch.
+ BR_JT,
+
+ // Jumptable branch using long branches for each entry.
+ BR_JT32
};
}
@@ -122,7 +128,7 @@ namespace llvm {
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
- SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 5a54844..722e747 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -145,6 +145,11 @@ static inline bool IsCondBranch(unsigned BrOpc) {
return IsBRF(BrOpc) || IsBRT(BrOpc);
}
+static inline bool IsBR_JT(unsigned BrOpc) {
+ return BrOpc == XCore::BR_JT
+ || BrOpc == XCore::BR_JT32;
+}
+
/// GetCondFromBranchOpc - Return the XCore CC that matches
/// the correspondent Branch instruction opcode.
static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
@@ -271,6 +276,14 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
return false;
}
+ // Likewise if it ends with a branch table followed by an unconditional branch.
+ if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
// Otherwise, can't handle this.
return true;
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 10dc18c..46805d5 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -34,6 +34,15 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTNone,
[SDNPHasChain, SDNPOptInFlag]>;
+def SDT_XCoreBR_JT : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def XCoreBR_JT : SDNode<"XCoreISD::BR_JT", SDT_XCoreBR_JT,
+ [SDNPHasChain]>;
+
+def XCoreBR_JT32 : SDNode<"XCoreISD::BR_JT32", SDT_XCoreBR_JT,
+ [SDNPHasChain]>;
+
def SDT_XCoreAddress : SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
@@ -185,6 +194,15 @@ def MEMii : Operand<i32> {
let MIOperandInfo = (ops i32imm, i32imm);
}
+// Jump tables.
+def InlineJT : Operand<i32> {
+ let PrintMethod = "printInlineJT";
+}
+
+def InlineJT32 : Operand<i32> {
+ let PrintMethod = "printInlineJT32";
+}
+
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
@@ -624,7 +642,7 @@ defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
// TODO extdp, kentsp, krestsp, blat, setsr
// clrsr, getsr, kalli
-let isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
def BRBU_u6 : _FU6<
(outs),
(ins brtarget:$target),
@@ -756,24 +774,34 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
// One operand short
// TODO edu, eeu, waitet, waitef, freer, tstart, msync, mjoin, syncr, clrtp
-// bru, setdp, setcp, setv, setev, kcall
+// setdp, setcp, setv, setev, kcall
// dgetreg
-let isBranch=1, isIndirectBranch=1, isTerminator=1 in
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
"bau $addr",
[(brind GRRegs:$addr)]>;
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i),
+ "bru $i\n$t",
+ [(XCoreBR_JT tjumptable:$t, GRRegs:$i)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
+ "bru $i\n$t",
+ [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
+
let Defs=[SP], neverHasSideEffects=1 in
def SETSP_1r : _F1R<(outs), (ins GRRegs:$src),
"set sp, $src",
[]>;
-let isBarrier = 1, hasCtrlDep = 1 in
+let hasCtrlDep = 1 in
def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src),
"ecallt $src",
[]>;
-let isBarrier = 1, hasCtrlDep = 1 in
+let hasCtrlDep = 1 in
def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
"ecallf $src",
[]>;
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h
index 7efb990..7424c78 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -10,13 +10,12 @@
#ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
#define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
namespace llvm {
class XCoreTargetObjectFile : public TargetLoweringObjectFileELF {
public:
-
void Initialize(MCContext &Ctx, const TargetMachine &TM);
// TODO: Classify globals as xcore wishes.
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index eac4e17..37d7a00 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -15,7 +15,6 @@
#define DEBUG_TYPE "hello"
#include "llvm/Pass.h"
#include "llvm/Function.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 325d353..7cb1367 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -124,7 +124,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
unsigned ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I, ++ArgNo)
- if (isa<PointerType>(I->getType()))
+ if (I->getType()->isPointerTy())
PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
if (PointerArgs.empty()) return 0;
@@ -317,7 +317,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if(isByVal || AllCalleesPassInValidPointerForArgument(Arg))
+ if (isByVal || AllCalleesPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -673,7 +673,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
IE = SI->end(); II != IE; ++II) {
// Use i32 to index structs, and i64 for others (pointers/arrays).
// This satisfies GEP constraints.
- const Type *IdxTy = (isa<StructType>(ElTy) ?
+ const Type *IdxTy = (ElTy->isStructTy() ?
Type::getInt32Ty(F->getContext()) :
Type::getInt64Ty(F->getContext()));
Ops.push_back(ConstantInt::get(IdxTy, *II));
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 4972687..3c05f88 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -19,10 +19,11 @@
#define DEBUG_TYPE "constmerge"
#include "llvm/Transforms/IPO.h"
+#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
-#include <map>
using namespace llvm;
STATISTIC(NumMerged, "Number of global constants merged");
@@ -48,10 +49,10 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
bool ConstantMerge::runOnModule(Module &M) {
// Map unique constant/section pairs to globals. We don't want to merge
// globals in different sections.
- std::map<std::pair<Constant*, std::string>, GlobalVariable*> CMap;
+ DenseMap<Constant*, GlobalVariable*> CMap;
// Replacements - This vector contains a list of replacements to perform.
- std::vector<std::pair<GlobalVariable*, GlobalVariable*> > Replacements;
+ SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
bool MadeChange = false;
@@ -76,19 +77,21 @@ bool ConstantMerge::runOnModule(Module &M) {
continue;
}
- // Only process constants with initializers.
- if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
- Constant *Init = GV->getInitializer();
-
- // Check to see if the initializer is already known.
- GlobalVariable *&Slot = CMap[std::make_pair(Init, GV->getSection())];
-
- if (Slot == 0) { // Nope, add it to the map.
- Slot = GV;
- } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate!
- // Make all uses of the duplicate constant use the canonical version.
- Replacements.push_back(std::make_pair(GV, Slot));
- }
+ // Only process constants with initializers in the default addres space.
+ if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty())
+ continue;
+
+ Constant *Init = GV->getInitializer();
+
+ // Check to see if the initializer is already known.
+ GlobalVariable *&Slot = CMap[Init];
+
+ if (Slot == 0) { // Nope, add it to the map.
+ Slot = GV;
+ } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate!
+ // Make all uses of the duplicate constant use the canonical version.
+ Replacements.push_back(std::make_pair(GV, Slot));
}
}
@@ -100,11 +103,11 @@ bool ConstantMerge::runOnModule(Module &M) {
// now. This avoid invalidating the pointers in CMap, which are unneeded
// now.
for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
- // Eliminate any uses of the dead global...
+ // Eliminate any uses of the dead global.
Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
- // Delete the global value from the module...
- M.getGlobalList().erase(Replacements[i].first);
+ // Delete the global value from the module.
+ Replacements[i].first->eraseFromParent();
}
NumMerged += Replacements.size();
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 1749b1e..f386ed7 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -796,7 +796,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Replace by null for now.
Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
} else {
- assert(isa<StructType>(RetTy) &&
+ assert(RetTy->isStructTy() &&
"Return type changed, but not into a void. The old return type"
" must have been a struct!");
Instruction *InsertPt = Call;
@@ -870,7 +870,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) {
RetVal = 0;
} else {
- assert (isa<StructType>(RetTy));
+ assert (RetTy->isStructTy());
// The original return value was a struct, insert
// extractvalue/insertvalue chains to extract only the values we need
// to return and insert them into our new result.
diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp
index 025d77e..662fbb5 100644
--- a/lib/Transforms/IPO/DeadTypeElimination.cpp
+++ b/lib/Transforms/IPO/DeadTypeElimination.cpp
@@ -57,13 +57,13 @@ ModulePass *llvm::createDeadTypeEliminationPass() {
//
static inline bool ShouldNukeSymtabEntry(const Type *Ty){
// Nuke all names for primitive types!
- if (Ty->isPrimitiveType() || Ty->isInteger())
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy())
return true;
// Nuke all pointers to primitive types as well...
if (const PointerType *PT = dyn_cast<PointerType>(Ty))
if (PT->getElementType()->isPrimitiveType() ||
- PT->getElementType()->isInteger())
+ PT->getElementType()->isIntegerTy())
return true;
return false;
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 64a6d78..298d5cf 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -175,7 +175,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI) {
Value *Arg = *CI;
- if (isa<PointerType>(Arg->getType()) && !PointsToLocalMemory(Arg))
+ if (Arg->getType()->isPointerTy() && !PointsToLocalMemory(Arg))
// Writes memory. Just give up.
return false;
}
@@ -257,7 +257,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) {
continue;
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
- if (isa<PointerType>(A->getType()) && !A->hasNoCaptureAttr() &&
+ if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr() &&
!PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) {
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
@@ -362,7 +362,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
// We annotate noalias return values, which are only applicable to
// pointer types.
- if (!isa<PointerType>(F->getReturnType()))
+ if (!F->getReturnType()->isPointerTy())
continue;
if (!IsFunctionMallocLike(F, SCCNodes))
@@ -372,7 +372,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
bool MadeChange = false;
for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
Function *F = SCC[i]->getFunction();
- if (F->doesNotAlias(0) || !isa<PointerType>(F->getReturnType()))
+ if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
continue;
F->setDoesNotAlias(0);
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index ac91631..7b1e9c0 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -303,7 +303,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
Changed |= CleanupConstantGlobalUsers(CE, SubInit);
} else if (CE->getOpcode() == Instruction::BitCast &&
- isa<PointerType>(CE->getType())) {
+ CE->getType()->isPointerTy()) {
// Pointer cast, delete any stores and memsets to the global.
Changed |= CleanupConstantGlobalUsers(CE, 0);
}
@@ -431,7 +431,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
NumElements = SubVectorTy->getNumElements();
else {
- assert(isa<StructType>(*GEPI) &&
+ assert((*GEPI)->isStructTy() &&
"Indexed GEP type is not array, vector, or struct!");
continue;
}
@@ -543,7 +543,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
if (NewGlobals.empty())
return 0;
-
+
DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -642,7 +642,7 @@ static bool AllUsesOfValueWillTrapIfNull(Value *V,
return false;
} else if (isa<ICmpInst>(*UI) &&
isa<ConstantPointerNull>(UI->getOperand(1))) {
- // Ignore setcc X, null
+ // Ignore icmp X, null
} else {
//cerr << "NONTRAPPING USE: " << **UI;
return false;
@@ -813,57 +813,47 @@ static void ConstantPropUsersOf(Value *V) {
static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
CallInst *CI,
const Type *AllocTy,
- Value* NElems,
+ ConstantInt *NElements,
TargetData* TD) {
- DEBUG(dbgs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
-
- const Type *IntPtrTy = TD->getIntPtrType(GV->getContext());
+ DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
- // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have
- // returned NULL and we would not be here).
- BitCastInst *BCI = NULL;
- for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; )
- if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
- break;
-
- ConstantInt *NElements = cast<ConstantInt>(NElems);
- if (NElements->getZExtValue() != 1) {
- // If we have an array allocation, transform it to a single element
- // allocation to make the code below simpler.
- Type *NewTy = ArrayType::get(AllocTy, NElements->getZExtValue());
- unsigned TypeSize = TD->getTypeAllocSize(NewTy);
- if (const StructType *ST = dyn_cast<StructType>(NewTy))
- TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
- Instruction *NewCI = CallInst::CreateMalloc(CI, IntPtrTy, NewTy,
- ConstantInt::get(IntPtrTy, TypeSize));
- Value* Indices[2];
- Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy);
- Value *NewGEP = GetElementPtrInst::Create(NewCI, Indices, Indices + 2,
- NewCI->getName()+".el0", CI);
- Value *Cast = new BitCastInst(NewGEP, CI->getType(), "el0", CI);
- if (BCI) BCI->replaceAllUsesWith(NewGEP);
- CI->replaceAllUsesWith(Cast);
- if (BCI) BCI->eraseFromParent();
- CI->eraseFromParent();
- BCI = dyn_cast<BitCastInst>(NewCI);
- CI = BCI ? extractMallocCallFromBitCast(BCI) : cast<CallInst>(NewCI);
- }
+ const Type *GlobalType;
+ if (NElements->getZExtValue() == 1)
+ GlobalType = AllocTy;
+ else
+ // If we have an array allocation, the global variable is of an array.
+ GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
// Create the new global variable. The contents of the malloc'd memory is
// undefined, so initialize with an undef value.
- const Type *MAT = getMallocAllocatedType(CI);
- Constant *Init = UndefValue::get(MAT);
GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
- MAT, false,
- GlobalValue::InternalLinkage, Init,
+ GlobalType, false,
+ GlobalValue::InternalLinkage,
+ UndefValue::get(GlobalType),
GV->getName()+".body",
GV,
GV->isThreadLocal());
- // Anything that used the malloc or its bitcast now uses the global directly.
- if (BCI) BCI->replaceAllUsesWith(NewGV);
- CI->replaceAllUsesWith(new BitCastInst(NewGV, CI->getType(), "newgv", CI));
-
+ // If there are bitcast users of the malloc (which is typical, usually we have
+ // a malloc + bitcast) then replace them with uses of the new global. Update
+ // other users to use the global as well.
+ BitCastInst *TheBC = 0;
+ while (!CI->use_empty()) {
+ Instruction *User = cast<Instruction>(CI->use_back());
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+ if (BCI->getType() == NewGV->getType()) {
+ BCI->replaceAllUsesWith(NewGV);
+ BCI->eraseFromParent();
+ } else {
+ BCI->setOperand(0, NewGV);
+ }
+ } else {
+ if (TheBC == 0)
+ TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
+ User->replaceUsesOfWith(CI, TheBC);
+ }
+ }
+
Constant *RepValue = NewGV;
if (NewGV->getType() != GV->getType()->getElementType())
RepValue = ConstantExpr::getBitCast(RepValue,
@@ -879,60 +869,60 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
bool InitBoolUsed = false;
// Loop over all uses of GV, processing them in turn.
- std::vector<StoreInst*> Stores;
- while (!GV->use_empty())
- if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) {
- while (!LI->use_empty()) {
- Use &LoadUse = LI->use_begin().getUse();
- if (!isa<ICmpInst>(LoadUse.getUser()))
- LoadUse = RepValue;
- else {
- ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
- // Replace the cmp X, 0 with a use of the bool value.
- Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
- InitBoolUsed = true;
- switch (ICI->getPredicate()) {
- default: llvm_unreachable("Unknown ICmp Predicate!");
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_SLT: // X < null -> always false
- LV = ConstantInt::getFalse(GV->getContext());
- break;
- case ICmpInst::ICMP_ULE:
- case ICmpInst::ICMP_SLE:
- case ICmpInst::ICMP_EQ:
- LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
- break;
- case ICmpInst::ICMP_NE:
- case ICmpInst::ICMP_UGE:
- case ICmpInst::ICMP_SGE:
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_SGT:
- break; // no change.
- }
- ICI->replaceAllUsesWith(LV);
- ICI->eraseFromParent();
- }
- }
- LI->eraseFromParent();
- } else {
- StoreInst *SI = cast<StoreInst>(GV->use_back());
+ while (!GV->use_empty()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
// The global is initialized when the store to it occurs.
new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI);
SI->eraseFromParent();
+ continue;
}
+
+ LoadInst *LI = cast<LoadInst>(GV->use_back());
+ while (!LI->use_empty()) {
+ Use &LoadUse = LI->use_begin().getUse();
+ if (!isa<ICmpInst>(LoadUse.getUser())) {
+ LoadUse = RepValue;
+ continue;
+ }
+
+ ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
+ // Replace the cmp X, 0 with a use of the bool value.
+ Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
+ InitBoolUsed = true;
+ switch (ICI->getPredicate()) {
+ default: llvm_unreachable("Unknown ICmp Predicate!");
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: // X < null -> always false
+ LV = ConstantInt::getFalse(GV->getContext());
+ break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_EQ:
+ LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ break; // no change.
+ }
+ ICI->replaceAllUsesWith(LV);
+ ICI->eraseFromParent();
+ }
+ LI->eraseFromParent();
+ }
// If the initialization boolean was used, insert it, otherwise delete it.
if (!InitBoolUsed) {
while (!InitBool->use_empty()) // Delete initializations
- cast<Instruction>(InitBool->use_back())->eraseFromParent();
+ cast<StoreInst>(InitBool->use_back())->eraseFromParent();
delete InitBool;
} else
GV->getParent()->getGlobalList().insert(GV, InitBool);
-
- // Now the GV is dead, nuke it and the malloc (both CI and BCI).
+ // Now the GV is dead, nuke it and the malloc..
GV->eraseFromParent();
- if (BCI) BCI->eraseFromParent();
CI->eraseFromParent();
// To further other optimizations, loop over all users of NewGV and try to
@@ -1303,9 +1293,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
ConstantInt::get(IntPtrTy, TypeSize),
NElems,
CI->getName() + ".f" + Twine(FieldNo));
- CallInst *NCI = dyn_cast<BitCastInst>(NMI) ?
- extractMallocCallFromBitCast(NMI) : cast<CallInst>(NMI);
- FieldMallocs.push_back(NCI);
+ FieldMallocs.push_back(NMI);
new StoreInst(NMI, NGV, CI);
}
@@ -1497,7 +1485,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// something.
if (TD &&
NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, TD);
+ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
return true;
}
@@ -1556,7 +1544,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
// only has one (non-null) value stored into it, then we can optimize any
// users of the loaded value (often calls and loads) that would trap if the
// value was null.
- if (isa<PointerType>(GV->getInitializer()->getType()) &&
+ if (GV->getInitializer()->getType()->isPointerTy() &&
GV->getInitializer()->isNullValue()) {
if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
if (GV->getInitializer()->getType() != SOVC->getType())
@@ -1590,8 +1578,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
// simplification. In these cases, we typically end up with "cond ? v1 : v2"
// where v1 and v2 both require constant pool loads, a big loss.
if (GVElType == Type::getInt1Ty(GV->getContext()) ||
- GVElType->isFloatingPoint() ||
- isa<PointerType>(GVElType) || isa<VectorType>(GVElType))
+ GVElType->isFloatingPointTy() ||
+ GVElType->isPointerTy() || GVElType->isVectorTy())
return false;
// Walk the use list of the global seeing if all the uses are load or store.
@@ -1925,7 +1913,7 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
if (!ATy) return 0;
const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
if (!STy || STy->getNumElements() != 2 ||
- !STy->getElementType(0)->isInteger(32)) return 0;
+ !STy->getElementType(0)->isIntegerTy(32)) return 0;
const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
if (!PFTy) return 0;
const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
@@ -2148,7 +2136,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
Elts[CI->getZExtValue()] =
EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
- if (isa<ArrayType>(Init->getType()))
+ if (Init->getType()->isArrayTy())
return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
else
return ConstantVector::get(&Elts[0], Elts.size());
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 97e2f06..752a97c 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -41,12 +41,9 @@ static cl::opt<int>
InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
cl::desc("Control the amount of inlining to perform (default = 225)"));
-static cl::opt<bool>
-RespectHint("respect-inlinehint", cl::Hidden,
- cl::desc("Respect the inlinehint attribute"));
-
-// Threshold to use when inlinehint is given.
-const int HintThreshold = 300;
+static cl::opt<int>
+HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
+ cl::desc("Threshold for inlining functions with inline hint"));
// Threshold to use when optsize is specified (and there is no -inline-limit).
const int OptSizeThreshold = 75;
@@ -183,20 +180,22 @@ static bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
}
unsigned Inliner::getInlineThreshold(CallSite CS) const {
- // Listen to inlinehint when -respect-inlinehint is given.
- Function *Callee = CS.getCalledFunction();
- if (RespectHint && Callee && !Callee->isDeclaration() &&
- Callee->hasFnAttr(Attribute::InlineHint))
- return HintThreshold;
+ int thres = InlineThreshold;
// Listen to optsize when -inline-limit is not given.
Function *Caller = CS.getCaller();
if (Caller && !Caller->isDeclaration() &&
Caller->hasFnAttr(Attribute::OptimizeForSize) &&
InlineLimit.getNumOccurrences() == 0)
- return OptSizeThreshold;
+ thres = OptSizeThreshold;
+
+ // Listen to inlinehint when it would increase the threshold.
+ Function *Callee = CS.getCalledFunction();
+ if (HintThreshold > thres && Callee && !Callee->isDeclaration() &&
+ Callee->hasFnAttr(Attribute::InlineHint))
+ thres = HintThreshold;
- return InlineThreshold;
+ return thres;
}
/// shouldInline - Return true if the inliner should attempt to inline
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 0e0d83a..310e4a2 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -214,6 +214,15 @@ static bool StripDebugInfo(Module &M) {
Changed = true;
}
+ if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
+ while (!DbgVal->use_empty()) {
+ CallInst *CI = cast<CallInst>(DbgVal->use_back());
+ CI->eraseFromParent();
+ }
+ DbgVal->eraseFromParent();
+ Changed = true;
+ }
+
NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
if (NMD) {
Changed = true;
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 5367900..bd06499 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -117,11 +117,11 @@ public:
Instruction *visitUDiv(BinaryOperator &I);
Instruction *visitSDiv(BinaryOperator &I);
Instruction *visitFDiv(BinaryOperator &I);
- Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
- Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
+ Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
Instruction *visitAnd(BinaryOperator &I);
- Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
- Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
+ Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
Value *A, Value *B, Value *C);
Instruction *visitOr (BinaryOperator &I);
@@ -199,13 +199,15 @@ private:
SmallVectorImpl<Value*> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
- /// ValueRequiresCast - Return true if the cast from "V to Ty" actually
- /// results in any code being generated. It does not require codegen if V is
- /// simple enough or if the cast can be folded into other casts.
- bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
- const Type *Ty);
+ /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+ /// results in any code being generated and is interesting to optimize out. If
+ /// the cast can be eliminated by some other simple transformation, we prefer
+ /// to do the simplification first.
+ bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
+ const Type *Ty);
Instruction *visitCallSite(CallSite CS);
+ Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD);
bool transformConstExprCastCall(CallSite CS);
Instruction *transformCallThroughTrampoline(CallSite CS);
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
@@ -326,8 +328,8 @@ private:
Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
bool isSub, Instruction &I);
- Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
- bool isSigned, bool Inside, Instruction &IB);
+ Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+ bool isSigned, bool Inside);
Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
Instruction *MatchBSwap(BinaryOperator &I);
bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c2924ab..4d2c89e 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -35,7 +35,7 @@ static Constant *SubOne(ConstantInt *C) {
// Otherwise, return null.
//
static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
- if (!V->hasOneUse() || !V->getType()->isInteger())
+ if (!V->hasOneUse() || !V->getType()->isIntegerTy())
return 0;
Instruction *I = dyn_cast<Instruction>(V);
@@ -145,10 +145,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
}
- if (I.getType()->isInteger(1))
+ if (I.getType()->isIntegerTy(1))
return BinaryOperator::CreateXor(LHS, RHS);
- if (I.getType()->isInteger()) {
+ if (I.getType()->isIntegerTy()) {
// X + X --> X << 1
if (LHS == RHS)
return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
@@ -168,7 +168,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// -A + B --> B - A
// -A + -B --> -(A + B)
if (Value *LHSV = dyn_castNegVal(LHS)) {
- if (LHS->getType()->isIntOrIntVector()) {
+ if (LHS->getType()->isIntOrIntVectorTy()) {
if (Value *RHSV = dyn_castNegVal(RHS)) {
Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
return BinaryOperator::CreateNeg(NewAdd);
@@ -222,7 +222,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
// W*X + Y*Z --> W * (X+Z) iff W == Y
- if (I.getType()->isIntOrIntVector()) {
+ if (I.getType()->isIntOrIntVectorTy()) {
Value *W, *X, *Y, *Z;
if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
@@ -373,10 +373,10 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
return ReplaceInstUsesWith(I, LHS);
- // Check for (add double (sitofp x), y), see if we can merge this into an
+ // Check for (fadd double (sitofp x), y), see if we can merge this into an
// integer add followed by a promotion.
if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
- // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+ // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
// ... if the constant fits in the integer value. This is useful for things
// like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
// requires a constant pool load, and generally allows the add to be better
@@ -394,7 +394,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
}
}
- // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+ // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
// Only do this if x/y have the same type, if at last one of them has a
// single use (so we don't increase the number of int->fp conversions),
@@ -560,7 +560,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Op0); // undef - X -> undef
if (isa<UndefValue>(Op1))
return ReplaceInstUsesWith(I, Op1); // X - undef -> undef
- if (I.getType()->isInteger(1))
+ if (I.getType()->isIntegerTy(1))
return BinaryOperator::CreateXor(Op0, Op1);
if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 28fd70e..3fb3de7 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -137,80 +137,44 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
/// opcode and two operands into either a constant true or false, or a brand
/// new ICmp instruction. The sign is passed in to determine which kind
/// of predicate to use in the new icmp instruction.
-static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS) {
+static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *Builder) {
+ CmpInst::Predicate Pred;
switch (Code) {
default: assert(0 && "Illegal ICmp code!");
- case 0:
- return ConstantInt::getFalse(LHS->getContext());
- case 1:
- if (Sign)
- return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
- return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS);
- case 2:
- return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS);
- case 3:
- if (Sign)
- return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS);
- return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS);
- case 4:
- if (Sign)
- return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS);
- return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS);
- case 5:
- return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS);
- case 6:
- if (Sign)
- return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
- return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
- case 7:
- return ConstantInt::getTrue(LHS->getContext());
+ case 0: // False.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+ case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ case 2: Pred = ICmpInst::ICMP_EQ; break;
+ case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 5: Pred = ICmpInst::ICMP_NE; break;
+ case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ case 7: // True.
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
}
+ return Builder->CreateICmp(Pred, LHS, RHS);
}
/// getFCmpValue - This is the complement of getFCmpCode, which turns an
/// opcode and two operands into either a FCmp instruction. isordered is passed
/// in to determine which kind of predicate to use in the new fcmp instruction.
static Value *getFCmpValue(bool isordered, unsigned code,
- Value *LHS, Value *RHS) {
+ Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *Builder) {
+ CmpInst::Predicate Pred;
switch (code) {
- default: llvm_unreachable("Illegal FCmp code!");
- case 0:
- if (isordered)
- return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS);
- else
- return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS);
- case 1:
- if (isordered)
- return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS);
- else
- return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS);
- case 2:
- if (isordered)
- return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS);
- else
- return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS);
- case 3:
- if (isordered)
- return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS);
- else
- return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS);
- case 4:
- if (isordered)
- return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS);
- else
- return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS);
- case 5:
- if (isordered)
- return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS);
- else
- return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS);
- case 6:
- if (isordered)
- return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
- else
- return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
- case 7: return ConstantInt::getTrue(LHS->getContext());
+ default: assert(0 && "Illegal FCmp code!");
+ case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break;
+ case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break;
+ case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break;
+ case 3: Pred = isordered ? FCmpInst::FCMP_OGE : FCmpInst::FCMP_UGE; break;
+ case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
+ case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
+ case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
+ case 7: return ConstantInt::getTrue(LHS->getContext());
}
+ return Builder->CreateFCmp(Pred, LHS, RHS);
}
/// PredicatesFoldable - Return true if both predicates match sign or if at
@@ -355,40 +319,39 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
/// whether to treat the V, Lo and HI as signed or not. IB is the location to
/// insert new instructions.
-Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
- bool isSigned, bool Inside,
- Instruction &IB) {
+Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+ bool isSigned, bool Inside) {
assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
"Lo is not <= Hi in range emission code!");
if (Inside) {
if (Lo == Hi) // Trivially false.
- return new ICmpInst(ICmpInst::ICMP_NE, V, V);
+ return ConstantInt::getFalse(V->getContext());
// V >= Min && V < Hi --> V < Hi
if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
ICmpInst::Predicate pred = (isSigned ?
ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
- return new ICmpInst(pred, V, Hi);
+ return Builder->CreateICmp(pred, V, Hi);
}
// Emit V-Lo <u Hi-Lo
Constant *NegLo = ConstantExpr::getNeg(Lo);
Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
- return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
+ return Builder->CreateICmpULT(Add, UpperBound);
}
if (Lo == Hi) // Trivially true.
- return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
+ return ConstantInt::getTrue(V->getContext());
// V < Min || V >= Hi -> V > Hi-1
Hi = SubOne(cast<ConstantInt>(Hi));
if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
ICmpInst::Predicate pred = (isSigned ?
ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
- return new ICmpInst(pred, V, Hi);
+ return Builder->CreateICmp(pred, V, Hi);
}
// Emit V-Lo >u Hi-1-Lo
@@ -396,7 +359,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
- return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
+ return Builder->CreateICmpUGT(Add, LowerBound);
}
// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
@@ -472,8 +435,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
}
/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
-Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
- ICmpInst *LHS, ICmpInst *RHS) {
+Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
// (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
@@ -486,11 +448,7 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
bool isSigned = LHS->isSigned() || RHS->isSigned();
- Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
- if (Instruction *I = dyn_cast<Instruction>(RV))
- return I;
- // Otherwise, it's a constant boolean value.
- return ReplaceInstUsesWith(I, RV);
+ return getICmpValue(isSigned, Code, Op0, Op1, Builder);
}
}
@@ -506,13 +464,13 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
if (LHSCC == ICmpInst::ICMP_ULT &&
LHSCst->getValue().isPowerOf2()) {
Value *NewOr = Builder->CreateOr(Val, Val2);
- return new ICmpInst(LHSCC, NewOr, LHSCst);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
}
// (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
Value *NewOr = Builder->CreateOr(Val, Val2);
- return new ICmpInst(LHSCC, NewOr, LHSCst);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
}
}
@@ -562,33 +520,32 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false
case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false
case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13
case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13
case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13
- return ReplaceInstUsesWith(I, LHS);
+ return LHS;
}
case ICmpInst::ICMP_NE:
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_ULT:
if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
- return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
+ return Builder->CreateICmpULT(Val, LHSCst);
break; // (X != 13 & X u< 15) -> no change
case ICmpInst::ICMP_SLT:
if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
- return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
+ return Builder->CreateICmpSLT(Val, LHSCst);
break; // (X != 13 & X s< 15) -> no change
case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15
case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15
case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
- return ReplaceInstUsesWith(I, RHS);
+ return RHS;
case ICmpInst::ICMP_NE:
if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
Constant *AddCST = ConstantExpr::getNeg(LHSCst);
Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
- return new ICmpInst(ICmpInst::ICMP_UGT, Add,
- ConstantInt::get(Add->getType(), 1));
+ return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
}
break; // (X != 13 & X != 15) -> no change
}
@@ -598,12 +555,12 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false
case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13
case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13
- return ReplaceInstUsesWith(I, LHS);
+ return LHS;
case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change
break;
}
@@ -613,12 +570,12 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false
case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13
case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13
- return ReplaceInstUsesWith(I, LHS);
+ return LHS;
case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change
break;
}
@@ -628,16 +585,15 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
- return ReplaceInstUsesWith(I, RHS);
+ return RHS;
case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change
break;
case ICmpInst::ICMP_NE:
if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
- return new ICmpInst(LHSCC, Val, RHSCst);
+ return Builder->CreateICmp(LHSCC, Val, RHSCst);
break; // (X u> 13 & X != 15) -> no change
case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
- return InsertRangeTest(Val, AddOne(LHSCst),
- RHSCst, false, true, I);
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change
break;
}
@@ -647,16 +603,15 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15
case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15
- return ReplaceInstUsesWith(I, RHS);
+ return RHS;
case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change
break;
case ICmpInst::ICMP_NE:
if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
- return new ICmpInst(LHSCC, Val, RHSCst);
+ return Builder->CreateICmp(LHSCC, Val, RHSCst);
break; // (X s> 13 & X != 15) -> no change
case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
- return InsertRangeTest(Val, AddOne(LHSCst),
- RHSCst, true, true, I);
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true);
case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change
break;
}
@@ -666,9 +621,10 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
return 0;
}
-Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
- FCmpInst *RHS) {
-
+/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
RHS->getPredicate() == FCmpInst::FCMP_ORD) {
// (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
@@ -677,17 +633,15 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
// If either of the constants are nans, then the whole thing returns
// false.
if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
- return new FCmpInst(FCmpInst::FCMP_ORD,
- LHS->getOperand(0), RHS->getOperand(0));
+ return ConstantInt::getFalse(LHS->getContext());
+ return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
}
// Handle vector zeros. This occurs because the canonical form of
// "fcmp ord x,x" is "fcmp ord x, 0".
if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
isa<ConstantAggregateZero>(RHS->getOperand(1)))
- return new FCmpInst(FCmpInst::FCMP_ORD,
- LHS->getOperand(0), RHS->getOperand(0));
+ return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
return 0;
}
@@ -705,14 +659,13 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
// Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
if (Op0CC == Op1CC)
- return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
-
+ return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
if (Op0CC == FCmpInst::FCMP_TRUE)
- return ReplaceInstUsesWith(I, RHS);
+ return RHS;
if (Op1CC == FCmpInst::FCMP_TRUE)
- return ReplaceInstUsesWith(I, LHS);
+ return LHS;
bool Op0Ordered;
bool Op1Ordered;
@@ -727,14 +680,14 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
// uno && ueq -> uno && (uno || eq) -> ueq
// ord && olt -> ord && (ord && lt) -> olt
if (Op0Ordered == Op1Ordered)
- return ReplaceInstUsesWith(I, RHS);
+ return RHS;
// uno && oeq -> uno && (ord && eq) -> false
// uno && ord -> false
if (!Op0Ordered)
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
// ord && ueq -> ord && (uno || eq) -> oeq
- return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS));
+ return getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS, Builder);
}
}
@@ -930,26 +883,47 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
- if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
- return Res;
-
+ if (Value *Res = FoldAndOfICmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+ // If and'ing two fcmp, try combine them into one.
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+ if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
+
// fold (and (cast A), (cast B)) -> (cast (and A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
- if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
- if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
- const Type *SrcTy = Op0C->getOperand(0)->getType();
- if (SrcTy == Op1C->getOperand(0)->getType() &&
- SrcTy->isIntOrIntVector() &&
- // Only do this if the casts both really cause code to be generated.
- ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
- I.getType()) &&
- ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
- I.getType())) {
- Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
- Op1C->getOperand(0), I.getName());
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
+ SrcTy == Op1C->getOperand(0)->getType() &&
+ SrcTy->isIntOrIntVectorTy()) {
+ Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+ // Only do this if the casts both really cause code to be generated.
+ if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+ Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
+
+ // If this is and(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+ if (Value *Res = FoldAndOfICmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+
+ // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+ if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
}
+ }
// (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
@@ -965,13 +939,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
}
- // If and'ing two fcmp, try combine them into one.
- if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
- if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
- if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
- return Res;
- }
-
return Changed ? &I : 0;
}
@@ -1143,7 +1110,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
// If A is not a select of -1/0, this cannot match.
Value *Cond = 0;
if (!match(A, m_SExt(m_Value(Cond))) ||
- !Cond->getType()->isInteger(1))
+ !Cond->getType()->isIntegerTy(1))
return 0;
// ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
@@ -1161,8 +1128,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
}
/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
-Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
- ICmpInst *LHS, ICmpInst *RHS) {
+Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
@@ -1175,11 +1141,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
bool isSigned = LHS->isSigned() || RHS->isSigned();
- Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
- if (Instruction *I = dyn_cast<Instruction>(RV))
- return I;
- // Otherwise, it's a constant boolean value.
- return ReplaceInstUsesWith(I, RV);
+ return getICmpValue(isSigned, Code, Op0, Op1, Builder);
}
}
@@ -1193,7 +1155,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
if (LHSCst == RHSCst && LHSCC == RHSCC &&
LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
Value *NewOr = Builder->CreateOr(Val, Val2);
- return new ICmpInst(LHSCC, NewOr, LHSCst);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
}
// From here on, we only handle:
@@ -1245,7 +1207,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
Constant *AddCST = ConstantExpr::getNeg(LHSCst);
Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
- return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
+ return Builder->CreateICmpULT(Add, AddCST);
}
break; // (X == 13 | X == 15) -> no change
case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
@@ -1254,7 +1216,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15
case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15
case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15
- return ReplaceInstUsesWith(I, RHS);
+ return RHS;
}
break;
case ICmpInst::ICMP_NE:
@@ -1263,11 +1225,11 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13
case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13
case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13
- return ReplaceInstUsesWith(I, LHS);
+ return LHS;
case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true
case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true
case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ConstantInt::getTrue(LHS->getContext());
}
break;
case ICmpInst::ICMP_ULT:
@@ -1279,14 +1241,13 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
// If RHSCst is [us]MAXINT, it is always false. Not handling
// this can cause overflow.
if (RHSCst->isMaxValue(false))
- return ReplaceInstUsesWith(I, LHS);
- return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
- false, false, I);
+ return LHS;
+ return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false);
case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15
case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15
- return ReplaceInstUsesWith(I, RHS);
+ return RHS;
case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change
break;
}
@@ -1300,14 +1261,13 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
// If RHSCst is [us]MAXINT, it is always false. Not handling
// this can cause overflow.
if (RHSCst->isMaxValue(true))
- return ReplaceInstUsesWith(I, LHS);
- return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
- true, false, I);
+ return LHS;
+ return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false);
case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15
case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15
- return ReplaceInstUsesWith(I, RHS);
+ return RHS;
case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change
break;
}
@@ -1317,12 +1277,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13
case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13
- return ReplaceInstUsesWith(I, LHS);
+ return LHS;
case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true
case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ConstantInt::getTrue(LHS->getContext());
case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change
break;
}
@@ -1332,12 +1292,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13
case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13
- return ReplaceInstUsesWith(I, LHS);
+ return LHS;
case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change
break;
case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true
case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ConstantInt::getTrue(LHS->getContext());
case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change
break;
}
@@ -1346,8 +1306,10 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
return 0;
}
-Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
- FCmpInst *RHS) {
+/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
RHS->getPredicate() == FCmpInst::FCMP_UNO &&
LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
@@ -1356,20 +1318,18 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
// If either of the constants are nans, then the whole thing returns
// true.
if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ConstantInt::getTrue(LHS->getContext());
// Otherwise, no need to compare the two constants, compare the
// rest.
- return new FCmpInst(FCmpInst::FCMP_UNO,
- LHS->getOperand(0), RHS->getOperand(0));
+ return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
}
// Handle vector zeros. This occurs because the canonical form of
// "fcmp uno x,x" is "fcmp uno x, 0".
if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
isa<ConstantAggregateZero>(RHS->getOperand(1)))
- return new FCmpInst(FCmpInst::FCMP_UNO,
- LHS->getOperand(0), RHS->getOperand(0));
+ return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
return 0;
}
@@ -1386,14 +1346,13 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
// Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
if (Op0CC == Op1CC)
- return new FCmpInst((FCmpInst::Predicate)Op0CC,
- Op0LHS, Op0RHS);
+ return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
if (Op0CC == FCmpInst::FCMP_FALSE)
- return ReplaceInstUsesWith(I, RHS);
+ return RHS;
if (Op1CC == FCmpInst::FCMP_FALSE)
- return ReplaceInstUsesWith(I, LHS);
+ return LHS;
bool Op0Ordered;
bool Op1Ordered;
unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
@@ -1401,11 +1360,7 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
if (Op0Ordered == Op1Ordered) {
// If both are ordered or unordered, return a new fcmp with
// or'ed predicates.
- Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS);
- if (Instruction *I = dyn_cast<Instruction>(RV))
- return I;
- // Otherwise, it's a constant boolean value...
- return ReplaceInstUsesWith(I, RV);
+ return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder);
}
}
return 0;
@@ -1446,8 +1401,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyOrInst(Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
-
-
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
@@ -1456,7 +1410,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
ConstantInt *C1 = 0; Value *X = 0;
// (X & C1) | C2 --> (X | C2) & (C1|C2)
+ // iff (C1 & C2) == 0.
if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
+ (RHS->getValue() & C1->getValue()) != 0 &&
Op0->hasOneUse()) {
Value *Or = Builder->CreateOr(X, RHS);
Or->takeName(Op0);
@@ -1479,6 +1435,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
+
if (isa<PHINode>(Op0))
if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
@@ -1600,7 +1557,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants.
// Don't do this for vector select idioms, the code generator doesn't handle
// them well yet.
- if (!isa<VectorType>(I.getType())) {
+ if (!I.getType()->isVectorTy()) {
if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
return Match;
if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
@@ -1666,40 +1623,50 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
- if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
- return Res;
+ if (Value *Res = FoldOrOfICmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+ // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+ if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ return ReplaceInstUsesWith(I, Res);
+
// fold (or (cast A), (cast B)) -> (cast (or A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
- if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
- !isa<ICmpInst>(Op1C->getOperand(0))) {
- const Type *SrcTy = Op0C->getOperand(0)->getType();
- if (SrcTy == Op1C->getOperand(0)->getType() &&
- SrcTy->isIntOrIntVector() &&
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() &&
+ SrcTy->isIntOrIntVectorTy()) {
+ Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+ if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
// Only do this if the casts both really cause code to be
// generated.
- ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
- I.getType()) &&
- ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
- I.getType())) {
- Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
- Op1C->getOperand(0), I.getName());
+ ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+ Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
+
+ // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+ if (Value *Res = FoldOrOfICmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+
+ // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+ if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
}
}
}
-
- // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
- if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
- if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
- if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
- return Res;
- }
-
return Changed ? &I : 0;
}
@@ -1723,7 +1690,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
- if (isa<VectorType>(I.getType()))
+ if (I.getType()->isVectorTy())
if (isa<ConstantAggregateZero>(Op1))
return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X
@@ -1971,11 +1938,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
bool isSigned = LHS->isSigned() || RHS->isSigned();
- Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
- if (Instruction *I = dyn_cast<Instruction>(RV))
- return I;
- // Otherwise, it's a constant boolean value.
- return ReplaceInstUsesWith(I, RV);
+ return ReplaceInstUsesWith(I,
+ getICmpValue(isSigned, Code, Op0, Op1, Builder));
}
}
@@ -1984,12 +1948,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
const Type *SrcTy = Op0C->getOperand(0)->getType();
- if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+ if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() &&
// Only do this if the casts both really cause code to be generated.
- ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
- I.getType()) &&
- ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
- I.getType())) {
+ ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0),
+ I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0),
+ I.getType())) {
Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
Op1C->getOperand(0), I.getName());
return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4929f40..e2b7d3d 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -16,6 +16,7 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
using namespace llvm;
/// getPromotedType - Return the specified type promoted as it would be to pass
@@ -199,7 +200,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
// Extract the length and alignment and fill if they are constant.
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
- if (!LenC || !FillC || !FillC->getType()->isInteger(8))
+ if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
return 0;
uint64_t Len = LenC->getZExtValue();
Alignment = MI->getAlignment();
@@ -304,23 +305,77 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::objectsize: {
+ // We need target data for just about everything so depend on it.
+ if (!TD) break;
+
const Type *ReturnTy = CI.getType();
- Value *Op1 = II->getOperand(1);
bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
+
+ // Get to the real allocated thing and offset as fast as possible.
+ Value *Op1 = II->getOperand(1)->stripPointerCasts();
- if (!TD) break;
- Op1 = Op1->stripPointerCasts();
-
+ // If we've stripped down to a single global variable that we
+ // can know the size of then just return that.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
if (GV->hasDefinitiveInitializer()) {
Constant *C = GV->getInitializer();
- size_t globalSize = TD->getTypeAllocSize(C->getType());
- return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, globalSize));
+ uint64_t GlobalSize = TD->getTypeAllocSize(C->getType());
+ return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize));
} else {
+ // Can't determine size of the GV.
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
return ReplaceInstUsesWith(CI, RetVal);
}
- }
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
+ // Get alloca size.
+ if (AI->getAllocatedType()->isSized()) {
+ uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+ if (AI->isArrayAllocation()) {
+ const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!C) break;
+ AllocaSize *= C->getZExtValue();
+ }
+ return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize));
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) {
+ // Only handle constant GEPs here.
+ if (CE->getOpcode() != Instruction::GetElementPtr) break;
+ GEPOperator *GEP = cast<GEPOperator>(CE);
+
+ // Make sure we're not a constant offset from an external
+ // global.
+ Value *Operand = GEP->getPointerOperand();
+ Operand = Operand->stripPointerCasts();
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand))
+ if (!GV->hasDefinitiveInitializer()) break;
+
+ // Get what we're pointing to and its size.
+ const PointerType *BaseType =
+ cast<PointerType>(Operand->getType());
+ uint64_t Size = TD->getTypeAllocSize(BaseType->getElementType());
+
+ // Get the current byte offset into the thing. Use the original
+ // operand in case we're looking through a bitcast.
+ SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end());
+ const PointerType *OffsetType =
+ cast<PointerType>(GEP->getPointerOperand()->getType());
+ uint64_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size());
+
+ if (Size < Offset) {
+ // Out of bound reference? Negative index normalized to large
+ // index? Just return "I don't know".
+ Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+ return ReplaceInstUsesWith(CI, RetVal);
+ }
+
+ Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset);
+ return ReplaceInstUsesWith(CI, RetVal);
+
+ }
+
+ // Do not return "I don't know" here. Later optimization passes could
+ // make it possible to evaluate objectsize to a constant.
+ break;
}
case Intrinsic::bswap:
// bswap(bswap(x)) -> x
@@ -686,6 +741,122 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
return true;
}
+// Try to fold some different type of calls here.
+// Currently we're only working with the checking functions, memcpy_chk,
+// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
+// strcat_chk and strncat_chk.
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
+ if (CI->getCalledFunction() == 0) return 0;
+
+ StringRef Name = CI->getCalledFunction()->getName();
+ BasicBlock *BB = CI->getParent();
+ IRBuilder<> B(CI->getParent()->getContext());
+
+ // Set the builder to the instruction after the call.
+ B.SetInsertPoint(BB, CI);
+
+ if (Name == "__memcpy_chk") {
+ ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+ if (!SizeCI)
+ return 0;
+ ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3));
+ if (!SizeArg)
+ return 0;
+ if (SizeCI->isAllOnesValue() ||
+ SizeCI->getZExtValue() <= SizeArg->getZExtValue()) {
+ EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+ 1, B, TD);
+ return ReplaceInstUsesWith(*CI, CI->getOperand(1));
+ }
+ return 0;
+ }
+
+ // Should be similar to memcpy.
+ if (Name == "__mempcpy_chk") {
+ return 0;
+ }
+
+ if (Name == "__memmove_chk") {
+ ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+ if (!SizeCI)
+ return 0;
+ ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3));
+ if (!SizeArg)
+ return 0;
+ if (SizeCI->isAllOnesValue() ||
+ SizeCI->getZExtValue() <= SizeArg->getZExtValue()) {
+ EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+ 1, B, TD);
+ return ReplaceInstUsesWith(*CI, CI->getOperand(1));
+ }
+ return 0;
+ }
+
+ if (Name == "__memset_chk") {
+ ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+ if (!SizeCI)
+ return 0;
+ ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3));
+ if (!SizeArg)
+ return 0;
+ if (SizeCI->isAllOnesValue() ||
+ SizeCI->getZExtValue() <= SizeArg->getZExtValue()) {
+ Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(),
+ false);
+ EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B, TD);
+ return ReplaceInstUsesWith(*CI, CI->getOperand(1));
+ }
+ return 0;
+ }
+
+ if (Name == "__strcpy_chk") {
+ ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
+ if (!SizeCI)
+ return 0;
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain strcpy. Otherwise we'll keep our
+ // strcpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (SizeCI->isAllOnesValue() ||
+ SizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2))) {
+ Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD);
+ return ReplaceInstUsesWith(*CI, Ret);
+ }
+ return 0;
+ }
+
+ // Should be similar to strcpy.
+ if (Name == "__stpcpy_chk") {
+ return 0;
+ }
+
+ if (Name == "__strncpy_chk") {
+ ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+ if (!SizeCI)
+ return 0;
+ ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3));
+ if (!SizeArg)
+ return 0;
+ if (SizeCI->isAllOnesValue() ||
+ SizeCI->getZExtValue() <= SizeArg->getZExtValue()) {
+ Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD);
+ return ReplaceInstUsesWith(*CI, Ret);
+ }
+ return 0;
+ }
+
+ if (Name == "__strcat_chk") {
+ return 0;
+ }
+
+ if (Name == "__strncat_chk") {
+ return 0;
+ }
+
+ return 0;
+}
+
// visitCallSite - Improvements for call and invoke instructions.
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
@@ -772,6 +943,16 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
Changed = true;
}
+ // Try to optimize the call if possible, we require TargetData for most of
+ // this. None of these calls are seen as possibly dead so go ahead and
+ // delete the instruction now.
+ if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+ Instruction *I = tryOptimizeCall(CI, TD);
+ // If we changed something return the result, etc. Otherwise let
+ // the fallthrough check.
+ if (I) return EraseInstFromFunction(*I);
+ }
+
return Changed ? CS.getInstruction() : 0;
}
@@ -796,7 +977,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
const Type *OldRetTy = Caller->getType();
const Type *NewRetTy = FT->getReturnType();
- if (isa<StructType>(NewRetTy))
+ if (NewRetTy->isStructTy())
return false; // TODO: Handle multiple return values.
// Check to see if we are changing the return type...
@@ -804,9 +985,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (Callee->isDeclaration() &&
// Conversion is ok if changing from one pointer type to another or from
// a pointer to an integer of the same size.
- !((isa<PointerType>(OldRetTy) || !TD ||
+ !((OldRetTy->isPointerTy() || !TD ||
OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
- (isa<PointerType>(NewRetTy) || !TD ||
+ (NewRetTy->isPointerTy() || !TD ||
NewRetTy == TD->getIntPtrType(Caller->getContext()))))
return false; // Cannot transform this return value.
@@ -853,9 +1034,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Converting from one pointer type to another or between a pointer and an
// integer of the same size is safe even if we do not have a body.
bool isConvertible = ActTy == ParamTy ||
- (TD && ((isa<PointerType>(ParamTy) ||
+ (TD && ((ParamTy->isPointerTy() ||
ParamTy == TD->getIntPtrType(Caller->getContext())) &&
- (isa<PointerType>(ActTy) ||
+ (ActTy->isPointerTy() ||
ActTy == TD->getIntPtrType(Caller->getContext()))));
if (Callee->isDeclaration() && !isConvertible) return false;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 09cd21f..a68fc6d 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -23,7 +23,7 @@ using namespace PatternMatch;
///
static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
int &Offset) {
- assert(Val->getType()->isInteger(32) && "Unexpected allocation size type!");
+ assert(Val->getType()->isIntegerTy(32) && "Unexpected allocation size type!");
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
Offset = CI->getZExtValue();
Scale = 0;
@@ -255,17 +255,26 @@ isEliminableCastPair(
return Instruction::CastOps(Res);
}
-/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results
-/// in any code being generated. It does not require codegen if V is simple
-/// enough or if the cast can be folded into other casts.
-bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
- const Type *Ty) {
+/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+/// results in any code being generated and is interesting to optimize out. If
+/// the cast can be eliminated by some other simple transformation, we prefer
+/// to do the simplification first.
+bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
+ const Type *Ty) {
+ // Noop casts and casts of constants should be eliminated trivially.
if (V->getType() == Ty || isa<Constant>(V)) return false;
- // If this is another cast that can be eliminated, it isn't codegen either.
+ // If this is another cast that can be eliminated, we prefer to have it
+ // eliminated.
if (const CastInst *CI = dyn_cast<CastInst>(V))
- if (isEliminableCastPair(CI, opcode, Ty, TD))
+ if (isEliminableCastPair(CI, opc, Ty, TD))
return false;
+
+ // If this is a vector sext from a compare, then we don't want to break the
+ // idiom where each element of the extended vector is either zero or all ones.
+ if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
+ return false;
+
return true;
}
@@ -294,8 +303,8 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
if (isa<PHINode>(Src)) {
// We don't do this if this would create a PHI node with an illegal type if
// it is currently legal.
- if (!isa<IntegerType>(Src->getType()) ||
- !isa<IntegerType>(CI.getType()) ||
+ if (!Src->getType()->isIntegerTy() ||
+ !CI.getType()->isIntegerTy() ||
ShouldChangeType(CI.getType(), Src->getType()))
if (Instruction *NV = FoldOpIntoPhi(CI))
return NV;
@@ -427,7 +436,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
CanEvaluateTruncated(Src, DestTy)) {
// If this cast is a truncate, evaluting in a different type always
@@ -719,7 +728,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// expression tree to something weird like i93 unless the source is also
// strange.
unsigned BitsToClear;
- if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
"Unreasonable BitsToClear");
@@ -828,7 +837,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1
Value *X;
- if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isInteger(1) &&
+ if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) &&
match(SrcI, m_Not(m_Value(X))) &&
(!X->hasOneUse() || !isa<CmpInst>(X))) {
Value *New = Builder->CreateZExt(X, CI.getType());
@@ -927,7 +936,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
CanEvaluateSExtd(Src, DestTy)) {
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -1280,7 +1289,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
unsigned NumZeros = 0;
while (SrcElTy != DstElTy &&
- isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
+ isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
SrcElTy->getNumContainedTypes() /* not "{}" */) {
SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
++NumZeros;
@@ -1295,7 +1304,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
- if (DestVTy->getNumElements() == 1 && !isa<VectorType>(SrcTy)) {
+ if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
@@ -1304,7 +1313,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
- if (SrcVTy->getNumElements() == 1 && !isa<VectorType>(DestTy)) {
+ if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
Value *Elem =
Builder->CreateExtractElement(Src,
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
@@ -1315,7 +1324,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
// Okay, we have (bitcast (shuffle ..)). Check to see if this is
// a bitconvert to a vector with the same # elts.
- if (SVI->hasOneUse() && isa<VectorType>(DestTy) &&
+ if (SVI->hasOneUse() && DestTy->isVectorTy() &&
cast<VectorType>(DestTy)->getNumElements() ==
SVI->getType()->getNumElements() &&
SVI->getType()->getNumElements() ==
@@ -1337,7 +1346,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
}
- if (isa<PointerType>(SrcTy))
+ if (SrcTy->isPointerTy())
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7c00c2c..72fd558 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -877,25 +877,26 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
case ICmpInst::ICMP_EQ:
if (LoOverflow && HiOverflow)
return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
- else if (HiOverflow)
+ if (HiOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
ICmpInst::ICMP_UGE, X, LoBound);
- else if (LoOverflow)
+ if (LoOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
ICmpInst::ICMP_ULT, X, HiBound);
- else
- return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI);
+ return ReplaceInstUsesWith(ICI,
+ InsertRangeTest(X, LoBound, HiBound, DivIsSigned,
+ true));
case ICmpInst::ICMP_NE:
if (LoOverflow && HiOverflow)
return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
- else if (HiOverflow)
+ if (HiOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
ICmpInst::ICMP_ULT, X, LoBound);
- else if (LoOverflow)
+ if (LoOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
ICmpInst::ICMP_UGE, X, HiBound);
- else
- return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI);
+ return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+ DivIsSigned, false));
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_SLT:
if (LoOverflow == +1) // Low bound is greater than input range.
@@ -1606,7 +1607,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
const Type *Ty = Op0->getType();
// icmp's with boolean values can always be turned into bitwise operations
- if (Ty->isInteger(1)) {
+ if (Ty->isIntegerTy(1)) {
switch (I.getPredicate()) {
default: llvm_unreachable("Invalid icmp instruction!");
case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B)
@@ -1650,7 +1651,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
unsigned BitWidth = 0;
if (TD)
BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
- else if (Ty->isIntOrIntVector())
+ else if (Ty->isIntOrIntVectorTy())
BitWidth = Ty->getScalarSizeInBits();
bool isSignBit = false;
@@ -1988,7 +1989,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// values. If the ptr->ptr cast can be stripped off both arguments, we do so
// now.
if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
- if (isa<PointerType>(Op0->getType()) &&
+ if (Op0->getType()->isPointerTy() &&
(isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
// We keep moving the cast from the left operand over to the right
// operand, where it can often be eliminated completely.
@@ -2458,17 +2459,17 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
break;
}
- case Instruction::Load:
- if (GetElementPtrInst *GEP =
- dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
- !cast<LoadInst>(LHSI)->isVolatile())
- if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
- return Res;
+ case Instruction::Load:
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !cast<LoadInst>(LHSI)->isVolatile())
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+ return Res;
+ }
+ break;
}
- break;
- }
}
return Changed ? &I : 0;
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 2d13298..0f2a24f 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -87,8 +87,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
const Type *SrcPTy = SrcTy->getElementType();
- if (DestPTy->isInteger() || isa<PointerType>(DestPTy) ||
- isa<VectorType>(DestPTy)) {
+ if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
+ DestPTy->isVectorTy()) {
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
@@ -104,11 +104,11 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
}
if (IC.getTargetData() &&
- (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) ||
- isa<VectorType>(SrcPTy)) &&
+ (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
+ SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
- (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) &&
+ (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
@@ -243,7 +243,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
const Type *SrcPTy = SrcTy->getElementType();
- if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy))
+ if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
return 0;
/// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
@@ -255,7 +255,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
- if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) {
+ if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
// Index through pointer.
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
NewGEPIndices.push_back(Zero);
@@ -277,7 +277,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
}
- if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy))
+ if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
return 0;
// If the pointers point into different address spaces or if they point to
@@ -297,11 +297,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
Instruction::CastOps opcode = Instruction::BitCast;
const Type* CastSrcTy = SIOp0->getType();
const Type* CastDstTy = SrcPTy;
- if (isa<PointerType>(CastDstTy)) {
- if (CastSrcTy->isInteger())
+ if (CastDstTy->isPointerTy()) {
+ if (CastSrcTy->isIntegerTy())
opcode = Instruction::IntToPtr;
- } else if (isa<IntegerType>(CastDstTy)) {
- if (isa<PointerType>(SIOp0->getType()))
+ } else if (CastDstTy->isIntegerTy()) {
+ if (SIOp0->getType()->isPointerTy())
opcode = Instruction::PtrToInt;
}
@@ -413,7 +413,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// Don't count debug info directives, lest they affect codegen,
// and we skip pointer-to-pointer bitcasts, which are NOPs.
if (isa<DbgInfoIntrinsic>(BBI) ||
- (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
ScanInsts++;
continue;
}
@@ -483,7 +483,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
do {
++BBI;
} while (isa<DbgInfoIntrinsic>(BBI) ||
- (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType())));
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
if (BI->isUnconditional())
if (SimplifyStoreAtEndOfBlock(SI))
@@ -544,7 +544,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
--BBI;
// Skip over debugging info.
while (isa<DbgInfoIntrinsic>(BBI) ||
- (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
if (BBI==OtherBB->begin())
return false;
--BBI;
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 2e26a75..b3974e8 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -76,7 +76,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
return BinaryOperator::CreateShl(Op0,
ConstantInt::get(Op0->getType(), Val.logBase2()));
}
- } else if (isa<VectorType>(Op1C->getType())) {
+ } else if (Op1C->getType()->isVectorTy()) {
if (Op1C->isNullValue())
return ReplaceInstUsesWith(I, Op1C);
@@ -157,7 +157,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
/// i1 mul -> i1 and.
- if (I.getType()->isInteger(1))
+ if (I.getType()->isIntegerTy(1))
return BinaryOperator::CreateAnd(Op0, Op1);
// X*(1 << Y) --> X << Y
@@ -173,7 +173,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
// If one of the operands of the multiply is a cast from a boolean value, then
// we know the bool is either zero or one, so this is a 'masking' multiply.
// X * Y (where Y is 0 or 1) -> X & (0-Y)
- if (!isa<VectorType>(I.getType())) {
+ if (!I.getType()->isVectorTy()) {
// -2 is "-1 << 1" so it is all bits set except the low one.
APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
@@ -203,8 +203,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
// "In IEEE floating point, x*1 is not equivalent to x for nans. However,
// ANSI says we can drop signals, so we can do this anyway." (from GCC)
if (Op1F->isExactlyValue(1.0))
- return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0'
- } else if (isa<VectorType>(Op1C->getType())) {
+ return ReplaceInstUsesWith(I, Op0); // Eliminate 'fmul double %X, 1.0'
+ } else if (Op1C->getType()->isVectorTy()) {
if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
// As above, vector X*splat(1.0) -> X in all defined cases.
if (Constant *Splat = Op1V->getSplatValue()) {
@@ -314,7 +314,7 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
// undef / X -> 0 for integer.
// undef / X -> undef for FP (the undef could be a snan).
if (isa<UndefValue>(Op0)) {
- if (Op0->getType()->isFPOrFPVector())
+ if (Op0->getType()->isFPOrFPVectorTy())
return ReplaceInstUsesWith(I, Op0);
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
}
@@ -386,7 +386,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
// It can't be division by zero, hence it must be division by one.
- if (I.getType()->isInteger(1))
+ if (I.getType()->isIntegerTy(1))
return ReplaceInstUsesWith(I, Op0);
if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
@@ -493,7 +493,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a udiv.
- if (I.getType()->isInteger()) {
+ if (I.getType()->isIntegerTy()) {
APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
if (MaskedValueIsZero(Op0, Mask)) {
if (MaskedValueIsZero(Op1, Mask)) {
@@ -527,7 +527,7 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (isa<UndefValue>(Op0)) { // undef % X -> 0
- if (I.getType()->isFPOrFPVector())
+ if (I.getType()->isFPOrFPVectorTy())
return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN)
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
}
@@ -648,7 +648,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a urem.
- if (I.getType()->isInteger()) {
+ if (I.getType()->isIntegerTy()) {
APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
// X srem Y -> X urem Y, iff X and Y don't have sign bit set
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index bb7632f..65f0393 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -266,6 +266,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// and if TD isn't around, we can't handle the mixed case.
bool isVolatile = FirstLI->isVolatile();
unsigned LoadAlignment = FirstLI->getAlignment();
+ unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
// We can't sink the load if the loaded value could be modified between the
// load and the PHI.
@@ -290,6 +291,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// the load and the PHI.
if (LI->isVolatile() != isVolatile ||
LI->getParent() != PN.getIncomingBlock(i) ||
+ LI->getPointerAddressSpace() != LoadAddrSpace ||
!isSafeAndProfitableToSinkLoad(LI))
return 0;
@@ -371,7 +373,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// Be careful about transforming integer PHIs. We don't want to pessimize
// the code by turning an i32 into an i1293.
- if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) {
+ if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {
if (!ShouldChangeType(PN.getType(), CastSrcTy))
return 0;
}
@@ -832,7 +834,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// it is only used by trunc or trunc(lshr) operations. If so, we split the
// PHI into the various pieces being extracted. This sort of thing is
// introduced when SROA promotes an aggregate to a single large integer type.
- if (isa<IntegerType>(PN.getType()) && TD &&
+ if (PN.getType()->isIntegerTy() && TD &&
!TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 9a02b33..2fc9325 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -441,7 +441,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return ReplaceInstUsesWith(SI, FalseVal);
}
- if (SI.getType()->isInteger(1)) {
+ if (SI.getType()->isIntegerTy(1)) {
if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
if (C->getZExtValue()) {
// Change: A = select B, true, C --> A = or B, C
@@ -539,9 +539,18 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
!CFPf->getValueAPF().isZero()))
return ReplaceInstUsesWith(SI, FalseVal);
}
- // Transform (X != Y) ? X : Y -> X
- if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
+ // Transform (X une Y) ? X : Y -> X
+ if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
return ReplaceInstUsesWith(SI, TrueVal);
+ }
// NOTE: if we wanted to, this is where to detect MIN/MAX
} else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
@@ -557,9 +566,18 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
!CFPf->getValueAPF().isZero()))
return ReplaceInstUsesWith(SI, FalseVal);
}
- // Transform (X != Y) ? Y : X -> Y
- if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
- return ReplaceInstUsesWith(SI, TrueVal);
+ // Transform (X une Y) ? Y : X -> Y
+ if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, TrueVal);
+ }
// NOTE: if we wanted to, this is where to detect MIN/MAX
}
// NOTE: if we wanted to, this is where to detect ABS
@@ -629,7 +647,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// See if we can fold the select into one of our operands.
- if (SI.getType()->isInteger()) {
+ if (SI.getType()->isIntegerTy()) {
if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
return FoldI;
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 53a5684..cd41844 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -104,10 +104,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(Depth <= 6 && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
const Type *VTy = V->getType();
- assert((TD || !isa<PointerType>(VTy)) &&
+ assert((TD || !VTy->isPointerTy()) &&
"SimplifyDemandedBits needs to know bit widths!");
assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
- (!VTy->isIntOrIntVector() ||
+ (!VTy->isIntOrIntVectorTy() ||
VTy->getScalarSizeInBits() == BitWidth) &&
KnownZero.getBitWidth() == BitWidth &&
KnownOne.getBitWidth() == BitWidth &&
@@ -401,7 +401,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
}
case Instruction::BitCast:
- if (!I->getOperand(0)->getType()->isIntOrIntVector())
+ if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
return 0; // vector->int or fp->int?
if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
@@ -413,7 +413,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
} else
// Don't touch a scalar-to-vector bitcast.
return 0;
- } else if (isa<VectorType>(I->getOperand(0)->getType()))
+ } else if (I->getOperand(0)->getType()->isVectorTy())
// Don't touch a vector-to-scalar bitcast.
return 0;
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 20fda1a..a58124d 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -78,7 +78,7 @@ static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
/// value is already around as a register, for example if it were inserted then
/// extracted from the vector.
static Value *FindScalarElement(Value *V, unsigned EltNo) {
- assert(isa<VectorType>(V->getType()) && "Not looking at a vector?");
+ assert(V->getType()->isVectorTy() && "Not looking at a vector?");
const VectorType *PTy = cast<VectorType>(V->getType());
unsigned Width = PTy->getNumElements();
if (EltNo >= Width) // Out of range access.
@@ -322,7 +322,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
/// that computes V and the LHS value of the shuffle.
static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
Value *&RHS) {
- assert(isa<VectorType>(V->getType()) &&
+ assert(V->getType()->isVectorTy() &&
(RHS == 0 || V->getType() == RHS->getType()) &&
"Invalid shuffle!");
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 93b1961..af9ec5c 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -73,7 +73,7 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
/// type for example, or from a smaller to a larger illegal type.
bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
- assert(isa<IntegerType>(From) && isa<IntegerType>(To));
+ assert(From->isIntegerTy() && To->isIntegerTy());
// If we don't have TD, we don't know if the source/dest are legal.
if (!TD) return false;
@@ -158,7 +158,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
return ConstantExpr::getNeg(C);
if (ConstantVector *C = dyn_cast<ConstantVector>(V))
- if (C->getType()->getElementType()->isInteger())
+ if (C->getType()->getElementType()->isIntegerTy())
return ConstantExpr::getNeg(C);
return 0;
@@ -177,7 +177,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const {
return ConstantExpr::getFNeg(C);
if (ConstantVector *C = dyn_cast<ConstantVector>(V))
- if (C->getType()->getElementType()->isFloatingPoint())
+ if (C->getType()->getElementType()->isFloatingPointTy())
return ConstantExpr::getFNeg(C);
return 0;
@@ -226,7 +226,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
if (isa<Constant>(TV) || isa<Constant>(FV)) {
// Bool selects with constant operands can be folded to logical ops.
- if (SI->getType()->isInteger(1)) return 0;
+ if (SI->getType()->isIntegerTy(1)) return 0;
Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
@@ -478,7 +478,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
bool EndsWithSequential = false;
for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
I != E; ++I)
- EndsWithSequential = !isa<StructType>(*I);
+ EndsWithSequential = !(*I)->isStructTy();
// Can we combine the two pointer arithmetics offsets?
if (EndsWithSequential) {
@@ -578,7 +578,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
const Type *SrcElTy = StrippedPtrTy->getElementType();
const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
- if (TD && isa<ArrayType>(SrcElTy) &&
+ if (TD && SrcElTy->isArrayTy() &&
TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
TD->getTypeAllocSize(ResElTy)) {
Value *Idx[2];
@@ -596,7 +596,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// (where tmp = 8*tmp2) into:
// getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
- if (TD && isa<ArrayType>(SrcElTy) && ResElTy->isInteger(8)) {
+ if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) {
uint64_t ArrayEltSize =
TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 3214c8c..8662a82 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -84,7 +84,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
AI = MainFn->arg_begin();
// If the program looked at argc, have it look at the return value of the
// init call instead.
- if (!AI->getType()->isInteger(32)) {
+ if (!AI->getType()->isIntegerTy(32)) {
Instruction::CastOps opcode;
if (!AI->use_empty()) {
opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
index cf5e8c0..ea8e5c3 100644
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ b/lib/Transforms/Scalar/ABCD.cpp
@@ -505,7 +505,7 @@ void ABCD::executeABCD(Function &F) {
continue;
ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0));
- if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType()))
+ if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy())
continue;
createConstraintCmpInst(ICI, TI);
@@ -713,7 +713,7 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
Value *V_op1 = ICI->getOperand(0);
Value *V_op2 = ICI->getOperand(1);
- if (!isa<IntegerType>(V_op1->getType()))
+ if (!V_op1->getType()->isIntegerTy())
return;
Instruction *I_op1 = dyn_cast<Instruction>(V_op1);
diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk
new file mode 100644
index 0000000..dea9b8f
--- /dev/null
+++ b/lib/Transforms/Scalar/Android.mk
@@ -0,0 +1,55 @@
+LOCAL_PATH:= $(call my-dir)
+
+transforms_scalar_SRC_FILES := \
+ ABCD.cpp \
+ ADCE.cpp \
+ BasicBlockPlacement.cpp \
+ CodeGenPrepare.cpp \
+ ConstantProp.cpp \
+ DCE.cpp \
+ DeadStoreElimination.cpp \
+ GEPSplitter.cpp \
+ GVN.cpp \
+ IndVarSimplify.cpp \
+ JumpThreading.cpp \
+ LICM.cpp \
+ LoopDeletion.cpp \
+ LoopIndexSplit.cpp \
+ LoopRotation.cpp \
+ LoopStrengthReduce.cpp \
+ LoopUnrollPass.cpp \
+ LoopUnswitch.cpp \
+ MemCpyOptimizer.cpp \
+ Reassociate.cpp \
+ Reg2Mem.cpp \
+ SCCP.cpp \
+ SCCVN.cpp \
+ Scalar.cpp \
+ ScalarReplAggregates.cpp \
+ SimplifyCFGPass.cpp \
+ SimplifyHalfPowrLibCalls.cpp \
+ SimplifyLibCalls.cpp \
+ TailDuplication.cpp \
+ TailRecursionElimination.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES)
+LOCAL_MODULE:= libLLVMScalarOpts
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES)
+LOCAL_MODULE:= libLLVMScalarOpts
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index fa60d3f..7ceda1f 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -32,7 +32,6 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
@@ -40,9 +39,6 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak",
- cl::init(false), cl::Hidden);
-
namespace {
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -301,6 +297,70 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
+/// FindReusablePredBB - Check all of the predecessors of the block DestPHI
+/// lives in to see if there is a block that we can reuse as a critical edge
+/// from TIBB.
+static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) {
+ BasicBlock *Dest = DestPHI->getParent();
+
+ /// TIPHIValues - This array is lazily computed to determine the values of
+ /// PHIs in Dest that TI would provide.
+ SmallVector<Value*, 32> TIPHIValues;
+
+ /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB.
+ unsigned TIBBEntryNo = 0;
+
+ // Check to see if Dest has any blocks that can be used as a split edge for
+ // this terminator.
+ for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) {
+ BasicBlock *Pred = DestPHI->getIncomingBlock(pi);
+ // To be usable, the pred has to end with an uncond branch to the dest.
+ BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
+ if (!PredBr || !PredBr->isUnconditional())
+ continue;
+ // Must be empty other than the branch and debug info.
+ BasicBlock::iterator I = Pred->begin();
+ while (isa<DbgInfoIntrinsic>(I))
+ I++;
+ if (&*I != PredBr)
+ continue;
+ // Cannot be the entry block; its label does not get emitted.
+ if (Pred == &Dest->getParent()->getEntryBlock())
+ continue;
+
+ // Finally, since we know that Dest has phi nodes in it, we have to make
+ // sure that jumping to Pred will have the same effect as going to Dest in
+ // terms of PHI values.
+ PHINode *PN;
+ unsigned PHINo = 0;
+ unsigned PredEntryNo = pi;
+
+ bool FoundMatch = true;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
+ if (PHINo == TIPHIValues.size()) {
+ if (PN->getIncomingBlock(TIBBEntryNo) != TIBB)
+ TIBBEntryNo = PN->getBasicBlockIndex(TIBB);
+ TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo));
+ }
+
+ // If the PHI entry doesn't work, we can't use this pred.
+ if (PN->getIncomingBlock(PredEntryNo) != Pred)
+ PredEntryNo = PN->getBasicBlockIndex(Pred);
+
+ if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) {
+ FoundMatch = false;
+ break;
+ }
+ }
+
+ // If we found a workable predecessor, change TI to branch to Succ.
+ if (FoundMatch)
+ return Pred;
+ }
+ return 0;
+}
+
/// SplitEdgeNicely - Split the critical edge from TI to its specified
/// successor if it will improve codegen. We only do this if the successor has
@@ -315,13 +375,12 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
BasicBlock *Dest = TI->getSuccessor(SuccNum);
assert(isa<PHINode>(Dest->begin()) &&
"This should only be called if Dest has a PHI!");
+ PHINode *DestPHI = cast<PHINode>(Dest->begin());
// Do not split edges to EH landing pads.
- if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) {
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI))
if (Invoke->getSuccessor(1) == Dest)
return;
- }
-
// As a hack, never split backedges of loops. Even though the copy for any
// PHIs inserted on the backedge would be dead for exits from the loop, we
@@ -329,92 +388,16 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
if (BackEdges.count(std::make_pair(TIBB, Dest)))
return;
- if (!FactorCommonPreds) {
- /// TIPHIValues - This array is lazily computed to determine the values of
- /// PHIs in Dest that TI would provide.
- SmallVector<Value*, 32> TIPHIValues;
-
- // Check to see if Dest has any blocks that can be used as a split edge for
- // this terminator.
- for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
- BasicBlock *Pred = *PI;
- // To be usable, the pred has to end with an uncond branch to the dest.
- BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
- if (!PredBr || !PredBr->isUnconditional())
- continue;
- // Must be empty other than the branch and debug info.
- BasicBlock::iterator I = Pred->begin();
- while (isa<DbgInfoIntrinsic>(I))
- I++;
- if (dyn_cast<Instruction>(I) != PredBr)
- continue;
- // Cannot be the entry block; its label does not get emitted.
- if (Pred == &(Dest->getParent()->getEntryBlock()))
- continue;
-
- // Finally, since we know that Dest has phi nodes in it, we have to make
- // sure that jumping to Pred will have the same effect as going to Dest in
- // terms of PHI values.
- PHINode *PN;
- unsigned PHINo = 0;
- bool FoundMatch = true;
- for (BasicBlock::iterator I = Dest->begin();
- (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
- if (PHINo == TIPHIValues.size())
- TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
-
- // If the PHI entry doesn't work, we can't use this pred.
- if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
- FoundMatch = false;
- break;
- }
- }
-
- // If we found a workable predecessor, change TI to branch to Succ.
- if (FoundMatch) {
- ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
- if (PFI)
- PFI->splitEdge(TIBB, Dest, Pred);
- Dest->removePredecessor(TIBB);
- TI->setSuccessor(SuccNum, Pred);
- return;
- }
- }
-
- SplitCriticalEdge(TI, SuccNum, P, true);
+ if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) {
+ ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
+ if (PFI)
+ PFI->splitEdge(TIBB, Dest, ReuseBB);
+ Dest->removePredecessor(TIBB);
+ TI->setSuccessor(SuccNum, ReuseBB);
return;
}
- PHINode *PN;
- SmallVector<Value*, 8> TIPHIValues;
- for (BasicBlock::iterator I = Dest->begin();
- (PN = dyn_cast<PHINode>(I)); ++I)
- TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
-
- SmallVector<BasicBlock*, 8> IdenticalPreds;
- for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
- BasicBlock *Pred = *PI;
- if (BackEdges.count(std::make_pair(Pred, Dest)))
- continue;
- if (PI == TIBB)
- IdenticalPreds.push_back(Pred);
- else {
- bool Identical = true;
- unsigned PHINo = 0;
- for (BasicBlock::iterator I = Dest->begin();
- (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo)
- if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
- Identical = false;
- break;
- }
- if (Identical)
- IdenticalPreds.push_back(Pred);
- }
- }
-
- assert(!IdenticalPreds.empty());
- SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(),
- ".critedge", P);
+ SplitCriticalEdge(TI, SuccNum, P, true);
}
@@ -629,7 +612,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// we'd end up sinking both muls.
if (AddrMode.BaseReg) {
Value *V = AddrMode.BaseReg;
- if (isa<PointerType>(V->getType()))
+ if (V->getType()->isPointerTy())
V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
if (V->getType() != IntPtrTy)
V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
@@ -642,7 +625,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Value *V = AddrMode.ScaledReg;
if (V->getType() == IntPtrTy) {
// done.
- } else if (isa<PointerType>(V->getType())) {
+ } else if (V->getType()->isPointerTy()) {
V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
} else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
cast<IntegerType>(V->getType())->getBitWidth()) {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 320afa1..09c01d3 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -44,8 +44,14 @@ namespace {
virtual bool runOnFunction(Function &F) {
bool Changed = false;
+
+ DominatorTree &DT = getAnalysis<DominatorTree>();
+
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- Changed |= runOnBasicBlock(*I);
+ // Only check non-dead blocks. Dead blocks may have strange pointer
+ // cycles that will confuse alias analysis.
+ if (DT.isReachableFromEntry(I))
+ Changed |= runOnBasicBlock(*I);
return Changed;
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 80e0027..fcb802a 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -662,11 +662,10 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- explicit GVN(bool nopre = false, bool noloads = false)
- : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { }
+ explicit GVN(bool noloads = false)
+ : FunctionPass(&ID), NoLoads(noloads), MD(0) { }
private:
- bool NoPRE;
bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
@@ -674,6 +673,9 @@ namespace {
ValueTable VN;
DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
+ // List of critical edges to be split between iterations.
+ SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
+
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
@@ -701,14 +703,15 @@ namespace {
Value *lookupNumber(BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
+ bool splitCriticalEdges();
};
char GVN::ID = 0;
}
// createGVNPass - The public interface to this file...
-FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) {
- return new GVN(NoPRE, NoLoads);
+FunctionPass *llvm::createGVNPass(bool NoLoads) {
+ return new GVN(NoLoads);
}
static RegisterPass<GVN> X("gvn",
@@ -836,9 +839,9 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
const TargetData &TD) {
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
- if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy) ||
- isa<StructType>(StoredVal->getType()) ||
- isa<ArrayType>(StoredVal->getType()))
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
+ StoredVal->getType()->isStructTy() ||
+ StoredVal->getType()->isArrayTy())
return false;
// The store has to be at least as big as the load.
@@ -870,26 +873,26 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
- if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) {
+ if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
// Pointer to Pointer -> use bitcast.
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
}
// Convert source pointers to integers, which can be bitcast.
- if (isa<PointerType>(StoredValTy)) {
+ if (StoredValTy->isPointerTy()) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
const Type *TypeToCastTo = LoadedTy;
- if (isa<PointerType>(TypeToCastTo))
+ if (TypeToCastTo->isPointerTy())
TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
if (StoredValTy != TypeToCastTo)
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
// Cast to pointer if the load needs a pointer type.
- if (isa<PointerType>(LoadedTy))
+ if (LoadedTy->isPointerTy())
StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
return StoredVal;
@@ -901,13 +904,13 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
// Convert source pointers to integers, which can be manipulated.
- if (isa<PointerType>(StoredValTy)) {
+ if (StoredValTy->isPointerTy()) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
// Convert vectors and fp to integer, which can be manipulated.
- if (!isa<IntegerType>(StoredValTy)) {
+ if (!StoredValTy->isIntegerTy()) {
StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
}
@@ -927,7 +930,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
return StoredVal;
// If the result is a pointer, inttoptr.
- if (isa<PointerType>(LoadedTy))
+ if (LoadedTy->isPointerTy())
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
// Otherwise, bitcast.
@@ -989,7 +992,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
const TargetData &TD) {
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
- if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy))
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy())
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
@@ -1064,8 +1067,8 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI,
const TargetData &TD) {
// Cannot handle reading from store of first-class aggregate yet.
- if (isa<StructType>(DepSI->getOperand(0)->getType()) ||
- isa<ArrayType>(DepSI->getOperand(0)->getType()))
+ if (DepSI->getOperand(0)->getType()->isStructTy() ||
+ DepSI->getOperand(0)->getType()->isArrayTy())
return -1;
Value *StorePtr = DepSI->getPointerOperand();
@@ -1136,9 +1139,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
- if (isa<PointerType>(SrcVal->getType()))
+ if (SrcVal->getType()->isPointerTy())
SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
- if (!isa<IntegerType>(SrcVal->getType()))
+ if (!SrcVal->getType()->isIntegerTy())
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
"tmp");
@@ -1323,7 +1326,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
// If new PHI nodes were created, notify alias analysis.
- if (isa<PointerType>(V->getType()))
+ if (V->getType()->isPointerTy())
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
AA->copyValue(LI, NewPHIs[i]);
@@ -1491,8 +1494,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
if (isa<PHINode>(V))
V->takeName(LI);
- if (isa<PointerType>(V->getType()))
+ if (V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
+ VN.erase(LI);
toErase.push_back(LI);
NumGVNLoad++;
return true;
@@ -1538,11 +1542,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// at least one of the values is LI. Since this means that we won't be able
// to eliminate LI even if we insert uses in the other predecessors, we will
// end up increasing code size. Reject this by scanning for LI.
- if (!EnableFullLoadPRE) {
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
- if (ValuesPerBlock[i].isSimpleValue() &&
- ValuesPerBlock[i].getSimpleValue() == LI)
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+ if (ValuesPerBlock[i].isSimpleValue() &&
+ ValuesPerBlock[i].getSimpleValue() == LI) {
+ // Skip cases where LI is the only definition, even for EnableFullLoadPRE.
+ if (!EnableFullLoadPRE || e == 1)
return false;
+ }
}
// FIXME: It is extremely unclear what this loop is doing, other than
@@ -1576,6 +1582,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+ bool NeedToSplitEdges = false;
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
PI != E; ++PI) {
BasicBlock *Pred = *PI;
@@ -1583,13 +1590,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
continue;
}
PredLoads[Pred] = 0;
- // We don't currently handle critical edges :(
+
if (Pred->getTerminator()->getNumSuccessors() != 1) {
- DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
- << Pred->getName() << "': " << *LI << '\n');
- return false;
+ if (isa<IndirectBrInst>(Pred->getTerminator())) {
+ DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+ unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
+ toSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
+ NeedToSplitEdges = true;
}
}
+ if (NeedToSplitEdges)
+ return false;
// Decide whether PRE is profitable for this load.
unsigned NumUnavailablePreds = PredLoads.size();
@@ -1623,13 +1637,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
*DT, NewInsts);
} else {
- Address.PHITranslateValue(LoadBB, UnavailablePred);
+ Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
LoadPtr = Address.getAddr();
-
- // Make sure the value is live in the predecessor.
- if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr))
- if (!DT->dominates(Inst->getParent(), UnavailablePred))
- LoadPtr = 0;
}
// If we couldn't find or insert a computation of this phi translated value,
@@ -1697,6 +1706,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// Add the newly created load.
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
NewLoad));
+ MD->invalidateCachedPointerInfo(LoadPtr);
+ DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
}
// Perform PHI construction.
@@ -1705,8 +1716,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
- if (isa<PointerType>(V->getType()))
+ if (V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
+ VN.erase(LI);
toErase.push_back(LI);
NumPRELoad++;
return true;
@@ -1765,8 +1777,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// Replace the load!
L->replaceAllUsesWith(AvailVal);
- if (isa<PointerType>(AvailVal->getType()))
+ if (AvailVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(AvailVal);
+ VN.erase(L);
toErase.push_back(L);
NumGVNLoad++;
return true;
@@ -1810,8 +1823,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// Remove it!
L->replaceAllUsesWith(StoredVal);
- if (isa<PointerType>(StoredVal->getType()))
+ if (StoredVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(StoredVal);
+ VN.erase(L);
toErase.push_back(L);
NumGVNLoad++;
return true;
@@ -1839,8 +1853,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// Remove it!
L->replaceAllUsesWith(AvailableVal);
- if (isa<PointerType>(DepLI->getType()))
+ if (DepLI->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
+ VN.erase(L);
toErase.push_back(L);
NumGVNLoad++;
return true;
@@ -1851,6 +1866,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// intervening stores, for example.
if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
+ VN.erase(L);
toErase.push_back(L);
NumGVNLoad++;
return true;
@@ -1861,6 +1877,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
+ VN.erase(L);
toErase.push_back(L);
NumGVNLoad++;
return true;
@@ -1891,6 +1908,10 @@ Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction *I,
SmallVectorImpl<Instruction*> &toErase) {
+ // Ignore dbg info intrinsics.
+ if (isa<DbgInfoIntrinsic>(I))
+ return false;
+
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
bool Changed = processLoad(LI, toErase);
@@ -1939,7 +1960,7 @@ bool GVN::processInstruction(Instruction *I,
if (constVal) {
p->replaceAllUsesWith(constVal);
- if (MD && isa<PointerType>(constVal->getType()))
+ if (MD && constVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(constVal);
VN.erase(p);
@@ -1960,7 +1981,7 @@ bool GVN::processInstruction(Instruction *I,
// Remove it!
VN.erase(I);
I->replaceAllUsesWith(repl);
- if (MD && isa<PointerType>(repl->getType()))
+ if (MD && repl->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
toErase.push_back(I);
return true;
@@ -2000,6 +2021,8 @@ bool GVN::runOnFunction(Function& F) {
while (ShouldContinue) {
DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
ShouldContinue = iterateOnFunction(F);
+ if (splitCriticalEdges())
+ ShouldContinue = true;
Changed |= ShouldContinue;
++Iteration;
}
@@ -2066,7 +2089,6 @@ bool GVN::processBlock(BasicBlock *BB) {
/// control flow patterns and attempts to perform simple PRE at the join point.
bool GVN::performPRE(Function &F) {
bool Changed = false;
- SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
DenseMap<BasicBlock*, Value*> predMap;
for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
@@ -2137,14 +2159,7 @@ bool GVN::performPRE(Function &F) {
// We can't do PRE safely on a critical edge, so instead we schedule
// the edge to be split and perform the PRE the next time we iterate
// on the function.
- unsigned SuccNum = 0;
- for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors();
- i != e; ++i)
- if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) {
- SuccNum = i;
- break;
- }
-
+ unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
continue;
@@ -2200,7 +2215,7 @@ bool GVN::performPRE(Function &F) {
localAvail[CurrentBlock]->table[ValNo] = Phi;
CurInst->replaceAllUsesWith(Phi);
- if (MD && isa<PointerType>(Phi->getType()))
+ if (MD && Phi->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(Phi);
VN.erase(CurInst);
@@ -2212,11 +2227,23 @@ bool GVN::performPRE(Function &F) {
}
}
- for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator
- I = toSplit.begin(), E = toSplit.end(); I != E; ++I)
- SplitCriticalEdge(I->first, I->second, this);
+ if (splitCriticalEdges())
+ Changed = true;
+
+ return Changed;
+}
- return Changed || toSplit.size();
+/// splitCriticalEdges - Split critical edges found during the previous
+/// iteration that may enable further optimization.
+bool GVN::splitCriticalEdges() {
+ if (toSplit.empty())
+ return false;
+ do {
+ std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
+ SplitCriticalEdge(Edge.first, Edge.second, this);
+ } while (!toSplit.empty());
+ if (MD) MD->invalidateCachedPredecessors();
+ return true;
}
/// iterateOnFunction - Executes one iteration of GVN
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index c54f596..cb563c3 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -103,11 +103,9 @@ namespace {
BasicBlock *ExitingBlock,
BranchInst *BI,
SCEVExpander &Rewriter);
- void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount,
- SCEVExpander &Rewriter);
+ void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
- void RewriteIVExpressions(Loop *L, const Type *LargestType,
- SCEVExpander &Rewriter);
+ void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
void SinkUnusedInvariants(Loop *L);
@@ -190,7 +188,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
- Instruction *OrigCond = cast<Instruction>(BI->getCondition());
+ Value *OrigCond = BI->getCondition();
// It's tempting to use replaceAllUsesWith here to fully replace the old
// comparison, but that's not immediately safe, since users of the old
// comparison may not be dominated by the new comparison. Instead, just
@@ -215,7 +213,6 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
/// able to brute-force evaluate arbitrary instructions as long as they have
/// constant operands at the beginning of the loop.
void IndVarSimplify::RewriteLoopExitValues(Loop *L,
- const SCEV *BackedgeTakenCount,
SCEVExpander &Rewriter) {
// Verify the input to the pass in already in LCSSA form.
assert(L->isLCSSAForm());
@@ -241,15 +238,24 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
while ((PN = dyn_cast<PHINode>(BBI++))) {
if (PN->use_empty())
continue; // dead use, don't replace it
+
+ // SCEV only supports integer expressions for now.
+ if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
+ continue;
+
+ // It's necessary to tell ScalarEvolution about this explicitly so that
+ // it can walk the def-use list and forget all SCEVs, as it may not be
+ // watching the PHI itself. Once the new exit value is in place, there
+ // may not be a def-use connection between the loop and every instruction
+ // which got a SCEVAddRecExpr for that loop.
+ SE->forgetValue(PN);
+
// Iterate over all of the values in all the PHI nodes.
for (unsigned i = 0; i != NumPreds; ++i) {
// If the value being merged in is not integer or is not defined
// in the loop, skip it.
Value *InVal = PN->getIncomingValue(i);
- if (!isa<Instruction>(InVal) ||
- // SCEV only supports integer expressions for now.
- (!isa<IntegerType>(InVal->getType()) &&
- !isa<PointerType>(InVal->getType())))
+ if (!isa<Instruction>(InVal))
continue;
// If this pred is for a subloop, not L itself, skip it.
@@ -349,7 +355,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// the current expressions.
//
if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
- RewriteLoopExitValues(L, BackedgeTakenCount, Rewriter);
+ RewriteLoopExitValues(L, Rewriter);
// Compute the type of the largest recurrence expression, and decide whether
// a canonical induction variable should be inserted.
@@ -364,37 +370,32 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (ExitingBlock)
NeedCannIV = true;
}
- for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
- const SCEV *Stride = IU->StrideOrder[i];
- const Type *Ty = SE->getEffectiveSCEVType(Stride->getType());
+ for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+ const Type *Ty =
+ SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
if (!LargestType ||
SE->getTypeSizeInBits(Ty) >
SE->getTypeSizeInBits(LargestType))
LargestType = Ty;
-
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[i]);
- assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-
- if (!SI->second->Users.empty())
- NeedCannIV = true;
+ NeedCannIV = true;
}
// Now that we know the largest of the induction variable expressions
// in this loop, insert a canonical induction variable of the largest size.
Value *IndVar = 0;
if (NeedCannIV) {
- // Check to see if the loop already has a canonical-looking induction
- // variable. If one is present and it's wider than the planned canonical
- // induction variable, temporarily remove it, so that the Rewriter
- // doesn't attempt to reuse it.
- PHINode *OldCannIV = L->getCanonicalInductionVariable();
- if (OldCannIV) {
+ // Check to see if the loop already has any canonical-looking induction
+ // variables. If any are present and wider than the planned canonical
+ // induction variable, temporarily remove them, so that the Rewriter
+ // doesn't attempt to reuse them.
+ SmallVector<PHINode *, 2> OldCannIVs;
+ while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
if (SE->getTypeSizeInBits(OldCannIV->getType()) >
SE->getTypeSizeInBits(LargestType))
OldCannIV->removeFromParent();
else
- OldCannIV = 0;
+ break;
+ OldCannIVs.push_back(OldCannIV);
}
IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
@@ -404,17 +405,21 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
// Now that the official induction variable is established, reinsert
- // the old canonical-looking variable after it so that the IR remains
- // consistent. It will be deleted as part of the dead-PHI deletion at
+ // any old canonical-looking variables after it so that the IR remains
+ // consistent. They will be deleted as part of the dead-PHI deletion at
// the end of the pass.
- if (OldCannIV)
- OldCannIV->insertAfter(cast<Instruction>(IndVar));
+ while (!OldCannIVs.empty()) {
+ PHINode *OldCannIV = OldCannIVs.pop_back_val();
+ OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
+ }
}
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
ICmpInst *NewICmp = 0;
- if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && ExitingBlock) {
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+ !BackedgeTakenCount->isZero() &&
+ ExitingBlock) {
assert(NeedCannIV &&
"LinearFunctionTestReplace requires a canonical induction variable");
// Can't rewrite non-branch yet.
@@ -424,7 +429,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Rewrite IV-derived expressions. Clears the rewriter cache.
- RewriteIVExpressions(L, LargestType, Rewriter);
+ RewriteIVExpressions(L, Rewriter);
// The Rewriter may not be used from this point on.
@@ -444,8 +449,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
return Changed;
}
-void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
- SCEVExpander &Rewriter) {
+void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
SmallVector<WeakVH, 16> DeadInsts;
// Rewrite all induction variable expressions in terms of the canonical
@@ -455,72 +459,64 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
// add the offsets to the primary induction variable and cast, avoiding
// the need for the code evaluation methods to insert induction variables
// of different sizes.
- for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
- const SCEV *Stride = IU->StrideOrder[i];
-
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[i]);
- assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
- ilist<IVStrideUse> &List = SI->second->Users;
- for (ilist<IVStrideUse>::iterator UI = List.begin(),
- E = List.end(); UI != E; ++UI) {
- Value *Op = UI->getOperandValToReplace();
- const Type *UseTy = Op->getType();
- Instruction *User = UI->getUser();
-
- // Compute the final addrec to expand into code.
- const SCEV *AR = IU->getReplacementExpr(*UI);
-
- // Evaluate the expression out of the loop, if possible.
- if (!L->contains(UI->getUser())) {
- const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
- if (ExitVal->isLoopInvariant(L))
- AR = ExitVal;
- }
+ for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
+ const SCEV *Stride = UI->getStride();
+ Value *Op = UI->getOperandValToReplace();
+ const Type *UseTy = Op->getType();
+ Instruction *User = UI->getUser();
+
+ // Compute the final addrec to expand into code.
+ const SCEV *AR = IU->getReplacementExpr(*UI);
+
+ // Evaluate the expression out of the loop, if possible.
+ if (!L->contains(UI->getUser())) {
+ const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
+ if (ExitVal->isLoopInvariant(L))
+ AR = ExitVal;
+ }
- // FIXME: It is an extremely bad idea to indvar substitute anything more
- // complex than affine induction variables. Doing so will put expensive
- // polynomial evaluations inside of the loop, and the str reduction pass
- // currently can only reduce affine polynomials. For now just disable
- // indvar subst on anything more complex than an affine addrec, unless
- // it can be expanded to a trivial value.
- if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
- continue;
+ // FIXME: It is an extremely bad idea to indvar substitute anything more
+ // complex than affine induction variables. Doing so will put expensive
+ // polynomial evaluations inside of the loop, and the str reduction pass
+ // currently can only reduce affine polynomials. For now just disable
+ // indvar subst on anything more complex than an affine addrec, unless
+ // it can be expanded to a trivial value.
+ if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
+ continue;
- // Determine the insertion point for this user. By default, insert
- // immediately before the user. The SCEVExpander class will automatically
- // hoist loop invariants out of the loop. For PHI nodes, there may be
- // multiple uses, so compute the nearest common dominator for the
- // incoming blocks.
- Instruction *InsertPt = User;
- if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
- if (PHI->getIncomingValue(i) == Op) {
- if (InsertPt == User)
- InsertPt = PHI->getIncomingBlock(i)->getTerminator();
- else
- InsertPt =
- DT->findNearestCommonDominator(InsertPt->getParent(),
- PHI->getIncomingBlock(i))
- ->getTerminator();
- }
-
- // Now expand it into actual Instructions and patch it into place.
- Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
-
- // Patch the new value into place.
- if (Op->hasName())
- NewVal->takeName(Op);
- User->replaceUsesOfWith(Op, NewVal);
- UI->setOperandValToReplace(NewVal);
- DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
- << " into = " << *NewVal << "\n");
- ++NumRemoved;
- Changed = true;
-
- // The old value may be dead now.
- DeadInsts.push_back(Op);
- }
+ // Determine the insertion point for this user. By default, insert
+ // immediately before the user. The SCEVExpander class will automatically
+ // hoist loop invariants out of the loop. For PHI nodes, there may be
+ // multiple uses, so compute the nearest common dominator for the
+ // incoming blocks.
+ Instruction *InsertPt = User;
+ if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+ if (PHI->getIncomingValue(i) == Op) {
+ if (InsertPt == User)
+ InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+ else
+ InsertPt =
+ DT->findNearestCommonDominator(InsertPt->getParent(),
+ PHI->getIncomingBlock(i))
+ ->getTerminator();
+ }
+
+ // Now expand it into actual Instructions and patch it into place.
+ Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+
+ // Patch the new value into place.
+ if (Op->hasName())
+ NewVal->takeName(Op);
+ User->replaceUsesOfWith(Op, NewVal);
+ UI->setOperandValToReplace(NewVal);
+ DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+ << " into = " << *NewVal << "\n");
+ ++NumRemoved;
+ Changed = true;
+
+ // The old value may be dead now.
+ DeadInsts.push_back(Op);
}
// Clear the rewriter cache, because values that are in the rewriter's cache
@@ -598,8 +594,8 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
}
}
-/// Return true if it is OK to use SIToFPInst for an inducation variable
-/// with given inital and exit values.
+/// Return true if it is OK to use SIToFPInst for an induction variable
+/// with given initial and exit values.
static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV,
uint64_t intIV, uint64_t intEV) {
@@ -652,7 +648,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
if (!convertToInt(InitValue->getValueAPF(), &newInitValue))
return;
- // Check IV increment. Reject this PH if increement operation is not
+ // Check IV increment. Reject this PH if increment operation is not
// an add or increment value can not be represented by an integer.
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge));
@@ -688,7 +684,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
if (BI->getCondition() != EC) return;
}
- // Find exit value. If exit value can not be represented as an interger then
+ // Find exit value. If exit value can not be represented as an integer then
// do not handle this floating point PH.
ConstantFP *EV = NULL;
unsigned EVIndex = 1;
@@ -750,11 +746,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(),
NewPred, LHS, RHS, EC->getName());
- // In the following deltions, PH may become dead and may be deleted.
+ // In the following deletions, PH may become dead and may be deleted.
// Use a WeakVH to observe whether this happens.
WeakVH WeakPH = PH;
- // Delete old, floating point, exit comparision instruction.
+ // Delete old, floating point, exit comparison instruction.
NewEC->takeName(EC);
EC->replaceAllUsesWith(NewEC);
RecursivelyDeleteTriviallyDeadInstructions(EC);
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 3eff3d8..a6489ec 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -201,7 +201,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
if (isa<DbgInfoIntrinsic>(I)) continue;
// If this is a pointer->pointer bitcast, it is free.
- if (isa<BitCastInst>(I) && isa<PointerType>(I->getType()))
+ if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
continue;
// All other instructions count for at least one unit.
@@ -214,7 +214,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
if (!isa<IntrinsicInst>(CI))
Size += 3;
- else if (!isa<VectorType>(CI->getType()))
+ else if (!CI->getType()->isVectorTy())
Size += 1;
}
}
@@ -336,13 +336,18 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
else
InterestingVal = ConstantInt::getFalse(I->getContext());
- // Scan for the sentinel.
+ // Scan for the sentinel. If we find an undef, force it to the
+ // interesting value: x|undef -> true and x&undef -> false.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
- if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0)
+ if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
Result.push_back(LHSVals[i]);
+ Result.back().first = InterestingVal;
+ }
for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
- if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0)
+ if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
Result.push_back(RHSVals[i]);
+ Result.back().first = InterestingVal;
+ }
return !Result.empty();
}
@@ -400,7 +405,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
- Cmp->getType()->isInteger() && // Not vector compare.
+ Cmp->getType()->isIntegerTy() && // Not vector compare.
(!isa<Instruction>(Cmp->getOperand(0)) ||
cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 81f9ae6..d7ace34 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -678,7 +678,7 @@ void LICM::PromoteValuesInLoop() {
// If we are promoting a pointer value, update alias information for the
// inserted load.
Value *LoadValue = 0;
- if (isa<PointerType>(cast<PointerType>(Ptr->getType())->getElementType())) {
+ if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) {
// Locate a load or store through the pointer, and assign the same value
// to LI as we are loading or storing. Since we know that the value is
// stored in this loop, this will always succeed.
@@ -751,7 +751,7 @@ void LICM::PromoteValuesInLoop() {
LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos);
// If this is a pointer type, update alias info appropriately.
- if (isa<PointerType>(LI->getType()))
+ if (LI->getType()->isPointerTy())
CurAST->copyValue(PointerValueNumbers[PVN++], LI);
// Store into the memory we promoted.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index a5611ff..f920dca 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -17,6 +17,40 @@
// available on the target, and it performs a variety of other optimizations
// related to loop induction variables.
//
+// Terminology note: this code has a lot of handling for "post-increment" or
+// "post-inc" users. This is not talking about post-increment addressing modes;
+// it is instead talking about code like this:
+//
+// %i = phi [ 0, %entry ], [ %i.next, %latch ]
+// ...
+// %i.next = add %i, 1
+// %c = icmp eq %i.next, %n
+//
+// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
+// it's useful to think about these as the same register, with some uses using
+// the value of the register before the add and some using // it after. In this
+// example, the icmp is a post-increment user, since it uses %i.next, which is
+// the value of the induction variable after the increment. The other common
+// case of post-increment users is users outside the loop.
+//
+// TODO: More sophistication in the way Formulae are generated and filtered.
+//
+// TODO: Handle multiple loops at a time.
+//
+// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
+// instead of a GlobalValue?
+//
+// TODO: When truncation is free, truncate ICmp users' operands to make it a
+// smaller encoding (on x86 at least).
+//
+// TODO: When a negated register is used by an add (such as in a list of
+// multiple base registers, or as the increment expression in an addrec),
+// we may not actually need both reg and (-1 * reg) in registers; the
+// negation can be implemented by using a sub instead of an add. The
+// lack of support for taking this into consideration when making
+// register pressure decisions is partly worked around by the "Special"
+// use kind.
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "loop-reduce"
@@ -26,208 +60,434 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include <algorithm>
using namespace llvm;
-STATISTIC(NumReduced , "Number of IV uses strength reduced");
-STATISTIC(NumInserted, "Number of PHIs inserted");
-STATISTIC(NumVariable, "Number of PHIs with variable strides");
-STATISTIC(NumEliminated, "Number of strides eliminated");
-STATISTIC(NumShadow, "Number of Shadow IVs optimized");
-STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses");
-STATISTIC(NumLoopCond, "Number of loop terminating conds optimized");
-STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero");
+namespace {
+
+/// RegSortData - This class holds data which is used to order reuse candidates.
+class RegSortData {
+public:
+ /// UsedByIndices - This represents the set of LSRUse indices which reference
+ /// a particular register.
+ SmallBitVector UsedByIndices;
+
+ RegSortData() {}
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
-static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
- cl::init(false),
- cl::Hidden);
+}
+
+void RegSortData::print(raw_ostream &OS) const {
+ OS << "[NumUses=" << UsedByIndices.count() << ']';
+}
+
+void RegSortData::dump() const {
+ print(errs()); errs() << '\n';
+}
namespace {
- struct BasedUser;
+/// RegUseTracker - Map register candidates to information about how they are
+/// used.
+class RegUseTracker {
+ typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
- /// IVInfo - This structure keeps track of one IV expression inserted during
- /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
- /// well as the PHI node and increment value created for rewrite.
- struct IVExpr {
- const SCEV *Stride;
- const SCEV *Base;
- PHINode *PHI;
+ RegUsesTy RegUses;
+ SmallVector<const SCEV *, 16> RegSequence;
- IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi)
- : Stride(stride), Base(base), PHI(phi) {}
- };
+public:
+ void CountRegister(const SCEV *Reg, size_t LUIdx);
+
+ bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
+
+ const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
+
+ void clear();
+
+ typedef SmallVectorImpl<const SCEV *>::iterator iterator;
+ typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
+ iterator begin() { return RegSequence.begin(); }
+ iterator end() { return RegSequence.end(); }
+ const_iterator begin() const { return RegSequence.begin(); }
+ const_iterator end() const { return RegSequence.end(); }
+};
+
+}
- /// IVsOfOneStride - This structure keeps track of all IV expression inserted
- /// during StrengthReduceStridedIVUsers for a particular stride of the IV.
- struct IVsOfOneStride {
- std::vector<IVExpr> IVs;
+void
+RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
+ std::pair<RegUsesTy::iterator, bool> Pair =
+ RegUses.insert(std::make_pair(Reg, RegSortData()));
+ RegSortData &RSD = Pair.first->second;
+ if (Pair.second)
+ RegSequence.push_back(Reg);
+ RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
+ RSD.UsedByIndices.set(LUIdx);
+}
+
+bool
+RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
+ if (!RegUses.count(Reg)) return false;
+ const SmallBitVector &UsedByIndices =
+ RegUses.find(Reg)->second.UsedByIndices;
+ int i = UsedByIndices.find_first();
+ if (i == -1) return false;
+ if ((size_t)i != LUIdx) return true;
+ return UsedByIndices.find_next(i) != -1;
+}
+
+const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
+ RegUsesTy::const_iterator I = RegUses.find(Reg);
+ assert(I != RegUses.end() && "Unknown register!");
+ return I->second.UsedByIndices;
+}
+
+void RegUseTracker::clear() {
+ RegUses.clear();
+ RegSequence.clear();
+}
+
+namespace {
+
+/// Formula - This class holds information that describes a formula for
+/// computing satisfying a use. It may include broken-out immediates and scaled
+/// registers.
+struct Formula {
+ /// AM - This is used to represent complex addressing, as well as other kinds
+ /// of interesting uses.
+ TargetLowering::AddrMode AM;
+
+ /// BaseRegs - The list of "base" registers for this use. When this is
+ /// non-empty, AM.HasBaseReg should be set to true.
+ SmallVector<const SCEV *, 2> BaseRegs;
- void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) {
- IVs.push_back(IVExpr(Stride, Base, PHI));
+ /// ScaledReg - The 'scaled' register for this use. This should be non-null
+ /// when AM.Scale is not zero.
+ const SCEV *ScaledReg;
+
+ Formula() : ScaledReg(0) {}
+
+ void InitialMatch(const SCEV *S, Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT);
+
+ unsigned getNumRegs() const;
+ const Type *getType() const;
+
+ bool referencesReg(const SCEV *S) const;
+ bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
+ const RegUseTracker &RegUses) const;
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+/// DoInitialMatch - Recursion helper for InitialMatch.
+static void DoInitialMatch(const SCEV *S, Loop *L,
+ SmallVectorImpl<const SCEV *> &Good,
+ SmallVectorImpl<const SCEV *> &Bad,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ // Collect expressions which properly dominate the loop header.
+ if (S->properlyDominates(L->getHeader(), &DT)) {
+ Good.push_back(S);
+ return;
+ }
+
+ // Look at add operands.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I)
+ DoInitialMatch(*I, L, Good, Bad, SE, DT);
+ return;
+ }
+
+ // Look at addrec operands.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ if (!AR->getStart()->isZero()) {
+ DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
+ DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+ AR->getStepRecurrence(SE),
+ AR->getLoop()),
+ L, Good, Bad, SE, DT);
+ return;
}
- };
- class LoopStrengthReduce : public LoopPass {
- IVUsers *IU;
- ScalarEvolution *SE;
- bool Changed;
-
- /// IVsByStride - Keep track of all IVs that have been inserted for a
- /// particular stride.
- std::map<const SCEV *, IVsOfOneStride> IVsByStride;
-
- /// DeadInsts - Keep track of instructions we may have made dead, so that
- /// we can remove them after we are done working.
- SmallVector<WeakVH, 16> DeadInsts;
-
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// transformation profitability.
- const TargetLowering *TLI;
-
- public:
- static char ID; // Pass ID, replacement for typeid
- explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
- LoopPass(&ID), TLI(tli) {}
-
- bool runOnLoop(Loop *L, LPPassManager &LPM);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- // We split critical edges, so we change the CFG. However, we do update
- // many analyses if they are around.
- AU.addPreservedID(LoopSimplifyID);
- AU.addPreserved("loops");
- AU.addPreserved("domfrontier");
- AU.addPreserved("domtree");
-
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<ScalarEvolution>();
- AU.addPreserved<ScalarEvolution>();
- AU.addRequired<IVUsers>();
- AU.addPreserved<IVUsers>();
+ // Handle a multiplication by -1 (negation) if it didn't fold.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
+ if (Mul->getOperand(0)->isAllOnesValue()) {
+ SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
+ const SCEV *NewMul = SE.getMulExpr(Ops);
+
+ SmallVector<const SCEV *, 4> MyGood;
+ SmallVector<const SCEV *, 4> MyBad;
+ DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
+ const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
+ SE.getEffectiveSCEVType(NewMul->getType())));
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
+ E = MyGood.end(); I != E; ++I)
+ Good.push_back(SE.getMulExpr(NegOne, *I));
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
+ E = MyBad.end(); I != E; ++I)
+ Bad.push_back(SE.getMulExpr(NegOne, *I));
+ return;
}
- private:
- void OptimizeIndvars(Loop *L);
-
- /// OptimizeLoopTermCond - Change loop terminating condition to use the
- /// postinc iv when possible.
- void OptimizeLoopTermCond(Loop *L);
-
- /// OptimizeShadowIV - If IV is used in a int-to-float cast
- /// inside the loop then try to eliminate the cast opeation.
- void OptimizeShadowIV(Loop *L);
-
- /// OptimizeMax - Rewrite the loop's terminating condition
- /// if it uses a max computation.
- ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse);
-
- /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for
- /// deciding when to exit the loop is used only for that purpose, try to
- /// rearrange things so it counts down to a test against zero.
- bool OptimizeLoopCountIV(Loop *L);
- bool OptimizeLoopCountIVOfStride(const SCEV* &Stride,
- IVStrideUse* &CondUse, Loop *L);
-
- /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a
- /// single stride of IV. All of the users may have different starting
- /// values, and this may not be the only stride.
- void StrengthReduceIVUsersOfStride(const SCEV *Stride,
- IVUsersOfOneStride &Uses,
- Loop *L);
- void StrengthReduceIVUsers(Loop *L);
-
- ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse,
- const SCEV* &CondStride,
- bool PostPass = false);
-
- bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
- const SCEV* &CondStride);
- bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
- const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *,
- IVExpr&, const Type*,
- const std::vector<BasedUser>& UsersToProcess);
- bool ValidScale(bool, int64_t,
- const std::vector<BasedUser>& UsersToProcess);
- bool ValidOffset(bool, int64_t, int64_t,
- const std::vector<BasedUser>& UsersToProcess);
- const SCEV *CollectIVUsers(const SCEV *Stride,
- IVUsersOfOneStride &Uses,
- Loop *L,
- bool &AllUsesAreAddresses,
- bool &AllUsesAreOutsideLoop,
- std::vector<BasedUser> &UsersToProcess);
- bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc);
- bool ShouldUseFullStrengthReductionMode(
- const std::vector<BasedUser> &UsersToProcess,
- const Loop *L,
- bool AllUsesAreAddresses,
- const SCEV *Stride);
- void PrepareToStrengthReduceFully(
- std::vector<BasedUser> &UsersToProcess,
- const SCEV *Stride,
- const SCEV *CommonExprs,
- const Loop *L,
- SCEVExpander &PreheaderRewriter);
- void PrepareToStrengthReduceFromSmallerStride(
- std::vector<BasedUser> &UsersToProcess,
- Value *CommonBaseV,
- const IVExpr &ReuseIV,
- Instruction *PreInsertPt);
- void PrepareToStrengthReduceWithNewPhi(
- std::vector<BasedUser> &UsersToProcess,
- const SCEV *Stride,
- const SCEV *CommonExprs,
- Value *CommonBaseV,
- Instruction *IVIncInsertPt,
- const Loop *L,
- SCEVExpander &PreheaderRewriter);
-
- void DeleteTriviallyDeadInstructions();
- };
+ // Ok, we can't do anything interesting. Just stuff the whole thing into a
+ // register and hope for the best.
+ Bad.push_back(S);
}
-char LoopStrengthReduce::ID = 0;
-static RegisterPass<LoopStrengthReduce>
-X("loop-reduce", "Loop Strength Reduction");
+/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
+/// attempting to keep all loop-invariant and loop-computable values in a
+/// single base register.
+void Formula::InitialMatch(const SCEV *S, Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ SmallVector<const SCEV *, 4> Good;
+ SmallVector<const SCEV *, 4> Bad;
+ DoInitialMatch(S, L, Good, Bad, SE, DT);
+ if (!Good.empty()) {
+ BaseRegs.push_back(SE.getAddExpr(Good));
+ AM.HasBaseReg = true;
+ }
+ if (!Bad.empty()) {
+ BaseRegs.push_back(SE.getAddExpr(Bad));
+ AM.HasBaseReg = true;
+ }
+}
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
- return new LoopStrengthReduce(TLI);
+/// getNumRegs - Return the total number of register operands used by this
+/// formula. This does not include register uses implied by non-constant
+/// addrec strides.
+unsigned Formula::getNumRegs() const {
+ return !!ScaledReg + BaseRegs.size();
}
-/// DeleteTriviallyDeadInstructions - If any of the instructions is the
-/// specified set are trivially dead, delete them and see if this makes any of
-/// their operands subsequently dead.
-void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
- while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+/// getType - Return the type of this formula, if it has one, or null
+/// otherwise. This type is meaningless except for the bit size.
+const Type *Formula::getType() const {
+ return !BaseRegs.empty() ? BaseRegs.front()->getType() :
+ ScaledReg ? ScaledReg->getType() :
+ AM.BaseGV ? AM.BaseGV->getType() :
+ 0;
+}
- if (I == 0 || !isInstructionTriviallyDead(I))
- continue;
+/// referencesReg - Test if this formula references the given register.
+bool Formula::referencesReg(const SCEV *S) const {
+ return S == ScaledReg ||
+ std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
+}
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
- if (Instruction *U = dyn_cast<Instruction>(*OI)) {
- *OI = 0;
- if (U->use_empty())
- DeadInsts.push_back(U);
+/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
+/// which are used by uses other than the use with the given index.
+bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
+ const RegUseTracker &RegUses) const {
+ if (ScaledReg)
+ if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
+ return true;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+ E = BaseRegs.end(); I != E; ++I)
+ if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
+ return true;
+ return false;
+}
+
+void Formula::print(raw_ostream &OS) const {
+ bool First = true;
+ if (AM.BaseGV) {
+ if (!First) OS << " + "; else First = false;
+ WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+ }
+ if (AM.BaseOffs != 0) {
+ if (!First) OS << " + "; else First = false;
+ OS << AM.BaseOffs;
+ }
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+ E = BaseRegs.end(); I != E; ++I) {
+ if (!First) OS << " + "; else First = false;
+ OS << "reg(" << **I << ')';
+ }
+ if (AM.Scale != 0) {
+ if (!First) OS << " + "; else First = false;
+ OS << AM.Scale << "*reg(";
+ if (ScaledReg)
+ OS << *ScaledReg;
+ else
+ OS << "<unknown>";
+ OS << ')';
+ }
+}
+
+void Formula::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+/// isAddRecSExtable - Return true if the given addrec can be sign-extended
+/// without changing its value.
+static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(AR->getType()) + 1);
+ return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+}
+
+/// isAddSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(A->getType()) + 1);
+ return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// isMulSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(A->getType()) + 1);
+ return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
+/// and if the remainder is known to be zero, or null otherwise. If
+/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
+/// to Y, ignoring that the multiplication may overflow, which is useful when
+/// the result will be used in a context where the most significant bits are
+/// ignored.
+static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
+ ScalarEvolution &SE,
+ bool IgnoreSignificantBits = false) {
+ // Handle the trivial case, which works for any SCEV type.
+ if (LHS == RHS)
+ return SE.getIntegerSCEV(1, LHS->getType());
+
+ // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
+ // folding.
+ if (RHS->isAllOnesValue())
+ return SE.getMulExpr(LHS, RHS);
+
+ // Check for a division of a constant by a constant.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
+ const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
+ if (!RC)
+ return 0;
+ if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0)
+ return 0;
+ return SE.getConstant(C->getValue()->getValue()
+ .sdiv(RC->getValue()->getValue()));
+ }
+
+ // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
+ if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
+ const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Start) return 0;
+ const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Step) return 0;
+ return SE.getAddRecExpr(Start, Step, AR->getLoop());
+ }
+ }
+
+ // Distribute the sdiv over add operands, if the add doesn't overflow.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
+ if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
+ SmallVector<const SCEV *, 8> Ops;
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ const SCEV *Op = getExactSDiv(*I, RHS, SE,
+ IgnoreSignificantBits);
+ if (!Op) return 0;
+ Ops.push_back(Op);
}
+ return SE.getAddExpr(Ops);
+ }
+ }
- I->eraseFromParent();
- Changed = true;
+ // Check for a multiply operand that we can pull RHS out of.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
+ if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
+ SmallVector<const SCEV *, 4> Ops;
+ bool Found = false;
+ for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
+ I != E; ++I) {
+ if (!Found)
+ if (const SCEV *Q = getExactSDiv(*I, RHS, SE,
+ IgnoreSignificantBits)) {
+ Ops.push_back(Q);
+ Found = true;
+ continue;
+ }
+ Ops.push_back(*I);
+ }
+ return Found ? SE.getMulExpr(Ops) : 0;
+ }
+
+ // Otherwise we don't know.
+ return 0;
+}
+
+/// ExtractImmediate - If S involves the addition of a constant integer value,
+/// return that integer value, and mutate S to point to a new SCEV with that
+/// value excluded.
+static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ if (C->getValue()->getValue().getMinSignedBits() <= 64) {
+ S = SE.getIntegerSCEV(0, C->getType());
+ return C->getValue()->getSExtValue();
+ }
+ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+ int64_t Result = ExtractImmediate(NewOps.front(), SE);
+ S = SE.getAddExpr(NewOps);
+ return Result;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+ int64_t Result = ExtractImmediate(NewOps.front(), SE);
+ S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ return Result;
+ }
+ return 0;
+}
+
+/// ExtractSymbol - If S involves the addition of a GlobalValue address,
+/// return that symbol, and mutate S to point to a new SCEV with that
+/// value excluded.
+static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
+ S = SE.getIntegerSCEV(0, GV->getType());
+ return GV;
+ }
+ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+ GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
+ S = SE.getAddExpr(NewOps);
+ return Result;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+ GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
+ S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ return Result;
}
+ return 0;
}
/// isAddressUse - Returns true if the specified instruction is using the
@@ -276,1776 +536,833 @@ static const Type *getAccessType(const Instruction *Inst) {
break;
}
}
- return AccessTy;
-}
-namespace {
- /// BasedUser - For a particular base value, keep information about how we've
- /// partitioned the expression so far.
- struct BasedUser {
- /// Base - The Base value for the PHI node that needs to be inserted for
- /// this use. As the use is processed, information gets moved from this
- /// field to the Imm field (below). BasedUser values are sorted by this
- /// field.
- const SCEV *Base;
-
- /// Inst - The instruction using the induction variable.
- Instruction *Inst;
-
- /// OperandValToReplace - The operand value of Inst to replace with the
- /// EmittedBase.
- Value *OperandValToReplace;
-
- /// Imm - The immediate value that should be added to the base immediately
- /// before Inst, because it will be folded into the imm field of the
- /// instruction. This is also sometimes used for loop-variant values that
- /// must be added inside the loop.
- const SCEV *Imm;
-
- /// Phi - The induction variable that performs the striding that
- /// should be used for this user.
- PHINode *Phi;
-
- // isUseOfPostIncrementedValue - True if this should use the
- // post-incremented version of this IV, not the preincremented version.
- // This can only be set in special cases, such as the terminating setcc
- // instruction for a loop and uses outside the loop that are dominated by
- // the loop.
- bool isUseOfPostIncrementedValue;
-
- BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
- : Base(IVSU.getOffset()), Inst(IVSU.getUser()),
- OperandValToReplace(IVSU.getOperandValToReplace()),
- Imm(se->getIntegerSCEV(0, Base->getType())),
- isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
-
- // Once we rewrite the code to insert the new IVs we want, update the
- // operands of Inst to use the new expression 'NewBase', with 'Imm' added
- // to it.
- void RewriteInstructionToUseNewBase(const SCEV *NewBase,
- Instruction *InsertPt,
- SCEVExpander &Rewriter, Loop *L, Pass *P,
- SmallVectorImpl<WeakVH> &DeadInsts,
- ScalarEvolution *SE);
-
- Value *InsertCodeForBaseAtPosition(const SCEV *NewBase,
- const Type *Ty,
- SCEVExpander &Rewriter,
- Instruction *IP,
- ScalarEvolution *SE);
- void dump() const;
- };
-}
+ // All pointers have the same requirements, so canonicalize them to an
+ // arbitrary pointer type to minimize variation.
+ if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy))
+ AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
+ PTy->getAddressSpace());
-void BasedUser::dump() const {
- dbgs() << " Base=" << *Base;
- dbgs() << " Imm=" << *Imm;
- dbgs() << " Inst: " << *Inst;
+ return AccessTy;
}
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase,
- const Type *Ty,
- SCEVExpander &Rewriter,
- Instruction *IP,
- ScalarEvolution *SE) {
- Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP);
-
- // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to
- // re-analyze it.
- const SCEV *NewValSCEV = SE->getUnknown(Base);
-
- // Always emit the immediate into the same block as the user.
- NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
-
- return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
-}
+/// DeleteTriviallyDeadInstructions - If any of the instructions is the
+/// specified set are trivially dead, delete them and see if this makes any of
+/// their operands subsequently dead.
+static bool
+DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
+ bool Changed = false;
+ while (!DeadInsts.empty()) {
+ Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
-// Once we rewrite the code to insert the new IVs we want, update the
-// operands of Inst to use the new expression 'NewBase', with 'Imm' added
-// to it. NewBasePt is the last instruction which contributes to the
-// value of NewBase in the case that it's a diffferent instruction from
-// the PHI that NewBase is computed from, or null otherwise.
-//
-void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase,
- Instruction *NewBasePt,
- SCEVExpander &Rewriter, Loop *L, Pass *P,
- SmallVectorImpl<WeakVH> &DeadInsts,
- ScalarEvolution *SE) {
- if (!isa<PHINode>(Inst)) {
- // By default, insert code at the user instruction.
- BasicBlock::iterator InsertPt = Inst;
-
- // However, if the Operand is itself an instruction, the (potentially
- // complex) inserted code may be shared by many users. Because of this, we
- // want to emit code for the computation of the operand right before its old
- // computation. This is usually safe, because we obviously used to use the
- // computation when it was computed in its current block. However, in some
- // cases (e.g. use of a post-incremented induction variable) the NewBase
- // value will be pinned to live somewhere after the original computation.
- // In this case, we have to back off.
- //
- // If this is a use outside the loop (which means after, since it is based
- // on a loop indvar) we use the post-incremented value, so that we don't
- // artificially make the preinc value live out the bottom of the loop.
- if (!isUseOfPostIncrementedValue && L->contains(Inst)) {
- if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
- InsertPt = NewBasePt;
- ++InsertPt;
- } else if (Instruction *OpInst
- = dyn_cast<Instruction>(OperandValToReplace)) {
- InsertPt = OpInst;
- while (isa<PHINode>(InsertPt)) ++InsertPt;
- }
- }
- Value *NewVal = InsertCodeForBaseAtPosition(NewBase,
- OperandValToReplace->getType(),
- Rewriter, InsertPt, SE);
- // Replace the use of the operand Value with the new Phi we just created.
- Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
-
- DEBUG(dbgs() << " Replacing with ");
- DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false));
- DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
- << *Imm << "\n");
- return;
- }
+ if (I == 0 || !isInstructionTriviallyDead(I))
+ continue;
- // PHI nodes are more complex. We have to insert one copy of the NewBase+Imm
- // expression into each operand block that uses it. Note that PHI nodes can
- // have multiple entries for the same predecessor. We use a map to make sure
- // that a PHI node only has a single Value* for each predecessor (which also
- // prevents us from inserting duplicate code in some blocks).
- DenseMap<BasicBlock*, Value*> InsertedCode;
- PHINode *PN = cast<PHINode>(Inst);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- if (PN->getIncomingValue(i) == OperandValToReplace) {
- // If the original expression is outside the loop, put the replacement
- // code in the same place as the original expression,
- // which need not be an immediate predecessor of this PHI. This way we
- // need only one copy of it even if it is referenced multiple times in
- // the PHI. We don't do this when the original expression is inside the
- // loop because multiple copies sometimes do useful sinking of code in
- // that case(?).
- Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
- BasicBlock *PHIPred = PN->getIncomingBlock(i);
- if (L->contains(OldLoc)) {
- // If this is a critical edge, split the edge so that we do not insert
- // the code on all predecessor/successor paths. We do this unless this
- // is the canonical backedge for this loop, as this can make some
- // inserted code be in an illegal position.
- if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
- !isa<IndirectBrInst>(PHIPred->getTerminator()) &&
- (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
-
- // First step, split the critical edge.
- BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(),
- P, false);
-
- // Next step: move the basic block. In particular, if the PHI node
- // is outside of the loop, and PredTI is in the loop, we want to
- // move the block to be immediately before the PHI block, not
- // immediately after PredTI.
- if (L->contains(PHIPred) && !L->contains(PN))
- NewBB->moveBefore(PN->getParent());
-
- // Splitting the edge can reduce the number of PHI entries we have.
- e = PN->getNumIncomingValues();
- PHIPred = NewBB;
- i = PN->getBasicBlockIndex(PHIPred);
- }
- }
- Value *&Code = InsertedCode[PHIPred];
- if (!Code) {
- // Insert the code into the end of the predecessor block.
- Instruction *InsertPt = (L->contains(OldLoc)) ?
- PHIPred->getTerminator() :
- OldLoc->getParent()->getTerminator();
- Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
- Rewriter, InsertPt, SE);
-
- DEBUG(dbgs() << " Changing PHI use to ");
- DEBUG(WriteAsOperand(dbgs(), Code, /*PrintType=*/false));
- DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
- << *Imm << "\n");
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+ if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ *OI = 0;
+ if (U->use_empty())
+ DeadInsts.push_back(U);
}
- // Replace the use of the operand Value with the new Phi we just created.
- PN->setIncomingValue(i, Code);
- Rewriter.clear();
- }
+ I->eraseFromParent();
+ Changed = true;
}
- // PHI node might have become a constant value after SplitCriticalEdge.
- DeadInsts.push_back(Inst);
+ return Changed;
}
+namespace {
-/// fitsInAddressMode - Return true if V can be subsumed within an addressing
-/// mode, and does not need to be put in a register first.
-static bool fitsInAddressMode(const SCEV *V, const Type *AccessTy,
- const TargetLowering *TLI, bool HasBaseReg) {
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
- int64_t VC = SC->getValue()->getSExtValue();
- if (TLI) {
- TargetLowering::AddrMode AM;
- AM.BaseOffs = VC;
- AM.HasBaseReg = HasBaseReg;
- return TLI->isLegalAddressingMode(AM, AccessTy);
- } else {
- // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
- return (VC > -(1 << 16) && VC < (1 << 16)-1);
- }
- }
-
- if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
- if (GlobalValue *GV = dyn_cast<GlobalValue>(SU->getValue())) {
- if (TLI) {
- TargetLowering::AddrMode AM;
- AM.BaseGV = GV;
- AM.HasBaseReg = HasBaseReg;
- return TLI->isLegalAddressingMode(AM, AccessTy);
- } else {
- // Default: assume global addresses are not legal.
- }
- }
+/// Cost - This class is used to measure and compare candidate formulae.
+class Cost {
+ /// TODO: Some of these could be merged. Also, a lexical ordering
+ /// isn't always optimal.
+ unsigned NumRegs;
+ unsigned AddRecCost;
+ unsigned NumIVMuls;
+ unsigned NumBaseAdds;
+ unsigned ImmCost;
+ unsigned SetupCost;
+
+public:
+ Cost()
+ : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
+ SetupCost(0) {}
+
+ unsigned getNumRegs() const { return NumRegs; }
+
+ bool operator<(const Cost &Other) const;
+
+ void Loose();
+
+ void RateFormula(const Formula &F,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const DenseSet<const SCEV *> &VisitedRegs,
+ const Loop *L,
+ const SmallVectorImpl<int64_t> &Offsets,
+ ScalarEvolution &SE, DominatorTree &DT);
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+
+private:
+ void RateRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT);
+ void RatePrimaryRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT);
+};
- return false;
}
-/// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are
-/// loop varying to the Imm operand.
-static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
- Loop *L, ScalarEvolution *SE) {
- if (Val->isLoopInvariant(L)) return; // Nothing to do.
-
- if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
- SmallVector<const SCEV *, 4> NewOps;
- NewOps.reserve(SAE->getNumOperands());
+/// RateRegister - Tally up interesting quantities from the given register.
+void Cost::RateRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
+ if (AR->getLoop() == L)
+ AddRecCost += 1; /// TODO: This should be a function of the stride.
+
+ // If this is an addrec for a loop that's already been visited by LSR,
+ // don't second-guess its addrec phi nodes. LSR isn't currently smart
+ // enough to reason about more than one loop at a time. Consider these
+ // registers free and leave them alone.
+ else if (L->contains(AR->getLoop()) ||
+ (!AR->getLoop()->contains(L) &&
+ DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
+ for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(AR->getType())) &&
+ SE.getSCEV(PN) == AR)
+ return;
- for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
- if (!SAE->getOperand(i)->isLoopInvariant(L)) {
- // If this is a loop-variant expression, it must stay in the immediate
- // field of the expression.
- Imm = SE->getAddExpr(Imm, SAE->getOperand(i));
- } else {
- NewOps.push_back(SAE->getOperand(i));
- }
+ // If this isn't one of the addrecs that the loop already has, it
+ // would require a costly new phi and add. TODO: This isn't
+ // precisely modeled right now.
+ ++NumBaseAdds;
+ if (!Regs.count(AR->getStart()))
+ RateRegister(AR->getStart(), Regs, L, SE, DT);
+ }
- if (NewOps.empty())
- Val = SE->getIntegerSCEV(0, Val->getType());
- else
- Val = SE->getAddExpr(NewOps);
- } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
- // Try to pull immediates out of the start value of nested addrec's.
- const SCEV *Start = SARE->getStart();
- MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
-
- SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
- Ops[0] = Start;
- Val = SE->getAddRecExpr(Ops, SARE->getLoop());
- } else {
- // Otherwise, all of Val is variant, move the whole thing over.
- Imm = SE->getAddExpr(Imm, Val);
- Val = SE->getIntegerSCEV(0, Val->getType());
+ // Add the step value register, if it needs one.
+ // TODO: The non-affine case isn't precisely modeled here.
+ if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
+ if (!Regs.count(AR->getStart()))
+ RateRegister(AR->getOperand(1), Regs, L, SE, DT);
}
+ ++NumRegs;
+
+ // Rough heuristic; favor registers which don't require extra setup
+ // instructions in the preheader.
+ if (!isa<SCEVUnknown>(Reg) &&
+ !isa<SCEVConstant>(Reg) &&
+ !(isa<SCEVAddRecExpr>(Reg) &&
+ (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
+ isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
+ ++SetupCost;
}
+/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
+/// before, rate it.
+void Cost::RatePrimaryRegister(const SCEV *Reg,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ if (Regs.insert(Reg))
+ RateRegister(Reg, Regs, L, SE, DT);
+}
-/// MoveImmediateValues - Look at Val, and pull out any additions of constants
-/// that can fit into the immediate field of instructions in the target.
-/// Accumulate these immediate values into the Imm value.
-static void MoveImmediateValues(const TargetLowering *TLI,
- const Type *AccessTy,
- const SCEV *&Val, const SCEV *&Imm,
- bool isAddress, Loop *L,
- ScalarEvolution *SE) {
- if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
- SmallVector<const SCEV *, 4> NewOps;
- NewOps.reserve(SAE->getNumOperands());
-
- for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
- const SCEV *NewOp = SAE->getOperand(i);
- MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
-
- if (!NewOp->isLoopInvariant(L)) {
- // If this is a loop-variant expression, it must stay in the immediate
- // field of the expression.
- Imm = SE->getAddExpr(Imm, NewOp);
- } else {
- NewOps.push_back(NewOp);
- }
- }
-
- if (NewOps.empty())
- Val = SE->getIntegerSCEV(0, Val->getType());
- else
- Val = SE->getAddExpr(NewOps);
- return;
- } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
- // Try to pull immediates out of the start value of nested addrec's.
- const SCEV *Start = SARE->getStart();
- MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
-
- if (Start != SARE->getStart()) {
- SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
- Ops[0] = Start;
- Val = SE->getAddRecExpr(Ops, SARE->getLoop());
+void Cost::RateFormula(const Formula &F,
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const DenseSet<const SCEV *> &VisitedRegs,
+ const Loop *L,
+ const SmallVectorImpl<int64_t> &Offsets,
+ ScalarEvolution &SE, DominatorTree &DT) {
+ // Tally up the registers.
+ if (const SCEV *ScaledReg = F.ScaledReg) {
+ if (VisitedRegs.count(ScaledReg)) {
+ Loose();
+ return;
}
- return;
- } else if (const SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) {
- // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field.
- if (isAddress &&
- fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) &&
- SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
-
- const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType());
- const SCEV *NewOp = SME->getOperand(1);
- MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
-
- // If we extracted something out of the subexpressions, see if we can
- // simplify this!
- if (NewOp != SME->getOperand(1)) {
- // Scale SubImm up by "8". If the result is a target constant, we are
- // good.
- SubImm = SE->getMulExpr(SubImm, SME->getOperand(0));
- if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) {
- // Accumulate the immediate.
- Imm = SE->getAddExpr(Imm, SubImm);
-
- // Update what is left of 'Val'.
- Val = SE->getMulExpr(SME->getOperand(0), NewOp);
- return;
- }
- }
+ RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+ }
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+ E = F.BaseRegs.end(); I != E; ++I) {
+ const SCEV *BaseReg = *I;
+ if (VisitedRegs.count(BaseReg)) {
+ Loose();
+ return;
}
+ RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+
+ NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
+ BaseReg->hasComputableLoopEvolution(L);
}
- // Loop-variant expressions must stay in the immediate field of the
- // expression.
- if ((isAddress && fitsInAddressMode(Val, AccessTy, TLI, false)) ||
- !Val->isLoopInvariant(L)) {
- Imm = SE->getAddExpr(Imm, Val);
- Val = SE->getIntegerSCEV(0, Val->getType());
- return;
+ if (F.BaseRegs.size() > 1)
+ NumBaseAdds += F.BaseRegs.size() - 1;
+
+ // Tally up the non-zero immediates.
+ for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+ E = Offsets.end(); I != E; ++I) {
+ int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
+ if (F.AM.BaseGV)
+ ImmCost += 64; // Handle symbolic values conservatively.
+ // TODO: This should probably be the pointer size.
+ else if (Offset != 0)
+ ImmCost += APInt(64, Offset, true).getMinSignedBits();
}
+}
- // Otherwise, no immediates to move.
+/// Loose - Set this cost to a loosing value.
+void Cost::Loose() {
+ NumRegs = ~0u;
+ AddRecCost = ~0u;
+ NumIVMuls = ~0u;
+ NumBaseAdds = ~0u;
+ ImmCost = ~0u;
+ SetupCost = ~0u;
}
-static void MoveImmediateValues(const TargetLowering *TLI,
- Instruction *User,
- const SCEV *&Val, const SCEV *&Imm,
- bool isAddress, Loop *L,
- ScalarEvolution *SE) {
- const Type *AccessTy = getAccessType(User);
- MoveImmediateValues(TLI, AccessTy, Val, Imm, isAddress, L, SE);
+/// operator< - Choose the lower cost.
+bool Cost::operator<(const Cost &Other) const {
+ if (NumRegs != Other.NumRegs)
+ return NumRegs < Other.NumRegs;
+ if (AddRecCost != Other.AddRecCost)
+ return AddRecCost < Other.AddRecCost;
+ if (NumIVMuls != Other.NumIVMuls)
+ return NumIVMuls < Other.NumIVMuls;
+ if (NumBaseAdds != Other.NumBaseAdds)
+ return NumBaseAdds < Other.NumBaseAdds;
+ if (ImmCost != Other.ImmCost)
+ return ImmCost < Other.ImmCost;
+ if (SetupCost != Other.SetupCost)
+ return SetupCost < Other.SetupCost;
+ return false;
}
-/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
-/// added together. This is used to reassociate common addition subexprs
-/// together for maximal sharing when rewriting bases.
-static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
- const SCEV *Expr,
- ScalarEvolution *SE) {
- if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
- for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
- SeparateSubExprs(SubExprs, AE->getOperand(j), SE);
- } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) {
- const SCEV *Zero = SE->getIntegerSCEV(0, Expr->getType());
- if (SARE->getOperand(0) == Zero) {
- SubExprs.push_back(Expr);
- } else {
- // Compute the addrec with zero as its base.
- SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
- Ops[0] = Zero; // Start with zero base.
- SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
+void Cost::print(raw_ostream &OS) const {
+ OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
+ if (AddRecCost != 0)
+ OS << ", with addrec cost " << AddRecCost;
+ if (NumIVMuls != 0)
+ OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
+ if (NumBaseAdds != 0)
+ OS << ", plus " << NumBaseAdds << " base add"
+ << (NumBaseAdds == 1 ? "" : "s");
+ if (ImmCost != 0)
+ OS << ", plus " << ImmCost << " imm cost";
+ if (SetupCost != 0)
+ OS << ", plus " << SetupCost << " setup cost";
+}
+void Cost::dump() const {
+ print(errs()); errs() << '\n';
+}
- SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
- }
- } else if (!Expr->isZero()) {
- // Do not add zero.
- SubExprs.push_back(Expr);
- }
-}
-
-// This is logically local to the following function, but C++ says we have
-// to make it file scope.
-struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
-
-/// RemoveCommonExpressionsFromUseBases - Look through all of the Bases of all
-/// the Uses, removing any common subexpressions, except that if all such
-/// subexpressions can be folded into an addressing mode for all uses inside
-/// the loop (this case is referred to as "free" in comments herein) we do
-/// not remove anything. This looks for things like (a+b+c) and
-/// (a+c+d) and computes the common (a+c) subexpression. The common expression
-/// is *removed* from the Bases and returned.
-static const SCEV *
-RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
- ScalarEvolution *SE, Loop *L,
- const TargetLowering *TLI) {
- unsigned NumUses = Uses.size();
-
- // Only one use? This is a very common case, so we handle it specially and
- // cheaply.
- const SCEV *Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
- const SCEV *Result = Zero;
- const SCEV *FreeResult = Zero;
- if (NumUses == 1) {
- // If the use is inside the loop, use its base, regardless of what it is:
- // it is clearly shared across all the IV's. If the use is outside the loop
- // (which means after it) we don't want to factor anything *into* the loop,
- // so just use 0 as the base.
- if (L->contains(Uses[0].Inst))
- std::swap(Result, Uses[0].Base);
- return Result;
- }
+namespace {
- // To find common subexpressions, count how many of Uses use each expression.
- // If any subexpressions are used Uses.size() times, they are common.
- // Also track whether all uses of each expression can be moved into an
- // an addressing mode "for free"; such expressions are left within the loop.
- // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
- std::map<const SCEV *, SubExprUseData> SubExpressionUseData;
-
- // UniqueSubExprs - Keep track of all of the subexpressions we see in the
- // order we see them.
- SmallVector<const SCEV *, 16> UniqueSubExprs;
-
- SmallVector<const SCEV *, 16> SubExprs;
- unsigned NumUsesInsideLoop = 0;
- for (unsigned i = 0; i != NumUses; ++i) {
- // If the user is outside the loop, just ignore it for base computation.
- // Since the user is outside the loop, it must be *after* the loop (if it
- // were before, it could not be based on the loop IV). We don't want users
- // after the loop to affect base computation of values *inside* the loop,
- // because we can always add their offsets to the result IV after the loop
- // is done, ensuring we get good code inside the loop.
- if (!L->contains(Uses[i].Inst))
- continue;
- NumUsesInsideLoop++;
+/// LSRFixup - An operand value in an instruction which is to be replaced
+/// with some equivalent, possibly strength-reduced, replacement.
+struct LSRFixup {
+ /// UserInst - The instruction which will be updated.
+ Instruction *UserInst;
- // If the base is zero (which is common), return zero now, there are no
- // CSEs we can find.
- if (Uses[i].Base == Zero) return Zero;
+ /// OperandValToReplace - The operand of the instruction which will
+ /// be replaced. The operand may be used more than once; every instance
+ /// will be replaced.
+ Value *OperandValToReplace;
- // If this use is as an address we may be able to put CSEs in the addressing
- // mode rather than hoisting them.
- bool isAddrUse = isAddressUse(Uses[i].Inst, Uses[i].OperandValToReplace);
- // We may need the AccessTy below, but only when isAddrUse, so compute it
- // only in that case.
- const Type *AccessTy = 0;
- if (isAddrUse)
- AccessTy = getAccessType(Uses[i].Inst);
-
- // Split the expression into subexprs.
- SeparateSubExprs(SubExprs, Uses[i].Base, SE);
- // Add one to SubExpressionUseData.Count for each subexpr present, and
- // if the subexpr is not a valid immediate within an addressing mode use,
- // set SubExpressionUseData.notAllUsesAreFree. We definitely want to
- // hoist these out of the loop (if they are common to all uses).
- for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
- if (++SubExpressionUseData[SubExprs[j]].Count == 1)
- UniqueSubExprs.push_back(SubExprs[j]);
- if (!isAddrUse || !fitsInAddressMode(SubExprs[j], AccessTy, TLI, false))
- SubExpressionUseData[SubExprs[j]].notAllUsesAreFree = true;
- }
- SubExprs.clear();
- }
-
- // Now that we know how many times each is used, build Result. Iterate over
- // UniqueSubexprs so that we have a stable ordering.
- for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
- std::map<const SCEV *, SubExprUseData>::iterator I =
- SubExpressionUseData.find(UniqueSubExprs[i]);
- assert(I != SubExpressionUseData.end() && "Entry not found?");
- if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
- if (I->second.notAllUsesAreFree)
- Result = SE->getAddExpr(Result, I->first);
- else
- FreeResult = SE->getAddExpr(FreeResult, I->first);
- } else
- // Remove non-cse's from SubExpressionUseData.
- SubExpressionUseData.erase(I);
- }
-
- if (FreeResult != Zero) {
- // We have some subexpressions that can be subsumed into addressing
- // modes in every use inside the loop. However, it's possible that
- // there are so many of them that the combined FreeResult cannot
- // be subsumed, or that the target cannot handle both a FreeResult
- // and a Result in the same instruction (for example because it would
- // require too many registers). Check this.
- for (unsigned i=0; i<NumUses; ++i) {
- if (!L->contains(Uses[i].Inst))
- continue;
- // We know this is an addressing mode use; if there are any uses that
- // are not, FreeResult would be Zero.
- const Type *AccessTy = getAccessType(Uses[i].Inst);
- if (!fitsInAddressMode(FreeResult, AccessTy, TLI, Result!=Zero)) {
- // FIXME: could split up FreeResult into pieces here, some hoisted
- // and some not. There is no obvious advantage to this.
- Result = SE->getAddExpr(Result, FreeResult);
- FreeResult = Zero;
- break;
- }
- }
- }
+ /// PostIncLoop - If this user is to use the post-incremented value of an
+ /// induction variable, this variable is non-null and holds the loop
+ /// associated with the induction variable.
+ const Loop *PostIncLoop;
- // If we found no CSE's, return now.
- if (Result == Zero) return Result;
+ /// LUIdx - The index of the LSRUse describing the expression which
+ /// this fixup needs, minus an offset (below).
+ size_t LUIdx;
- // If we still have a FreeResult, remove its subexpressions from
- // SubExpressionUseData. This means they will remain in the use Bases.
- if (FreeResult != Zero) {
- SeparateSubExprs(SubExprs, FreeResult, SE);
- for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
- std::map<const SCEV *, SubExprUseData>::iterator I =
- SubExpressionUseData.find(SubExprs[j]);
- SubExpressionUseData.erase(I);
- }
- SubExprs.clear();
- }
+ /// Offset - A constant offset to be added to the LSRUse expression.
+ /// This allows multiple fixups to share the same LSRUse with different
+ /// offsets, for example in an unrolled loop.
+ int64_t Offset;
- // Otherwise, remove all of the CSE's we found from each of the base values.
- for (unsigned i = 0; i != NumUses; ++i) {
- // Uses outside the loop don't necessarily include the common base, but
- // the final IV value coming into those uses does. Instead of trying to
- // remove the pieces of the common base, which might not be there,
- // subtract off the base to compensate for this.
- if (!L->contains(Uses[i].Inst)) {
- Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result);
- continue;
- }
+ LSRFixup();
- // Split the expression into subexprs.
- SeparateSubExprs(SubExprs, Uses[i].Base, SE);
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
- // Remove any common subexpressions.
- for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
- if (SubExpressionUseData.count(SubExprs[j])) {
- SubExprs.erase(SubExprs.begin()+j);
- --j; --e;
- }
+}
- // Finally, add the non-shared expressions together.
- if (SubExprs.empty())
- Uses[i].Base = Zero;
- else
- Uses[i].Base = SE->getAddExpr(SubExprs);
- SubExprs.clear();
+LSRFixup::LSRFixup()
+ : UserInst(0), OperandValToReplace(0), PostIncLoop(0),
+ LUIdx(~size_t(0)), Offset(0) {}
+
+void LSRFixup::print(raw_ostream &OS) const {
+ OS << "UserInst=";
+ // Store is common and interesting enough to be worth special-casing.
+ if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
+ OS << "store ";
+ WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
+ } else if (UserInst->getType()->isVoidTy())
+ OS << UserInst->getOpcodeName();
+ else
+ WriteAsOperand(OS, UserInst, /*PrintType=*/false);
+
+ OS << ", OperandValToReplace=";
+ WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
+
+ if (PostIncLoop) {
+ OS << ", PostIncLoop=";
+ WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
}
- return Result;
-}
+ if (LUIdx != ~size_t(0))
+ OS << ", LUIdx=" << LUIdx;
-/// ValidScale - Check whether the given Scale is valid for all loads and
-/// stores in UsersToProcess.
-///
-bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
- const std::vector<BasedUser>& UsersToProcess) {
- if (!TLI)
- return true;
+ if (Offset != 0)
+ OS << ", Offset=" << Offset;
+}
- for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) {
- // If this is a load or other access, pass the type of the access in.
- const Type *AccessTy =
- Type::getVoidTy(UsersToProcess[i].Inst->getContext());
- if (isAddressUse(UsersToProcess[i].Inst,
- UsersToProcess[i].OperandValToReplace))
- AccessTy = getAccessType(UsersToProcess[i].Inst);
- else if (isa<PHINode>(UsersToProcess[i].Inst))
- continue;
+void LSRFixup::dump() const {
+ print(errs()); errs() << '\n';
+}
- TargetLowering::AddrMode AM;
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
- AM.BaseOffs = SC->getValue()->getSExtValue();
- AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
- AM.Scale = Scale;
+namespace {
- // If load[imm+r*scale] is illegal, bail out.
- if (!TLI->isLegalAddressingMode(AM, AccessTy))
- return false;
+/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
+struct UniquifierDenseMapInfo {
+ static SmallVector<const SCEV *, 2> getEmptyKey() {
+ SmallVector<const SCEV *, 2> V;
+ V.push_back(reinterpret_cast<const SCEV *>(-1));
+ return V;
}
- return true;
-}
-
-/// ValidOffset - Check whether the given Offset is valid for all loads and
-/// stores in UsersToProcess.
-///
-bool LoopStrengthReduce::ValidOffset(bool HasBaseReg,
- int64_t Offset,
- int64_t Scale,
- const std::vector<BasedUser>& UsersToProcess) {
- if (!TLI)
- return true;
- for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
- // If this is a load or other access, pass the type of the access in.
- const Type *AccessTy =
- Type::getVoidTy(UsersToProcess[i].Inst->getContext());
- if (isAddressUse(UsersToProcess[i].Inst,
- UsersToProcess[i].OperandValToReplace))
- AccessTy = getAccessType(UsersToProcess[i].Inst);
- else if (isa<PHINode>(UsersToProcess[i].Inst))
- continue;
+ static SmallVector<const SCEV *, 2> getTombstoneKey() {
+ SmallVector<const SCEV *, 2> V;
+ V.push_back(reinterpret_cast<const SCEV *>(-2));
+ return V;
+ }
- TargetLowering::AddrMode AM;
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
- AM.BaseOffs = SC->getValue()->getSExtValue();
- AM.BaseOffs = (uint64_t)AM.BaseOffs + (uint64_t)Offset;
- AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
- AM.Scale = Scale;
+ static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+ unsigned Result = 0;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
+ E = V.end(); I != E; ++I)
+ Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I);
+ return Result;
+ }
- // If load[imm+r*scale] is illegal, bail out.
- if (!TLI->isLegalAddressingMode(AM, AccessTy))
- return false;
+ static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
+ const SmallVector<const SCEV *, 2> &RHS) {
+ return LHS == RHS;
}
- return true;
-}
+};
+
+/// LSRUse - This class holds the state that LSR keeps for each use in
+/// IVUsers, as well as uses invented by LSR itself. It includes information
+/// about what kinds of things can be folded into the user, information about
+/// the user itself, and information about how the use may be satisfied.
+/// TODO: Represent multiple users of the same expression in common?
+class LSRUse {
+ DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+
+public:
+ /// KindType - An enum for a kind of use, indicating what types of
+ /// scaled and immediate operands it might support.
+ enum KindType {
+ Basic, ///< A normal use, with no folding.
+ Special, ///< A special case of basic, allowing -1 scales.
+ Address, ///< An address use; folding according to TargetLowering
+ ICmpZero ///< An equality icmp with both operands folded into one.
+ // TODO: Add a generic icmp too?
+ };
-/// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not
-/// a nop.
-bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
- const Type *Ty2) {
- if (Ty1 == Ty2)
- return false;
- Ty1 = SE->getEffectiveSCEVType(Ty1);
- Ty2 = SE->getEffectiveSCEVType(Ty2);
- if (Ty1 == Ty2)
- return false;
- if (Ty1->canLosslesslyBitCastTo(Ty2))
- return false;
- if (TLI && TLI->isTruncateFree(Ty1, Ty2))
- return false;
- return true;
-}
+ KindType Kind;
+ const Type *AccessTy;
-/// CheckForIVReuse - Returns the multiple if the stride is the multiple
-/// of a previous stride and it is a legal value for the target addressing
-/// mode scale component and optional base reg. This allows the users of
-/// this stride to be rewritten as prev iv * factor. It returns 0 if no
-/// reuse is possible. Factors can be negative on same targets, e.g. ARM.
-///
-/// If all uses are outside the loop, we don't require that all multiplies
-/// be folded into the addressing mode, nor even that the factor be constant;
-/// a multiply (executed once) outside the loop is better than another IV
-/// within. Well, usually.
-const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
- bool AllUsesAreAddresses,
- bool AllUsesAreOutsideLoop,
- const SCEV *Stride,
- IVExpr &IV, const Type *Ty,
- const std::vector<BasedUser>& UsersToProcess) {
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
- int64_t SInt = SC->getValue()->getSExtValue();
- for (unsigned NewStride = 0, e = IU->StrideOrder.size();
- NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
- IVsByStride.find(IU->StrideOrder[NewStride]);
- if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
- continue;
- // The other stride has no uses, don't reuse it.
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI =
- IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
- if (UI->second->Users.empty())
- continue;
- int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
- if (SI->first != Stride &&
- (unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0))
- continue;
- int64_t Scale = SInt / SSInt;
- // Check that this stride is valid for all the types used for loads and
- // stores; if it can be used for some and not others, we might as well use
- // the original stride everywhere, since we have to create the IV for it
- // anyway. If the scale is 1, then we don't need to worry about folding
- // multiplications.
- if (Scale == 1 ||
- (AllUsesAreAddresses &&
- ValidScale(HasBaseReg, Scale, UsersToProcess))) {
- // Prefer to reuse an IV with a base of zero.
- for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
- IE = SI->second.IVs.end(); II != IE; ++II)
- // Only reuse previous IV if it would not require a type conversion
- // and if the base difference can be folded.
- if (II->Base->isZero() &&
- !RequiresTypeConversion(II->Base->getType(), Ty)) {
- IV = *II;
- return SE->getIntegerSCEV(Scale, Stride->getType());
- }
- // Otherwise, settle for an IV with a foldable base.
- if (AllUsesAreAddresses)
- for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
- IE = SI->second.IVs.end(); II != IE; ++II)
- // Only reuse previous IV if it would not require a type conversion
- // and if the base difference can be folded.
- if (SE->getEffectiveSCEVType(II->Base->getType()) ==
- SE->getEffectiveSCEVType(Ty) &&
- isa<SCEVConstant>(II->Base)) {
- int64_t Base =
- cast<SCEVConstant>(II->Base)->getValue()->getSExtValue();
- if (Base > INT32_MIN && Base <= INT32_MAX &&
- ValidOffset(HasBaseReg, -Base * Scale,
- Scale, UsersToProcess)) {
- IV = *II;
- return SE->getIntegerSCEV(Scale, Stride->getType());
- }
- }
- }
- }
- } else if (AllUsesAreOutsideLoop) {
- // Accept nonconstant strides here; it is really really right to substitute
- // an existing IV if we can.
- for (unsigned NewStride = 0, e = IU->StrideOrder.size();
- NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
- IVsByStride.find(IU->StrideOrder[NewStride]);
- if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
- continue;
- int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
- if (SI->first != Stride && SSInt != 1)
- continue;
- for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
- IE = SI->second.IVs.end(); II != IE; ++II)
- // Accept nonzero base here.
- // Only reuse previous IV if it would not require a type conversion.
- if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
- IV = *II;
- return Stride;
- }
- }
- // Special case, old IV is -1*x and this one is x. Can treat this one as
- // -1*old.
- for (unsigned NewStride = 0, e = IU->StrideOrder.size();
- NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
- IVsByStride.find(IU->StrideOrder[NewStride]);
- if (SI == IVsByStride.end())
- continue;
- if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
- if (Stride == ME->getOperand(1) &&
- SC->getValue()->getSExtValue() == -1LL)
- for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
- IE = SI->second.IVs.end(); II != IE; ++II)
- // Accept nonzero base here.
- // Only reuse previous IV if it would not require type conversion.
- if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
- IV = *II;
- return SE->getIntegerSCEV(-1LL, Stride->getType());
- }
- }
- }
- return SE->getIntegerSCEV(0, Stride->getType());
-}
-
-/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
-/// returns true if Val's isUseOfPostIncrementedValue is true.
-static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
- return Val.isUseOfPostIncrementedValue;
-}
-
-/// isNonConstantNegative - Return true if the specified scev is negated, but
-/// not a constant.
-static bool isNonConstantNegative(const SCEV *Expr) {
- const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
- if (!Mul) return false;
-
- // If there is a constant factor, it will be first.
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
- if (!SC) return false;
-
- // Return true if the value is negative, this matches things like (-42 * V).
- return SC->getValue()->getValue().isNegative();
-}
-
-/// CollectIVUsers - Transform our list of users and offsets to a bit more
-/// complex table. In this new vector, each 'BasedUser' contains 'Base', the
-/// base of the strided accesses, as well as the old information from Uses. We
-/// progressively move information from the Base field to the Imm field, until
-/// we eventually have the full access expression to rewrite the use.
-const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *Stride,
- IVUsersOfOneStride &Uses,
- Loop *L,
- bool &AllUsesAreAddresses,
- bool &AllUsesAreOutsideLoop,
- std::vector<BasedUser> &UsersToProcess) {
- // FIXME: Generalize to non-affine IV's.
- if (!Stride->isLoopInvariant(L))
- return SE->getIntegerSCEV(0, Stride->getType());
-
- UsersToProcess.reserve(Uses.Users.size());
- for (ilist<IVStrideUse>::iterator I = Uses.Users.begin(),
- E = Uses.Users.end(); I != E; ++I) {
- UsersToProcess.push_back(BasedUser(*I, SE));
-
- // Move any loop variant operands from the offset field to the immediate
- // field of the use, so that we don't try to use something before it is
- // computed.
- MoveLoopVariantsToImmediateField(UsersToProcess.back().Base,
- UsersToProcess.back().Imm, L, SE);
- assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
- "Base value is not loop invariant!");
- }
-
- // We now have a whole bunch of uses of like-strided induction variables, but
- // they might all have different bases. We want to emit one PHI node for this
- // stride which we fold as many common expressions (between the IVs) into as
- // possible. Start by identifying the common expressions in the base values
- // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
- // "A+B"), emit it to the preheader, then remove the expression from the
- // UsersToProcess base values.
- const SCEV *CommonExprs =
- RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
-
- // Next, figure out what we can represent in the immediate fields of
- // instructions. If we can represent anything there, move it to the imm
- // fields of the BasedUsers. We do this so that it increases the commonality
- // of the remaining uses.
- unsigned NumPHI = 0;
- bool HasAddress = false;
- for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
- // If the user is not in the current loop, this means it is using the exit
- // value of the IV. Do not put anything in the base, make sure it's all in
- // the immediate field to allow as much factoring as possible.
- if (!L->contains(UsersToProcess[i].Inst)) {
- UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
- UsersToProcess[i].Base);
- UsersToProcess[i].Base =
- SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
- } else {
- // Not all uses are outside the loop.
- AllUsesAreOutsideLoop = false;
-
- // Addressing modes can be folded into loads and stores. Be careful that
- // the store is through the expression, not of the expression though.
- bool isPHI = false;
- bool isAddress = isAddressUse(UsersToProcess[i].Inst,
- UsersToProcess[i].OperandValToReplace);
- if (isa<PHINode>(UsersToProcess[i].Inst)) {
- isPHI = true;
- ++NumPHI;
- }
+ SmallVector<int64_t, 8> Offsets;
+ int64_t MinOffset;
+ int64_t MaxOffset;
- if (isAddress)
- HasAddress = true;
+ /// AllFixupsOutsideLoop - This records whether all of the fixups using this
+ /// LSRUse are outside of the loop, in which case some special-case heuristics
+ /// may be used.
+ bool AllFixupsOutsideLoop;
- // If this use isn't an address, then not all uses are addresses.
- if (!isAddress && !isPHI)
- AllUsesAreAddresses = false;
+ /// Formulae - A list of ways to build a value that can satisfy this user.
+ /// After the list is populated, one of these is selected heuristically and
+ /// used to formulate a replacement for OperandValToReplace in UserInst.
+ SmallVector<Formula, 12> Formulae;
- MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
- UsersToProcess[i].Imm, isAddress, L, SE);
- }
- }
+ /// Regs - The set of register candidates used by all formulae in this LSRUse.
+ SmallPtrSet<const SCEV *, 4> Regs;
- // If one of the use is a PHI node and all other uses are addresses, still
- // allow iv reuse. Essentially we are trading one constant multiplication
- // for one fewer iv.
- if (NumPHI > 1)
- AllUsesAreAddresses = false;
+ LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
+ MinOffset(INT64_MAX),
+ MaxOffset(INT64_MIN),
+ AllFixupsOutsideLoop(true) {}
- // There are no in-loop address uses.
- if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
- AllUsesAreAddresses = false;
+ bool InsertFormula(const Formula &F);
- return CommonExprs;
-}
+ void check() const;
-/// ShouldUseFullStrengthReductionMode - Test whether full strength-reduction
-/// is valid and profitable for the given set of users of a stride. In
-/// full strength-reduction mode, all addresses at the current stride are
-/// strength-reduced all the way down to pointer arithmetic.
-///
-bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
- const std::vector<BasedUser> &UsersToProcess,
- const Loop *L,
- bool AllUsesAreAddresses,
- const SCEV *Stride) {
- if (!EnableFullLSRMode)
- return false;
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
- // The heuristics below aim to avoid increasing register pressure, but
- // fully strength-reducing all the addresses increases the number of
- // add instructions, so don't do this when optimizing for size.
- // TODO: If the loop is large, the savings due to simpler addresses
- // may oughtweight the costs of the extra increment instructions.
- if (L->getHeader()->getParent()->hasFnAttr(Attribute::OptimizeForSize))
- return false;
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRUse::InsertFormula(const Formula &F) {
+ SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ if (F.ScaledReg) Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for uniquifying.
+ std::sort(Key.begin(), Key.end());
- // TODO: For now, don't do full strength reduction if there could
- // potentially be greater-stride multiples of the current stride
- // which could reuse the current stride IV.
- if (IU->StrideOrder.back() != Stride)
+ if (!Uniquifier.insert(Key).second)
return false;
- // Iterate through the uses to find conditions that automatically rule out
- // full-lsr mode.
- for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
- const SCEV *Base = UsersToProcess[i].Base;
- const SCEV *Imm = UsersToProcess[i].Imm;
- // If any users have a loop-variant component, they can't be fully
- // strength-reduced.
- if (Imm && !Imm->isLoopInvariant(L))
- return false;
- // If there are to users with the same base and the difference between
- // the two Imm values can't be folded into the address, full
- // strength reduction would increase register pressure.
- do {
- const SCEV *CurImm = UsersToProcess[i].Imm;
- if ((CurImm || Imm) && CurImm != Imm) {
- if (!CurImm) CurImm = SE->getIntegerSCEV(0, Stride->getType());
- if (!Imm) Imm = SE->getIntegerSCEV(0, Stride->getType());
- const Instruction *Inst = UsersToProcess[i].Inst;
- const Type *AccessTy = getAccessType(Inst);
- const SCEV *Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
- if (!Diff->isZero() &&
- (!AllUsesAreAddresses ||
- !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true)))
- return false;
- }
- } while (++i != e && Base == UsersToProcess[i].Base);
- }
+ // Using a register to hold the value of 0 is not profitable.
+ assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
+ "Zero allocated in a scaled register!");
+#ifndef NDEBUG
+ for (SmallVectorImpl<const SCEV *>::const_iterator I =
+ F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
+ assert(!(*I)->isZero() && "Zero allocated in a base register!");
+#endif
- // If there's exactly one user in this stride, fully strength-reducing it
- // won't increase register pressure. If it's starting from a non-zero base,
- // it'll be simpler this way.
- if (UsersToProcess.size() == 1 && !UsersToProcess[0].Base->isZero())
- return true;
+ // Add the formula to the list.
+ Formulae.push_back(F);
- // Otherwise, if there are any users in this stride that don't require
- // a register for their base, full strength-reduction will increase
- // register pressure.
- for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
- if (UsersToProcess[i].Base->isZero())
- return false;
+ // Record registers now being used by this use.
+ if (F.ScaledReg) Regs.insert(F.ScaledReg);
+ Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
- // Otherwise, go for it.
return true;
}
-/// InsertAffinePhi Create and insert a PHI node for an induction variable
-/// with the specified start and step values in the specified loop.
-///
-/// If NegateStride is true, the stride should be negated by using a
-/// subtract instead of an add.
-///
-/// Return the created phi node.
-///
-static PHINode *InsertAffinePhi(const SCEV *Start, const SCEV *Step,
- Instruction *IVIncInsertPt,
- const Loop *L,
- SCEVExpander &Rewriter) {
- assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!");
- assert(Step->isLoopInvariant(L) && "New PHI stride is not loop invariant!");
-
- BasicBlock *Header = L->getHeader();
- BasicBlock *Preheader = L->getLoopPreheader();
- BasicBlock *LatchBlock = L->getLoopLatch();
- const Type *Ty = Start->getType();
- Ty = Rewriter.SE.getEffectiveSCEVType(Ty);
-
- PHINode *PN = PHINode::Create(Ty, "lsr.iv", Header->begin());
- PN->addIncoming(Rewriter.expandCodeFor(Start, Ty, Preheader->getTerminator()),
- Preheader);
-
- // If the stride is negative, insert a sub instead of an add for the
- // increment.
- bool isNegative = isNonConstantNegative(Step);
- const SCEV *IncAmount = Step;
- if (isNegative)
- IncAmount = Rewriter.SE.getNegativeSCEV(Step);
-
- // Insert an add instruction right before the terminator corresponding
- // to the back-edge or just before the only use. The location is determined
- // by the caller and passed in as IVIncInsertPt.
- Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty,
- Preheader->getTerminator());
- Instruction *IncV;
- if (isNegative) {
- IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next",
- IVIncInsertPt);
- } else {
- IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next",
- IVIncInsertPt);
- }
- if (!isa<ConstantInt>(StepV)) ++NumVariable;
-
- PN->addIncoming(IncV, LatchBlock);
-
- ++NumInserted;
- return PN;
-}
-
-static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {
- // We want to emit code for users inside the loop first. To do this, we
- // rearrange BasedUser so that the entries at the end have
- // isUseOfPostIncrementedValue = false, because we pop off the end of the
- // vector (so we handle them first).
- std::partition(UsersToProcess.begin(), UsersToProcess.end(),
- PartitionByIsUseOfPostIncrementedValue);
-
- // Sort this by base, so that things with the same base are handled
- // together. By partitioning first and stable-sorting later, we are
- // guaranteed that within each base we will pop off users from within the
- // loop before users outside of the loop with a particular base.
- //
- // We would like to use stable_sort here, but we can't. The problem is that
- // const SCEV *'s don't have a deterministic ordering w.r.t to each other, so
- // we don't have anything to do a '<' comparison on. Because we think the
- // number of uses is small, do a horrible bubble sort which just relies on
- // ==.
- for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
- // Get a base value.
- const SCEV *Base = UsersToProcess[i].Base;
-
- // Compact everything with this base to be consecutive with this one.
- for (unsigned j = i+1; j != e; ++j) {
- if (UsersToProcess[j].Base == Base) {
- std::swap(UsersToProcess[i+1], UsersToProcess[j]);
- ++i;
- }
- }
+void LSRUse::print(raw_ostream &OS) const {
+ OS << "LSR Use: Kind=";
+ switch (Kind) {
+ case Basic: OS << "Basic"; break;
+ case Special: OS << "Special"; break;
+ case ICmpZero: OS << "ICmpZero"; break;
+ case Address:
+ OS << "Address of ";
+ if (AccessTy->isPointerTy())
+ OS << "pointer"; // the full pointer type could be really verbose
+ else
+ OS << *AccessTy;
}
-}
-/// PrepareToStrengthReduceFully - Prepare to fully strength-reduce
-/// UsersToProcess, meaning lowering addresses all the way down to direct
-/// pointer arithmetic.
-///
-void
-LoopStrengthReduce::PrepareToStrengthReduceFully(
- std::vector<BasedUser> &UsersToProcess,
- const SCEV *Stride,
- const SCEV *CommonExprs,
- const Loop *L,
- SCEVExpander &PreheaderRewriter) {
- DEBUG(dbgs() << " Fully reducing all users\n");
-
- // Rewrite the UsersToProcess records, creating a separate PHI for each
- // unique Base value.
- Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator();
- for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
- // TODO: The uses are grouped by base, but not sorted. We arbitrarily
- // pick the first Imm value here to start with, and adjust it for the
- // other uses.
- const SCEV *Imm = UsersToProcess[i].Imm;
- const SCEV *Base = UsersToProcess[i].Base;
- const SCEV *Start = SE->getAddExpr(CommonExprs, Base, Imm);
- PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L,
- PreheaderRewriter);
- // Loop over all the users with the same base.
- do {
- UsersToProcess[i].Base = SE->getIntegerSCEV(0, Stride->getType());
- UsersToProcess[i].Imm = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
- UsersToProcess[i].Phi = Phi;
- assert(UsersToProcess[i].Imm->isLoopInvariant(L) &&
- "ShouldUseFullStrengthReductionMode should reject this!");
- } while (++i != e && Base == UsersToProcess[i].Base);
- }
-}
-
-/// FindIVIncInsertPt - Return the location to insert the increment instruction.
-/// If the only use if a use of postinc value, (must be the loop termination
-/// condition), then insert it just before the use.
-static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,
- const Loop *L) {
- if (UsersToProcess.size() == 1 &&
- UsersToProcess[0].isUseOfPostIncrementedValue &&
- L->contains(UsersToProcess[0].Inst))
- return UsersToProcess[0].Inst;
- return L->getLoopLatch()->getTerminator();
-}
-
-/// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the
-/// given users to share.
-///
-void
-LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
- std::vector<BasedUser> &UsersToProcess,
- const SCEV *Stride,
- const SCEV *CommonExprs,
- Value *CommonBaseV,
- Instruction *IVIncInsertPt,
- const Loop *L,
- SCEVExpander &PreheaderRewriter) {
- DEBUG(dbgs() << " Inserting new PHI:\n");
-
- PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),
- Stride, IVIncInsertPt, L,
- PreheaderRewriter);
-
- // Remember this in case a later stride is multiple of this.
- IVsByStride[Stride].addIV(Stride, CommonExprs, Phi);
-
- // All the users will share this new IV.
- for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
- UsersToProcess[i].Phi = Phi;
-
- DEBUG(dbgs() << " IV=");
- DEBUG(WriteAsOperand(dbgs(), Phi, /*PrintType=*/false));
- DEBUG(dbgs() << "\n");
-}
-
-/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to
-/// reuse an induction variable with a stride that is a factor of the current
-/// induction variable.
-///
-void
-LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride(
- std::vector<BasedUser> &UsersToProcess,
- Value *CommonBaseV,
- const IVExpr &ReuseIV,
- Instruction *PreInsertPt) {
- DEBUG(dbgs() << " Rewriting in terms of existing IV of STRIDE "
- << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n");
-
- // All the users will share the reused IV.
- for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
- UsersToProcess[i].Phi = ReuseIV.PHI;
-
- Constant *C = dyn_cast<Constant>(CommonBaseV);
- if (C &&
- (!C->isNullValue() &&
- !fitsInAddressMode(SE->getUnknown(CommonBaseV), CommonBaseV->getType(),
- TLI, false)))
- // We want the common base emitted into the preheader! This is just
- // using cast as a copy so BitCast (no-op cast) is appropriate
- CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
- "commonbase", PreInsertPt);
-}
-
-static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
- const Type *AccessTy,
- std::vector<BasedUser> &UsersToProcess,
- const TargetLowering *TLI) {
- SmallVector<Instruction*, 16> AddrModeInsts;
- for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
- if (UsersToProcess[i].isUseOfPostIncrementedValue)
- continue;
- ExtAddrMode AddrMode =
- AddressingModeMatcher::Match(UsersToProcess[i].OperandValToReplace,
- AccessTy, UsersToProcess[i].Inst,
- AddrModeInsts, *TLI);
- if (GV && GV != AddrMode.BaseGV)
- return false;
- if (Offset && !AddrMode.BaseOffs)
- // FIXME: How to accurate check it's immediate offset is folded.
- return false;
- AddrModeInsts.clear();
+ OS << ", Offsets={";
+ for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+ E = Offsets.end(); I != E; ++I) {
+ OS << *I;
+ if (next(I) != E)
+ OS << ',';
}
- return true;
-}
+ OS << '}';
-/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single
-/// stride of IV. All of the users may have different starting values, and this
-/// may not be the only stride.
-void
-LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *Stride,
- IVUsersOfOneStride &Uses,
- Loop *L) {
- // If all the users are moved to another stride, then there is nothing to do.
- if (Uses.Users.empty())
- return;
+ if (AllFixupsOutsideLoop)
+ OS << ", all-fixups-outside-loop";
+}
- // Keep track if every use in UsersToProcess is an address. If they all are,
- // we may be able to rewrite the entire collection of them in terms of a
- // smaller-stride IV.
- bool AllUsesAreAddresses = true;
-
- // Keep track if every use of a single stride is outside the loop. If so,
- // we want to be more aggressive about reusing a smaller-stride IV; a
- // multiply outside the loop is better than another IV inside. Well, usually.
- bool AllUsesAreOutsideLoop = true;
-
- // Transform our list of users and offsets to a bit more complex table. In
- // this new vector, each 'BasedUser' contains 'Base' the base of the strided
- // access as well as the old information from Uses. We progressively move
- // information from the Base field to the Imm field until we eventually have
- // the full access expression to rewrite the use.
- std::vector<BasedUser> UsersToProcess;
- const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
-
- // Sort the UsersToProcess array so that users with common bases are
- // next to each other.
- SortUsersToProcess(UsersToProcess);
-
- // If we managed to find some expressions in common, we'll need to carry
- // their value in a register and add it in for each use. This will take up
- // a register operand, which potentially restricts what stride values are
- // valid.
- bool HaveCommonExprs = !CommonExprs->isZero();
- const Type *ReplacedTy = CommonExprs->getType();
-
- // If all uses are addresses, consider sinking the immediate part of the
- // common expression back into uses if they can fit in the immediate fields.
- if (TLI && HaveCommonExprs && AllUsesAreAddresses) {
- const SCEV *NewCommon = CommonExprs;
- const SCEV *Imm = SE->getIntegerSCEV(0, ReplacedTy);
- MoveImmediateValues(TLI, Type::getVoidTy(
- L->getLoopPreheader()->getContext()),
- NewCommon, Imm, true, L, SE);
- if (!Imm->isZero()) {
- bool DoSink = true;
-
- // If the immediate part of the common expression is a GV, check if it's
- // possible to fold it into the target addressing mode.
- GlobalValue *GV = 0;
- if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Imm))
- GV = dyn_cast<GlobalValue>(SU->getValue());
- int64_t Offset = 0;
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm))
- Offset = SC->getValue()->getSExtValue();
- if (GV || Offset)
- // Pass VoidTy as the AccessTy to be conservative, because
- // there could be multiple access types among all the uses.
- DoSink = IsImmFoldedIntoAddrMode(GV, Offset,
- Type::getVoidTy(L->getLoopPreheader()->getContext()),
- UsersToProcess, TLI);
-
- if (DoSink) {
- DEBUG(dbgs() << " Sinking " << *Imm << " back down into uses\n");
- for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
- UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm);
- CommonExprs = NewCommon;
- HaveCommonExprs = !CommonExprs->isZero();
- ++NumImmSunk;
- }
- }
- }
+void LSRUse::dump() const {
+ print(errs()); errs() << '\n';
+}
- // Now that we know what we need to do, insert the PHI node itself.
- //
- DEBUG(dbgs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
- << *Stride << ":\n"
- << " Common base: " << *CommonExprs << '\n');
+/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
+/// be completely folded into the user instruction at isel time. This includes
+/// address-mode folding and special icmp tricks.
+static bool isLegalUse(const TargetLowering::AddrMode &AM,
+ LSRUse::KindType Kind, const Type *AccessTy,
+ const TargetLowering *TLI) {
+ switch (Kind) {
+ case LSRUse::Address:
+ // If we have low-level target information, ask the target if it can
+ // completely fold this address.
+ if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+
+ // Otherwise, just guess that reg+reg addressing is legal.
+ return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+
+ case LSRUse::ICmpZero:
+ // There's not even a target hook for querying whether it would be legal to
+ // fold a GV into an ICmp.
+ if (AM.BaseGV)
+ return false;
- SCEVExpander Rewriter(*SE);
- SCEVExpander PreheaderRewriter(*SE);
+ // ICmp only has two operands; don't allow more than two non-trivial parts.
+ if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+ return false;
- BasicBlock *Preheader = L->getLoopPreheader();
- Instruction *PreInsertPt = Preheader->getTerminator();
- BasicBlock *LatchBlock = L->getLoopLatch();
- Instruction *IVIncInsertPt = LatchBlock->getTerminator();
-
- Value *CommonBaseV = Constant::getNullValue(ReplacedTy);
-
- const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
- IVExpr ReuseIV(SE->getIntegerSCEV(0,
- Type::getInt32Ty(Preheader->getContext())),
- SE->getIntegerSCEV(0,
- Type::getInt32Ty(Preheader->getContext())),
- 0);
-
- // Choose a strength-reduction strategy and prepare for it by creating
- // the necessary PHIs and adjusting the bookkeeping.
- if (ShouldUseFullStrengthReductionMode(UsersToProcess, L,
- AllUsesAreAddresses, Stride)) {
- PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L,
- PreheaderRewriter);
- } else {
- // Emit the initial base value into the loop preheader.
- CommonBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, ReplacedTy,
- PreInsertPt);
-
- // If all uses are addresses, check if it is possible to reuse an IV. The
- // new IV must have a stride that is a multiple of the old stride; the
- // multiple must be a number that can be encoded in the scale field of the
- // target addressing mode; and we must have a valid instruction after this
- // substitution, including the immediate field, if any.
- RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- Stride, ReuseIV, ReplacedTy,
- UsersToProcess);
- if (!RewriteFactor->isZero())
- PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV,
- ReuseIV, PreInsertPt);
- else {
- IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L);
- PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs,
- CommonBaseV, IVIncInsertPt,
- L, PreheaderRewriter);
- }
- }
+ // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
+ // putting the scaled register in the other operand of the icmp.
+ if (AM.Scale != 0 && AM.Scale != -1)
+ return false;
- // Process all the users now, replacing their strided uses with
- // strength-reduced forms. This outer loop handles all bases, the inner
- // loop handles all users of a particular base.
- while (!UsersToProcess.empty()) {
- const SCEV *Base = UsersToProcess.back().Base;
- Instruction *Inst = UsersToProcess.back().Inst;
-
- // Emit the code for Base into the preheader.
- Value *BaseV = 0;
- if (!Base->isZero()) {
- BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt);
-
- DEBUG(dbgs() << " INSERTING code for BASE = " << *Base << ":");
- if (BaseV->hasName())
- DEBUG(dbgs() << " Result value name = %" << BaseV->getName());
- DEBUG(dbgs() << "\n");
-
- // If BaseV is a non-zero constant, make sure that it gets inserted into
- // the preheader, instead of being forward substituted into the uses. We
- // do this by forcing a BitCast (noop cast) to be inserted into the
- // preheader in this case.
- if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) &&
- isa<Constant>(BaseV)) {
- // We want this constant emitted into the preheader! This is just
- // using cast as a copy so BitCast (no-op cast) is appropriate
- BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
- PreInsertPt);
- }
+ // If we have low-level target information, ask the target if it can fold an
+ // integer immediate on an icmp.
+ if (AM.BaseOffs != 0) {
+ if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
+ return false;
}
- // Emit the code to add the immediate offset to the Phi value, just before
- // the instructions that we identified as using this stride and base.
- do {
- // FIXME: Use emitted users to emit other users.
- BasedUser &User = UsersToProcess.back();
-
- DEBUG(dbgs() << " Examining ");
- if (User.isUseOfPostIncrementedValue)
- DEBUG(dbgs() << "postinc");
- else
- DEBUG(dbgs() << "preinc");
- DEBUG(dbgs() << " use ");
- DEBUG(WriteAsOperand(dbgs(), UsersToProcess.back().OperandValToReplace,
- /*PrintType=*/false));
- DEBUG(dbgs() << " in Inst: " << *User.Inst << '\n');
-
- // If this instruction wants to use the post-incremented value, move it
- // after the post-inc and use its value instead of the PHI.
- Value *RewriteOp = User.Phi;
- if (User.isUseOfPostIncrementedValue) {
- RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock);
- // If this user is in the loop, make sure it is the last thing in the
- // loop to ensure it is dominated by the increment. In case it's the
- // only use of the iv, the increment instruction is already before the
- // use.
- if (L->contains(User.Inst) && User.Inst != IVIncInsertPt)
- User.Inst->moveBefore(IVIncInsertPt);
- }
-
- const SCEV *RewriteExpr = SE->getUnknown(RewriteOp);
-
- if (SE->getEffectiveSCEVType(RewriteOp->getType()) !=
- SE->getEffectiveSCEVType(ReplacedTy)) {
- assert(SE->getTypeSizeInBits(RewriteOp->getType()) >
- SE->getTypeSizeInBits(ReplacedTy) &&
- "Unexpected widening cast!");
- RewriteExpr = SE->getTruncateExpr(RewriteExpr, ReplacedTy);
- }
-
- // If we had to insert new instructions for RewriteOp, we have to
- // consider that they may not have been able to end up immediately
- // next to RewriteOp, because non-PHI instructions may never precede
- // PHI instructions in a block. In this case, remember where the last
- // instruction was inserted so that if we're replacing a different
- // PHI node, we can use the later point to expand the final
- // RewriteExpr.
- Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp);
- if (RewriteOp == User.Phi) NewBasePt = 0;
-
- // Clear the SCEVExpander's expression map so that we are guaranteed
- // to have the code emitted where we expect it.
- Rewriter.clear();
-
- // If we are reusing the iv, then it must be multiplied by a constant
- // factor to take advantage of the addressing mode scale component.
- if (!RewriteFactor->isZero()) {
- // If we're reusing an IV with a nonzero base (currently this happens
- // only when all reuses are outside the loop) subtract that base here.
- // The base has been used to initialize the PHI node but we don't want
- // it here.
- if (!ReuseIV.Base->isZero()) {
- const SCEV *typedBase = ReuseIV.Base;
- if (SE->getEffectiveSCEVType(RewriteExpr->getType()) !=
- SE->getEffectiveSCEVType(ReuseIV.Base->getType())) {
- // It's possible the original IV is a larger type than the new IV,
- // in which case we have to truncate the Base. We checked in
- // RequiresTypeConversion that this is valid.
- assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
- SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
- "Unexpected lengthening conversion!");
- typedBase = SE->getTruncateExpr(ReuseIV.Base,
- RewriteExpr->getType());
- }
- RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
- }
+ return true;
- // Multiply old variable, with base removed, by new scale factor.
- RewriteExpr = SE->getMulExpr(RewriteFactor,
- RewriteExpr);
-
- // The common base is emitted in the loop preheader. But since we
- // are reusing an IV, it has not been used to initialize the PHI node.
- // Add it to the expression used to rewrite the uses.
- // When this use is outside the loop, we earlier subtracted the
- // common base, and are adding it back here. Use the same expression
- // as before, rather than CommonBaseV, so DAGCombiner will zap it.
- if (!CommonExprs->isZero()) {
- if (L->contains(User.Inst))
- RewriteExpr = SE->getAddExpr(RewriteExpr,
- SE->getUnknown(CommonBaseV));
- else
- RewriteExpr = SE->getAddExpr(RewriteExpr, CommonExprs);
- }
- }
+ case LSRUse::Basic:
+ // Only handle single-register values.
+ return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
- // Now that we know what we need to do, insert code before User for the
- // immediate and any loop-variant expressions.
- if (BaseV)
- // Add BaseV to the PHI value if needed.
- RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
-
- User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
- Rewriter, L, this,
- DeadInsts, SE);
-
- // Mark old value we replaced as possibly dead, so that it is eliminated
- // if we just replaced the last use of that value.
- DeadInsts.push_back(User.OperandValToReplace);
-
- UsersToProcess.pop_back();
- ++NumReduced;
-
- // If there are any more users to process with the same base, process them
- // now. We sorted by base above, so we just have to check the last elt.
- } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
- // TODO: Next, find out which base index is the most common, pull it out.
- }
-
- // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
- // different starting values, into different PHIs.
-}
-
-void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) {
- // Note: this processes each stride/type pair individually. All users
- // passed into StrengthReduceIVUsersOfStride have the same type AND stride.
- // Also, note that we iterate over IVUsesByStride indirectly by using
- // StrideOrder. This extra layer of indirection makes the ordering of
- // strides deterministic - not dependent on map order.
- for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
- assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
- // FIXME: Generalize to non-affine IV's.
- if (!SI->first->isLoopInvariant(L))
- continue;
- StrengthReduceIVUsersOfStride(SI->first, *SI->second, L);
+ case LSRUse::Special:
+ // Only handle -1 scales, or no scale.
+ return AM.Scale == 0 || AM.Scale == -1;
}
+
+ return false;
}
-/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
-/// set the IV user and stride information and return true, otherwise return
-/// false.
-bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond,
- IVStrideUse *&CondUse,
- const SCEV* &CondStride) {
- for (unsigned Stride = 0, e = IU->StrideOrder.size();
- Stride != e && !CondUse; ++Stride) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
- assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-
- for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
- E = SI->second->Users.end(); UI != E; ++UI)
- if (UI->getUser() == Cond) {
- // NOTE: we could handle setcc instructions with multiple uses here, but
- // InstCombine does it as well for simple uses, it's not clear that it
- // occurs enough in real life to handle.
- CondUse = UI;
- CondStride = SI->first;
- return true;
- }
+static bool isLegalUse(TargetLowering::AddrMode AM,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, const Type *AccessTy,
+ const TargetLowering *TLI) {
+ // Check for overflow.
+ if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+ (MinOffset > 0))
+ return false;
+ AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
+ if (isLegalUse(AM, Kind, AccessTy, TLI)) {
+ AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
+ // Check for overflow.
+ if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
+ (MaxOffset > 0))
+ return false;
+ AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
+ return isLegalUse(AM, Kind, AccessTy, TLI);
}
return false;
}
-namespace {
- // Constant strides come first which in turns are sorted by their absolute
- // values. If absolute values are the same, then positive strides comes first.
- // e.g.
- // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
- struct StrideCompare {
- const ScalarEvolution *SE;
- explicit StrideCompare(const ScalarEvolution *se) : SE(se) {}
-
- bool operator()(const SCEV *LHS, const SCEV *RHS) {
- const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
- const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
- if (LHSC && RHSC) {
- int64_t LV = LHSC->getValue()->getSExtValue();
- int64_t RV = RHSC->getValue()->getSExtValue();
- uint64_t ALV = (LV < 0) ? -LV : LV;
- uint64_t ARV = (RV < 0) ? -RV : RV;
- if (ALV == ARV) {
- if (LV != RV)
- return LV > RV;
- } else {
- return ALV < ARV;
- }
+static bool isAlwaysFoldable(int64_t BaseOffs,
+ GlobalValue *BaseGV,
+ bool HasBaseReg,
+ LSRUse::KindType Kind, const Type *AccessTy,
+ const TargetLowering *TLI) {
+ // Fast-path: zero is always foldable.
+ if (BaseOffs == 0 && !BaseGV) return true;
+
+ // Conservatively, create an address with an immediate and a
+ // base and a scale.
+ TargetLowering::AddrMode AM;
+ AM.BaseOffs = BaseOffs;
+ AM.BaseGV = BaseGV;
+ AM.HasBaseReg = HasBaseReg;
+ AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+ return isLegalUse(AM, Kind, AccessTy, TLI);
+}
- // If it's the same value but different type, sort by bit width so
- // that we emit larger induction variables before smaller
- // ones, letting the smaller be re-written in terms of larger ones.
- return SE->getTypeSizeInBits(RHS->getType()) <
- SE->getTypeSizeInBits(LHS->getType());
- }
- return LHSC && !RHSC;
- }
- };
+static bool isAlwaysFoldable(const SCEV *S,
+ int64_t MinOffset, int64_t MaxOffset,
+ bool HasBaseReg,
+ LSRUse::KindType Kind, const Type *AccessTy,
+ const TargetLowering *TLI,
+ ScalarEvolution &SE) {
+ // Fast-path: zero is always foldable.
+ if (S->isZero()) return true;
+
+ // Conservatively, create an address with an immediate and a
+ // base and a scale.
+ int64_t BaseOffs = ExtractImmediate(S, SE);
+ GlobalValue *BaseGV = ExtractSymbol(S, SE);
+
+ // If there's anything else involved, it's not foldable.
+ if (!S->isZero()) return false;
+
+ // Fast-path: zero is always foldable.
+ if (BaseOffs == 0 && !BaseGV) return true;
+
+ // Conservatively, create an address with an immediate and a
+ // base and a scale.
+ TargetLowering::AddrMode AM;
+ AM.BaseOffs = BaseOffs;
+ AM.BaseGV = BaseGV;
+ AM.HasBaseReg = HasBaseReg;
+ AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+ return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
}
-/// ChangeCompareStride - If a loop termination compare instruction is the only
-/// use of its stride, and the comparison is against a constant value, try to
-/// eliminate the stride by moving the compare instruction to another stride and
-/// changing its constant operand accordingly. E.g.
-///
-/// loop:
-/// ...
-/// v1 = v1 + 3
-/// v2 = v2 + 1
-/// if (v2 < 10) goto loop
-/// =>
-/// loop:
-/// ...
-/// v1 = v1 + 3
-/// if (v1 < 30) goto loop
-ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse,
- const SCEV* &CondStride,
- bool PostPass) {
- // If there's only one stride in the loop, there's nothing to do here.
- if (IU->StrideOrder.size() < 2)
- return Cond;
+/// FormulaSorter - This class implements an ordering for formulae which sorts
+/// the by their standalone cost.
+class FormulaSorter {
+ /// These two sets are kept empty, so that we compute standalone costs.
+ DenseSet<const SCEV *> VisitedRegs;
+ SmallPtrSet<const SCEV *, 16> Regs;
+ Loop *L;
+ LSRUse *LU;
+ ScalarEvolution &SE;
+ DominatorTree &DT;
+
+public:
+ FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt)
+ : L(l), LU(&lu), SE(se), DT(dt) {}
+
+ bool operator()(const Formula &A, const Formula &B) {
+ Cost CostA;
+ CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
+ Regs.clear();
+ Cost CostB;
+ CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
+ Regs.clear();
+ return CostA < CostB;
+ }
+};
+
+/// LSRInstance - This class holds state for the main loop strength reduction
+/// logic.
+class LSRInstance {
+ IVUsers &IU;
+ ScalarEvolution &SE;
+ DominatorTree &DT;
+ const TargetLowering *const TLI;
+ Loop *const L;
+ bool Changed;
+
+ /// IVIncInsertPos - This is the insert position that the current loop's
+ /// induction variable increment should be placed. In simple loops, this is
+ /// the latch block's terminator. But in more complicated cases, this is a
+ /// position which will dominate all the in-loop post-increment users.
+ Instruction *IVIncInsertPos;
+
+ /// Factors - Interesting factors between use strides.
+ SmallSetVector<int64_t, 8> Factors;
+
+ /// Types - Interesting use types, to facilitate truncation reuse.
+ SmallSetVector<const Type *, 4> Types;
+
+ /// Fixups - The list of operands which are to be replaced.
+ SmallVector<LSRFixup, 16> Fixups;
+
+ /// Uses - The list of interesting uses.
+ SmallVector<LSRUse, 16> Uses;
+
+ /// RegUses - Track which uses use which register candidates.
+ RegUseTracker RegUses;
+
+ void OptimizeShadowIV();
+ bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
+ ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
+ bool OptimizeLoopTermCond();
+
+ void CollectInterestingTypesAndFactors();
+ void CollectFixupsAndInitialFormulae();
+
+ LSRFixup &getNewFixup() {
+ Fixups.push_back(LSRFixup());
+ return Fixups.back();
+ }
- // If there are other users of the condition's stride, don't bother trying to
- // change the condition because the stride will still remain.
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =
- IU->IVUsesByStride.find(CondStride);
- if (I == IU->IVUsesByStride.end())
- return Cond;
+ // Support for sharing of LSRUses between LSRFixups.
+ typedef DenseMap<const SCEV *, size_t> UseMapTy;
+ UseMapTy UseMap;
+
+ bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+ LSRUse::KindType Kind, const Type *AccessTy);
+
+ std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
+ LSRUse::KindType Kind,
+ const Type *AccessTy);
+
+public:
+ void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+ void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+ void CountRegisters(const Formula &F, size_t LUIdx);
+ bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+
+ void CollectLoopInvariantFixupsAndFormulae();
+
+ void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
+ unsigned Depth = 0);
+ void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateCrossUseConstantOffsets();
+ void GenerateAllReuseFormulae();
+
+ void FilterOutUndesirableDedicatedRegisters();
+ void NarrowSearchSpaceUsingHeuristics();
+
+ void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+ Cost &SolutionCost,
+ SmallVectorImpl<const Formula *> &Workspace,
+ const Cost &CurCost,
+ const SmallPtrSet<const SCEV *, 16> &CurRegs,
+ DenseSet<const SCEV *> &VisitedRegs) const;
+ void Solve(SmallVectorImpl<const Formula *> &Solution) const;
+
+ Value *Expand(const LSRFixup &LF,
+ const Formula &F,
+ BasicBlock::iterator IP,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts) const;
+ void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const;
+ void Rewrite(const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const;
+ void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+ Pass *P);
+
+ LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+
+ bool getChanged() const { return Changed; }
+
+ void print_factors_and_types(raw_ostream &OS) const;
+ void print_fixups(raw_ostream &OS) const;
+ void print_uses(raw_ostream &OS) const;
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
- if (I->second->Users.size() > 1) {
- for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(),
- EE = I->second->Users.end(); II != EE; ++II) {
- if (II->getUser() == Cond)
- continue;
- if (!isInstructionTriviallyDead(II->getUser()))
- return Cond;
- }
- }
+}
- // Only handle constant strides for now.
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride);
- if (!SC) return Cond;
-
- ICmpInst::Predicate Predicate = Cond->getPredicate();
- int64_t CmpSSInt = SC->getValue()->getSExtValue();
- unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType());
- uint64_t SignBit = 1ULL << (BitWidth-1);
- const Type *CmpTy = Cond->getOperand(0)->getType();
- const Type *NewCmpTy = NULL;
- unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
- unsigned NewTyBits = 0;
- const SCEV *NewStride = NULL;
- Value *NewCmpLHS = NULL;
- Value *NewCmpRHS = NULL;
- int64_t Scale = 1;
- const SCEV *NewOffset = SE->getIntegerSCEV(0, CmpTy);
-
- if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
- int64_t CmpVal = C->getValue().getSExtValue();
-
- // Check the relevant induction variable for conformance to the pattern.
- const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
- if (!AR || !AR->isAffine())
- return Cond;
-
- const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
- // Check stride constant and the comparision constant signs to detect
- // overflow.
- if (StartC) {
- if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) ||
- (StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0))
- return Cond;
- } else {
- // More restrictive check for the other cases.
- if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
- return Cond;
- }
+/// OptimizeShadowIV - If IV is used in a int-to-float cast
+/// inside the loop then try to eliminate the cast operation.
+void LSRInstance::OptimizeShadowIV() {
+ const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ return;
- // Look for a suitable stride / iv as replacement.
- for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[i]);
- if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty())
- continue;
- int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
- if (SSInt == CmpSSInt ||
- abs64(SSInt) < abs64(CmpSSInt) ||
- (SSInt % CmpSSInt) != 0)
- continue;
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
+ UI != E; /* empty */) {
+ IVUsers::const_iterator CandidateUI = UI;
+ ++UI;
+ Instruction *ShadowUse = CandidateUI->getUser();
+ const Type *DestTy = NULL;
- Scale = SSInt / CmpSSInt;
- int64_t NewCmpVal = CmpVal * Scale;
+ /* If shadow use is a int->float cast then insert a second IV
+ to eliminate this cast.
- // If old icmp value fits in icmp immediate field, but the new one doesn't
- // try something else.
- if (TLI &&
- TLI->isLegalICmpImmediate(CmpVal) &&
- !TLI->isLegalICmpImmediate(NewCmpVal))
- continue;
+ for (unsigned i = 0; i < n; ++i)
+ foo((double)i);
- APInt Mul = APInt(BitWidth*2, CmpVal, true);
- Mul = Mul * APInt(BitWidth*2, Scale, true);
- // Check for overflow.
- if (!Mul.isSignedIntN(BitWidth))
- continue;
- // Check for overflow in the stride's type too.
- if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType())))
- continue;
+ is transformed into
- // Watch out for overflow.
- if (ICmpInst::isSigned(Predicate) &&
- (CmpVal & SignBit) != (NewCmpVal & SignBit))
- continue;
+ double d = 0.0;
+ for (unsigned i = 0; i < n; ++i, ++d)
+ foo(d);
+ */
+ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
+ DestTy = UCast->getDestTy();
+ else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
+ DestTy = SCast->getDestTy();
+ if (!DestTy) continue;
- // Pick the best iv to use trying to avoid a cast.
- NewCmpLHS = NULL;
- for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
- E = SI->second->Users.end(); UI != E; ++UI) {
- Value *Op = UI->getOperandValToReplace();
-
- // If the IVStrideUse implies a cast, check for an actual cast which
- // can be used to find the original IV expression.
- if (SE->getEffectiveSCEVType(Op->getType()) !=
- SE->getEffectiveSCEVType(SI->first->getType())) {
- CastInst *CI = dyn_cast<CastInst>(Op);
- // If it's not a simple cast, it's complicated.
- if (!CI)
- continue;
- // If it's a cast from a type other than the stride type,
- // it's complicated.
- if (CI->getOperand(0)->getType() != SI->first->getType())
- continue;
- // Ok, we found the IV expression in the stride's type.
- Op = CI->getOperand(0);
- }
+ if (TLI) {
+ // If target does not support DestTy natively then do not apply
+ // this transformation.
+ EVT DVT = TLI->getValueType(DestTy);
+ if (!TLI->isTypeLegal(DVT)) continue;
+ }
- NewCmpLHS = Op;
- if (NewCmpLHS->getType() == CmpTy)
- break;
- }
- if (!NewCmpLHS)
- continue;
+ PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
+ if (!PH) continue;
+ if (PH->getNumIncomingValues() != 2) continue;
- NewCmpTy = NewCmpLHS->getType();
- NewTyBits = SE->getTypeSizeInBits(NewCmpTy);
- const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits);
- if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
- // Check if it is possible to rewrite it using
- // an iv / stride of a smaller integer type.
- unsigned Bits = NewTyBits;
- if (ICmpInst::isSigned(Predicate))
- --Bits;
- uint64_t Mask = (1ULL << Bits) - 1;
- if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal)
- continue;
- }
+ const Type *SrcTy = PH->getType();
+ int Mantissa = DestTy->getFPMantissaWidth();
+ if (Mantissa == -1) continue;
+ if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
+ continue;
- // Don't rewrite if use offset is non-constant and the new type is
- // of a different type.
- // FIXME: too conservative?
- if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
- continue;
+ unsigned Entry, Latch;
+ if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
+ Entry = 0;
+ Latch = 1;
+ } else {
+ Entry = 1;
+ Latch = 0;
+ }
- if (!PostPass) {
- bool AllUsesAreAddresses = true;
- bool AllUsesAreOutsideLoop = true;
- std::vector<BasedUser> UsersToProcess;
- const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
- AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
- // Avoid rewriting the compare instruction with an iv of new stride
- // if it's likely the new stride uses will be rewritten using the
- // stride of the compare instruction.
- if (AllUsesAreAddresses &&
- ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
- continue;
- }
+ ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
+ if (!Init) continue;
+ Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
- // Avoid rewriting the compare instruction with an iv which has
- // implicit extension or truncation built into it.
- // TODO: This is over-conservative.
- if (SE->getTypeSizeInBits(CondUse->getOffset()->getType()) != TyBits)
- continue;
+ BinaryOperator *Incr =
+ dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
+ if (!Incr) continue;
+ if (Incr->getOpcode() != Instruction::Add
+ && Incr->getOpcode() != Instruction::Sub)
+ continue;
- // If scale is negative, use swapped predicate unless it's testing
- // for equality.
- if (Scale < 0 && !Cond->isEquality())
- Predicate = ICmpInst::getSwappedPredicate(Predicate);
+ /* Initialize new IV, double d = 0.0 in above example. */
+ ConstantInt *C = NULL;
+ if (Incr->getOperand(0) == PH)
+ C = dyn_cast<ConstantInt>(Incr->getOperand(1));
+ else if (Incr->getOperand(1) == PH)
+ C = dyn_cast<ConstantInt>(Incr->getOperand(0));
+ else
+ continue;
- NewStride = IU->StrideOrder[i];
- if (!isa<PointerType>(NewCmpTy))
- NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
- else {
- Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
- NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
- }
- NewOffset = TyBits == NewTyBits
- ? SE->getMulExpr(CondUse->getOffset(),
- SE->getConstant(CmpTy, Scale))
- : SE->getConstant(NewCmpIntTy,
- cast<SCEVConstant>(CondUse->getOffset())->getValue()
- ->getSExtValue()*Scale);
- break;
- }
- }
+ if (!C) continue;
- // Forgo this transformation if it the increment happens to be
- // unfortunately positioned after the condition, and the condition
- // has multiple uses which prevent it from being moved immediately
- // before the branch. See
- // test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
- // for an example of this situation.
- if (!Cond->hasOneUse()) {
- for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
- I != E; ++I)
- if (I == NewCmpLHS)
- return Cond;
- }
+ // Ignore negative constants, as the code below doesn't handle them
+ // correctly. TODO: Remove this restriction.
+ if (!C->getValue().isStrictlyPositive()) continue;
- if (NewCmpRHS) {
- // Create a new compare instruction using new stride / iv.
- ICmpInst *OldCond = Cond;
- // Insert new compare instruction.
- Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
- L->getHeader()->getName() + ".termcond");
+ /* Add new PHINode. */
+ PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
- DEBUG(dbgs() << " Change compare stride in Inst " << *OldCond);
- DEBUG(dbgs() << " to " << *Cond << '\n');
+ /* create new increment. '++d' in above example. */
+ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+ BinaryOperator *NewIncr =
+ BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
+ Instruction::FAdd : Instruction::FSub,
+ NewPH, CFP, "IV.S.next.", Incr);
- // Remove the old compare instruction. The old indvar is probably dead too.
- DeadInsts.push_back(CondUse->getOperandValToReplace());
- OldCond->replaceAllUsesWith(Cond);
- OldCond->eraseFromParent();
+ NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
+ NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
- IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
- CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
- CondStride = NewStride;
- ++NumEliminated;
- Changed = true;
+ /* Remove cast operation */
+ ShadowUse->replaceAllUsesWith(NewPH);
+ ShadowUse->eraseFromParent();
+ break;
}
+}
- return Cond;
+/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
+/// set the IV user and stride information and return true, otherwise return
+/// false.
+bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,
+ IVStrideUse *&CondUse) {
+ for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+ if (UI->getUser() == Cond) {
+ // NOTE: we could handle setcc instructions with multiple uses here, but
+ // InstCombine does it as well for simple uses, it's not clear that it
+ // occurs enough in real life to handle.
+ CondUse = UI;
+ return true;
+ }
+ return false;
}
/// OptimizeMax - Rewrite the loop's terminating condition if it uses
@@ -2088,7 +1405,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
/// are designed around them. The most obvious example of this is the
/// LoopInfo analysis, which doesn't remember trip count values. It
/// expects to be able to rediscover the trip count each time it is
-/// needed, and it does this using a simple analyis that only succeeds if
+/// needed, and it does this using a simple analysis that only succeeds if
/// the loop has a canonical induction variable.
///
/// However, when it comes time to generate code, the maximum operation
@@ -2098,8 +1415,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
-ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse) {
+ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
@@ -2108,19 +1424,19 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return Cond;
- const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
+ const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType());
// Add one to the backedge-taken count to get the trip count.
- const SCEV *IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
+ const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
// Check for a max calculation that matches the pattern.
if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
return Cond;
const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
- if (Max != SE->getSCEV(Sel)) return Cond;
+ if (Max != SE.getSCEV(Sel)) return Cond;
// To handle a max with more than two operands, this optimization would
// require additional checking and setup.
@@ -2130,14 +1446,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
const SCEV *MaxLHS = Max->getOperand(0);
const SCEV *MaxRHS = Max->getOperand(1);
if (!MaxLHS || MaxLHS != One) return Cond;
-
// Check the relevant induction variable for conformance to
// the pattern.
- const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
+ const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
- AR->getStepRecurrence(*SE) != One)
+ AR->getStepRecurrence(SE) != One)
return Cond;
assert(AR->getLoop() == L &&
@@ -2146,9 +1461,9 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
- if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS)
+ if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
- else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS)
+ else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
@@ -2175,552 +1490,1804 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
return NewCond;
}
-/// OptimizeShadowIV - If IV is used in a int-to-float cast
-/// inside the loop then try to eliminate the cast opeation.
-void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
+/// postinc iv when possible.
+bool
+LSRInstance::OptimizeLoopTermCond() {
+ SmallPtrSet<Instruction *, 4> PostIncs;
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
- return;
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
- for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
- ++Stride) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
- assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
- if (!isa<SCEVConstant>(SI->first))
+ // Get the terminating condition for the loop if possible. If we
+ // can, we want to change it to use a post-incremented version of its
+ // induction variable, to allow coalescing the live ranges for the IV into
+ // one register value.
+
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!TermBr)
+ continue;
+ // FIXME: Overly conservative, termination condition could be an 'or' etc..
+ if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
continue;
- for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
- E = SI->second->Users.end(); UI != E; /* empty */) {
- ilist<IVStrideUse>::iterator CandidateUI = UI;
- ++UI;
- Instruction *ShadowUse = CandidateUI->getUser();
- const Type *DestTy = NULL;
-
- /* If shadow use is a int->float cast then insert a second IV
- to eliminate this cast.
-
- for (unsigned i = 0; i < n; ++i)
- foo((double)i);
-
- is transformed into
-
- double d = 0.0;
- for (unsigned i = 0; i < n; ++i, ++d)
- foo(d);
- */
- if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
- DestTy = UCast->getDestTy();
- else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
- DestTy = SCast->getDestTy();
- if (!DestTy) continue;
-
- if (TLI) {
- // If target does not support DestTy natively then do not apply
- // this transformation.
- EVT DVT = TLI->getValueType(DestTy);
- if (!TLI->isTypeLegal(DVT)) continue;
- }
+ // Search IVUsesByStride to find Cond's IVUse if there is one.
+ IVStrideUse *CondUse = 0;
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+ if (!FindIVUserForCond(Cond, CondUse))
+ continue;
+
+ // If the trip count is computed in terms of a max (due to ScalarEvolution
+ // being unable to find a sufficient guard, for example), change the loop
+ // comparison to use SLT or ULT instead of NE.
+ // One consequence of doing this now is that it disrupts the count-down
+ // optimization. That's not always a bad thing though, because in such
+ // cases it may still be worthwhile to avoid a max.
+ Cond = OptimizeMax(Cond, CondUse);
+
+ // If this exiting block dominates the latch block, it may also use
+ // the post-inc value if it won't be shared with other uses.
+ // Check for dominance.
+ if (!DT.dominates(ExitingBlock, LatchBlock))
+ continue;
- PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
- if (!PH) continue;
- if (PH->getNumIncomingValues() != 2) continue;
+ // Conservatively avoid trying to use the post-inc value in non-latch
+ // exits if there may be pre-inc users in intervening blocks.
+ if (LatchBlock != ExitingBlock)
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+ // Test if the use is reachable from the exiting block. This dominator
+ // query is a conservative approximation of reachability.
+ if (&*UI != CondUse &&
+ !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
+ // Conservatively assume there may be reuse if the quotient of their
+ // strides could be a legal scale.
+ const SCEV *A = CondUse->getStride();
+ const SCEV *B = UI->getStride();
+ if (SE.getTypeSizeInBits(A->getType()) !=
+ SE.getTypeSizeInBits(B->getType())) {
+ if (SE.getTypeSizeInBits(A->getType()) >
+ SE.getTypeSizeInBits(B->getType()))
+ B = SE.getSignExtendExpr(B, A->getType());
+ else
+ A = SE.getSignExtendExpr(A, B->getType());
+ }
+ if (const SCEVConstant *D =
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
+ // Stride of one or negative one can have reuse with non-addresses.
+ if (D->getValue()->isOne() ||
+ D->getValue()->isAllOnesValue())
+ goto decline_post_inc;
+ // Avoid weird situations.
+ if (D->getValue()->getValue().getMinSignedBits() >= 64 ||
+ D->getValue()->getValue().isMinSignedValue())
+ goto decline_post_inc;
+ // Without TLI, assume that any stride might be valid, and so any
+ // use might be shared.
+ if (!TLI)
+ goto decline_post_inc;
+ // Check for possible scaled-address reuse.
+ const Type *AccessTy = getAccessType(UI->getUser());
+ TargetLowering::AddrMode AM;
+ AM.Scale = D->getValue()->getSExtValue();
+ if (TLI->isLegalAddressingMode(AM, AccessTy))
+ goto decline_post_inc;
+ AM.Scale = -AM.Scale;
+ if (TLI->isLegalAddressingMode(AM, AccessTy))
+ goto decline_post_inc;
+ }
+ }
- const Type *SrcTy = PH->getType();
- int Mantissa = DestTy->getFPMantissaWidth();
- if (Mantissa == -1) continue;
- if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
- continue;
+ DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
+ << *Cond << '\n');
- unsigned Entry, Latch;
- if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
- Entry = 0;
- Latch = 1;
+ // It's possible for the setcc instruction to be anywhere in the loop, and
+ // possible for it to have multiple users. If it is not immediately before
+ // the exiting block branch, move it.
+ if (&*++BasicBlock::iterator(Cond) != TermBr) {
+ if (Cond->hasOneUse()) {
+ Cond->moveBefore(TermBr);
} else {
- Entry = 1;
- Latch = 0;
+ // Clone the terminating condition and insert into the loopend.
+ ICmpInst *OldCond = Cond;
+ Cond = cast<ICmpInst>(Cond->clone());
+ Cond->setName(L->getHeader()->getName() + ".termcond");
+ ExitingBlock->getInstList().insert(TermBr, Cond);
+
+ // Clone the IVUse, as the old use still exists!
+ CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(),
+ Cond, CondUse->getOperandValToReplace());
+ TermBr->replaceUsesOfWith(OldCond, Cond);
}
+ }
- ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
- if (!Init) continue;
- Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+ // If we get to here, we know that we can transform the setcc instruction to
+ // use the post-incremented version of the IV, allowing us to coalesce the
+ // live ranges for the IV correctly.
+ CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(),
+ CondUse->getStride()));
+ CondUse->setIsUseOfPostIncrementedValue(true);
+ Changed = true;
- BinaryOperator *Incr =
- dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
- if (!Incr) continue;
- if (Incr->getOpcode() != Instruction::Add
- && Incr->getOpcode() != Instruction::Sub)
- continue;
+ PostIncs.insert(Cond);
+ decline_post_inc:;
+ }
- /* Initialize new IV, double d = 0.0 in above example. */
- ConstantInt *C = NULL;
- if (Incr->getOperand(0) == PH)
- C = dyn_cast<ConstantInt>(Incr->getOperand(1));
- else if (Incr->getOperand(1) == PH)
- C = dyn_cast<ConstantInt>(Incr->getOperand(0));
- else
- continue;
+ // Determine an insertion point for the loop induction variable increment. It
+ // must dominate all the post-inc comparisons we just set up, and it must
+ // dominate the loop latch edge.
+ IVIncInsertPos = L->getLoopLatch()->getTerminator();
+ for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
+ E = PostIncs.end(); I != E; ++I) {
+ BasicBlock *BB =
+ DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
+ (*I)->getParent());
+ if (BB == (*I)->getParent())
+ IVIncInsertPos = *I;
+ else if (BB != IVIncInsertPos->getParent())
+ IVIncInsertPos = BB->getTerminator();
+ }
+
+ return Changed;
+}
- if (!C) continue;
+bool
+LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+ LSRUse::KindType Kind, const Type *AccessTy) {
+ int64_t NewMinOffset = LU.MinOffset;
+ int64_t NewMaxOffset = LU.MaxOffset;
+ const Type *NewAccessTy = AccessTy;
+
+ // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
+ // something conservative, however this can pessimize in the case that one of
+ // the uses will have all its uses outside the loop, for example.
+ if (LU.Kind != Kind)
+ return false;
+ // Conservatively assume HasBaseReg is true for now.
+ if (NewOffset < LU.MinOffset) {
+ if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true,
+ Kind, AccessTy, TLI))
+ return false;
+ NewMinOffset = NewOffset;
+ } else if (NewOffset > LU.MaxOffset) {
+ if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true,
+ Kind, AccessTy, TLI))
+ return false;
+ NewMaxOffset = NewOffset;
+ }
+ // Check for a mismatched access type, and fall back conservatively as needed.
+ if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+ NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
+ // Update the use.
+ LU.MinOffset = NewMinOffset;
+ LU.MaxOffset = NewMaxOffset;
+ LU.AccessTy = NewAccessTy;
+ if (NewOffset != LU.Offsets.back())
+ LU.Offsets.push_back(NewOffset);
+ return true;
+}
- // Ignore negative constants, as the code below doesn't handle them
- // correctly. TODO: Remove this restriction.
- if (!C->getValue().isStrictlyPositive()) continue;
+/// getUse - Return an LSRUse index and an offset value for a fixup which
+/// needs the given expression, with the given kind and optional access type.
+/// Either reuse an existing use or create a new one, as needed.
+std::pair<size_t, int64_t>
+LSRInstance::getUse(const SCEV *&Expr,
+ LSRUse::KindType Kind, const Type *AccessTy) {
+ const SCEV *Copy = Expr;
+ int64_t Offset = ExtractImmediate(Expr, SE);
+
+ // Basic uses can't accept any offset, for example.
+ if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+ Expr = Copy;
+ Offset = 0;
+ }
- /* Add new PHINode. */
- PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
+ std::pair<UseMapTy::iterator, bool> P =
+ UseMap.insert(std::make_pair(Expr, 0));
+ if (!P.second) {
+ // A use already existed with this base.
+ size_t LUIdx = P.first->second;
+ LSRUse &LU = Uses[LUIdx];
+ if (reconcileNewOffset(LU, Offset, Kind, AccessTy))
+ // Reuse this use.
+ return std::make_pair(LUIdx, Offset);
+ }
- /* create new increment. '++d' in above example. */
- Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
- BinaryOperator *NewIncr =
- BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
- Instruction::FAdd : Instruction::FSub,
- NewPH, CFP, "IV.S.next.", Incr);
+ // Create a new use.
+ size_t LUIdx = Uses.size();
+ P.first->second = LUIdx;
+ Uses.push_back(LSRUse(Kind, AccessTy));
+ LSRUse &LU = Uses[LUIdx];
- NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
- NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
+ // We don't need to track redundant offsets, but we don't need to go out
+ // of our way here to avoid them.
+ if (LU.Offsets.empty() || Offset != LU.Offsets.back())
+ LU.Offsets.push_back(Offset);
- /* Remove cast operation */
- ShadowUse->replaceAllUsesWith(NewPH);
- ShadowUse->eraseFromParent();
- NumShadow++;
- break;
- }
- }
+ LU.MinOffset = Offset;
+ LU.MaxOffset = Offset;
+ return std::make_pair(LUIdx, Offset);
}
-/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
-/// uses in the loop, look to see if we can eliminate some, in favor of using
-/// common indvars for the different uses.
-void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
- // TODO: implement optzns here.
+void LSRInstance::CollectInterestingTypesAndFactors() {
+ SmallSetVector<const SCEV *, 4> Strides;
+
+ // Collect interesting types and strides.
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ const SCEV *Stride = UI->getStride();
+
+ // Collect interesting types.
+ Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
- OptimizeShadowIV(L);
+ // Add the stride for this loop.
+ Strides.insert(Stride);
+
+ // Add strides for other mentioned loops.
+ for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset());
+ AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart()))
+ Strides.insert(AR->getStepRecurrence(SE));
+ }
+
+ // Compute interesting factors from the set of interesting strides.
+ for (SmallSetVector<const SCEV *, 4>::const_iterator
+ I = Strides.begin(), E = Strides.end(); I != E; ++I)
+ for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
+ next(I); NewStrideIter != E; ++NewStrideIter) {
+ const SCEV *OldStride = *I;
+ const SCEV *NewStride = *NewStrideIter;
+
+ if (SE.getTypeSizeInBits(OldStride->getType()) !=
+ SE.getTypeSizeInBits(NewStride->getType())) {
+ if (SE.getTypeSizeInBits(OldStride->getType()) >
+ SE.getTypeSizeInBits(NewStride->getType()))
+ NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
+ else
+ OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
+ }
+ if (const SCEVConstant *Factor =
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
+ SE, true))) {
+ if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ } else if (const SCEVConstant *Factor =
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
+ NewStride,
+ SE, true))) {
+ if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ }
+ }
+
+ // If all uses use the same type, don't bother looking for truncation-based
+ // reuse.
+ if (Types.size() == 1)
+ Types.clear();
+
+ DEBUG(print_factors_and_types(dbgs()));
}
-bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L,
- bool CheckPreInc) {
- int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
- for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[i]);
- const SCEV *Share = SI->first;
- if (!isa<SCEVConstant>(SI->first) || Share == Stride)
- continue;
- int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
- if (SSInt == SInt)
- return true; // This can definitely be reused.
- if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
- continue;
- int64_t Scale = SSInt / SInt;
- bool AllUsesAreAddresses = true;
- bool AllUsesAreOutsideLoop = true;
- std::vector<BasedUser> UsersToProcess;
- const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
- AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
- if (AllUsesAreAddresses &&
- ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) {
- if (!CheckPreInc)
- return true;
- // Any pre-inc iv use?
- IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share];
- for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
- E = StrideUses.Users.end(); I != E; ++I) {
- if (!I->isUseOfPostIncrementedValue())
- return true;
+void LSRInstance::CollectFixupsAndInitialFormulae() {
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ // Record the uses.
+ LSRFixup &LF = getNewFixup();
+ LF.UserInst = UI->getUser();
+ LF.OperandValToReplace = UI->getOperandValToReplace();
+ if (UI->isUseOfPostIncrementedValue())
+ LF.PostIncLoop = L;
+
+ LSRUse::KindType Kind = LSRUse::Basic;
+ const Type *AccessTy = 0;
+ if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
+ Kind = LSRUse::Address;
+ AccessTy = getAccessType(LF.UserInst);
+ }
+
+ const SCEV *S = IU.getCanonicalExpr(*UI);
+
+ // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
+ // (N - i == 0), and this allows (N - i) to be the expression that we work
+ // with rather than just N or i, so we can consider the register
+ // requirements for both N and i at the same time. Limiting this code to
+ // equality icmps is not a problem because all interesting loops use
+ // equality icmps, thanks to IndVarSimplify.
+ if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
+ if (CI->isEquality()) {
+ // Swap the operands if needed to put the OperandValToReplace on the
+ // left, for consistency.
+ Value *NV = CI->getOperand(1);
+ if (NV == LF.OperandValToReplace) {
+ CI->setOperand(1, CI->getOperand(0));
+ CI->setOperand(0, NV);
+ }
+
+ // x == y --> x - y == 0
+ const SCEV *N = SE.getSCEV(NV);
+ if (N->isLoopInvariant(L)) {
+ Kind = LSRUse::ICmpZero;
+ S = SE.getMinusSCEV(N, S);
+ }
+
+ // -1 and the negations of all interesting strides (except the negation
+ // of -1) are now also interesting.
+ for (size_t i = 0, e = Factors.size(); i != e; ++i)
+ if (Factors[i] != -1)
+ Factors.insert(-(uint64_t)Factors[i]);
+ Factors.insert(-1);
}
+
+ // Set up the initial formula for this use.
+ std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
+ LF.LUIdx = P.first;
+ LF.Offset = P.second;
+ LSRUse &LU = Uses[LF.LUIdx];
+ LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst);
+
+ // If this is the first use of this LSRUse, give it a formula.
+ if (LU.Formulae.empty()) {
+ InsertInitialFormula(S, LU, LF.LUIdx);
+ CountRegisters(LU.Formulae.back(), LF.LUIdx);
}
}
- return false;
+
+ DEBUG(print_fixups(dbgs()));
}
-/// isUsedByExitBranch - Return true if icmp is used by a loop terminating
-/// conditional branch or it's and / or with other conditions before being used
-/// as the condition.
-static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) {
- BasicBlock *CondBB = Cond->getParent();
- if (!L->isLoopExiting(CondBB))
- return false;
- BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator());
- if (!TermBr || !TermBr->isConditional())
+void
+LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+ Formula F;
+ F.InitialMatch(S, L, SE, DT);
+ bool Inserted = InsertFormula(LU, LUIdx, F);
+ assert(Inserted && "Initial formula already exists!"); (void)Inserted;
+}
+
+void
+LSRInstance::InsertSupplementalFormula(const SCEV *S,
+ LSRUse &LU, size_t LUIdx) {
+ Formula F;
+ F.BaseRegs.push_back(S);
+ F.AM.HasBaseReg = true;
+ bool Inserted = InsertFormula(LU, LUIdx, F);
+ assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
+}
+
+/// CountRegisters - Note which registers are used by the given formula,
+/// updating RegUses.
+void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
+ if (F.ScaledReg)
+ RegUses.CountRegister(F.ScaledReg, LUIdx);
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+ E = F.BaseRegs.end(); I != E; ++I)
+ RegUses.CountRegister(*I, LUIdx);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+ if (!LU.InsertFormula(F))
return false;
- Value *User = *Cond->use_begin();
- Instruction *UserInst = dyn_cast<Instruction>(User);
- while (UserInst &&
- (UserInst->getOpcode() == Instruction::And ||
- UserInst->getOpcode() == Instruction::Or)) {
- if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB)
- return false;
- User = *User->use_begin();
- UserInst = dyn_cast<Instruction>(User);
+ CountRegisters(F, LUIdx);
+ return true;
+}
+
+/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
+/// loop-invariant values which we're tracking. These other uses will pin these
+/// values in registers, making them less profitable for elimination.
+/// TODO: This currently misses non-constant addrec step registers.
+/// TODO: Should this give more weight to users inside the loop?
+void
+LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
+ SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
+ SmallPtrSet<const SCEV *, 8> Inserted;
+
+ while (!Worklist.empty()) {
+ const SCEV *S = Worklist.pop_back_val();
+
+ if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
+ Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
+ else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+ Worklist.push_back(C->getOperand());
+ else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ Worklist.push_back(D->getLHS());
+ Worklist.push_back(D->getRHS());
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (!Inserted.insert(U)) continue;
+ const Value *V = U->getValue();
+ if (const Instruction *Inst = dyn_cast<Instruction>(V))
+ if (L->contains(Inst)) continue;
+ for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ const Instruction *UserInst = dyn_cast<Instruction>(*UI);
+ // Ignore non-instructions.
+ if (!UserInst)
+ continue;
+ // Ignore instructions in other functions (as can happen with
+ // Constants).
+ if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
+ continue;
+ // Ignore instructions not dominated by the loop.
+ const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
+ UserInst->getParent() :
+ cast<PHINode>(UserInst)->getIncomingBlock(
+ PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
+ if (!DT.dominates(L->getHeader(), UseBB))
+ continue;
+ // Ignore uses which are part of other SCEV expressions, to avoid
+ // analyzing them multiple times.
+ if (SE.isSCEVable(UserInst->getType()) &&
+ !isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst))))
+ continue;
+ // Ignore icmp instructions which are already being analyzed.
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
+ unsigned OtherIdx = !UI.getOperandNo();
+ Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
+ if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
+ continue;
+ }
+
+ LSRFixup &LF = getNewFixup();
+ LF.UserInst = const_cast<Instruction *>(UserInst);
+ LF.OperandValToReplace = UI.getUse();
+ std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+ LF.LUIdx = P.first;
+ LF.Offset = P.second;
+ LSRUse &LU = Uses[LF.LUIdx];
+ LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst);
+ InsertSupplementalFormula(U, LU, LF.LUIdx);
+ CountRegisters(LU.Formulae.back(), Uses.size() - 1);
+ break;
+ }
+ }
}
- return User == TermBr;
}
-static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse,
- ScalarEvolution *SE, Loop *L,
- const TargetLowering *TLI = 0) {
- if (!L->contains(Cond))
- return false;
+/// CollectSubexprs - Split S into subexpressions which can be pulled out into
+/// separate registers. If C is non-null, multiply each subexpression by C.
+static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+ SmallVectorImpl<const SCEV *> &Ops,
+ ScalarEvolution &SE) {
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ // Break out add operands.
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I)
+ CollectSubexprs(*I, C, Ops, SE);
+ return;
+ } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // Split a non-zero base out of an addrec.
+ if (!AR->getStart()->isZero()) {
+ CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+ AR->getStepRecurrence(SE),
+ AR->getLoop()), C, Ops, SE);
+ CollectSubexprs(AR->getStart(), C, Ops, SE);
+ return;
+ }
+ } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ // Break (C * (a + b + c)) into C*a + C*b + C*c.
+ if (Mul->getNumOperands() == 2)
+ if (const SCEVConstant *Op0 =
+ dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ CollectSubexprs(Mul->getOperand(1),
+ C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
+ Ops, SE);
+ return;
+ }
+ }
- if (!isa<SCEVConstant>(CondUse->getOffset()))
- return false;
+ // Otherwise use the value itself.
+ Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+}
- // Handle only tests for equality for the moment.
- if (!Cond->isEquality() || !Cond->hasOneUse())
- return false;
- if (!isUsedByExitBranch(Cond, L))
- return false;
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+ Formula Base,
+ unsigned Depth) {
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3) return;
+
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEV *BaseReg = Base.BaseRegs[i];
+
+ SmallVector<const SCEV *, 8> AddOps;
+ CollectSubexprs(BaseReg, 0, AddOps, SE);
+ if (AddOps.size() == 1) continue;
+
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+ JE = AddOps.end(); J != JE; ++J) {
+ // Don't pull a constant into a register if the constant could be folded
+ // into an immediate field.
+ if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
+ Base.getNumRegs() > 1,
+ LU.Kind, LU.AccessTy, TLI, SE))
+ continue;
- Value *CondOp0 = Cond->getOperand(0);
- const SCEV *IV = SE->getSCEV(CondOp0);
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
- if (!AR || !AR->isAffine())
- return false;
+ // Collect all operands except *J.
+ SmallVector<const SCEV *, 8> InnerAddOps;
+ for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(),
+ KE = AddOps.end(); K != KE; ++K)
+ if (K != J)
+ InnerAddOps.push_back(*K);
+
+ // Don't leave just a constant behind in a register if the constant could
+ // be folded into an immediate field.
+ if (InnerAddOps.size() == 1 &&
+ isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
+ Base.getNumRegs() > 1,
+ LU.Kind, LU.AccessTy, TLI, SE))
+ continue;
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
- if (!SC || SC->getValue()->getSExtValue() < 0)
- // If it's already counting down, don't do anything.
- return false;
+ Formula F = Base;
+ F.BaseRegs[i] = SE.getAddExpr(InnerAddOps);
+ F.BaseRegs.push_back(*J);
+ if (InsertFormula(LU, LUIdx, F))
+ // If that formula hadn't been seen before, recurse to find more like
+ // it.
+ GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
+ }
+ }
+}
- // If the RHS of the comparison is not an loop invariant, the rewrite
- // cannot be done. Also bail out if it's already comparing against a zero.
- // If we are checking this before cmp stride optimization, check if it's
- // comparing against a already legal immediate.
- Value *RHS = Cond->getOperand(1);
- ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
- if (!L->isLoopInvariant(RHS) ||
- (RHSC && RHSC->isZero()) ||
- (RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue())))
- return false;
+/// GenerateCombinations - Generate a formula consisting of all of the
+/// loop-dominating registers added into a single register.
+void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // This method is only interesting on a plurality of registers.
+ if (Base.BaseRegs.size() <= 1) return;
+
+ Formula F = Base;
+ F.BaseRegs.clear();
+ SmallVector<const SCEV *, 4> Ops;
+ for (SmallVectorImpl<const SCEV *>::const_iterator
+ I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
+ const SCEV *BaseReg = *I;
+ if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
+ !BaseReg->hasComputableLoopEvolution(L))
+ Ops.push_back(BaseReg);
+ else
+ F.BaseRegs.push_back(BaseReg);
+ }
+ if (Ops.size() > 1) {
+ const SCEV *Sum = SE.getAddExpr(Ops);
+ // TODO: If Sum is zero, it probably means ScalarEvolution missed an
+ // opportunity to fold something. For now, just ignore such cases
+ // rather than proceed with zero in a register.
+ if (!Sum->isZero()) {
+ F.BaseRegs.push_back(Sum);
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
+}
- // Make sure the IV is only used for counting. Value may be preinc or
- // postinc; 2 uses in either case.
- if (!CondOp0->hasNUses(2))
- return false;
+/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // We can't add a symbolic offset if the address already contains one.
+ if (Base.AM.BaseGV) return;
- return true;
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEV *G = Base.BaseRegs[i];
+ GlobalValue *GV = ExtractSymbol(G, SE);
+ if (G->isZero() || !GV)
+ continue;
+ Formula F = Base;
+ F.AM.BaseGV = GV;
+ if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI))
+ continue;
+ F.BaseRegs[i] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+ }
}
-/// OptimizeLoopTermCond - Change loop terminating condition to use the
-/// postinc iv when possible.
-void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
- BasicBlock *LatchBlock = L->getLoopLatch();
- bool LatchExit = L->isLoopExiting(LatchBlock);
- SmallVector<BasicBlock*, 8> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
+/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // TODO: For now, just add the min and max offset, because it usually isn't
+ // worthwhile looking at everything inbetween.
+ SmallVector<int64_t, 4> Worklist;
+ Worklist.push_back(LU.MinOffset);
+ if (LU.MaxOffset != LU.MinOffset)
+ Worklist.push_back(LU.MaxOffset);
+
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEV *G = Base.BaseRegs[i];
+
+ for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ Formula F = Base;
+ F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
+ if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
+ LU.Kind, LU.AccessTy, TLI)) {
+ F.BaseRegs[i] = SE.getAddExpr(G, SE.getIntegerSCEV(*I, G->getType()));
+
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+ }
- for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- BasicBlock *ExitingBlock = ExitingBlocks[i];
+ int64_t Imm = ExtractImmediate(G, SE);
+ if (G->isZero() || Imm == 0)
+ continue;
+ Formula F = Base;
+ F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
+ if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI))
+ continue;
+ F.BaseRegs[i] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+ }
+}
- // Finally, get the terminating condition for the loop if possible. If we
- // can, we want to change it to use a post-incremented version of its
- // induction variable, to allow coalescing the live ranges for the IV into
- // one register value.
+/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
+/// the comparison. For example, x == y -> x*c == y*c.
+void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ if (LU.Kind != LSRUse::ICmpZero) return;
- BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!TermBr)
+ // Determine the integer type for the base formula.
+ const Type *IntTy = Base.getType();
+ if (!IntTy) return;
+ if (SE.getTypeSizeInBits(IntTy) > 64) return;
+
+ // Don't do this if there is more than one offset.
+ if (LU.MinOffset != LU.MaxOffset) return;
+
+ assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+
+ // Check each interesting stride.
+ for (SmallSetVector<int64_t, 8>::const_iterator
+ I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ int64_t Factor = *I;
+ Formula F = Base;
+
+ // Check that the multiplication doesn't overflow.
+ if (F.AM.BaseOffs == INT64_MIN && Factor == -1)
continue;
- // FIXME: Overly conservative, termination condition could be an 'or' etc..
- if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+ F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
+ if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs)
continue;
- // Search IVUsesByStride to find Cond's IVUse if there is one.
- IVStrideUse *CondUse = 0;
- const SCEV *CondStride = 0;
- ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
- if (!FindIVUserForCond(Cond, CondUse, CondStride))
+ // Check that multiplying with the use offset doesn't overflow.
+ int64_t Offset = LU.MinOffset;
+ if (Offset == INT64_MIN && Factor == -1)
+ continue;
+ Offset = (uint64_t)Offset * Factor;
+ if (Offset / Factor != LU.MinOffset)
continue;
- // If the latch block is exiting and it's not a single block loop, it's
- // not safe to use postinc iv in other exiting blocks. FIXME: overly
- // conservative? How about icmp stride optimization?
- bool UsePostInc = !(e > 1 && LatchExit && ExitingBlock != LatchBlock);
- if (UsePostInc && ExitingBlock != LatchBlock) {
- if (!Cond->hasOneUse())
- // See below, we don't want the condition to be cloned.
- UsePostInc = false;
- else {
- // If exiting block is the latch block, we know it's safe and profitable
- // to transform the icmp to use post-inc iv. Otherwise do so only if it
- // would not reuse another iv and its iv would be reused by other uses.
- // We are optimizing for the case where the icmp is the only use of the
- // iv.
- IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride];
- for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
- E = StrideUses.Users.end(); I != E; ++I) {
- if (I->getUser() == Cond)
- continue;
- if (!I->isUseOfPostIncrementedValue()) {
- UsePostInc = false;
- break;
- }
+ // Check that this scale is legal.
+ if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+ continue;
+
+ // Compensate for the use having MinOffset built into it.
+ F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+
+ const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy);
+
+ // Check that multiplying with each base register doesn't overflow.
+ for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
+ F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
+ if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
+ goto next;
+ }
+
+ // Check that multiplying with the scaled register doesn't overflow.
+ if (F.ScaledReg) {
+ F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
+ if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
+ continue;
+ }
+
+ // If we make it here and it's legal, add it.
+ (void)InsertFormula(LU, LUIdx, F);
+ next:;
+ }
+}
+
+/// GenerateScales - Generate stride factor reuse formulae by making use of
+/// scaled-offset address modes, for example.
+void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // Determine the integer type for the base formula.
+ const Type *IntTy = Base.getType();
+ if (!IntTy) return;
+
+ // If this Formula already has a scaled register, we can't add another one.
+ if (Base.AM.Scale != 0) return;
+
+ // Check each interesting stride.
+ for (SmallSetVector<int64_t, 8>::const_iterator
+ I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ int64_t Factor = *I;
+
+ Base.AM.Scale = Factor;
+ Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+ // Check whether this scale is going to be legal.
+ if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI)) {
+ // As a special-case, handle special out-of-loop Basic users specially.
+ // TODO: Reconsider this special case.
+ if (LU.Kind == LSRUse::Basic &&
+ isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+ LSRUse::Special, LU.AccessTy, TLI) &&
+ LU.AllFixupsOutsideLoop)
+ LU.Kind = LSRUse::Special;
+ else
+ continue;
+ }
+ // For an ICmpZero, negating a solitary base register won't lead to
+ // new solutions.
+ if (LU.Kind == LSRUse::ICmpZero &&
+ !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+ continue;
+ // For each addrec base reg, apply the scale, if possible.
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ if (const SCEVAddRecExpr *AR =
+ dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
+ const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy);
+ if (FactorS->isZero())
+ continue;
+ // Divide out the factor, ignoring high bits, since we'll be
+ // scaling the value back up in the end.
+ if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
+ // TODO: This could be optimized to avoid all the copying.
+ Formula F = Base;
+ F.ScaledReg = Quotient;
+ std::swap(F.BaseRegs[i], F.BaseRegs.back());
+ F.BaseRegs.pop_back();
+ (void)InsertFormula(LU, LUIdx, F);
}
}
+ }
+}
- // If iv for the stride might be shared and any of the users use pre-inc
- // iv might be used, then it's not safe to use post-inc iv.
- if (UsePostInc &&
- isa<SCEVConstant>(CondStride) &&
- StrideMightBeShared(CondStride, L, true))
- UsePostInc = false;
- }
+/// GenerateTruncates - Generate reuse formulae from different IV types.
+void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx,
+ Formula Base) {
+ // This requires TargetLowering to tell us which truncates are free.
+ if (!TLI) return;
+
+ // Don't bother truncating symbolic values.
+ if (Base.AM.BaseGV) return;
+
+ // Determine the integer type for the base formula.
+ const Type *DstTy = Base.getType();
+ if (!DstTy) return;
+ DstTy = SE.getEffectiveSCEVType(DstTy);
+
+ for (SmallSetVector<const Type *, 4>::const_iterator
+ I = Types.begin(), E = Types.end(); I != E; ++I) {
+ const Type *SrcTy = *I;
+ if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+ Formula F = Base;
+
+ if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
+ for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J)
+ *J = SE.getAnyExtendExpr(*J, SrcTy);
+
+ // TODO: This assumes we've done basic processing on all uses and
+ // have an idea what the register usage is.
+ if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
+ continue;
- // If the trip count is computed in terms of a max (due to ScalarEvolution
- // being unable to find a sufficient guard, for example), change the loop
- // comparison to use SLT or ULT instead of NE.
- Cond = OptimizeMax(L, Cond, CondUse);
-
- // If possible, change stride and operands of the compare instruction to
- // eliminate one stride. However, avoid rewriting the compare instruction
- // with an iv of new stride if it's likely the new stride uses will be
- // rewritten using the stride of the compare instruction.
- if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) {
- // If the condition stride is a constant and it's the only use, we might
- // want to optimize it first by turning it to count toward zero.
- if (!StrideMightBeShared(CondStride, L, false) &&
- !ShouldCountToZero(Cond, CondUse, SE, L, TLI))
- Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
+ (void)InsertFormula(LU, LUIdx, F);
}
+ }
+}
+
+namespace {
+
+/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
+/// defer modifications so that the search phase doesn't have to worry about
+/// the data structures moving underneath it.
+struct WorkItem {
+ size_t LUIdx;
+ int64_t Imm;
+ const SCEV *OrigReg;
+
+ WorkItem(size_t LI, int64_t I, const SCEV *R)
+ : LUIdx(LI), Imm(I), OrigReg(R) {}
+
+ void print(raw_ostream &OS) const;
+ void dump() const;
+};
+
+}
+
+void WorkItem::print(raw_ostream &OS) const {
+ OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
+ << " , add offset " << Imm;
+}
+
+void WorkItem::dump() const {
+ print(errs()); errs() << '\n';
+}
- if (!UsePostInc)
+/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
+/// distance apart and try to form reuse opportunities between them.
+void LSRInstance::GenerateCrossUseConstantOffsets() {
+ // Group the registers by their value without any added constant offset.
+ typedef std::map<int64_t, const SCEV *> ImmMapTy;
+ typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
+ RegMapTy Map;
+ DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
+ SmallVector<const SCEV *, 8> Sequence;
+ for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+ I != E; ++I) {
+ const SCEV *Reg = *I;
+ int64_t Imm = ExtractImmediate(Reg, SE);
+ std::pair<RegMapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(Reg, ImmMapTy()));
+ if (Pair.second)
+ Sequence.push_back(Reg);
+ Pair.first->second.insert(std::make_pair(Imm, *I));
+ UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
+ }
+
+ // Now examine each set of registers with the same base value. Build up
+ // a list of work to do and do the work in a separate step so that we're
+ // not adding formulae and register counts while we're searching.
+ SmallVector<WorkItem, 32> WorkItems;
+ SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
+ E = Sequence.end(); I != E; ++I) {
+ const SCEV *Reg = *I;
+ const ImmMapTy &Imms = Map.find(Reg)->second;
+
+ // It's not worthwhile looking for reuse if there's only one offset.
+ if (Imms.size() == 1)
continue;
- DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
- << *Cond << '\n');
+ DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
+ for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+ J != JE; ++J)
+ dbgs() << ' ' << J->first;
+ dbgs() << '\n');
- // It's possible for the setcc instruction to be anywhere in the loop, and
- // possible for it to have multiple users. If it is not immediately before
- // the exiting block branch, move it.
- if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
- if (Cond->hasOneUse()) { // Condition has a single use, just move it.
- Cond->moveBefore(TermBr);
+ // Examine each offset.
+ for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+ J != JE; ++J) {
+ const SCEV *OrigReg = J->second;
+
+ int64_t JImm = J->first;
+ const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
+
+ if (!isa<SCEVConstant>(OrigReg) &&
+ UsedByIndicesMap[Reg].count() == 1) {
+ DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
+ continue;
+ }
+
+ // Conservatively examine offsets between this orig reg a few selected
+ // other orig regs.
+ ImmMapTy::const_iterator OtherImms[] = {
+ Imms.begin(), prior(Imms.end()),
+ Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
+ };
+ for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
+ ImmMapTy::const_iterator M = OtherImms[i];
+ if (M == J || M == JE) continue;
+
+ // Compute the difference between the two.
+ int64_t Imm = (uint64_t)JImm - M->first;
+ for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
+ LUIdx = UsedByIndices.find_next(LUIdx))
+ // Make a memo of this use, offset, and register tuple.
+ if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
+ WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
+ }
+ }
+ }
+
+ Map.clear();
+ Sequence.clear();
+ UsedByIndicesMap.clear();
+ UniqueItems.clear();
+
+ // Now iterate through the worklist and add new formulae.
+ for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
+ E = WorkItems.end(); I != E; ++I) {
+ const WorkItem &WI = *I;
+ size_t LUIdx = WI.LUIdx;
+ LSRUse &LU = Uses[LUIdx];
+ int64_t Imm = WI.Imm;
+ const SCEV *OrigReg = WI.OrigReg;
+
+ const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
+ const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
+ unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
+
+ // TODO: Use a more targeted data structure.
+ for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
+ Formula F = LU.Formulae[L];
+ // Use the immediate in the scaled register.
+ if (F.ScaledReg == OrigReg) {
+ int64_t Offs = (uint64_t)F.AM.BaseOffs +
+ Imm * (uint64_t)F.AM.Scale;
+ // Don't create 50 + reg(-50).
+ if (F.referencesReg(SE.getSCEV(
+ ConstantInt::get(IntTy, -(uint64_t)Offs))))
+ continue;
+ Formula NewF = F;
+ NewF.AM.BaseOffs = Offs;
+ if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI))
+ continue;
+ NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
+
+ // If the new scale is a constant in a register, and adding the constant
+ // value to the immediate would produce a value closer to zero than the
+ // immediate itself, then the formula isn't worthwhile.
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
+ if (C->getValue()->getValue().isNegative() !=
+ (NewF.AM.BaseOffs < 0) &&
+ (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
+ .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs()))
+ continue;
+
+ // OK, looks good.
+ (void)InsertFormula(LU, LUIdx, NewF);
} else {
- // Otherwise, clone the terminating condition and insert into the
- // loopend.
- Cond = cast<ICmpInst>(Cond->clone());
- Cond->setName(L->getHeader()->getName() + ".termcond");
- ExitingBlock->getInstList().insert(TermBr, Cond);
+ // Use the immediate in a base register.
+ for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
+ const SCEV *BaseReg = F.BaseRegs[N];
+ if (BaseReg != OrigReg)
+ continue;
+ Formula NewF = F;
+ NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
+ if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI))
+ continue;
+ NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
+
+ // If the new formula has a constant in a register, and adding the
+ // constant value to the immediate would produce a value closer to
+ // zero than the immediate itself, then the formula isn't worthwhile.
+ for (SmallVectorImpl<const SCEV *>::const_iterator
+ J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
+ J != JE; ++J)
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
+ if (C->getValue()->getValue().isNegative() !=
+ (NewF.AM.BaseOffs < 0) &&
+ C->getValue()->getValue().abs()
+ .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs()))
+ goto skip_formula;
+
+ // Ok, looks good.
+ (void)InsertFormula(LU, LUIdx, NewF);
+ break;
+ skip_formula:;
+ }
+ }
+ }
+ }
+}
- // Clone the IVUse, as the old use still exists!
- IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
- CondUse->getOperandValToReplace());
- CondUse = &IU->IVUsesByStride[CondStride]->Users.back();
+/// GenerateAllReuseFormulae - Generate formulae for each use.
+void
+LSRInstance::GenerateAllReuseFormulae() {
+ // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
+ // queries are more precise.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
+ }
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateScales(LU, LUIdx, LU.Formulae[i]);
+ }
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+ GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
+ }
+
+ GenerateCrossUseConstantOffsets();
+}
+
+/// If their are multiple formulae with the same set of registers used
+/// by other uses, pick the best one and delete the others.
+void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+#ifndef NDEBUG
+ bool Changed = false;
+#endif
+
+ // Collect the best formula for each unique set of shared registers. This
+ // is reset for each use.
+ typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+ BestFormulaeTy;
+ BestFormulaeTy BestFormulae;
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ FormulaSorter Sorter(L, LU, SE, DT);
+
+ // Clear out the set of used regs; it will be recomputed.
+ LU.Regs.clear();
+
+ for (size_t FIdx = 0, NumForms = LU.Formulae.size();
+ FIdx != NumForms; ++FIdx) {
+ Formula &F = LU.Formulae[FIdx];
+
+ SmallVector<const SCEV *, 2> Key;
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+ Key.push_back(Reg);
}
+ if (F.ScaledReg &&
+ RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+ Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for
+ // uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ std::pair<BestFormulaeTy::const_iterator, bool> P =
+ BestFormulae.insert(std::make_pair(Key, FIdx));
+ if (!P.second) {
+ Formula &Best = LU.Formulae[P.first->second];
+ if (Sorter.operator()(F, Best))
+ std::swap(F, Best);
+ DEBUG(dbgs() << "Filtering out "; F.print(dbgs());
+ dbgs() << "\n"
+ " in favor of "; Best.print(dbgs());
+ dbgs() << '\n');
+#ifndef NDEBUG
+ Changed = true;
+#endif
+ std::swap(F, LU.Formulae.back());
+ LU.Formulae.pop_back();
+ --FIdx;
+ --NumForms;
+ continue;
+ }
+ if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
+ LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
}
+ BestFormulae.clear();
+ }
- // If we get to here, we know that we can transform the setcc instruction to
- // use the post-incremented version of the IV, allowing us to coalesce the
- // live ranges for the IV correctly.
- CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride));
- CondUse->setIsUseOfPostIncrementedValue(true);
- Changed = true;
+ DEBUG(if (Changed) {
+ dbgs() << "\n"
+ "After filtering out undesirable candidates:\n";
+ print_uses(dbgs());
+ });
+}
- ++NumLoopCond;
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+ // This is a rough guess that seems to work fairly well.
+ const size_t Limit = UINT16_MAX;
+
+ SmallPtrSet<const SCEV *, 4> Taken;
+ for (;;) {
+ // Estimate the worst-case number of solutions we might consider. We almost
+ // never consider this many solutions because we prune the search space,
+ // but the pruning isn't always sufficient.
+ uint32_t Power = 1;
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ size_t FSize = I->Formulae.size();
+ if (FSize >= Limit) {
+ Power = Limit;
+ break;
+ }
+ Power *= FSize;
+ if (Power >= Limit)
+ break;
+ }
+ if (Power < Limit)
+ break;
+
+ // Ok, we have too many of formulae on our hands to conveniently handle.
+ // Use a rough heuristic to thin out the list.
+
+ // Pick the register which is used by the most LSRUses, which is likely
+ // to be a good reuse register candidate.
+ const SCEV *Best = 0;
+ unsigned BestNum = 0;
+ for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+ I != E; ++I) {
+ const SCEV *Reg = *I;
+ if (Taken.count(Reg))
+ continue;
+ if (!Best)
+ Best = Reg;
+ else {
+ unsigned Count = RegUses.getUsedByIndices(Reg).count();
+ if (Count > BestNum) {
+ Best = Reg;
+ BestNum = Count;
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
+ << " will yield profitable reuse.\n");
+ Taken.insert(Best);
+
+ // In any use with formulae which references this register, delete formulae
+ // which don't reference it.
+ for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ LSRUse &LU = *I;
+ if (!LU.Regs.count(Best)) continue;
+
+ // Clear out the set of used regs; it will be recomputed.
+ LU.Regs.clear();
+
+ for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+ Formula &F = LU.Formulae[i];
+ if (!F.referencesReg(Best)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
+ std::swap(LU.Formulae.back(), F);
+ LU.Formulae.pop_back();
+ --e;
+ --i;
+ continue;
+ }
+
+ if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
+ LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ }
+ }
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
}
}
-bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
- IVStrideUse* &CondUse,
- Loop *L) {
- // If the only use is an icmp of a loop exiting conditional branch, then
- // attempt the optimization.
- BasedUser User = BasedUser(*CondUse, SE);
- assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!");
- ICmpInst *Cond = cast<ICmpInst>(User.Inst);
+/// SolveRecurse - This is the recursive solver.
+void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+ Cost &SolutionCost,
+ SmallVectorImpl<const Formula *> &Workspace,
+ const Cost &CurCost,
+ const SmallPtrSet<const SCEV *, 16> &CurRegs,
+ DenseSet<const SCEV *> &VisitedRegs) const {
+ // Some ideas:
+ // - prune more:
+ // - use more aggressive filtering
+ // - sort the formula so that the most profitable solutions are found first
+ // - sort the uses too
+ // - search faster:
+ // - don't compute a cost, and then compare. compare while computing a cost
+ // and bail early.
+ // - track register sets with SmallBitVector
+
+ const LSRUse &LU = Uses[Workspace.size()];
+
+ // If this use references any register that's already a part of the
+ // in-progress solution, consider it a requirement that a formula must
+ // reference that register in order to be considered. This prunes out
+ // unprofitable searching.
+ SmallSetVector<const SCEV *, 4> ReqRegs;
+ for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
+ E = CurRegs.end(); I != E; ++I)
+ if (LU.Regs.count(*I))
+ ReqRegs.insert(*I);
+
+ bool AnySatisfiedReqRegs = false;
+ SmallPtrSet<const SCEV *, 16> NewRegs;
+ Cost NewCost;
+retry:
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+
+ // Ignore formulae which do not use any of the required registers.
+ for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
+ JE = ReqRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if ((!F.ScaledReg || F.ScaledReg != Reg) &&
+ std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+ F.BaseRegs.end())
+ goto skip;
+ }
+ AnySatisfiedReqRegs = true;
+
+ // Evaluate the cost of the current formula. If it's already worse than
+ // the current best, prune the search at that point.
+ NewCost = CurCost;
+ NewRegs = CurRegs;
+ NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
+ if (NewCost < SolutionCost) {
+ Workspace.push_back(&F);
+ if (Workspace.size() != Uses.size()) {
+ SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
+ NewRegs, VisitedRegs);
+ if (F.getNumRegs() == 1 && Workspace.size() == 1)
+ VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
+ } else {
+ DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
+ dbgs() << ". Regs:";
+ for (SmallPtrSet<const SCEV *, 16>::const_iterator
+ I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
+ dbgs() << ' ' << **I;
+ dbgs() << '\n');
+
+ SolutionCost = NewCost;
+ Solution = Workspace;
+ }
+ Workspace.pop_back();
+ }
+ skip:;
+ }
- // Less strict check now that compare stride optimization is done.
- if (!ShouldCountToZero(Cond, CondUse, SE, L))
- return false;
+ // If none of the formulae had all of the required registers, relax the
+ // constraint so that we don't exclude all formulae.
+ if (!AnySatisfiedReqRegs) {
+ ReqRegs.clear();
+ goto retry;
+ }
+}
- Value *CondOp0 = Cond->getOperand(0);
- PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0);
- Instruction *Incr;
- if (!PHIExpr) {
- // Value tested is postinc. Find the phi node.
- Incr = dyn_cast<BinaryOperator>(CondOp0);
- // FIXME: Just use User.OperandValToReplace here?
- if (!Incr || Incr->getOpcode() != Instruction::Add)
- return false;
+void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
+ SmallVector<const Formula *, 8> Workspace;
+ Cost SolutionCost;
+ SolutionCost.Loose();
+ Cost CurCost;
+ SmallPtrSet<const SCEV *, 16> CurRegs;
+ DenseSet<const SCEV *> VisitedRegs;
+ Workspace.reserve(Uses.size());
+
+ SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
+ CurRegs, VisitedRegs);
+
+ // Ok, we've now made all our decisions.
+ DEBUG(dbgs() << "\n"
+ "The chosen solution requires "; SolutionCost.print(dbgs());
+ dbgs() << ":\n";
+ for (size_t i = 0, e = Uses.size(); i != e; ++i) {
+ dbgs() << " ";
+ Uses[i].print(dbgs());
+ dbgs() << "\n"
+ " ";
+ Solution[i]->print(dbgs());
+ dbgs() << '\n';
+ });
+}
- PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0));
- if (!PHIExpr)
- return false;
- // 1 use for preinc value, the increment.
- if (!PHIExpr->hasOneUse())
- return false;
- } else {
- assert(isa<PHINode>(CondOp0) &&
- "Unexpected loop exiting counting instruction sequence!");
- PHIExpr = cast<PHINode>(CondOp0);
- // Value tested is preinc. Find the increment.
- // A CmpInst is not a BinaryOperator; we depend on this.
- Instruction::use_iterator UI = PHIExpr->use_begin();
- Incr = dyn_cast<BinaryOperator>(UI);
- if (!Incr)
- Incr = dyn_cast<BinaryOperator>(++UI);
- // One use for postinc value, the phi. Unnecessarily conservative?
- if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add)
- return false;
+/// getImmediateDominator - A handy utility for the specific DominatorTree
+/// query that we need here.
+///
+static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
+ DomTreeNode *Node = DT.getNode(BB);
+ if (!Node) return 0;
+ Node = Node->getIDom();
+ if (!Node) return 0;
+ return Node->getBlock();
+}
+
+Value *LSRInstance::Expand(const LSRFixup &LF,
+ const Formula &F,
+ BasicBlock::iterator IP,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts) const {
+ const LSRUse &LU = Uses[LF.LUIdx];
+
+ // Then, collect some instructions which we will remain dominated by when
+ // expanding the replacement. These must be dominated by any operands that
+ // will be required in the expansion.
+ SmallVector<Instruction *, 4> Inputs;
+ if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
+ Inputs.push_back(I);
+ if (LU.Kind == LSRUse::ICmpZero)
+ if (Instruction *I =
+ dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
+ Inputs.push_back(I);
+ if (LF.PostIncLoop) {
+ if (!L->contains(LF.UserInst))
+ Inputs.push_back(L->getLoopLatch()->getTerminator());
+ else
+ Inputs.push_back(IVIncInsertPos);
}
- // Replace the increment with a decrement.
- DEBUG(dbgs() << "LSR: Examining use ");
- DEBUG(WriteAsOperand(dbgs(), CondOp0, /*PrintType=*/false));
- DEBUG(dbgs() << " in Inst: " << *Cond << '\n');
- BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub,
- Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr);
- Incr->replaceAllUsesWith(Decr);
- Incr->eraseFromParent();
-
- // Substitute endval-startval for the original startval, and 0 for the
- // original endval. Since we're only testing for equality this is OK even
- // if the computation wraps around.
- BasicBlock *Preheader = L->getLoopPreheader();
- Instruction *PreInsertPt = Preheader->getTerminator();
- unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0;
- Value *StartVal = PHIExpr->getIncomingValue(InBlock);
- Value *EndVal = Cond->getOperand(1);
- DEBUG(dbgs() << " Optimize loop counting iv to count down ["
- << *EndVal << " .. " << *StartVal << "]\n");
-
- // FIXME: check for case where both are constant.
- Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
- BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub,
- EndVal, StartVal, "tmp", PreInsertPt);
- PHIExpr->setIncomingValue(InBlock, NewStartVal);
- Cond->setOperand(1, Zero);
- DEBUG(dbgs() << " New icmp: " << *Cond << "\n");
-
- int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
- const SCEV *NewStride = 0;
- bool Found = false;
- for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
- const SCEV *OldStride = IU->StrideOrder[i];
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride))
- if (SC->getValue()->getSExtValue() == -SInt) {
- Found = true;
- NewStride = OldStride;
+ // Then, climb up the immediate dominator tree as far as we can go while
+ // still being dominated by the input positions.
+ for (;;) {
+ bool AllDominate = true;
+ Instruction *BetterPos = 0;
+ BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT);
+ if (!IDom) break;
+ Instruction *Tentative = IDom->getTerminator();
+ for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
+ E = Inputs.end(); I != E; ++I) {
+ Instruction *Inst = *I;
+ if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
+ AllDominate = false;
break;
}
+ if (IDom == Inst->getParent() &&
+ (!BetterPos || DT.dominates(BetterPos, Inst)))
+ BetterPos = next(BasicBlock::iterator(Inst));
+ }
+ if (!AllDominate)
+ break;
+ if (BetterPos)
+ IP = BetterPos;
+ else
+ IP = Tentative;
}
+ while (isa<PHINode>(IP)) ++IP;
+
+ // Inform the Rewriter if we have a post-increment use, so that it can
+ // perform an advantageous expansion.
+ Rewriter.setPostInc(LF.PostIncLoop);
+
+ // This is the type that the user actually needs.
+ const Type *OpTy = LF.OperandValToReplace->getType();
+ // This will be the type that we'll initially expand to.
+ const Type *Ty = F.getType();
+ if (!Ty)
+ // No type known; just expand directly to the ultimate type.
+ Ty = OpTy;
+ else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
+ // Expand directly to the ultimate type if it's the right size.
+ Ty = OpTy;
+ // This is the type to do integer arithmetic in.
+ const Type *IntTy = SE.getEffectiveSCEVType(Ty);
+
+ // Build up a list of operands to add together to form the full base.
+ SmallVector<const SCEV *, 8> Ops;
+
+ // Expand the BaseRegs portion.
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+ E = F.BaseRegs.end(); I != E; ++I) {
+ const SCEV *Reg = *I;
+ assert(!Reg->isZero() && "Zero allocated in a base register!");
+
+ // If we're expanding for a post-inc user for the add-rec's loop, make the
+ // post-inc adjustment.
+ const SCEV *Start = Reg;
+ while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) {
+ if (AR->getLoop() == LF.PostIncLoop) {
+ Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
+ // If the user is inside the loop, insert the code after the increment
+ // so that it is dominated by its operand. If the original insert point
+ // was already dominated by the increment, keep it, because there may
+ // be loop-variant operands that need to be respected also.
+ if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP))
+ IP = IVIncInsertPos;
+ break;
+ }
+ Start = AR->getStart();
+ }
- if (!Found)
- NewStride = SE->getIntegerSCEV(-SInt, Stride->getType());
- IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0));
- IU->IVUsesByStride[Stride]->removeUser(CondUse);
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+ }
- CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
- Stride = NewStride;
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ if (!Ops.empty()) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
- ++NumCountZero;
+ // Expand the ScaledReg portion.
+ Value *ICmpScaledV = 0;
+ if (F.AM.Scale != 0) {
+ const SCEV *ScaledS = F.ScaledReg;
+
+ // If we're expanding for a post-inc user for the add-rec's loop, make the
+ // post-inc adjustment.
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
+ if (AR->getLoop() == LF.PostIncLoop)
+ ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
+
+ if (LU.Kind == LSRUse::ICmpZero) {
+ // An interesting way of "folding" with an icmp is to use a negated
+ // scale, which we'll implement by inserting it into the other operand
+ // of the icmp.
+ assert(F.AM.Scale == -1 &&
+ "The only scale supported by ICmpZero uses is -1!");
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+ } else {
+ // Otherwise just expand the scaled register and an explicit scale,
+ // which is expected to be matched as part of the address.
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
+ ScaledS = SE.getMulExpr(ScaledS,
+ SE.getIntegerSCEV(F.AM.Scale,
+ ScaledS->getType()));
+ Ops.push_back(ScaledS);
+
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+ }
- return true;
+ // Expand the GV portion.
+ if (F.AM.BaseGV) {
+ Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+
+ // Expand the immediate portion.
+ int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+ if (Offset != 0) {
+ if (LU.Kind == LSRUse::ICmpZero) {
+ // The other interesting way of "folding" with an ICmpZero is to use a
+ // negated immediate.
+ if (!ICmpScaledV)
+ ICmpScaledV = ConstantInt::get(IntTy, -Offset);
+ else {
+ Ops.push_back(SE.getUnknown(ICmpScaledV));
+ ICmpScaledV = ConstantInt::get(IntTy, Offset);
+ }
+ } else {
+ // Just add the immediate values. These again are expected to be matched
+ // as part of the address.
+ Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
+ }
+ }
+
+ // Emit instructions summing all the operands.
+ const SCEV *FullS = Ops.empty() ?
+ SE.getIntegerSCEV(0, IntTy) :
+ SE.getAddExpr(Ops);
+ Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
+
+ // We're done expanding now, so reset the rewriter.
+ Rewriter.setPostInc(0);
+
+ // An ICmpZero Formula represents an ICmp which we're handling as a
+ // comparison against zero. Now that we've expanded an expression for that
+ // form, update the ICmp's other operand.
+ if (LU.Kind == LSRUse::ICmpZero) {
+ ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
+ DeadInsts.push_back(CI->getOperand(1));
+ assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+ "a scale at the same time!");
+ if (F.AM.Scale == -1) {
+ if (ICmpScaledV->getType() != OpTy) {
+ Instruction *Cast =
+ CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
+ OpTy, false),
+ ICmpScaledV, OpTy, "tmp", CI);
+ ICmpScaledV = Cast;
+ }
+ CI->setOperand(1, ICmpScaledV);
+ } else {
+ assert(F.AM.Scale == 0 &&
+ "ICmp does not support folding a global value and "
+ "a scale at the same time!");
+ Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
+ -(uint64_t)Offset);
+ if (C->getType() != OpTy)
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ OpTy, false),
+ C, OpTy);
+
+ CI->setOperand(1, C);
+ }
+ }
+
+ return FullV;
}
-/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
-/// when to exit the loop is used only for that purpose, try to rearrange things
-/// so it counts down to a test against zero.
-bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
- bool ThisChanged = false;
- for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
- const SCEV *Stride = IU->StrideOrder[i];
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(Stride);
- assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
- // FIXME: Generalize to non-affine IV's.
- if (!SI->first->isLoopInvariant(L))
- continue;
- // If stride is a constant and it has an icmpinst use, check if we can
- // optimize the loop to count down.
- if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) {
- Instruction *User = SI->second->Users.begin()->getUser();
- if (!isa<ICmpInst>(User))
- continue;
- const SCEV *CondStride = Stride;
- IVStrideUse *Use = &*SI->second->Users.begin();
- if (!OptimizeLoopCountIVOfStride(CondStride, Use, L))
- continue;
- ThisChanged = true;
+/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
+/// of their operands effectively happens in their predecessor blocks, so the
+/// expression may need to be expanded in multiple places.
+void LSRInstance::RewriteForPHI(PHINode *PN,
+ const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const {
+ DenseMap<BasicBlock *, Value *> Inserted;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+ BasicBlock *BB = PN->getIncomingBlock(i);
+
+ // If this is a critical edge, split the edge so that we do not insert
+ // the code on all predecessor/successor paths. We do this unless this
+ // is the canonical backedge for this loop, which complicates post-inc
+ // users.
+ if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
+ !isa<IndirectBrInst>(BB->getTerminator()) &&
+ (PN->getParent() != L->getHeader() || !L->contains(BB))) {
+ // Split the critical edge.
+ BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+ // If PN is outside of the loop and BB is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after BB.
+ if (L->contains(BB) && !L->contains(PN))
+ NewBB->moveBefore(PN->getParent());
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ BB = NewBB;
+ i = PN->getBasicBlockIndex(BB);
+ }
- // Now check if it's possible to reuse this iv for other stride uses.
- for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) {
- const SCEV *SStride = IU->StrideOrder[j];
- if (SStride == CondStride)
- continue;
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII =
- IU->IVUsesByStride.find(SStride);
- assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!");
- // FIXME: Generalize to non-affine IV's.
- if (!SII->first->isLoopInvariant(L))
- continue;
- // FIXME: Rewrite other stride using CondStride.
+ std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
+ Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+ if (!Pair.second)
+ PN->setIncomingValue(i, Pair.first->second);
+ else {
+ Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+
+ // If this is reuse-by-noop-cast, insert the noop cast.
+ const Type *OpTy = LF.OperandValToReplace->getType();
+ if (FullV->getType() != OpTy)
+ FullV =
+ CastInst::Create(CastInst::getCastOpcode(FullV, false,
+ OpTy, false),
+ FullV, LF.OperandValToReplace->getType(),
+ "tmp", BB->getTerminator());
+
+ PN->setIncomingValue(i, FullV);
+ Pair.first->second = FullV;
}
}
+}
+
+/// Rewrite - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding"), and update the UserInst to reference
+/// the newly expanded value.
+void LSRInstance::Rewrite(const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const {
+ // First, find an insertion point that dominates UserInst. For PHI nodes,
+ // find the nearest block which dominates all the relevant uses.
+ if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
+ RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
+ } else {
+ Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
+
+ // If this is reuse-by-noop-cast, insert the noop cast.
+ const Type *OpTy = LF.OperandValToReplace->getType();
+ if (FullV->getType() != OpTy) {
+ Instruction *Cast =
+ CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
+ FullV, OpTy, "tmp", LF.UserInst);
+ FullV = Cast;
+ }
+
+ // Update the user. ICmpZero is handled specially here (for now) because
+ // Expand may have updated one of the operands of the icmp already, and
+ // its new value may happen to be equal to LF.OperandValToReplace, in
+ // which case doing replaceUsesOfWith leads to replacing both operands
+ // with the same value. TODO: Reorganize this.
+ if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
+ LF.UserInst->setOperand(0, FullV);
+ else
+ LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
}
- Changed |= ThisChanged;
- return ThisChanged;
+ DeadInsts.push_back(LF.OperandValToReplace);
}
-bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
- IU = &getAnalysis<IVUsers>();
- SE = &getAnalysis<ScalarEvolution>();
- Changed = false;
+void
+LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+ Pass *P) {
+ // Keep track of instructions we may have made dead, so that
+ // we can remove them after we are done working.
+ SmallVector<WeakVH, 16> DeadInsts;
+
+ SCEVExpander Rewriter(SE);
+ Rewriter.disableCanonicalMode();
+ Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
- // If LoopSimplify form is not available, stay out of trouble.
- if (!L->getLoopPreheader() || !L->getLoopLatch())
- return false;
+ // Expand the new value definitions and update the users.
+ for (size_t i = 0, e = Fixups.size(); i != e; ++i) {
+ size_t LUIdx = Fixups[i].LUIdx;
+
+ Rewrite(Fixups[i], *Solution[LUIdx], Rewriter, DeadInsts, P);
+
+ Changed = true;
+ }
- if (!IU->IVUsesByStride.empty()) {
- DEBUG(dbgs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
- << "\" ";
- L->print(dbgs()));
+ // Clean up after ourselves. This must be done before deleting any
+ // instructions.
+ Rewriter.clear();
- // Sort the StrideOrder so we process larger strides first.
- std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(),
- StrideCompare(SE));
+ Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+}
- // Optimize induction variables. Some indvar uses can be transformed to use
- // strides that will be needed for other purposes. A common example of this
- // is the exit test for the loop, which can often be rewritten to use the
- // computation of some other indvar to decide when to terminate the loop.
- OptimizeIndvars(L);
+LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
+ : IU(P->getAnalysis<IVUsers>()),
+ SE(P->getAnalysis<ScalarEvolution>()),
+ DT(P->getAnalysis<DominatorTree>()),
+ TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
- // Change loop terminating condition to use the postinc iv when possible
- // and optimize loop terminating compare. FIXME: Move this after
- // StrengthReduceIVUsersOfStride?
- OptimizeLoopTermCond(L);
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm()) return;
+
+ // If there's no interesting work to be done, bail early.
+ if (IU.empty()) return;
+
+ DEBUG(dbgs() << "\nLSR on loop ";
+ WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
+ dbgs() << ":\n");
+
+ /// OptimizeShadowIV - If IV is used in a int-to-float cast
+ /// inside the loop then try to eliminate the cast operation.
+ OptimizeShadowIV();
+
+ // Change loop terminating condition to use the postinc iv when possible.
+ Changed |= OptimizeLoopTermCond();
+
+ CollectInterestingTypesAndFactors();
+ CollectFixupsAndInitialFormulae();
+ CollectLoopInvariantFixupsAndFormulae();
+
+ DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
+ print_uses(dbgs()));
+
+ // Now use the reuse data to generate a bunch of interesting ways
+ // to formulate the values needed for the uses.
+ GenerateAllReuseFormulae();
+
+ DEBUG(dbgs() << "\n"
+ "After generating reuse formulae:\n";
+ print_uses(dbgs()));
+
+ FilterOutUndesirableDedicatedRegisters();
+ NarrowSearchSpaceUsingHeuristics();
+
+ SmallVector<const Formula *, 8> Solution;
+ Solve(Solution);
+ assert(Solution.size() == Uses.size() && "Malformed solution!");
+
+ // Release memory that is no longer needed.
+ Factors.clear();
+ Types.clear();
+ RegUses.clear();
+
+#ifndef NDEBUG
+ // Formulae should be legal.
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ const LSRUse &LU = *I;
+ for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+ JE = LU.Formulae.end(); J != JE; ++J)
+ assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, TLI) &&
+ "Illegal formula generated!");
+ };
+#endif
- // FIXME: We can shrink overlarge IV's here. e.g. if the code has
- // computation in i64 values and the target doesn't support i64, demote
- // the computation to 32-bit if safe.
+ // Now that we've decided what we want, make it so.
+ ImplementSolution(Solution, P);
+}
- // FIXME: Attempt to reuse values across multiple IV's. In particular, we
- // could have something like "for(i) { foo(i*8); bar(i*16) }", which should
- // be codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC.
- // Need to be careful that IV's are all the same type. Only works for
- // intptr_t indvars.
+void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
+ if (Factors.empty() && Types.empty()) return;
- // IVsByStride keeps IVs for one particular loop.
- assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
+ OS << "LSR has identified the following interesting factors and types: ";
+ bool First = true;
- StrengthReduceIVUsers(L);
+ for (SmallSetVector<int64_t, 8>::const_iterator
+ I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ if (!First) OS << ", ";
+ First = false;
+ OS << '*' << *I;
+ }
- // After all sharing is done, see if we can adjust the loop to test against
- // zero instead of counting up to a maximum. This is usually faster.
- OptimizeLoopCountIV(L);
+ for (SmallSetVector<const Type *, 4>::const_iterator
+ I = Types.begin(), E = Types.end(); I != E; ++I) {
+ if (!First) OS << ", ";
+ First = false;
+ OS << '(' << **I << ')';
+ }
+ OS << '\n';
+}
- // We're done analyzing this loop; release all the state we built up for it.
- IVsByStride.clear();
+void LSRInstance::print_fixups(raw_ostream &OS) const {
+ OS << "LSR is examining the following fixup sites:\n";
+ for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ const LSRFixup &LF = *I;
+ dbgs() << " ";
+ LF.print(OS);
+ OS << '\n';
+ }
+}
- // Clean up after ourselves
- DeleteTriviallyDeadInstructions();
+void LSRInstance::print_uses(raw_ostream &OS) const {
+ OS << "LSR is examining the following uses:\n";
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ const LSRUse &LU = *I;
+ dbgs() << " ";
+ LU.print(OS);
+ OS << '\n';
+ for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+ JE = LU.Formulae.end(); J != JE; ++J) {
+ OS << " ";
+ J->print(OS);
+ OS << '\n';
+ }
}
+}
+
+void LSRInstance::print(raw_ostream &OS) const {
+ print_factors_and_types(OS);
+ print_fixups(OS);
+ print_uses(OS);
+}
+
+void LSRInstance::dump() const {
+ print(errs()); errs() << '\n';
+}
+
+namespace {
+
+class LoopStrengthReduce : public LoopPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// transformation profitability.
+ const TargetLowering *const TLI;
+
+public:
+ static char ID; // Pass ID, replacement for typeid
+ explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+
+private:
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+}
+
+char LoopStrengthReduce::ID = 0;
+static RegisterPass<LoopStrengthReduce>
+X("loop-reduce", "Loop Strength Reduction");
+
+Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
+ return new LoopStrengthReduce(TLI);
+}
+
+LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
+ : LoopPass(&ID), TLI(tli) {}
+
+void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
+ // We split critical edges, so we change the CFG. However, we do update
+ // many analyses if they are around.
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<LoopInfo>();
+ AU.addPreserved("domfrontier");
+
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<IVUsers>();
+ AU.addPreserved<IVUsers>();
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
+ bool Changed = false;
+
+ // Run the main LSR transformation.
+ Changed |= LSRInstance(TLI, L, this).getChanged();
// At this point, it is worth checking to see if any recurrence PHIs are also
// dead, so that we can remove them as well.
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index e5fba28..071e9b7 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -170,7 +170,7 @@ Pass *llvm::createLoopUnswitchPass(bool Os) {
/// Otherwise, return null.
static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
// We can never unswitch on vector conditions.
- if (isa<VectorType>(Cond->getType()))
+ if (Cond->getType()->isVectorTy())
return 0;
// Constants should be folded, not unswitched on!
@@ -871,7 +871,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
// in the loop with the appropriate one directly.
if (IsEqual || (isa<ConstantInt>(Val) &&
- Val->getType()->isInteger(1))) {
+ Val->getType()->isIntegerTy(1))) {
Value *Replacement;
if (IsEqual)
Replacement = Val;
@@ -997,10 +997,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
case Instruction::And:
if (isa<ConstantInt>(I->getOperand(0)) &&
// constant -> RHS
- I->getOperand(0)->getType()->isInteger(1))
+ I->getOperand(0)->getType()->isIntegerTy(1))
cast<BinaryOperator>(I)->swapOperands();
if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
- if (CB->getType()->isInteger(1)) {
+ if (CB->getType()->isIntegerTy(1)) {
if (CB->isOne()) // X & 1 -> X
ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
else // X & 0 -> 0
@@ -1011,10 +1011,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
case Instruction::Or:
if (isa<ConstantInt>(I->getOperand(0)) &&
// constant -> RHS
- I->getOperand(0)->getType()->isInteger(1))
+ I->getOperand(0)->getType()->isIntegerTy(1))
cast<BinaryOperator>(I)->swapOperands();
if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
- if (CB->getType()->isInteger(1)) {
+ if (CB->getType()->isIntegerTy(1)) {
if (CB->isOne()) // X | 1 -> 1
ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
else // X | 0 -> X
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index e0aa491..62e2977 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -42,7 +42,7 @@ static Value *isBytewiseValue(Value *V) {
LLVMContext &Context = V->getContext();
// All byte-wide stores are splatable, even of arbitrary variables.
- if (V->getType()->isInteger(8)) return V;
+ if (V->getType()->isIntegerTy(8)) return V;
// Constant float and double values can be handled as integer values if the
// corresponding integer value is "byteable". An important case is 0.0.
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index bbd4b45..5aca9cd 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -182,7 +182,7 @@ unsigned Reassociate::getRank(Value *V) {
// If this is a not or neg instruction, do not count it for rank. This
// assures us that X and ~X will have the same rank.
- if (!I->getType()->isInteger() ||
+ if (!I->getType()->isIntegerTy() ||
(!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
++Rank;
@@ -597,19 +597,35 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
/// add its operands as factors, otherwise add V to the list of factors.
+///
+/// Ops is the top-level list of add operands we're trying to factor.
static void FindSingleUseMultiplyFactors(Value *V,
- SmallVectorImpl<Value*> &Factors) {
+ SmallVectorImpl<Value*> &Factors,
+ const SmallVectorImpl<ValueEntry> &Ops,
+ bool IsRoot) {
BinaryOperator *BO;
- if ((!V->hasOneUse() && !V->use_empty()) ||
+ if (!(V->hasOneUse() || V->use_empty()) || // More than one use.
!(BO = dyn_cast<BinaryOperator>(V)) ||
BO->getOpcode() != Instruction::Mul) {
Factors.push_back(V);
return;
}
+ // If this value has a single use because it is another input to the add
+ // tree we're reassociating and we dropped its use, it actually has two
+ // uses and we can't factor it.
+ if (!IsRoot) {
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (Ops[i].Op == V) {
+ Factors.push_back(V);
+ return;
+ }
+ }
+
+
// Otherwise, add the LHS and RHS to the list of factors.
- FindSingleUseMultiplyFactors(BO->getOperand(1), Factors);
- FindSingleUseMultiplyFactors(BO->getOperand(0), Factors);
+ FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false);
+ FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false);
}
/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
@@ -753,7 +769,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// Compute all of the factors of this added value.
SmallVector<Value*, 8> Factors;
- FindSingleUseMultiplyFactors(BOp, Factors);
+ FindSingleUseMultiplyFactors(BOp, Factors, Ops, true);
assert(Factors.size() > 1 && "Bad linearize!");
// Add one to FactorOccurrences for each unique factor in this op.
@@ -929,8 +945,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
}
// Reject cases where it is pointless to do this.
- if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPoint() ||
- isa<VectorType>(BI->getType()))
+ if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() ||
+ BI->getType()->isVectorTy())
continue; // Floating point ops are not associative.
// Do not reassociate boolean (i1) expressions. We want to preserve the
@@ -939,7 +955,7 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
// is not further optimized, it is likely to be transformed back to a
// short-circuited form for code gen, and the source order may have been
// optimized for the most likely conditions.
- if (BI->getType()->isInteger(1))
+ if (BI->getType()->isIntegerTy(1))
continue;
// If this is a subtract instruction which is not already in negate form,
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 02b45a1..7e37938 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -295,7 +295,7 @@ public:
}
void markOverdefined(Value *V) {
- assert(!isa<StructType>(V->getType()) && "Should use other method");
+ assert(!V->getType()->isStructTy() && "Should use other method");
markOverdefined(ValueState[V], V);
}
@@ -321,12 +321,12 @@ private:
}
void markConstant(Value *V, Constant *C) {
- assert(!isa<StructType>(V->getType()) && "Should use other method");
+ assert(!V->getType()->isStructTy() && "Should use other method");
markConstant(ValueState[V], V, C);
}
void markForcedConstant(Value *V, Constant *C) {
- assert(!isa<StructType>(V->getType()) && "Should use other method");
+ assert(!V->getType()->isStructTy() && "Should use other method");
ValueState[V].markForcedConstant(C);
DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
InstWorkList.push_back(V);
@@ -360,7 +360,7 @@ private:
}
void mergeInValue(Value *V, LatticeVal MergeWithV) {
- assert(!isa<StructType>(V->getType()) && "Should use other method");
+ assert(!V->getType()->isStructTy() && "Should use other method");
mergeInValue(ValueState[V], V, MergeWithV);
}
@@ -369,7 +369,7 @@ private:
/// value. This function handles the case when the value hasn't been seen yet
/// by properly seeding constants etc.
LatticeVal &getValueState(Value *V) {
- assert(!isa<StructType>(V->getType()) && "Should use getStructValueState");
+ assert(!V->getType()->isStructTy() && "Should use getStructValueState");
std::pair<DenseMap<Value*, LatticeVal>::iterator, bool> I =
ValueState.insert(std::make_pair(V, LatticeVal()));
@@ -392,7 +392,7 @@ private:
/// value/field pair. This function handles the case when the value hasn't
/// been seen yet by properly seeding constants etc.
LatticeVal &getStructValueState(Value *V, unsigned i) {
- assert(isa<StructType>(V->getType()) && "Should use getValueState");
+ assert(V->getType()->isStructTy() && "Should use getValueState");
assert(i < cast<StructType>(V->getType())->getNumElements() &&
"Invalid element #");
@@ -666,7 +666,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
void SCCPSolver::visitPHINode(PHINode &PN) {
// If this PN returns a struct, just mark the result overdefined.
// TODO: We could do a lot better than this if code actually uses this.
- if (isa<StructType>(PN.getType()))
+ if (PN.getType()->isStructTy())
return markAnythingOverdefined(&PN);
if (getValueState(&PN).isOverdefined()) {
@@ -742,7 +742,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
Value *ResultOp = I.getOperand(0);
// If we are tracking the return value of this function, merge it in.
- if (!TrackedRetVals.empty() && !isa<StructType>(ResultOp->getType())) {
+ if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
DenseMap<Function*, LatticeVal>::iterator TFRVI =
TrackedRetVals.find(F);
if (TFRVI != TrackedRetVals.end()) {
@@ -787,7 +787,7 @@ void SCCPSolver::visitCastInst(CastInst &I) {
void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
// If this returns a struct, mark all elements over defined, we don't track
// structs in structs.
- if (isa<StructType>(EVI.getType()))
+ if (EVI.getType()->isStructTy())
return markAnythingOverdefined(&EVI);
// If this is extracting from more than one level of struct, we don't know.
@@ -795,7 +795,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
return markOverdefined(&EVI);
Value *AggVal = EVI.getAggregateOperand();
- if (isa<StructType>(AggVal->getType())) {
+ if (AggVal->getType()->isStructTy()) {
unsigned i = *EVI.idx_begin();
LatticeVal EltVal = getStructValueState(AggVal, i);
mergeInValue(getValueState(&EVI), &EVI, EltVal);
@@ -828,7 +828,7 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
}
Value *Val = IVI.getInsertedValueOperand();
- if (isa<StructType>(Val->getType()))
+ if (Val->getType()->isStructTy())
// We don't track structs in structs.
markOverdefined(getStructValueState(&IVI, i), &IVI);
else {
@@ -841,7 +841,7 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
void SCCPSolver::visitSelectInst(SelectInst &I) {
// If this select returns a struct, just mark the result overdefined.
// TODO: We could do a lot better than this if code actually uses this.
- if (isa<StructType>(I.getType()))
+ if (I.getType()->isStructTy())
return markAnythingOverdefined(&I);
LatticeVal CondValue = getValueState(I.getCondition());
@@ -1166,7 +1166,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
void SCCPSolver::visitStoreInst(StoreInst &SI) {
// If this store is of a struct, ignore it.
- if (isa<StructType>(SI.getOperand(0)->getType()))
+ if (SI.getOperand(0)->getType()->isStructTy())
return;
if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
@@ -1187,7 +1187,7 @@ void SCCPSolver::visitStoreInst(StoreInst &SI) {
// global, we can replace the load with the loaded constant value!
void SCCPSolver::visitLoadInst(LoadInst &I) {
// If this load is of a struct, just mark the result overdefined.
- if (isa<StructType>(I.getType()))
+ if (I.getType()->isStructTy())
return markAnythingOverdefined(&I);
LatticeVal PtrVal = getValueState(I.getOperand(0));
@@ -1241,7 +1241,7 @@ CallOverdefined:
// Otherwise, if we have a single return value case, and if the function is
// a declaration, maybe we can constant fold it.
- if (F && F->isDeclaration() && !isa<StructType>(I->getType()) &&
+ if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
canConstantFoldCallTo(F)) {
SmallVector<Constant*, 8> Operands;
@@ -1352,7 +1352,7 @@ void SCCPSolver::Solve() {
// since all of its users will have already been marked as overdefined.
// Update all of the users of this instruction's value.
//
- if (isa<StructType>(I->getType()) || !getValueState(I).isOverdefined())
+ if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
UI != E; ++UI)
if (Instruction *I = dyn_cast<Instruction>(*UI))
@@ -1418,7 +1418,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (!LV.isUndefined()) continue;
// No instructions using structs need disambiguation.
- if (isa<StructType>(I->getOperand(0)->getType()))
+ if (I->getOperand(0)->getType()->isStructTy())
continue;
// Get the lattice values of the first two operands for use below.
@@ -1426,7 +1426,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
LatticeVal Op1LV;
if (I->getNumOperands() == 2) {
// No instructions using structs need disambiguation.
- if (isa<StructType>(I->getOperand(1)->getType()))
+ if (I->getOperand(1)->getType()->isStructTy())
continue;
// If this is a two-operand instruction, and if both operands are
@@ -1656,7 +1656,7 @@ bool SCCP::runOnFunction(Function &F) {
continue;
// TODO: Reconstruct structs from their elements.
- if (isa<StructType>(Inst->getType()))
+ if (Inst->getType()->isStructTy())
continue;
LatticeVal IV = Solver.getLatticeValueFor(Inst);
@@ -1792,7 +1792,7 @@ bool IPSCCP::runOnModule(Module &M) {
if (Solver.isBlockExecutable(F->begin())) {
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI) {
- if (AI->use_empty() || isa<StructType>(AI->getType())) continue;
+ if (AI->use_empty() || AI->getType()->isStructTy()) continue;
// TODO: Could use getStructLatticeValueFor to find out if the entire
// result is a constant and replace it entirely if so.
@@ -1835,7 +1835,7 @@ bool IPSCCP::runOnModule(Module &M) {
for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
Instruction *Inst = BI++;
- if (Inst->getType()->isVoidTy() || isa<StructType>(Inst->getType()))
+ if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
continue;
// TODO: Could use getStructLatticeValueFor to find out if the entire
@@ -1918,6 +1918,14 @@ bool IPSCCP::runOnModule(Module &M) {
// all call uses with the inferred value. This means we don't need to bother
// actually returning anything from the function. Replace all return
// instructions with return undef.
+ //
+ // Do this in two stages: first identify the functions we should process, then
+ // actually zap their returns. This is important because we can only do this
+ // if the address of the function isn't taken. In cases where a return is the
+ // last use of a function, the order of processing functions would affect
+ // whether other functions are optimizable.
+ SmallVector<ReturnInst*, 8> ReturnsToZap;
+
// TODO: Process multiple value ret instructions also.
const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
@@ -1933,7 +1941,13 @@ bool IPSCCP::runOnModule(Module &M) {
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
if (!isa<UndefValue>(RI->getOperand(0)))
- RI->setOperand(0, UndefValue::get(F->getReturnType()));
+ ReturnsToZap.push_back(RI);
+ }
+
+ // Zap all returns which we've identified as zap to change.
+ for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) {
+ Function *F = ReturnsToZap[i]->getParent()->getParent();
+ ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
}
// If we infered constant or undef values for globals variables, we can delete
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 900d119..bbe6270 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -302,7 +302,7 @@ bool SROA::performScalarRepl(Function &F) {
// random stuff that doesn't use vectors (e.g. <9 x double>) because then
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
- if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) {
+ if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
@@ -449,7 +449,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
// into.
for (; GEPIt != E; ++GEPIt) {
// Ignore struct elements, no extra checking needed for these.
- if (isa<StructType>(*GEPIt))
+ if ((*GEPIt)->isStructTy())
continue;
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
@@ -480,7 +480,7 @@ void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
// (which are essentially the same as the MemIntrinsics, especially with
// regard to copying padding between elements), or references using the
// aggregate type of the alloca.
- if (!MemOpType || isa<IntegerType>(MemOpType) || UsesAggregateType) {
+ if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) {
if (!UsesAggregateType) {
if (isStore)
Info.isMemCpyDst = true;
@@ -565,7 +565,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
}
LI->replaceAllUsesWith(Insert);
DeadInsts.push_back(LI);
- } else if (isa<IntegerType>(LIType) &&
+ } else if (LIType->isIntegerTy() &&
TD->getTypeAllocSize(LIType) ==
TD->getTypeAllocSize(AI->getAllocatedType())) {
// If this is a load of the entire alloca to an integer, rewrite it.
@@ -588,7 +588,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
new StoreInst(Extract, NewElts[i], SI);
}
DeadInsts.push_back(SI);
- } else if (isa<IntegerType>(SIType) &&
+ } else if (SIType->isIntegerTy() &&
TD->getTypeAllocSize(SIType) ==
TD->getTypeAllocSize(AI->getAllocatedType())) {
// If this is a store of the entire alloca from an integer, rewrite it.
@@ -833,9 +833,9 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// Convert the integer value to the appropriate type.
StoreVal = ConstantInt::get(Context, TotalVal);
- if (isa<PointerType>(ValTy))
+ if (ValTy->isPointerTy())
StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
- else if (ValTy->isFloatingPoint())
+ else if (ValTy->isFloatingPointTy())
StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
assert(StoreVal->getType() == ValTy && "Type mismatch!");
@@ -939,7 +939,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
Value *DestField = NewElts[i];
if (EltVal->getType() == FieldTy) {
// Storing to an integer field of this size, just do it.
- } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) {
+ } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
} else {
@@ -983,7 +983,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
Value *DestField = NewElts[i];
if (EltVal->getType() == ArrayEltTy) {
// Storing to an integer field of this size, just do it.
- } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) {
+ } else if (ArrayEltTy->isFloatingPointTy() ||
+ ArrayEltTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
} else {
@@ -1043,8 +1044,8 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
FieldSizeBits);
- if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&
- !isa<VectorType>(FieldTy))
+ if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
+ !FieldTy->isVectorTy())
SrcField = new BitCastInst(SrcField,
PointerType::getUnqual(FieldIntTy),
"", LI);
@@ -1182,7 +1183,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
return;
}
} else if (In->isFloatTy() || In->isDoubleTy() ||
- (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 &&
+ (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
// If we're accessing something that could be an element of a vector, see
// if the implied vector agrees with what we already have and if Offset is
@@ -1226,7 +1227,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
return false;
MergeInType(LI->getType(), Offset, VecTy,
AllocaSize, *TD, V->getContext());
- SawVec |= isa<VectorType>(LI->getType());
+ SawVec |= LI->getType()->isVectorTy();
continue;
}
@@ -1235,7 +1236,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
MergeInType(SI->getOperand(0)->getType(), Offset,
VecTy, AllocaSize, *TD, V->getContext());
- SawVec |= isa<VectorType>(SI->getOperand(0)->getType());
+ SawVec |= SI->getOperand(0)->getType()->isVectorTy();
continue;
}
@@ -1437,7 +1438,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
- if (isa<VectorType>(ToType))
+ if (ToType->isVectorTy())
return Builder.CreateBitCast(FromVal, ToType, "tmp");
// Otherwise it must be an element access.
@@ -1520,9 +1521,9 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
LIBitWidth), "tmp");
// If the result is an integer, this is a trunc or bitcast.
- if (isa<IntegerType>(ToType)) {
+ if (ToType->isIntegerTy()) {
// Should be done.
- } else if (ToType->isFloatingPoint() || isa<VectorType>(ToType)) {
+ } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
// Just do a bitcast, we know the sizes match up.
FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
} else {
@@ -1600,10 +1601,10 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
- if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType()))
+ if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
SV = Builder.CreateBitCast(SV,
IntegerType::get(SV->getContext(),SrcWidth), "tmp");
- else if (isa<PointerType>(SV->getType()))
+ else if (SV->getType()->isPointerTy())
SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp");
// Zero extend or truncate the value if needed.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 4216e8f..05027ae 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -17,6 +17,7 @@
#define DEBUG_TYPE "simplify-libcalls"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
@@ -67,496 +68,14 @@ public:
Context = &CI->getCalledFunction()->getContext();
return CallOptimizer(CI->getCalledFunction(), CI, B);
}
-
- /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
- Value *CastToCStr(Value *V, IRBuilder<> &B);
-
- /// EmitStrLen - Emit a call to the strlen function to the builder, for the
- /// specified pointer. Ptr is required to be some pointer type, and the
- /// return value has 'intptr_t' type.
- Value *EmitStrLen(Value *Ptr, IRBuilder<> &B);
-
- /// EmitStrChr - Emit a call to the strchr function to the builder, for the
- /// specified pointer and character. Ptr is required to be some pointer type,
- /// and the return value has 'i8*' type.
- Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B);
-
- /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
- /// specified pointer arguments.
- Value *EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B);
-
- /// EmitMemCpy - Emit a call to the memcpy function to the builder. This
- /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
- Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,
- unsigned Align, IRBuilder<> &B);
-
- /// EmitMemMove - Emit a call to the memmove function to the builder. This
- /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
- Value *EmitMemMove(Value *Dst, Value *Src, Value *Len,
- unsigned Align, IRBuilder<> &B);
-
- /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
- /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
- Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B);
-
- /// EmitMemCmp - Emit a call to the memcmp function.
- Value *EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B);
-
- /// EmitMemSet - Emit a call to the memset function
- Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B);
-
- /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name'
- /// (e.g. 'floor'). This function is known to take a single of type matching
- /// 'Op' and returns one value with the same type. If 'Op' is a long double,
- /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f'
- /// suffix.
- Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B,
- const AttrListPtr &Attrs);
-
- /// EmitPutChar - Emit a call to the putchar function. This assumes that Char
- /// is an integer.
- Value *EmitPutChar(Value *Char, IRBuilder<> &B);
-
- /// EmitPutS - Emit a call to the puts function. This assumes that Str is
- /// some pointer.
- void EmitPutS(Value *Str, IRBuilder<> &B);
-
- /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
- /// an i32, and File is a pointer to FILE.
- void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B);
-
- /// EmitFPutS - Emit a call to the puts function. Str is required to be a
- /// pointer and File is a pointer to FILE.
- void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B);
-
- /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
- /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
- void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B);
-
};
} // End anonymous namespace.
-/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
-Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) {
- return B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr");
-}
-
-/// EmitStrLen - Emit a call to the strlen function to the builder, for the
-/// specified pointer. This always returns an integer value of size intptr_t.
-Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
- Attribute::NoUnwind);
-
- Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
- TD->getIntPtrType(*Context),
- Type::getInt8PtrTy(*Context),
- NULL);
- CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
- if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
-}
-
-/// EmitStrChr - Emit a call to the strchr function to the builder, for the
-/// specified pointer and character. Ptr is required to be some pointer type,
-/// and the return value has 'i8*' type.
-Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- AttributeWithIndex AWI =
- AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
-
- const Type *I8Ptr = Type::getInt8PtrTy(*Context);
- const Type *I32Ty = Type::getInt32Ty(*Context);
- Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
- I8Ptr, I8Ptr, I32Ty, NULL);
- CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
- ConstantInt::get(I32Ty, C), "strchr");
- if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
-}
-
-/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
-/// specified pointer arguments.
-Value *LibCallOptimization::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
- const Type *I8Ptr = Type::getInt8PtrTy(*Context);
- Value *StrCpy = M->getOrInsertFunction("strcpy", AttrListPtr::get(AWI, 2),
- I8Ptr, I8Ptr, I8Ptr, NULL);
- CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
- "strcpy");
- if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
-}
-
-/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always
-/// expects that the size has type 'intptr_t' and Dst/Src are pointers.
-Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
- unsigned Align, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- const Type *Ty = Len->getType();
- Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1);
- Dst = CastToCStr(Dst, B);
- Src = CastToCStr(Src, B);
- return B.CreateCall4(MemCpy, Dst, Src, Len,
- ConstantInt::get(Type::getInt32Ty(*Context), Align));
-}
-
-/// EmitMemMove - Emit a call to the memmove function to the builder. This
-/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-Value *LibCallOptimization::EmitMemMove(Value *Dst, Value *Src, Value *Len,
- unsigned Align, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- const Type *Ty = TD->getIntPtrType(*Context);
- Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1);
- Dst = CastToCStr(Dst, B);
- Src = CastToCStr(Src, B);
- Value *A = ConstantInt::get(Type::getInt32Ty(*Context), Align);
- return B.CreateCall4(MemMove, Dst, Src, Len, A);
-}
-
-/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
-/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
-Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
- Value *Len, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- AttributeWithIndex AWI;
- AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
-
- Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
- Type::getInt8PtrTy(*Context),
- Type::getInt8PtrTy(*Context),
- Type::getInt32Ty(*Context),
- TD->getIntPtrType(*Context),
- NULL);
- CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
-
- if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
-}
-
-/// EmitMemCmp - Emit a call to the memcmp function.
-Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,
- Value *Len, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
- AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
- Attribute::NoUnwind);
-
- Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
- Type::getInt32Ty(*Context),
- Type::getInt8PtrTy(*Context),
- Type::getInt8PtrTy(*Context),
- TD->getIntPtrType(*Context), NULL);
- CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
- Len, "memcmp");
-
- if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
-}
-
-/// EmitMemSet - Emit a call to the memset function
-Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val,
- Value *Len, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- Intrinsic::ID IID = Intrinsic::memset;
- const Type *Tys[1];
- Tys[0] = Len->getType();
- Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1);
- Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1);
- return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);
-}
-
-/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
-/// 'floor'). This function is known to take a single of type matching 'Op' and
-/// returns one value with the same type. If 'Op' is a long double, 'l' is
-/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
-Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
- IRBuilder<> &B,
- const AttrListPtr &Attrs) {
- char NameBuffer[20];
- if (!Op->getType()->isDoubleTy()) {
- // If we need to add a suffix, copy into NameBuffer.
- unsigned NameLen = strlen(Name);
- assert(NameLen < sizeof(NameBuffer)-2);
- memcpy(NameBuffer, Name, NameLen);
- if (Op->getType()->isFloatTy())
- NameBuffer[NameLen] = 'f'; // floorf
- else
- NameBuffer[NameLen] = 'l'; // floorl
- NameBuffer[NameLen+1] = 0;
- Name = NameBuffer;
- }
-
- Module *M = Caller->getParent();
- Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
- Op->getType(), NULL);
- CallInst *CI = B.CreateCall(Callee, Op, Name);
- CI->setAttributes(Attrs);
- if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
-}
-
-/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
-/// is an integer.
-Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context),
- Type::getInt32Ty(*Context), NULL);
- CallInst *CI = B.CreateCall(PutChar,
- B.CreateIntCast(Char,
- Type::getInt32Ty(*Context),
- /*isSigned*/true,
- "chari"),
- "putchar");
-
- if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
-}
-
-/// EmitPutS - Emit a call to the puts function. This assumes that Str is
-/// some pointer.
-void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
-
- Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
- Type::getInt32Ty(*Context),
- Type::getInt8PtrTy(*Context),
- NULL);
- CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
- if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
-}
-
-/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
-/// an integer and File is a pointer to FILE.
-void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
- Constant *F;
- if (isa<PointerType>(File->getType()))
- F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
- Type::getInt32Ty(*Context),
- Type::getInt32Ty(*Context), File->getType(),
- NULL);
- else
- F = M->getOrInsertFunction("fputc",
- Type::getInt32Ty(*Context),
- Type::getInt32Ty(*Context),
- File->getType(), NULL);
- Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true,
- "chari");
- CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
-
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
-}
-
-/// EmitFPutS - Emit a call to the puts function. Str is required to be a
-/// pointer and File is a pointer to FILE.
-void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
- AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
- Constant *F;
- if (isa<PointerType>(File->getType()))
- F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
- Type::getInt32Ty(*Context),
- Type::getInt8PtrTy(*Context),
- File->getType(), NULL);
- else
- F = M->getOrInsertFunction("fputs", Type::getInt32Ty(*Context),
- Type::getInt8PtrTy(*Context),
- File->getType(), NULL);
- CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
-
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
-}
-
-/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
-/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
- IRBuilder<> &B) {
- Module *M = Caller->getParent();
- AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
- AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
- Constant *F;
- if (isa<PointerType>(File->getType()))
- F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
- TD->getIntPtrType(*Context),
- Type::getInt8PtrTy(*Context),
- TD->getIntPtrType(*Context),
- TD->getIntPtrType(*Context),
- File->getType(), NULL);
- else
- F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context),
- Type::getInt8PtrTy(*Context),
- TD->getIntPtrType(*Context),
- TD->getIntPtrType(*Context),
- File->getType(), NULL);
- CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
- ConstantInt::get(TD->getIntPtrType(*Context), 1), File);
-
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
- CI->setCallingConv(Fn->getCallingConv());
-}
//===----------------------------------------------------------------------===//
// Helper Functions
//===----------------------------------------------------------------------===//
-/// GetStringLengthH - If we can compute the length of the string pointed to by
-/// the specified pointer, return 'len+1'. If we can't, return 0.
-static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
- // Look through noop bitcast instructions.
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
- return GetStringLengthH(BCI->getOperand(0), PHIs);
-
- // If this is a PHI node, there are two cases: either we have already seen it
- // or we haven't.
- if (PHINode *PN = dyn_cast<PHINode>(V)) {
- if (!PHIs.insert(PN))
- return ~0ULL; // already in the set.
-
- // If it was new, see if all the input strings are the same length.
- uint64_t LenSoFar = ~0ULL;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
- if (Len == 0) return 0; // Unknown length -> unknown.
-
- if (Len == ~0ULL) continue;
-
- if (Len != LenSoFar && LenSoFar != ~0ULL)
- return 0; // Disagree -> unknown.
- LenSoFar = Len;
- }
-
- // Success, all agree.
- return LenSoFar;
- }
-
- // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
- if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
- uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
- if (Len1 == 0) return 0;
- uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
- if (Len2 == 0) return 0;
- if (Len1 == ~0ULL) return Len2;
- if (Len2 == ~0ULL) return Len1;
- if (Len1 != Len2) return 0;
- return Len1;
- }
-
- // If the value is not a GEP instruction nor a constant expression with a
- // GEP instruction, then return unknown.
- User *GEP = 0;
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
- GEP = GEPI;
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- if (CE->getOpcode() != Instruction::GetElementPtr)
- return 0;
- GEP = CE;
- } else {
- return 0;
- }
-
- // Make sure the GEP has exactly three arguments.
- if (GEP->getNumOperands() != 3)
- return 0;
-
- // Check to make sure that the first operand of the GEP is an integer and
- // has value 0 so that we are sure we're indexing into the initializer.
- if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
- if (!Idx->isZero())
- return 0;
- } else
- return 0;
-
- // If the second index isn't a ConstantInt, then this is a variable index
- // into the array. If this occurs, we can't say anything meaningful about
- // the string.
- uint64_t StartIdx = 0;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
- StartIdx = CI->getZExtValue();
- else
- return 0;
-
- // The GEP instruction, constant or instruction, must reference a global
- // variable that is a constant and is initialized. The referenced constant
- // initializer is the array that we'll use for optimization.
- GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
- if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
- GV->mayBeOverridden())
- return 0;
- Constant *GlobalInit = GV->getInitializer();
-
- // Handle the ConstantAggregateZero case, which is a degenerate case. The
- // initializer is constant zero so the length of the string must be zero.
- if (isa<ConstantAggregateZero>(GlobalInit))
- return 1; // Len = 0 offset by 1.
-
- // Must be a Constant Array
- ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
- if (!Array || !Array->getType()->getElementType()->isInteger(8))
- return false;
-
- // Get the number of elements in the array
- uint64_t NumElts = Array->getType()->getNumElements();
-
- // Traverse the constant array from StartIdx (derived above) which is
- // the place the GEP refers to in the array.
- for (unsigned i = StartIdx; i != NumElts; ++i) {
- Constant *Elt = Array->getOperand(i);
- ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
- if (!CI) // This array isn't suitable, non-int initializer.
- return 0;
- if (CI->isZero())
- return i-StartIdx+1; // We found end of string, success!
- }
-
- return 0; // The array isn't null terminated, conservatively return 'unknown'.
-}
-
-/// GetStringLength - If we can compute the length of the string pointed to by
-/// the specified pointer, return 'len+1'. If we can't, return 0.
-static uint64_t GetStringLength(Value *V) {
- if (!isa<PointerType>(V->getType())) return 0;
-
- SmallPtrSet<PHINode*, 32> PHIs;
- uint64_t Len = GetStringLengthH(V, PHIs);
- // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
- // an empty string as a length.
- return Len == ~0ULL ? 1 : Len;
-}
-
/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
/// value is equal or not-equal to zero.
static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
@@ -613,7 +132,7 @@ struct StrCatOpt : public LibCallOptimization {
void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
// We need to find the end of the destination string. That's where the
// memory is to be moved to. We just generate a call to strlen.
- Value *DstLen = EmitStrLen(Dst, B);
+ Value *DstLen = EmitStrLen(Dst, B, TD);
// Now that we have the destination's length, we must index into the
// destination's pointer to get the actual memcpy destination (end of
@@ -623,7 +142,7 @@ struct StrCatOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
EmitMemCpy(CpyDst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B);
+ ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B, TD);
}
};
@@ -638,7 +157,7 @@ struct StrNCatOpt : public StrCatOpt {
FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
FT->getParamType(0) != FT->getReturnType() ||
FT->getParamType(1) != FT->getReturnType() ||
- !isa<IntegerType>(FT->getParamType(2)))
+ !FT->getParamType(2)->isIntegerTy())
return 0;
// Extract some information from the instruction
@@ -697,11 +216,12 @@ struct StrChrOpt : public LibCallOptimization {
if (!TD) return 0;
uint64_t Len = GetStringLength(SrcStr);
- if (Len == 0 || !FT->getParamType(1)->isInteger(32)) // memchr needs i32.
+ if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
return 0;
return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
- ConstantInt::get(TD->getIntPtrType(*Context), Len), B);
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ B, TD);
}
// Otherwise, the character is a constant, see if the first argument is
@@ -739,7 +259,7 @@ struct StrCmpOpt : public LibCallOptimization {
// Verify the "strcmp" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- !FT->getReturnType()->isInteger(32) ||
+ !FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(*Context))
return 0;
@@ -772,7 +292,7 @@ struct StrCmpOpt : public LibCallOptimization {
return EmitMemCmp(Str1P, Str2P,
ConstantInt::get(TD->getIntPtrType(*Context),
- std::min(Len1, Len2)), B);
+ std::min(Len1, Len2)), B, TD);
}
return 0;
@@ -787,10 +307,10 @@ struct StrNCmpOpt : public LibCallOptimization {
// Verify the "strncmp" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
- !FT->getReturnType()->isInteger(32) ||
+ !FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
- !isa<IntegerType>(FT->getParamType(2)))
+ !FT->getParamType(2)->isIntegerTy())
return 0;
Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
@@ -852,7 +372,7 @@ struct StrCpyOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
EmitMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B);
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
return Dst;
}
};
@@ -866,7 +386,7 @@ struct StrNCpyOpt : public LibCallOptimization {
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
- !isa<IntegerType>(FT->getParamType(2)))
+ !FT->getParamType(2)->isIntegerTy())
return 0;
Value *Dst = CI->getOperand(1);
@@ -881,7 +401,7 @@ struct StrNCpyOpt : public LibCallOptimization {
if (SrcLen == 0) {
// strncpy(x, "", y) -> memset(x, '\0', y, 1)
EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp,
- B);
+ B, TD);
return Dst;
}
@@ -901,7 +421,7 @@ struct StrNCpyOpt : public LibCallOptimization {
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
EmitMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B);
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
return Dst;
}
@@ -915,7 +435,7 @@ struct StrLenOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 ||
FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
- !isa<IntegerType>(FT->getReturnType()))
+ !FT->getReturnType()->isIntegerTy())
return 0;
Value *Src = CI->getOperand(1);
@@ -939,8 +459,8 @@ struct StrToOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)))
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy())
return 0;
Value *EndPtr = CI->getOperand(2);
@@ -960,9 +480,9 @@ struct StrStrOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
- !isa<PointerType>(FT->getReturnType()))
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isPointerTy())
return 0;
// fold strstr(x, x) -> x.
@@ -993,7 +513,7 @@ struct StrStrOpt : public LibCallOptimization {
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1)
- return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B),
+ return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD),
CI->getType());
return 0;
}
@@ -1006,9 +526,9 @@ struct StrStrOpt : public LibCallOptimization {
struct MemCmpOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
- !FT->getReturnType()->isInteger(32))
+ if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy(32))
return 0;
Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
@@ -1055,13 +575,14 @@ struct MemCpyOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
FT->getParamType(2) != TD->getIntPtrType(*Context))
return 0;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
- EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
+ EmitMemCpy(CI->getOperand(1), CI->getOperand(2),
+ CI->getOperand(3), 1, B, TD);
return CI->getOperand(1);
}
};
@@ -1076,13 +597,14 @@ struct MemMoveOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
FT->getParamType(2) != TD->getIntPtrType(*Context))
return 0;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
- EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
+ EmitMemMove(CI->getOperand(1), CI->getOperand(2),
+ CI->getOperand(3), 1, B, TD);
return CI->getOperand(1);
}
};
@@ -1097,137 +619,20 @@ struct MemSetOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<IntegerType>(FT->getParamType(1)) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
FT->getParamType(2) != TD->getIntPtrType(*Context))
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
false);
- EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B);
+ EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B, TD);
return CI->getOperand(1);
}
};
//===----------------------------------------------------------------------===//
-// Object Size Checking Optimizations
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------===//
-// 'memcpy_chk' Optimizations
-
-struct MemCpyChkOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getParamType(3)) ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
- return 0;
-
- ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
- if (!SizeCI)
- return 0;
- if (SizeCI->isAllOnesValue()) {
- EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
- return CI->getOperand(1);
- }
-
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// 'memset_chk' Optimizations
-
-struct MemSetChkOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<IntegerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getParamType(3)) ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
- return 0;
-
- ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
- if (!SizeCI)
- return 0;
- if (SizeCI->isAllOnesValue()) {
- Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
- false);
- EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B);
- return CI->getOperand(1);
- }
-
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// 'memmove_chk' Optimizations
-
-struct MemMoveChkOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // These optimizations require TargetData.
- if (!TD) return 0;
-
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getParamType(3)) ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
- return 0;
-
- ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
- if (!SizeCI)
- return 0;
- if (SizeCI->isAllOnesValue()) {
- EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
- 1, B);
- return CI->getOperand(1);
- }
-
- return 0;
- }
-};
-
-struct StrCpyChkOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)))
- return 0;
-
- ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
- if (!SizeCI)
- return 0;
-
- // If a) we don't have any length information, or b) we know this will
- // fit then just lower to a plain strcpy. Otherwise we'll keep our
- // strcpy_chk call which may fail at runtime if the size is too long.
- // TODO: It might be nice to get a maximum length out of the possible
- // string lengths for varying.
- if (SizeCI->isAllOnesValue() ||
- SizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2)))
- return EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B);
-
- return 0;
- }
-};
-
-
-//===----------------------------------------------------------------------===//
// Math Library Optimizations
//===----------------------------------------------------------------------===//
@@ -1241,7 +646,7 @@ struct PowOpt : public LibCallOptimization {
// result type.
if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
- !FT->getParamType(0)->isFloatingPoint())
+ !FT->getParamType(0)->isFloatingPointTy())
return 0;
Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2);
@@ -1295,7 +700,7 @@ struct Exp2Opt : public LibCallOptimization {
// Just make sure this has 1 argument of FP type, which matches the
// result type.
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isFloatingPoint())
+ !FT->getParamType(0)->isFloatingPointTy())
return 0;
Value *Op = CI->getOperand(1);
@@ -1375,8 +780,8 @@ struct FFSOpt : public LibCallOptimization {
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
if (FT->getNumParams() != 1 ||
- !FT->getReturnType()->isInteger(32) ||
- !isa<IntegerType>(FT->getParamType(0)))
+ !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getParamType(0)->isIntegerTy())
return 0;
Value *Op = CI->getOperand(1);
@@ -1410,8 +815,8 @@ struct IsDigitOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
// We require integer(i32)
- if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
- !FT->getParamType(0)->isInteger(32))
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
return 0;
// isdigit(c) -> (c-'0') <u 10
@@ -1431,8 +836,8 @@ struct IsAsciiOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
// We require integer(i32)
- if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
- !FT->getParamType(0)->isInteger(32))
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
return 0;
// isascii(c) -> c <u 128
@@ -1450,7 +855,7 @@ struct AbsOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
// We require integer(integer) where the types agree.
- if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
FT->getParamType(0) != FT->getReturnType())
return 0;
@@ -1473,7 +878,7 @@ struct ToAsciiOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
// We require i32(i32)
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isInteger(32))
+ !FT->getParamType(0)->isIntegerTy(32))
return 0;
// isascii(c) -> c & 0x7f
@@ -1493,8 +898,8 @@ struct PrintFOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require one fixed pointer argument and an integer/void result.
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() < 1 || !isa<PointerType>(FT->getParamType(0)) ||
- !(isa<IntegerType>(FT->getReturnType()) ||
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
FT->getReturnType()->isVoidTy()))
return 0;
@@ -1512,7 +917,7 @@ struct PrintFOpt : public LibCallOptimization {
// in case there is an error writing to stdout.
if (FormatStr.size() == 1) {
Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context),
- FormatStr[0]), B);
+ FormatStr[0]), B, TD);
if (CI->use_empty()) return CI;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1526,7 +931,7 @@ struct PrintFOpt : public LibCallOptimization {
Constant *C = ConstantArray::get(*Context, FormatStr, true);
C = new GlobalVariable(*Callee->getParent(), C->getType(), true,
GlobalVariable::InternalLinkage, C, "str");
- EmitPutS(C, B);
+ EmitPutS(C, B, TD);
return CI->use_empty() ? (Value*)CI :
ConstantInt::get(CI->getType(), FormatStr.size()+1);
}
@@ -1534,8 +939,8 @@ struct PrintFOpt : public LibCallOptimization {
// Optimize specific format strings.
// printf("%c", chr) --> putchar(*(i8*)dst)
if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
- isa<IntegerType>(CI->getOperand(2)->getType())) {
- Value *Res = EmitPutChar(CI->getOperand(2), B);
+ CI->getOperand(2)->getType()->isIntegerTy()) {
+ Value *Res = EmitPutChar(CI->getOperand(2), B, TD);
if (CI->use_empty()) return CI;
return B.CreateIntCast(Res, CI->getType(), true);
@@ -1543,9 +948,9 @@ struct PrintFOpt : public LibCallOptimization {
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&
- isa<PointerType>(CI->getOperand(2)->getType()) &&
+ CI->getOperand(2)->getType()->isPointerTy() &&
CI->use_empty()) {
- EmitPutS(CI->getOperand(2), B);
+ EmitPutS(CI->getOperand(2), B, TD);
return CI;
}
return 0;
@@ -1559,9 +964,9 @@ struct SPrintFOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require two fixed pointer arguments and an integer result.
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getReturnType()))
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
return 0;
// Check for a fixed format string.
@@ -1582,8 +987,8 @@ struct SPrintFOpt : public LibCallOptimization {
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
- ConstantInt::get
- (TD->getIntPtrType(*Context), FormatStr.size()+1),1,B);
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()+1), 1, B, TD);
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1595,7 +1000,7 @@ struct SPrintFOpt : public LibCallOptimization {
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
// sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
- if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
+ if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
Value *V = B.CreateTrunc(CI->getOperand(3),
Type::getInt8Ty(*Context), "char");
Value *Ptr = CastToCStr(CI->getOperand(1), B);
@@ -1612,13 +1017,13 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
- if (!isa<PointerType>(CI->getOperand(3)->getType())) return 0;
+ if (!CI->getOperand(3)->getType()->isPointerTy()) return 0;
- Value *Len = EmitStrLen(CI->getOperand(3), B);
+ Value *Len = EmitStrLen(CI->getOperand(3), B, TD);
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
- EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B);
+ EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B, TD);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -1634,11 +1039,11 @@ struct FWriteOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require a pointer, an integer, an integer, a pointer, returning integer.
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 4 || !isa<PointerType>(FT->getParamType(0)) ||
- !isa<IntegerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getParamType(2)) ||
- !isa<PointerType>(FT->getParamType(3)) ||
- !isa<IntegerType>(FT->getReturnType()))
+ if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ !FT->getParamType(3)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
return 0;
// Get the element size and count.
@@ -1654,7 +1059,7 @@ struct FWriteOpt : public LibCallOptimization {
// If this is writing one byte, turn it into fputc.
if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");
- EmitFPutC(Char, CI->getOperand(4), B);
+ EmitFPutC(Char, CI->getOperand(4), B, TD);
return ConstantInt::get(CI->getType(), 1);
}
@@ -1672,8 +1077,8 @@ struct FPutsOpt : public LibCallOptimization {
// Require two pointers. Also, we can't optimize if return value is used.
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
!CI->use_empty())
return 0;
@@ -1682,7 +1087,7 @@ struct FPutsOpt : public LibCallOptimization {
if (!Len) return 0;
EmitFWrite(CI->getOperand(1),
ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
- CI->getOperand(2), B);
+ CI->getOperand(2), B, TD);
return CI; // Known to have no uses (see above).
}
};
@@ -1694,9 +1099,9 @@ struct FPrintFOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require two fixed paramters as pointers and integer result.
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getReturnType()))
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
return 0;
// All the optimizations depend on the format string.
@@ -1716,7 +1121,7 @@ struct FPrintFOpt : public LibCallOptimization {
EmitFWrite(CI->getOperand(2),
ConstantInt::get(TD->getIntPtrType(*Context),
FormatStr.size()),
- CI->getOperand(1), B);
+ CI->getOperand(1), B, TD);
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1728,16 +1133,16 @@ struct FPrintFOpt : public LibCallOptimization {
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> *(i8*)dst = chr
- if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
- EmitFPutC(CI->getOperand(3), CI->getOperand(1), B);
+ if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
+ EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD);
return ConstantInt::get(CI->getType(), 1);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) -> fputs(str, F)
- if (!isa<PointerType>(CI->getOperand(3)->getType()) || !CI->use_empty())
+ if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty())
return 0;
- EmitFPutS(CI->getOperand(3), CI->getOperand(1), B);
+ EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD);
return CI;
}
return 0;
@@ -1769,10 +1174,6 @@ namespace {
SPrintFOpt SPrintF; PrintFOpt PrintF;
FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
- // Object Size Checking
- MemCpyChkOpt MemCpyChk; MemSetChkOpt MemSetChk; MemMoveChkOpt MemMoveChk;
- StrCpyChkOpt StrCpyChk;
-
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
@@ -1878,12 +1279,6 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["fwrite"] = &FWrite;
Optimizations["fputs"] = &FPuts;
Optimizations["fprintf"] = &FPrintF;
-
- // Object Size Checking
- Optimizations["__memcpy_chk"] = &MemCpyChk;
- Optimizations["__memset_chk"] = &MemSetChk;
- Optimizations["__memmove_chk"] = &MemMoveChk;
- Optimizations["__strcpy_chk"] = &StrCpyChk;
}
@@ -2000,7 +1395,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 's':
if (Name == "strlen") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setOnlyReadsMemory(F);
setDoesNotThrow(F);
@@ -2018,14 +1413,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "strncpy" ||
Name == "strtoull") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
} else if (Name == "strxfrm") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2038,8 +1433,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "strcasecmp" ||
Name == "strncasecmp") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setOnlyReadsMemory(F);
setDoesNotThrow(F);
@@ -2048,7 +1443,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
} else if (Name == "strstr" ||
Name == "strpbrk") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setOnlyReadsMemory(F);
setDoesNotThrow(F);
@@ -2056,7 +1451,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
} else if (Name == "strtok" ||
Name == "strtok_r") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
@@ -2064,15 +1459,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "setbuf" ||
Name == "setvbuf") {
if (FTy->getNumParams() < 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "strdup" ||
Name == "strndup") {
if (FTy->getNumParams() < 1 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2082,31 +1477,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "sprintf" ||
Name == "statvfs") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
} else if (Name == "snprintf") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(2)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 3);
} else if (Name == "setitimer") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(1)) ||
- !isa<PointerType>(FTy->getParamType(2)))
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
setDoesNotCapture(F, 3);
} else if (Name == "system") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
// May throw; "system" is a valid pthread cancellation point.
setDoesNotCapture(F, 1);
@@ -2115,14 +1510,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'm':
if (Name == "malloc") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getReturnType()))
+ !FTy->getReturnType()->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
} else if (Name == "memcmp") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setOnlyReadsMemory(F);
setDoesNotThrow(F);
@@ -2141,18 +1536,18 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "memccpy" ||
Name == "memmove") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
} else if (Name == "memalign") {
- if (!isa<PointerType>(FTy->getReturnType()))
+ if (!FTy->getReturnType()->isPointerTy())
continue;
setDoesNotAlias(F, 0);
} else if (Name == "mkdir" ||
Name == "mktime") {
if (FTy->getNumParams() == 0 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2161,15 +1556,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'r':
if (Name == "realloc") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getReturnType()))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
setDoesNotCapture(F, 1);
} else if (Name == "read") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
// May throw; "read" is a valid pthread cancellation point.
setDoesNotCapture(F, 2);
@@ -2178,15 +1573,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "remove" ||
Name == "realpath") {
if (FTy->getNumParams() < 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "rename" ||
Name == "readlink") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2196,7 +1591,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'w':
if (Name == "write") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
// May throw; "write" is a valid pthread cancellation point.
setDoesNotCapture(F, 2);
@@ -2205,16 +1600,16 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'b':
if (Name == "bcopy") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
} else if (Name == "bcmp") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setOnlyReadsMemory(F);
@@ -2222,7 +1617,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
setDoesNotCapture(F, 2);
} else if (Name == "bzero") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2231,7 +1626,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'c':
if (Name == "calloc") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getReturnType()))
+ !FTy->getReturnType()->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2241,7 +1636,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "clearerr" ||
Name == "closedir") {
if (FTy->getNumParams() == 0 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2253,14 +1648,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "atof" ||
Name == "atoll") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setOnlyReadsMemory(F);
setDoesNotCapture(F, 1);
} else if (Name == "access") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2269,9 +1664,9 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'f':
if (Name == "fopen") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2279,8 +1674,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
setDoesNotCapture(F, 2);
} else if (Name == "fdopen") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2300,13 +1695,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "funlockfile" ||
Name == "ftrylockfile") {
if (FTy->getNumParams() == 0 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "ferror") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2318,22 +1713,22 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "frexpl" ||
Name == "fstatvfs") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
} else if (Name == "fgets") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(2)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 3);
} else if (Name == "fread" ||
Name == "fwrite") {
if (FTy->getNumParams() != 4 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(3)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2343,8 +1738,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "fprintf" ||
Name == "fgetpos") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2356,13 +1751,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "getlogin_r" ||
Name == "getc_unlocked") {
if (FTy->getNumParams() == 0 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "getenv") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setOnlyReadsMemory(F);
@@ -2372,13 +1767,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
setDoesNotThrow(F);
} else if (Name == "getitimer") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
} else if (Name == "getpwnam") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2387,7 +1782,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'u':
if (Name == "ungetc") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
@@ -2395,15 +1790,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "unlink" ||
Name == "unsetenv") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "utime" ||
Name == "utimes") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2413,7 +1808,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'p':
if (Name == "putc") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
@@ -2421,14 +1816,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "printf" ||
Name == "perror") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "pread" ||
Name == "pwrite") {
if (FTy->getNumParams() != 4 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
// May throw; these are valid pthread cancellation points.
setDoesNotCapture(F, 2);
@@ -2436,9 +1831,9 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
setDoesNotThrow(F);
} else if (Name == "popen") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2446,7 +1841,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
setDoesNotCapture(F, 2);
} else if (Name == "pclose") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2455,43 +1850,43 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'v':
if (Name == "vscanf") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "vsscanf" ||
Name == "vfscanf") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(1)) ||
- !isa<PointerType>(FTy->getParamType(2)))
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
} else if (Name == "valloc") {
- if (!isa<PointerType>(FTy->getReturnType()))
+ if (!FTy->getReturnType()->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
} else if (Name == "vprintf") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "vfprintf" ||
Name == "vsprintf") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
} else if (Name == "vsnprintf") {
if (FTy->getNumParams() != 4 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(2)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2501,14 +1896,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'o':
if (Name == "open") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
// May throw; "open" is a valid pthread cancellation point.
setDoesNotCapture(F, 1);
} else if (Name == "opendir") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2517,13 +1912,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
break;
case 't':
if (Name == "tmpfile") {
- if (!isa<PointerType>(FTy->getReturnType()))
+ if (!FTy->getReturnType()->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
} else if (Name == "times") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2546,15 +1941,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'l':
if (Name == "lstat") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
} else if (Name == "lchown") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2563,7 +1958,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'q':
if (Name == "qsort") {
if (FTy->getNumParams() != 4 ||
- !isa<PointerType>(FTy->getParamType(3)))
+ !FTy->getParamType(3)->isPointerTy())
continue;
// May throw; places call through function pointer.
setDoesNotCapture(F, 4);
@@ -2573,27 +1968,27 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
if (Name == "__strdup" ||
Name == "__strndup") {
if (FTy->getNumParams() < 1 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
setDoesNotCapture(F, 1);
} else if (Name == "__strtok_r") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
} else if (Name == "_IO_getc") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "_IO_putc") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
@@ -2602,7 +1997,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 1:
if (Name == "\1__isoc99_scanf") {
if (FTy->getNumParams() < 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2611,17 +2006,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "\1statvfs64" ||
Name == "\1__isoc99_sscanf") {
if (FTy->getNumParams() < 1 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
} else if (Name == "\1fopen64") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2630,25 +2025,25 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
} else if (Name == "\1fseeko64" ||
Name == "\1ftello64") {
if (FTy->getNumParams() == 0 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "\1tmpfile64") {
- if (!isa<PointerType>(FTy->getReturnType()))
+ if (!FTy->getReturnType()->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
} else if (Name == "\1fstat64" ||
Name == "\1fstatvfs64") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
} else if (Name == "\1open64") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
// May throw; "open" is a valid pthread cancellation point.
setDoesNotCapture(F, 1);
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 8c4aa59..be6b383 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -125,7 +125,7 @@ static bool MightBeFoldableInst(Instruction *I) {
// Don't touch identity bitcasts.
if (I->getType() == I->getOperand(0)->getType())
return false;
- return isa<PointerType>(I->getType()) || isa<IntegerType>(I->getType());
+ return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
case Instruction::PtrToInt:
// PtrToInt is always a noop, as we know that the int type is pointer sized.
return true;
@@ -167,8 +167,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::BitCast:
// BitCast is always a noop, and we can handle it as long as it is
// int->int or pointer->pointer (we don't want int<->fp or something).
- if ((isa<PointerType>(AddrInst->getOperand(0)->getType()) ||
- isa<IntegerType>(AddrInst->getOperand(0)->getType())) &&
+ if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+ AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
// Don't touch identity bitcasts. These were probably put here by LSR,
// and we don't want to mess around with them. Assume it knows what it
// is doing.
@@ -569,7 +569,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// Get the access type of this use. If the use isn't a pointer, we don't
// know what it accesses.
Value *Address = User->getOperand(OpNo);
- if (!isa<PointerType>(Address->getType()))
+ if (!Address->getType()->isPointerTy())
return false;
const Type *AddressAccessTy =
cast<PointerType>(Address->getType())->getElementType();
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk
new file mode 100644
index 0000000..d9f31d7
--- /dev/null
+++ b/lib/Transforms/Utils/Android.mk
@@ -0,0 +1,49 @@
+LOCAL_PATH:= $(call my-dir)
+
+transforms_utils_SRC_FILES := \
+ AddrModeMatcher.cpp \
+ BasicBlockUtils.cpp \
+ BasicInliner.cpp \
+ BreakCriticalEdges.cpp \
+ CloneFunction.cpp \
+ CloneLoop.cpp \
+ CloneModule.cpp \
+ CodeExtractor.cpp \
+ DemoteRegToStack.cpp \
+ InlineFunction.cpp \
+ InstructionNamer.cpp \
+ LCSSA.cpp \
+ Local.cpp \
+ LoopSimplify.cpp \
+ LoopUnroll.cpp \
+ LowerInvoke.cpp \
+ LowerSwitch.cpp \
+ Mem2Reg.cpp \
+ PromoteMemoryToRegister.cpp \
+ SSAUpdater.cpp \
+ SSI.cpp \
+ SimplifyCFG.cpp \
+ UnifyFunctionExitNodes.cpp \
+ ValueMapper.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(transforms_utils_SRC_FILES)
+LOCAL_MODULE:= libLLVMTransformUtils
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(transforms_utils_SRC_FILES)
+LOCAL_MODULE:= libLLVMTransformUtils
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 7bc4fcd..1f62dab 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -274,24 +274,31 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
ReplaceInstWithInst(TI, NewTI);
}
-/// SplitEdge - Split the edge connecting specified block. Pass P must
-/// not be NULL.
-BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
- TerminatorInst *LatchTerm = BB->getTerminator();
- unsigned SuccNum = 0;
+/// GetSuccessorNumber - Search for the specified successor of basic block BB
+/// and return its position in the terminator instruction's list of
+/// successors. It is an error to call this with a block that is not a
+/// successor.
+unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
+ TerminatorInst *Term = BB->getTerminator();
#ifndef NDEBUG
- unsigned e = LatchTerm->getNumSuccessors();
+ unsigned e = Term->getNumSuccessors();
#endif
for (unsigned i = 0; ; ++i) {
assert(i != e && "Didn't find edge?");
- if (LatchTerm->getSuccessor(i) == Succ) {
- SuccNum = i;
- break;
- }
+ if (Term->getSuccessor(i) == Succ)
+ return i;
}
+ return 0;
+}
+
+/// SplitEdge - Split the edge connecting specified block. Pass P must
+/// not be NULL.
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+ unsigned SuccNum = GetSuccessorNumber(BB, Succ);
// If this is a critical edge, let SplitCriticalEdge do it.
- if (SplitCriticalEdge(BB->getTerminator(), SuccNum, P))
+ TerminatorInst *LatchTerm = BB->getTerminator();
+ if (SplitCriticalEdge(LatchTerm, SuccNum, P))
return LatchTerm->getSuccessor(SuccNum);
// If the edge isn't critical, then BB has a single successor or Succ has a
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 19c7206..3657390 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -179,7 +179,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// Create a new basic block, linking it into the CFG.
BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
- // Create our unconditional branch...
+ // Create our unconditional branch.
BranchInst::Create(DestBB, NewBB);
// Branch to the new block, breaking the edge.
@@ -192,16 +192,47 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// If there are any PHI nodes in DestBB, we need to update them so that they
// merge incoming values from NewBB instead of from TIBB.
- //
- for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- // We no longer enter through TIBB, now we come in through NewBB. Revector
- // exactly one entry in the PHI node that used to come from TIBB to come
- // from NewBB.
- int BBIdx = PN->getBasicBlockIndex(TIBB);
- PN->setIncomingBlock(BBIdx, NewBB);
+ if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) {
+ // This conceptually does:
+ // foreach (PHINode *PN in DestBB)
+ // PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB);
+ // but is optimized for two cases.
+
+ if (APHI->getNumIncomingValues() <= 8) { // Small # preds case.
+ unsigned BBIdx = 0;
+ for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+ // We no longer enter through TIBB, now we come in through NewBB.
+ // Revector exactly one entry in the PHI node that used to come from
+ // TIBB to come from NewBB.
+ PHINode *PN = cast<PHINode>(I);
+
+ // Reuse the previous value of BBIdx if it lines up. In cases where we
+ // have multiple phi nodes with *lots* of predecessors, this is a speed
+ // win because we don't have to scan the PHI looking for TIBB. This
+ // happens because the BB list of PHI nodes are usually in the same
+ // order.
+ if (PN->getIncomingBlock(BBIdx) != TIBB)
+ BBIdx = PN->getBasicBlockIndex(TIBB);
+ PN->setIncomingBlock(BBIdx, NewBB);
+ }
+ } else {
+ // However, the foreach loop is slow for blocks with lots of predecessors
+ // because PHINode::getIncomingBlock is O(n) in # preds. Instead, walk
+ // the user list of TIBB to find the PHI nodes.
+ SmallPtrSet<PHINode*, 16> UpdatedPHIs;
+
+ for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end();
+ UI != E; ) {
+ Value::use_iterator Use = UI++;
+ if (PHINode *PN = dyn_cast<PHINode>(Use)) {
+ // Remove one entry from each PHI.
+ if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN))
+ PN->setOperand(Use.getOperandNo(), NewBB);
+ }
+ }
+ }
}
-
+
// If there are any other edges from TIBB to DestBB, update those to go
// through the split block, making those edges non-critical as well (and
// reducing the number of phi entries in the DestBB if relevant).
@@ -221,6 +252,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// If we don't have a pass object, we can't update anything...
if (P == 0) return NewBB;
+
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>();
+ LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+ ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+
+ // If we have nothing to update, just return.
+ if (DT == 0 && DF == 0 && LI == 0 && PI == 0)
+ return NewBB;
// Now update analysis information. Since the only predecessor of NewBB is
// the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate
@@ -229,14 +269,23 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// loop header) then NewBB dominates DestBB.
SmallVector<BasicBlock*, 8> OtherPreds;
- for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I)
- if (*I != NewBB)
- OtherPreds.push_back(*I);
+ // If there is a PHI in the block, loop over predecessors with it, which is
+ // faster than iterating pred_begin/end.
+ if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) != NewBB)
+ OtherPreds.push_back(PN->getIncomingBlock(i));
+ } else {
+ for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
+ I != E; ++I)
+ if (*I != NewBB)
+ OtherPreds.push_back(*I);
+ }
bool NewBBDominatesDestBB = true;
// Should we update DominatorTree information?
- if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ if (DT) {
DomTreeNode *TINode = DT->getNode(TIBB);
// The new block is not the immediate dominator for any other nodes, but
@@ -267,7 +316,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
// Should we update DominanceFrontier information?
- if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>()) {
+ if (DF) {
// If NewBBDominatesDestBB hasn't been computed yet, do so with DF.
if (!OtherPreds.empty()) {
// FIXME: IMPLEMENT THIS!
@@ -301,7 +350,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
// Update LoopInfo if it is around.
- if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) {
+ if (LI) {
if (Loop *TIL = LI->getLoopFor(TIBB)) {
// If one or the other blocks were not in a loop, the new block is not
// either, and thus LI doesn't need to be updated.
@@ -382,9 +431,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
// Update ProfileInfo if it is around.
- if (ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>()) {
- PI->splitEdge(TIBB,DestBB,NewBB,MergeIdenticalEdges);
- }
+ if (PI)
+ PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
return NewBB;
}
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
new file mode 100644
index 0000000..2ea4bb6
--- /dev/null
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,324 @@
+//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions that will create standard C libcalls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Type.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Intrinsics.h"
+
+using namespace llvm;
+
+/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
+ return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+}
+
+/// EmitStrLen - Emit a call to the strlen function to the builder, for the
+/// specified pointer. This always returns an integer value of size intptr_t.
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ NULL);
+ CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+ if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitStrChr - Emit a call to the strchr function to the builder, for the
+/// specified pointer and character. Ptr is required to be some pointer type,
+/// and the return value has 'i8*' type.
+Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+ const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI =
+ AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+
+ const Type *I8Ptr = B.getInt8PtrTy();
+ const Type *I32Ty = B.getInt32Ty();
+ Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
+ I8Ptr, I8Ptr, I32Ty, NULL);
+ CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
+ ConstantInt::get(I32Ty, C), "strchr");
+ if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+ const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ const Type *I8Ptr = B.getInt8PtrTy();
+ Value *StrCpy = M->getOrInsertFunction("strcpy", AttrListPtr::get(AWI, 2),
+ I8Ptr, I8Ptr, I8Ptr, NULL);
+ CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+ "strcpy");
+ if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always
+/// expects that the size has type 'intptr_t' and Dst/Src are pointers.
+Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
+ unsigned Align, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ const Type *Ty = Len->getType();
+ Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1);
+ Dst = CastToCStr(Dst, B);
+ Src = CastToCStr(Src, B);
+ return B.CreateCall4(MemCpy, Dst, Src, Len,
+ ConstantInt::get(B.getInt32Ty(), Align));
+}
+
+/// EmitMemMove - Emit a call to the memmove function to the builder. This
+/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
+Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len,
+ unsigned Align, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ const Type *Ty = TD->getIntPtrType(Context);
+ Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1);
+ Dst = CastToCStr(Dst, B);
+ Src = CastToCStr(Src, B);
+ Value *A = ConstantInt::get(B.getInt32Ty(), Align);
+ return B.CreateCall4(MemMove, Dst, Src, Len, A);
+}
+
+/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
+/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
+ Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI;
+ AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ B.getInt32Ty(),
+ TD->getIntPtrType(Context),
+ NULL);
+ CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+ if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitMemCmp - Emit a call to the memcmp function.
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
+ Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context), NULL);
+ CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+ Len, "memcmp");
+
+ if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitMemSet - Emit a call to the memset function
+Value *llvm::EmitMemSet(Value *Dst, Value *Val,
+ Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Intrinsic::ID IID = Intrinsic::memset;
+ const Type *Tys[1];
+ Tys[0] = Len->getType();
+ Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1);
+ Value *Align = ConstantInt::get(B.getInt32Ty(), 1);
+ return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);
+}
+
+/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+/// 'floor'). This function is known to take a single of type matching 'Op' and
+/// returns one value with the same type. If 'Op' is a long double, 'l' is
+/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name,
+ IRBuilder<> &B, const AttrListPtr &Attrs) {
+ char NameBuffer[20];
+ if (!Op->getType()->isDoubleTy()) {
+ // If we need to add a suffix, copy into NameBuffer.
+ unsigned NameLen = strlen(Name);
+ assert(NameLen < sizeof(NameBuffer)-2);
+ memcpy(NameBuffer, Name, NameLen);
+ if (Op->getType()->isFloatTy())
+ NameBuffer[NameLen] = 'f'; // floorf
+ else
+ NameBuffer[NameLen] = 'l'; // floorl
+ NameBuffer[NameLen+1] = 0;
+ Name = NameBuffer;
+ }
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(), NULL);
+ CallInst *CI = B.CreateCall(Callee, Op, Name);
+ CI->setAttributes(Attrs);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
+/// is an integer.
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
+ B.getInt32Ty(), NULL);
+ CallInst *CI = B.CreateCall(PutChar,
+ B.CreateIntCast(Char,
+ B.getInt32Ty(),
+ /*isSigned*/true,
+ "chari"),
+ "putchar");
+
+ if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
+/// EmitPutS - Emit a call to the puts function. This assumes that Str is
+/// some pointer.
+void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+
+ Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ NULL);
+ CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+ if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+}
+
+/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
+/// an integer and File is a pointer to FILE.
+void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+ const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
+ B.getInt32Ty(),
+ B.getInt32Ty(), File->getType(),
+ NULL);
+ else
+ F = M->getOrInsertFunction("fputc",
+ B.getInt32Ty(),
+ B.getInt32Ty(),
+ File->getType(), NULL);
+ Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
+ "chari");
+ CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFPutS - Emit a call to the puts function. Str is required to be a
+/// pointer and File is a pointer to FILE.
+void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+ const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ File->getType(), NULL);
+ else
+ F = M->getOrInsertFunction("fputs", B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ File->getType(), NULL);
+ CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
+/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+ IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *F;
+ if (File->getType()->isPointerTy())
+ F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context),
+ File->getType(), NULL);
+ else
+ F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context),
+ File->getType(), NULL);
+ CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+ ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
+}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 93577b4..dec227a 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils
BasicBlockUtils.cpp
BasicInliner.cpp
BreakCriticalEdges.cpp
+ BuildLibCalls.cpp
CloneFunction.cpp
CloneLoop.cpp
CloneModule.cpp
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 7e7973a..d03f7a6 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -46,7 +46,7 @@ using namespace llvm;
static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
uint64_t &ByteOffset,
unsigned MaxLookup = 6) {
- if (!isa<PointerType>(V->getType()))
+ if (!V->getType()->isPointerTy())
return V;
for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
@@ -65,7 +65,7 @@ static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
} else {
return V;
}
- assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
}
return V;
}
@@ -490,6 +490,17 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
// Splice all the instructions from PredBB to DestBB.
PredBB->getTerminator()->eraseFromParent();
DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+ // Zap anything that took the address of DestBB. Not doing this will give the
+ // address an invalid value.
+ if (DestBB->hasAddressTaken()) {
+ BlockAddress *BA = BlockAddress::get(DestBB);
+ Constant *Replacement =
+ ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+ BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+ BA->getType()));
+ BA->destroyConstant();
+ }
// Anything that branched to PredBB now branches to DestBB.
PredBB->replaceAllUsesWith(DestBB);
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 57bab60..924b744 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -51,6 +51,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
@@ -147,6 +148,11 @@ ReprocessLoop:
// Delete each unique out-of-loop (and thus dead) predecessor.
for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(),
E = BadPreds.end(); I != E; ++I) {
+
+ DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor ";
+ WriteAsOperand(dbgs(), *I, false);
+ dbgs() << "\n");
+
// Inform each successor of each dead pred.
for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
(*SI)->removePredecessor(*I);
@@ -159,6 +165,27 @@ ReprocessLoop:
}
}
+ // If there are exiting blocks with branches on undef, resolve the undef in
+ // the direction which will exit the loop. This will help simplify loop
+ // trip count computations.
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ E = ExitingBlocks.end(); I != E; ++I)
+ if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+ if (BI->isConditional()) {
+ if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+ DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in ";
+ WriteAsOperand(dbgs(), *I, false);
+ dbgs() << "\n");
+
+ BI->setCondition(ConstantInt::get(Cond->getType(),
+ !L->contains(BI->getSuccessor(0))));
+ Changed = true;
+ }
+ }
+
// Does the loop already have a preheader? If so, don't insert one.
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
@@ -250,8 +277,6 @@ ReprocessLoop:
break;
}
if (UniqueExit) {
- SmallVector<BasicBlock*, 8> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitingBlock = ExitingBlocks[i];
if (!ExitingBlock->getSinglePredecessor()) continue;
@@ -282,6 +307,11 @@ ReprocessLoop:
// Success. The block is now dead, so remove it from the loop,
// update the dominator tree and dominance frontier, and delete it.
+
+ DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block ";
+ WriteAsOperand(dbgs(), ExitingBlock, false);
+ dbgs() << "\n");
+
assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
Changed = true;
LI->removeBlock(ExitingBlock);
@@ -335,6 +365,10 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
".preheader", this);
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header ";
+ WriteAsOperand(dbgs(), NewBB, false);
+ dbgs() << "\n");
+
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
@@ -360,6 +394,10 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
LoopBlocks.size(), ".loopexit",
this);
+ DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block ";
+ WriteAsOperand(dbgs(), NewBB, false);
+ dbgs() << "\n");
+
return NewBB;
}
@@ -480,6 +518,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
}
+ DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
BasicBlock *Header = L->getHeader();
BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
OuterLoopPreds.size(),
@@ -574,6 +614,10 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
Header->getName()+".backedge", F);
BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+ DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block ";
+ WriteAsOperand(dbgs(), BEBlock, false);
+ dbgs() << "\n");
+
// Move the new backedge block to right after the last backedge block.
Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 544e20b..4f5a70b 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -518,7 +518,7 @@ void PromoteMem2Reg::run() {
// If this PHI node merges one value and/or undefs, get the value.
if (Value *V = PN->hasConstantValue(&DT)) {
- if (AST && isa<PointerType>(PN->getType()))
+ if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
@@ -780,7 +780,7 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
if (ReplVal == LI)
ReplVal = UndefValue::get(LI->getType());
LI->replaceAllUsesWith(ReplVal);
- if (AST && isa<PointerType>(LI->getType()))
+ if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
LI->eraseFromParent();
LBI.deleteValue(LI);
@@ -838,7 +838,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;)
if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
- if (AST && isa<PointerType>(LI->getType()))
+ if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
LBI.deleteValue(LI);
LI->eraseFromParent();
@@ -874,7 +874,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
// Otherwise, there was a store before this load, the load takes its value.
--I;
LI->replaceAllUsesWith(I->second->getOperand(0));
- if (AST && isa<PointerType>(LI->getType()))
+ if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
LI->eraseFromParent();
LBI.deleteValue(LI);
@@ -922,7 +922,7 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
InsertedPHINodes.insert(PN);
- if (AST && isa<PointerType>(PN->getType()))
+ if (AST && PN->getType()->isPointerTy())
AST->copyValue(PointerAllocaValues[AllocaNo], PN);
return true;
@@ -996,7 +996,7 @@ NextIteration:
// Anything using the load now uses the current value.
LI->replaceAllUsesWith(V);
- if (AST && isa<PointerType>(LI->getType()))
+ if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
BB->getInstList().erase(LI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 795b6bf..f343c38 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -271,7 +271,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) {
// Normal constant int.
ConstantInt *CI = dyn_cast<ConstantInt>(V);
- if (CI || !TD || !isa<Constant>(V) || !isa<PointerType>(V->getType()))
+ if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
return CI;
// This is some kind of pointer constant. Turn it into a pointer-sized
@@ -701,7 +701,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
// Convert pointer to int before we switch.
- if (isa<PointerType>(CV->getType())) {
+ if (CV->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without TargetData");
CV = new PtrToIntInst(CV, TD->getIntPtrType(CV->getContext()),
"magicptr", PTI);
@@ -915,7 +915,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
case Instruction::Add:
case Instruction::Sub:
// Not worth doing for vector ops.
- if (isa<VectorType>(HInst->getType()))
+ if (HInst->getType()->isVectorTy())
return false;
break;
case Instruction::And:
@@ -925,7 +925,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
case Instruction::LShr:
case Instruction::AShr:
// Don't mess with vector operations.
- if (isa<VectorType>(HInst->getType()))
+ if (HInst->getType()->isVectorTy())
return false;
break; // These are all cheap and non-trapping instructions.
}
@@ -1077,7 +1077,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
ConstantInt *CB;
if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
- CB->getType()->isInteger(1)) {
+ CB->getType()->isIntegerTy(1)) {
// Okay, we now know that all edges from PredBB should be revectored to
// branch to RealDest.
BasicBlock *PredBB = PN->getIncomingBlock(i);
@@ -2068,7 +2068,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
// Convert pointer to int before we switch.
- if (isa<PointerType>(CompVal->getType())) {
+ if (CompVal->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without TargetData");
CompVal = new PtrToIntInst(CompVal,
TD->getIntPtrType(CompVal->getContext()),
diff --git a/lib/VMCore/Android.mk b/lib/VMCore/Android.mk
new file mode 100644
index 0000000..4784684
--- /dev/null
+++ b/lib/VMCore/Android.mk
@@ -0,0 +1,61 @@
+LOCAL_PATH:= $(call my-dir)
+
+vmcore_SRC_FILES := \
+ AsmWriter.cpp \
+ Attributes.cpp \
+ AutoUpgrade.cpp \
+ BasicBlock.cpp \
+ ConstantFold.cpp \
+ Constants.cpp \
+ Core.cpp \
+ Dominators.cpp \
+ Function.cpp \
+ GVMaterializer.cpp \
+ Globals.cpp \
+ IRBuilder.cpp \
+ InlineAsm.cpp \
+ Instruction.cpp \
+ Instructions.cpp \
+ IntrinsicInst.cpp \
+ LLVMContext.cpp \
+ LeakDetector.cpp \
+ Metadata.cpp \
+ Module.cpp \
+ Pass.cpp \
+ PassManager.cpp \
+ PrintModulePass.cpp \
+ Type.cpp \
+ TypeSymbolTable.cpp \
+ Use.cpp \
+ Value.cpp \
+ ValueSymbolTable.cpp \
+ ValueTypes.cpp \
+ Verifier.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+REQUIRES_RTTI := 1
+
+LOCAL_SRC_FILES := $(vmcore_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMCore
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+REQUIRES_RTTI := 1
+
+LOCAL_SRC_FILES := $(vmcore_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMCore
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index ab5f45a..fd74241 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -17,6 +17,7 @@
#include "llvm/Assembly/Writer.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Assembly/AsmAnnotationWriter.h"
+#include "llvm/LLVMContext.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -239,6 +240,19 @@ void TypePrinting::CalcTypeName(const Type *Ty,
OS << '>';
break;
}
+ case Type::UnionTyID: {
+ const UnionType *UTy = cast<UnionType>(Ty);
+ OS << "union { ";
+ for (StructType::element_iterator I = UTy->element_begin(),
+ E = UTy->element_end(); I != E; ++I) {
+ CalcTypeName(*I, TypeStack, OS);
+ if (next(I) != UTy->element_end())
+ OS << ',';
+ OS << ' ';
+ }
+ OS << '}';
+ break;
+ }
case Type::PointerTyID: {
const PointerType *PTy = cast<PointerType>(Ty);
CalcTypeName(PTy->getElementType(), TypeStack, OS);
@@ -363,8 +377,8 @@ namespace {
return;
// If this is a structure or opaque type, add a name for the type.
- if (((isa<StructType>(Ty) && cast<StructType>(Ty)->getNumElements())
- || isa<OpaqueType>(Ty)) && !TP.hasTypeName(Ty)) {
+ if (((Ty->isStructTy() && cast<StructType>(Ty)->getNumElements())
+ || Ty->isOpaqueTy()) && !TP.hasTypeName(Ty)) {
TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size())));
NumberedTypes.push_back(Ty);
}
@@ -418,13 +432,13 @@ static void AddModuleTypesToPrinter(TypePrinting &TP,
// they are used too often to have a single useful name.
if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
const Type *PETy = PTy->getElementType();
- if ((PETy->isPrimitiveType() || PETy->isInteger()) &&
- !isa<OpaqueType>(PETy))
+ if ((PETy->isPrimitiveType() || PETy->isIntegerTy()) &&
+ !PETy->isOpaqueTy())
continue;
}
// Likewise don't insert primitives either.
- if (Ty->isInteger() || Ty->isPrimitiveType())
+ if (Ty->isIntegerTy() || Ty->isPrimitiveType())
continue;
// Get the name as a string and insert it into TypeNames.
@@ -836,7 +850,7 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
TypePrinting &TypePrinter, SlotTracker *Machine) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
- if (CI->getType()->isInteger(1)) {
+ if (CI->getType()->isIntegerTy(1)) {
Out << (CI->getZExtValue() ? "true" : "false");
return;
}
@@ -1223,7 +1237,6 @@ class AssemblyWriter {
TypePrinting TypePrinter;
AssemblyAnnotationWriter *AnnotationWriter;
std::vector<const Type*> NumberedTypes;
- SmallVector<StringRef, 8> MDNames;
public:
inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
@@ -1231,8 +1244,6 @@ public:
AssemblyAnnotationWriter *AAW)
: Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M);
- if (M)
- M->getMDKindNames(MDNames);
}
void printMDNodeBody(const MDNode *MD);
@@ -1252,15 +1263,14 @@ public:
void printArgument(const Argument *FA, Attributes Attrs);
void printBasicBlock(const BasicBlock *BB);
void printInstruction(const Instruction &I);
-private:
+private:
// printInfoComment - Print a little comment after the instruction indicating
// which slot it occupies.
void printInfoComment(const Value &V);
};
} // end of anonymous namespace
-
void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
if (Operand == 0) {
Out << "<null operand!>";
@@ -1689,11 +1699,15 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
}
-
/// printInfoComment - Print a little comment after the instruction indicating
/// which slot it occupies.
///
void AssemblyWriter::printInfoComment(const Value &V) {
+ if (AnnotationWriter) {
+ AnnotationWriter->printInfoComment(V, Out);
+ return;
+ }
+
if (V.getType()->isVoidTy()) return;
Out.PadToColumn(50);
@@ -1834,8 +1848,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
//
Out << ' ';
if (!FTy->isVarArg() &&
- (!isa<PointerType>(RetTy) ||
- !isa<FunctionType>(cast<PointerType>(RetTy)->getElementType()))) {
+ (!RetTy->isPointerTy() ||
+ !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
TypePrinter.print(RetTy, Out);
Out << ' ';
writeOperand(Operand, false);
@@ -1880,8 +1894,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
//
Out << ' ';
if (!FTy->isVarArg() &&
- (!isa<PointerType>(RetTy) ||
- !isa<FunctionType>(cast<PointerType>(RetTy)->getElementType()))) {
+ (!RetTy->isPointerTy() ||
+ !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
TypePrinter.print(RetTy, Out);
Out << ' ';
writeOperand(Operand, false);
@@ -1972,12 +1986,20 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
}
// Print Metadata info.
- if (!MDNames.empty()) {
- SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
- I.getAllMetadata(InstMD);
- for (unsigned i = 0, e = InstMD.size(); i != e; ++i)
- Out << ", !" << MDNames[InstMD[i].first]
- << " !" << Machine.getMetadataSlot(InstMD[i].second);
+ SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
+ I.getAllMetadata(InstMD);
+ if (!InstMD.empty()) {
+ SmallVector<StringRef, 8> MDNames;
+ I.getType()->getContext().getMDKindNames(MDNames);
+ for (unsigned i = 0, e = InstMD.size(); i != e; ++i) {
+ unsigned Kind = InstMD[i].first;
+ if (Kind < MDNames.size()) {
+ Out << ", !" << MDNames[Kind];
+ } else {
+ Out << ", !<unknown kind #" << Kind << ">";
+ }
+ Out << " !" << Machine.getMetadataSlot(InstMD[i].second);
+ }
}
printInfoComment(I);
}
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index a371c6f..a000aee 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -70,6 +70,11 @@ std::string Attribute::getAsString(Attributes Attrs) {
Result += "noimplicitfloat ";
if (Attrs & Attribute::Naked)
Result += "naked ";
+ if (Attrs & Attribute::StackAlignment) {
+ Result += "alignstack(";
+ Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
+ Result += ") ";
+ }
if (Attrs & Attribute::Alignment) {
Result += "align ";
Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
@@ -84,11 +89,11 @@ std::string Attribute::getAsString(Attributes Attrs) {
Attributes Attribute::typeIncompatible(const Type *Ty) {
Attributes Incompatible = None;
- if (!Ty->isInteger())
+ if (!Ty->isIntegerTy())
// Attributes that only apply to integers.
Incompatible |= SExt | ZExt;
- if (!isa<PointerType>(Ty))
+ if (!Ty->isPointerTy())
// Attributes that only apply to pointers.
Incompatible |= ByVal | Nest | NoAlias | StructRet | NoCapture;
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 5c117d8..549977c 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -112,7 +112,7 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
IdxList.push_back(Zero);
} else if (const SequentialType *STy =
dyn_cast<SequentialType>(ElTy)) {
- if (isa<PointerType>(ElTy)) break; // Can't index into pointers!
+ if (ElTy->isPointerTy()) break; // Can't index into pointers!
ElTy = STy->getElementType();
IdxList.push_back(Zero);
} else {
@@ -155,12 +155,12 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
// Handle integral constant input.
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- if (DestTy->isInteger())
+ if (DestTy->isIntegerTy())
// Integral -> Integral. This is a no-op because the bit widths must
// be the same. Consequently, we just fold to V.
return V;
- if (DestTy->isFloatingPoint())
+ if (DestTy->isFloatingPointTy())
return ConstantFP::get(DestTy->getContext(),
APFloat(CI->getValue(),
!DestTy->isPPC_FP128Ty()));
@@ -189,7 +189,7 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
///
static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
unsigned ByteSize) {
- assert(isa<IntegerType>(C->getType()) &&
+ assert(C->getType()->isIntegerTy() &&
(cast<IntegerType>(C->getType())->getBitWidth() & 7) == 0 &&
"Non-byte sized integer input");
unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8;
@@ -334,11 +334,7 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
return ConstantExpr::getNUWMul(E, N);
}
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- Constant *N = ConstantInt::get(DestTy, VTy->getNumElements());
- Constant *E = getFoldedSizeOf(VTy->getElementType(), DestTy, true);
- return ConstantExpr::getNUWMul(E, N);
- }
+
if (const StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isPacked()) {
unsigned NumElems = STy->getNumElements();
@@ -361,10 +357,26 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
}
}
+ if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
+ unsigned NumElems = UTy->getNumElements();
+ // Check for a union with all members having the same size.
+ Constant *MemberSize =
+ getFoldedSizeOf(UTy->getElementType(0), DestTy, true);
+ bool AllSame = true;
+ for (unsigned i = 1; i != NumElems; ++i)
+ if (MemberSize !=
+ getFoldedSizeOf(UTy->getElementType(i), DestTy, true)) {
+ AllSame = false;
+ break;
+ }
+ if (AllSame)
+ return MemberSize;
+ }
+
// Pointer size doesn't depend on the pointee type, so canonicalize them
// to an arbitrary pointee.
if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
- if (!PTy->getElementType()->isInteger(1))
+ if (!PTy->getElementType()->isIntegerTy(1))
return
getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1),
PTy->getAddressSpace()),
@@ -426,10 +438,28 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
return MemberAlign;
}
+ if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
+ // Union alignment is the maximum alignment of any member.
+ // Without target data, we can't compare much, but we can check to see
+ // if all the members have the same alignment.
+ unsigned NumElems = UTy->getNumElements();
+ // Check for a union with all members having the same alignment.
+ Constant *MemberAlign =
+ getFoldedAlignOf(UTy->getElementType(0), DestTy, true);
+ bool AllSame = true;
+ for (unsigned i = 1; i != NumElems; ++i)
+ if (MemberAlign != getFoldedAlignOf(UTy->getElementType(i), DestTy, true)) {
+ AllSame = false;
+ break;
+ }
+ if (AllSame)
+ return MemberAlign;
+ }
+
// Pointer alignment doesn't depend on the pointee type, so canonicalize them
// to an arbitrary pointee.
if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
- if (!PTy->getElementType()->isInteger(1))
+ if (!PTy->getElementType()->isIntegerTy(1))
return
getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
1),
@@ -464,13 +494,7 @@ static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo,
Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
return ConstantExpr::getNUWMul(E, N);
}
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
- DestTy, false),
- FieldNo, DestTy);
- Constant *E = getFoldedSizeOf(VTy->getElementType(), DestTy, true);
- return ConstantExpr::getNUWMul(E, N);
- }
+
if (const StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isPacked()) {
unsigned NumElems = STy->getNumElements();
@@ -551,7 +575,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
// operating on each element. In the cast of bitcasts, the element
// count may be mismatched; don't attempt to handle that here.
if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
- if (isa<VectorType>(DestTy) &&
+ if (DestTy->isVectorTy() &&
cast<VectorType>(DestTy)->getNumElements() ==
CV->getType()->getNumElements()) {
std::vector<Constant*> res;
@@ -629,12 +653,12 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
if (CI->isOne() &&
STy->getNumElements() == 2 &&
- STy->getElementType(0)->isInteger(1)) {
+ STy->getElementType(0)->isIntegerTy(1)) {
return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
}
}
// Handle an offsetof-like expression.
- if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)){
+ if (Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()){
if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
DestTy, false))
return C;
@@ -885,6 +909,8 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
unsigned numOps;
if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
numOps = AR->getNumElements();
+ else if (AggTy->isUnionTy())
+ numOps = 1;
else
numOps = cast<StructType>(AggTy)->getNumElements();
@@ -901,6 +927,10 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
if (const StructType* ST = dyn_cast<StructType>(AggTy))
return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+ if (const UnionType* UT = dyn_cast<UnionType>(AggTy)) {
+ assert(Ops.size() == 1 && "Union can only contain a single value!");
+ return ConstantUnion::get(UT, Ops[0]);
+ }
return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
}
@@ -1099,6 +1129,10 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
return ConstantExpr::getLShr(C1, C2);
break;
}
+ } else if (isa<ConstantInt>(C1)) {
+ // If C1 is a ConstantInt and C2 is not, swap the operands.
+ if (Instruction::isCommutative(Opcode))
+ return ConstantExpr::get(Opcode, C2, C1);
}
// At this point we know neither constant is an UndefValue.
@@ -1358,35 +1392,12 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
} else if (isa<ConstantExpr>(C2)) {
// If C2 is a constant expr and C1 isn't, flop them around and fold the
// other way if possible.
- switch (Opcode) {
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- // No change of opcode required.
+ if (Instruction::isCommutative(Opcode))
return ConstantFoldBinaryInstruction(Opcode, C2, C1);
-
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::SDiv:
- case Instruction::UDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- default: // These instructions cannot be flopped around.
- break;
- }
}
// i1 can be simplified in many cases.
- if (C1->getType()->isInteger(1)) {
+ if (C1->getType()->isIntegerTy(1)) {
switch (Opcode) {
case Instruction::Add:
case Instruction::Sub:
@@ -1421,7 +1432,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
/// isZeroSizedType - This type is zero sized if its an array or structure of
/// zero sized types. The only leaf zero sized type is an empty structure.
static bool isMaybeZeroSizedType(const Type *Ty) {
- if (isa<OpaqueType>(Ty)) return true; // Can't say.
+ if (Ty->isOpaqueTy()) return true; // Can't say.
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
// If all of elements have zero size, this does too.
@@ -1452,10 +1463,10 @@ static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) {
// Ok, we have two differing integer indices. Sign extend them to be the same
// type. Long is always big enough, so we use it.
- if (!C1->getType()->isInteger(64))
+ if (!C1->getType()->isIntegerTy(64))
C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
- if (!C2->getType()->isInteger(64))
+ if (!C2->getType()->isIntegerTy(64))
C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
if (C1 == C2) return 0; // They are equal
@@ -1661,7 +1672,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
// If the cast is not actually changing bits, and the second operand is a
// null pointer, do the comparison with the pre-casted value.
if (V2->isNullValue() &&
- (isa<PointerType>(CE1->getType()) || CE1->getType()->isInteger())) {
+ (CE1->getType()->isPointerTy() || CE1->getType()->isIntegerTy())) {
if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
return evaluateICmpRelation(CE1Op0,
@@ -1807,7 +1818,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
// Handle some degenerate cases first
if (isa<UndefValue>(C1) || isa<UndefValue>(C2))
- return UndefValue::get(ResultTy);
+ return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
// No compile-time operations on this type yet.
if (C1->getType()->isPPC_FP128Ty())
@@ -1836,7 +1847,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
}
// If the comparison is a comparison between two i1's, simplify it.
- if (C1->getType()->isInteger(1)) {
+ if (C1->getType()->isIntegerTy(1)) {
switch(pred) {
case ICmpInst::ICMP_EQ:
if (isa<ConstantInt>(C2))
@@ -1908,7 +1919,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan ||
R==APFloat::cmpEqual);
}
- } else if (isa<VectorType>(C1->getType())) {
+ } else if (C1->getType()->isVectorTy()) {
SmallVector<Constant*, 16> C1Elts, C2Elts;
C1->getVectorElements(C1Elts);
C2->getVectorElements(C2Elts);
@@ -1925,7 +1936,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
return ConstantVector::get(&ResElts[0], ResElts.size());
}
- if (C1->getType()->isFloatingPoint()) {
+ if (C1->getType()->isFloatingPointTy()) {
int Result = -1; // -1 = unknown, 0 = known false, 1 = known true.
switch (evaluateFCmpRelation(C1, C2)) {
default: llvm_unreachable("Unknown relation!");
@@ -2059,7 +2070,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
Constant *CE2Op0 = CE2->getOperand(0);
if (CE2->getOpcode() == Instruction::BitCast &&
- isa<VectorType>(CE2->getType())==isa<VectorType>(CE2Op0->getType())) {
+ CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) {
Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
}
@@ -2067,8 +2078,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
// If the left hand side is an extension, try eliminating it.
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
- if (CE1->getOpcode() == Instruction::SExt ||
- CE1->getOpcode() == Instruction::ZExt) {
+ if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) ||
+ (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){
Constant *CE1Op0 = CE1->getOperand(0);
Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
if (CE1Inverse == CE1Op0) {
@@ -2086,27 +2097,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
// If C2 is a constant expr and C1 isn't, flip them around and fold the
// other way if possible.
// Also, if C1 is null and C2 isn't, flip them around.
- switch (pred) {
- case ICmpInst::ICMP_EQ:
- case ICmpInst::ICMP_NE:
- // No change of predicate required.
- return ConstantExpr::getICmp(pred, C2, C1);
-
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_ULE:
- case ICmpInst::ICMP_SLE:
- case ICmpInst::ICMP_UGE:
- case ICmpInst::ICMP_SGE:
- // Change the predicate as necessary to swap the operands.
- pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
- return ConstantExpr::getICmp(pred, C2, C1);
-
- default: // These predicates cannot be flopped around.
- break;
- }
+ pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
+ return ConstantExpr::getICmp(pred, C2, C1);
}
}
return 0;
@@ -2178,7 +2170,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
I != E; ++I)
LastTy = *I;
- if ((LastTy && isa<ArrayType>(LastTy)) || Idx0->isNullValue()) {
+ if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) {
SmallVector<Value*, 16> NewIndices;
NewIndices.reserve(NumIdx + CE->getNumOperands());
for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
@@ -2260,10 +2252,10 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
// Before adding, extend both operands to i64 to avoid
// overflow trouble.
- if (!PrevIdx->getType()->isInteger(64))
+ if (!PrevIdx->getType()->isIntegerTy(64))
PrevIdx = ConstantExpr::getSExt(PrevIdx,
Type::getInt64Ty(Div->getContext()));
- if (!Div->getType()->isInteger(64))
+ if (!Div->getType()->isIntegerTy(64))
Div = ConstantExpr::getSExt(Div,
Type::getInt64Ty(Div->getContext()));
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 2250626..10f8879 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -229,7 +229,7 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
/// This handles breaking down a vector undef into undef elements, etc. For
/// constant exprs and other cases we can't handle, we return an empty vector.
void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
- assert(isa<VectorType>(getType()) && "Not a vector constant!");
+ assert(getType()->isVectorTy() && "Not a vector constant!");
if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) {
for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i)
@@ -404,13 +404,13 @@ ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) {
Constant* ConstantFP::getZeroValueForNegation(const Type* Ty) {
if (const VectorType *PTy = dyn_cast<VectorType>(Ty))
- if (PTy->getElementType()->isFloatingPoint()) {
+ if (PTy->getElementType()->isFloatingPointTy()) {
std::vector<Constant*> zeros(PTy->getNumElements(),
getNegativeZero(PTy->getElementType()));
return ConstantVector::get(PTy, zeros);
}
- if (Ty->isFloatingPoint())
+ if (Ty->isFloatingPointTy())
return getNegativeZero(Ty);
return Constant::getNullValue(Ty);
@@ -585,6 +585,27 @@ Constant* ConstantStruct::get(LLVMContext &Context,
return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
}
+ConstantUnion::ConstantUnion(const UnionType *T, Constant* V)
+ : Constant(T, ConstantUnionVal,
+ OperandTraits<ConstantUnion>::op_end(this) - 1, 1) {
+ Use *OL = OperandList;
+ assert(T->getElementTypeIndex(V->getType()) >= 0 &&
+ "Initializer for union element isn't a member of union type!");
+ *OL = V;
+}
+
+// ConstantUnion accessors.
+Constant* ConstantUnion::get(const UnionType* T, Constant* V) {
+ LLVMContextImpl* pImpl = T->getContext().pImpl;
+
+ // Create a ConstantAggregateZero value if all elements are zeros...
+ if (!V->isNullValue())
+ return pImpl->UnionConstants.getOrCreate(T, V);
+
+ return ConstantAggregateZero::get(T);
+}
+
+
ConstantVector::ConstantVector(const VectorType *T,
const std::vector<Constant*> &V)
: Constant(T, ConstantVectorVal,
@@ -640,13 +661,13 @@ Constant* ConstantVector::get(Constant* const* Vals, unsigned NumVals) {
}
Constant* ConstantExpr::getNSWNeg(Constant* C) {
- assert(C->getType()->isIntOrIntVector() &&
+ assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NEG a nonintegral value!");
return getNSWSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
}
Constant* ConstantExpr::getNUWNeg(Constant* C) {
- assert(C->getType()->isIntOrIntVector() &&
+ assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NEG a nonintegral value!");
return getNUWSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
}
@@ -923,7 +944,7 @@ bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
// Factory Function Implementation
ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
- assert((isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) &&
+ assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
"Cannot create an aggregate zero of non-aggregate type!");
LLVMContextImpl *pImpl = Ty->getContext().pImpl;
@@ -948,7 +969,7 @@ void ConstantArray::destroyConstant() {
/// if the elements of the array are all ConstantInt's.
bool ConstantArray::isString() const {
// Check the element type for i8...
- if (!getType()->getElementType()->isInteger(8))
+ if (!getType()->getElementType()->isIntegerTy(8))
return false;
// Check the elements to make sure they are all integers, not constant
// expressions.
@@ -963,7 +984,7 @@ bool ConstantArray::isString() const {
/// null bytes except its terminator.
bool ConstantArray::isCString() const {
// Check the element type for i8...
- if (!getType()->getElementType()->isInteger(8))
+ if (!getType()->getElementType()->isIntegerTy(8))
return false;
// Last element must be a null.
@@ -1010,6 +1031,13 @@ void ConstantStruct::destroyConstant() {
// destroyConstant - Remove the constant from the constant table...
//
+void ConstantUnion::destroyConstant() {
+ getType()->getContext().pImpl->UnionConstants.remove(this);
+ destroyConstantImpl();
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
void ConstantVector::destroyConstant() {
getType()->getContext().pImpl->VectorConstants.remove(this);
destroyConstantImpl();
@@ -1211,18 +1239,18 @@ Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
}
Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
- assert(isa<PointerType>(S->getType()) && "Invalid cast");
- assert((Ty->isInteger() || isa<PointerType>(Ty)) && "Invalid cast");
+ assert(S->getType()->isPointerTy() && "Invalid cast");
+ assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast");
- if (Ty->isInteger())
+ if (Ty->isIntegerTy())
return getCast(Instruction::PtrToInt, S, Ty);
return getCast(Instruction::BitCast, S, Ty);
}
Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty,
bool isSigned) {
- assert(C->getType()->isIntOrIntVector() &&
- Ty->isIntOrIntVector() && "Invalid cast");
+ assert(C->getType()->isIntOrIntVectorTy() &&
+ Ty->isIntOrIntVectorTy() && "Invalid cast");
unsigned SrcBits = C->getType()->getScalarSizeInBits();
unsigned DstBits = Ty->getScalarSizeInBits();
Instruction::CastOps opcode =
@@ -1233,7 +1261,7 @@ Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty,
}
Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
- assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
"Invalid cast");
unsigned SrcBits = C->getType()->getScalarSizeInBits();
unsigned DstBits = Ty->getScalarSizeInBits();
@@ -1250,8 +1278,8 @@ Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) {
bool toVec = Ty->getTypeID() == Type::VectorTyID;
#endif
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isIntOrIntVector() && "Trunc operand must be integer");
- assert(Ty->isIntOrIntVector() && "Trunc produces only integral");
+ assert(C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer");
+ assert(Ty->isIntOrIntVectorTy() && "Trunc produces only integral");
assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
"SrcTy must be larger than DestTy for Trunc!");
@@ -1264,8 +1292,8 @@ Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) {
bool toVec = Ty->getTypeID() == Type::VectorTyID;
#endif
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isIntOrIntVector() && "SExt operand must be integral");
- assert(Ty->isIntOrIntVector() && "SExt produces only integer");
+ assert(C->getType()->isIntOrIntVectorTy() && "SExt operand must be integral");
+ assert(Ty->isIntOrIntVectorTy() && "SExt produces only integer");
assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
"SrcTy must be smaller than DestTy for SExt!");
@@ -1278,8 +1306,8 @@ Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) {
bool toVec = Ty->getTypeID() == Type::VectorTyID;
#endif
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isIntOrIntVector() && "ZEXt operand must be integral");
- assert(Ty->isIntOrIntVector() && "ZExt produces only integer");
+ assert(C->getType()->isIntOrIntVectorTy() && "ZEXt operand must be integral");
+ assert(Ty->isIntOrIntVectorTy() && "ZExt produces only integer");
assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
"SrcTy must be smaller than DestTy for ZExt!");
@@ -1292,7 +1320,7 @@ Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) {
bool toVec = Ty->getTypeID() == Type::VectorTyID;
#endif
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
"This is an illegal floating point truncation!");
return getFoldedCast(Instruction::FPTrunc, C, Ty);
@@ -1304,7 +1332,7 @@ Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) {
bool toVec = Ty->getTypeID() == Type::VectorTyID;
#endif
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
"This is an illegal floating point extension!");
return getFoldedCast(Instruction::FPExt, C, Ty);
@@ -1316,7 +1344,7 @@ Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) {
bool toVec = Ty->getTypeID() == Type::VectorTyID;
#endif
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isIntOrIntVector() && Ty->isFPOrFPVector() &&
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
"This is an illegal uint to floating point cast!");
return getFoldedCast(Instruction::UIToFP, C, Ty);
}
@@ -1327,7 +1355,7 @@ Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) {
bool toVec = Ty->getTypeID() == Type::VectorTyID;
#endif
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isIntOrIntVector() && Ty->isFPOrFPVector() &&
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
"This is an illegal sint to floating point cast!");
return getFoldedCast(Instruction::SIToFP, C, Ty);
}
@@ -1338,7 +1366,7 @@ Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) {
bool toVec = Ty->getTypeID() == Type::VectorTyID;
#endif
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isFPOrFPVector() && Ty->isIntOrIntVector() &&
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
"This is an illegal floating point to uint cast!");
return getFoldedCast(Instruction::FPToUI, C, Ty);
}
@@ -1349,20 +1377,20 @@ Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) {
bool toVec = Ty->getTypeID() == Type::VectorTyID;
#endif
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
- assert(C->getType()->isFPOrFPVector() && Ty->isIntOrIntVector() &&
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
"This is an illegal floating point to sint cast!");
return getFoldedCast(Instruction::FPToSI, C, Ty);
}
Constant *ConstantExpr::getPtrToInt(Constant *C, const Type *DstTy) {
- assert(isa<PointerType>(C->getType()) && "PtrToInt source must be pointer");
- assert(DstTy->isInteger() && "PtrToInt destination must be integral");
+ assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer");
+ assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral");
return getFoldedCast(Instruction::PtrToInt, C, DstTy);
}
Constant *ConstantExpr::getIntToPtr(Constant *C, const Type *DstTy) {
- assert(C->getType()->isInteger() && "IntToPtr source must be integral");
- assert(isa<PointerType>(DstTy) && "IntToPtr destination must be a pointer");
+ assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral");
+ assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer");
return getFoldedCast(Instruction::IntToPtr, C, DstTy);
}
@@ -1421,7 +1449,7 @@ Constant *ConstantExpr::getCompareTy(unsigned short predicate,
Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
unsigned Flags) {
// API compatibility: Adjust integer opcodes to floating-point opcodes.
- if (C1->getType()->isFPOrFPVector()) {
+ if (C1->getType()->isFPOrFPVectorTy()) {
if (Opcode == Instruction::Add) Opcode = Instruction::FAdd;
else if (Opcode == Instruction::Sub) Opcode = Instruction::FSub;
else if (Opcode == Instruction::Mul) Opcode = Instruction::FMul;
@@ -1432,51 +1460,51 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
case Instruction::Sub:
case Instruction::Mul:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert(C1->getType()->isIntOrIntVector() &&
+ assert(C1->getType()->isIntOrIntVectorTy() &&
"Tried to create an integer operation on a non-integer type!");
break;
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert(C1->getType()->isFPOrFPVector() &&
+ assert(C1->getType()->isFPOrFPVectorTy() &&
"Tried to create a floating-point operation on a "
"non-floating-point type!");
break;
case Instruction::UDiv:
case Instruction::SDiv:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert(C1->getType()->isIntOrIntVector() &&
+ assert(C1->getType()->isIntOrIntVectorTy() &&
"Tried to create an arithmetic operation on a non-arithmetic type!");
break;
case Instruction::FDiv:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert(C1->getType()->isFPOrFPVector() &&
+ assert(C1->getType()->isFPOrFPVectorTy() &&
"Tried to create an arithmetic operation on a non-arithmetic type!");
break;
case Instruction::URem:
case Instruction::SRem:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert(C1->getType()->isIntOrIntVector() &&
+ assert(C1->getType()->isIntOrIntVectorTy() &&
"Tried to create an arithmetic operation on a non-arithmetic type!");
break;
case Instruction::FRem:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert(C1->getType()->isFPOrFPVector() &&
+ assert(C1->getType()->isFPOrFPVectorTy() &&
"Tried to create an arithmetic operation on a non-arithmetic type!");
break;
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert(C1->getType()->isIntOrIntVector() &&
+ assert(C1->getType()->isIntOrIntVectorTy() &&
"Tried to create a logical operation on a non-integral type!");
break;
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert(C1->getType()->isIntOrIntVector() &&
+ assert(C1->getType()->isIntOrIntVectorTy() &&
"Tried to create a shift operation on a non-integer type!");
break;
default:
@@ -1564,7 +1592,7 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
(Constant**)Idxs, NumIdx))
return FC; // Fold a few common cases...
- assert(isa<PointerType>(C->getType()) &&
+ assert(C->getType()->isPointerTy() &&
"Non-pointer type for constant GetElementPtr expression");
// Look up the constant in the table first to ensure uniqueness
std::vector<Constant*> ArgVec;
@@ -1591,7 +1619,7 @@ Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy,
(Constant**)Idxs, NumIdx))
return FC; // Fold a few common cases...
- assert(isa<PointerType>(C->getType()) &&
+ assert(C->getType()->isPointerTy() &&
"Non-pointer type for constant GetElementPtr expression");
// Look up the constant in the table first to ensure uniqueness
std::vector<Constant*> ArgVec;
@@ -1699,9 +1727,9 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
}
Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
- assert(isa<VectorType>(Val->getType()) &&
+ assert(Val->getType()->isVectorTy() &&
"Tried to create extractelement operation on non-vector type!");
- assert(Idx->getType()->isInteger(32) &&
+ assert(Idx->getType()->isIntegerTy(32) &&
"Extractelement index must be i32 type!");
return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(),
Val, Idx);
@@ -1723,11 +1751,11 @@ Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
Constant *Idx) {
- assert(isa<VectorType>(Val->getType()) &&
+ assert(Val->getType()->isVectorTy() &&
"Tried to create insertelement operation on non-vector type!");
assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType()
&& "Insertelement types must match!");
- assert(Idx->getType()->isInteger(32) &&
+ assert(Idx->getType()->isIntegerTy(32) &&
"Insertelement index must be i32 type!");
return getInsertElementTy(Val->getType(), Val, Elt, Idx);
}
@@ -1811,9 +1839,9 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg,
Constant* ConstantExpr::getNeg(Constant* C) {
// API compatibility: Adjust integer opcodes to floating-point opcodes.
- if (C->getType()->isFPOrFPVector())
+ if (C->getType()->isFPOrFPVectorTy())
return getFNeg(C);
- assert(C->getType()->isIntOrIntVector() &&
+ assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NEG a nonintegral value!");
return get(Instruction::Sub,
ConstantFP::getZeroValueForNegation(C->getType()),
@@ -1821,7 +1849,7 @@ Constant* ConstantExpr::getNeg(Constant* C) {
}
Constant* ConstantExpr::getFNeg(Constant* C) {
- assert(C->getType()->isFPOrFPVector() &&
+ assert(C->getType()->isFPOrFPVectorTy() &&
"Cannot FNEG a non-floating-point value!");
return get(Instruction::FSub,
ConstantFP::getZeroValueForNegation(C->getType()),
@@ -1829,7 +1857,7 @@ Constant* ConstantExpr::getFNeg(Constant* C) {
}
Constant* ConstantExpr::getNot(Constant* C) {
- assert(C->getType()->isIntOrIntVector() &&
+ assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NOT a nonintegral value!");
return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
}
@@ -2083,6 +2111,56 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
destroyConstant();
}
+void ConstantUnion::replaceUsesOfWithOnConstant(Value *From, Value *To,
+ Use *U) {
+ assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+ Constant *ToC = cast<Constant>(To);
+
+ assert(U == OperandList && "Union constants can only have one use!");
+ assert(getNumOperands() == 1 && "Union constants can only have one use!");
+ assert(getOperand(0) == From && "ReplaceAllUsesWith broken!");
+
+ std::pair<LLVMContextImpl::UnionConstantsTy::MapKey, ConstantUnion*> Lookup;
+ Lookup.first.first = getType();
+ Lookup.second = this;
+ Lookup.first.second = ToC;
+
+ LLVMContext &Context = getType()->getContext();
+ LLVMContextImpl *pImpl = Context.pImpl;
+
+ Constant *Replacement = 0;
+ if (ToC->isNullValue()) {
+ Replacement = ConstantAggregateZero::get(getType());
+ } else {
+ // Check to see if we have this union type already.
+ bool Exists;
+ LLVMContextImpl::UnionConstantsTy::MapTy::iterator I =
+ pImpl->UnionConstants.InsertOrGetItem(Lookup, Exists);
+
+ if (Exists) {
+ Replacement = I->second;
+ } else {
+ // Okay, the new shape doesn't exist in the system yet. Instead of
+ // creating a new constant union, inserting it, replaceallusesof'ing the
+ // old with the new, then deleting the old... just update the current one
+ // in place!
+ pImpl->UnionConstants.MoveConstantToNewSlot(this, I);
+
+ // Update to the new value.
+ setOperand(0, ToC);
+ return;
+ }
+ }
+
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ uncheckedReplaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
Use *U) {
assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 08224e4..c798ba2 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -341,6 +341,13 @@ struct ConstantTraits< std::vector<T, Alloc> > {
}
};
+template<>
+struct ConstantTraits<Constant *> {
+ static unsigned uses(Constant * const & v) {
+ return 1;
+ }
+};
+
template<class ConstantClass, class TypeClass, class ValType>
struct ConstantCreator {
static ConstantClass *create(const TypeClass *Ty, const ValType &V) {
@@ -470,6 +477,14 @@ struct ConstantKeyData<ConstantStruct> {
}
};
+template<>
+struct ConstantKeyData<ConstantUnion> {
+ typedef Constant* ValType;
+ static ValType getValType(ConstantUnion *CU) {
+ return cast<Constant>(CU->getOperand(0));
+ }
+};
+
// ConstantPointerNull does not take extra "value" argument...
template<class ValType>
struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 1755cd2..f4f65c5 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -55,6 +55,15 @@ void LLVMContextDispose(LLVMContextRef C) {
delete unwrap(C);
}
+unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name,
+ unsigned SLen) {
+ return unwrap(C)->getMDKindID(StringRef(Name, SLen));
+}
+
+unsigned LLVMGetMDKindID(const char* Name, unsigned SLen) {
+ return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen);
+}
+
/*===-- Operations on modules ---------------------------------------------===*/
@@ -141,6 +150,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
return LLVMFunctionTypeKind;
case Type::StructTyID:
return LLVMStructTypeKind;
+ case Type::UnionTyID:
+ return LLVMUnionTypeKind;
case Type::ArrayTyID:
return LLVMArrayTypeKind;
case Type::PointerTyID:
@@ -299,6 +310,35 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
return unwrap<StructType>(StructTy)->isPacked();
}
+/*--.. Operations on union types ..........................................--*/
+
+LLVMTypeRef LLVMUnionTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
+ unsigned ElementCount) {
+ SmallVector<const Type*, 8> Tys;
+ for (LLVMTypeRef *I = ElementTypes,
+ *E = ElementTypes + ElementCount; I != E; ++I)
+ Tys.push_back(unwrap(*I));
+
+ return wrap(UnionType::get(&Tys[0], Tys.size()));
+}
+
+LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes,
+ unsigned ElementCount, int Packed) {
+ return LLVMUnionTypeInContext(LLVMGetGlobalContext(), ElementTypes,
+ ElementCount);
+}
+
+unsigned LLVMCountUnionElementTypes(LLVMTypeRef UnionTy) {
+ return unwrap<UnionType>(UnionTy)->getNumElements();
+}
+
+void LLVMGetUnionElementTypes(LLVMTypeRef UnionTy, LLVMTypeRef *Dest) {
+ UnionType *Ty = unwrap<UnionType>(UnionTy);
+ for (FunctionType::param_iterator I = Ty->element_begin(),
+ E = Ty->element_end(); I != E; ++I)
+ *Dest++ = wrap(*I);
+}
+
/*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
@@ -394,6 +434,18 @@ void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal));
}
+int LLVMHasMetadata(LLVMValueRef Inst) {
+ return unwrap<Instruction>(Inst)->hasMetadata();
+}
+
+LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
+ return wrap(unwrap<Instruction>(Inst)->getMetadata(KindID));
+}
+
+void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
+ unwrap<Instruction>(Inst)->setMetadata(KindID, MD? unwrap<MDNode>(MD) : NULL);
+}
+
/*--.. Conversion functions ................................................--*/
#define LLVM_DEFINE_VALUE_CAST(name) \
@@ -404,7 +456,7 @@ void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST)
/*--.. Operations on Uses ..................................................--*/
-LLVMUseIteratorRef LLVMGetFirstUse(LLVMValueRef Val) {
+LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) {
Value *V = unwrap(Val);
Value::use_iterator I = V->use_begin();
if (I == V->use_end())
@@ -412,16 +464,19 @@ LLVMUseIteratorRef LLVMGetFirstUse(LLVMValueRef Val) {
return wrap(&(I.getUse()));
}
-LLVMUseIteratorRef LLVMGetNextUse(LLVMUseIteratorRef UR) {
- return wrap(unwrap(UR)->getNext());
+LLVMUseRef LLVMGetNextUse(LLVMUseRef U) {
+ Use *Next = unwrap(U)->getNext();
+ if (Next)
+ return wrap(Next);
+ return 0;
}
-LLVMValueRef LLVMGetUser(LLVMUseIteratorRef UR) {
- return wrap(unwrap(UR)->getUser());
+LLVMValueRef LLVMGetUser(LLVMUseRef U) {
+ return wrap(unwrap(U)->getUser());
}
-LLVMValueRef LLVMGetUsedValue(LLVMUseIteratorRef UR) {
- return wrap(unwrap(UR)->get());
+LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) {
+ return wrap(unwrap(U)->get());
}
/*--.. Operations on Users .................................................--*/
@@ -462,6 +517,26 @@ LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
wrap(ConstantPointerNull::get(unwrap<PointerType>(Ty)));
}
+/*--.. Operations on metadata nodes ........................................--*/
+
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+ unsigned SLen) {
+ return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen)));
+}
+
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) {
+ return LLVMMDStringInContext(LLVMGetGlobalContext(), Str, SLen);
+}
+
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+ unsigned Count) {
+ return wrap(MDNode::get(*unwrap(C), unwrap<Value>(Vals, Count), Count));
+}
+
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) {
+ return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count);
+}
+
/*--.. Operations on scalar constants ......................................--*/
LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
@@ -536,11 +611,13 @@ LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
Packed);
}
-
LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
return wrap(ConstantVector::get(
unwrap<Constant>(ScalarConstantVals, Size), Size));
}
+LLVMValueRef LLVMConstUnion(LLVMTypeRef Ty, LLVMValueRef Val) {
+ return wrap(ConstantUnion::get(unwrap<UnionType>(Ty), unwrap<Constant>(Val)));
+}
/*--.. Constant expressions ................................................--*/
@@ -561,6 +638,17 @@ LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
unwrap<Constant>(ConstantVal)));
}
+LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNSWNeg(
+ unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNUWNeg(
+ unwrap<Constant>(ConstantVal)));
+}
+
+
LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) {
return wrap(ConstantExpr::getFNeg(
unwrap<Constant>(ConstantVal)));
@@ -584,6 +672,13 @@ LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant,
unwrap<Constant>(RHSConstant)));
}
+LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNUWAdd(
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
return wrap(ConstantExpr::getFAdd(
unwrap<Constant>(LHSConstant),
@@ -596,6 +691,20 @@ LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
unwrap<Constant>(RHSConstant)));
}
+LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNSWSub(
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNUWSub(
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
@@ -607,6 +716,20 @@ LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
unwrap<Constant>(RHSConstant)));
}
+LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNSWMul(
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNUWMul(
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
return wrap(ConstantExpr::getFMul(
unwrap<Constant>(LHSConstant),
@@ -893,6 +1016,10 @@ LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
Constraints, HasSideEffects, IsAlignStack));
}
+LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) {
+ return wrap(BlockAddress::get(unwrap<Function>(F), unwrap(BB)));
+}
+
/*--.. Operations on global variables, functions, and aliases (globals) ....--*/
LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) {
@@ -1029,6 +1156,14 @@ LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
GlobalValue::ExternalLinkage, 0, Name));
}
+LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
+ const char *Name,
+ unsigned AddressSpace) {
+ return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+ GlobalValue::ExternalLinkage, 0, Name, 0,
+ false, AddressSpace));
+}
+
LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) {
return wrap(unwrap(M)->getNamedGlobal(Name));
}
@@ -1184,14 +1319,14 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
Function *Func = unwrap<Function>(Fn);
const AttrListPtr PAL = Func->getAttributes();
- const AttrListPtr PALnew = PAL.addAttr(0, PA);
+ const AttrListPtr PALnew = PAL.addAttr(~0U, PA);
Func->setAttributes(PALnew);
}
void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
Function *Func = unwrap<Function>(Fn);
const AttrListPtr PAL = Func->getAttributes();
- const AttrListPtr PALnew = PAL.removeAttr(0, PA);
+ const AttrListPtr PALnew = PAL.removeAttr(~0U, PA);
Func->setAttributes(PALnew);
}
@@ -1532,6 +1667,21 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
delete unwrap(Builder);
}
+/*--.. Metadata builders ...................................................--*/
+
+void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
+ unwrap(Builder)->SetCurrentDebugLocation(L? unwrap<MDNode>(L) : NULL);
+}
+
+LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
+ return wrap(unwrap(Builder)->getCurrentDebugLocation());
+}
+
+void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
+ unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
+}
+
+
/*--.. Instruction builders ................................................--*/
LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef B) {
@@ -1561,6 +1711,11 @@ LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef B, LLVMValueRef V,
return wrap(unwrap(B)->CreateSwitch(unwrap(V), unwrap(Else), NumCases));
}
+LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
+ unsigned NumDests) {
+ return wrap(unwrap(B)->CreateIndirectBr(unwrap(Addr), NumDests));
+}
+
LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
LLVMValueRef *Args, unsigned NumArgs,
LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
@@ -1583,6 +1738,10 @@ void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal,
unwrap<SwitchInst>(Switch)->addCase(unwrap<ConstantInt>(OnVal), unwrap(Dest));
}
+void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) {
+ unwrap<IndirectBrInst>(IndirectBr)->addDestination(unwrap(Dest));
+}
+
/*--.. Arithmetic ..........................................................--*/
LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
@@ -1595,6 +1754,11 @@ LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RH
return wrap(unwrap(B)->CreateNSWAdd(unwrap(LHS), unwrap(RHS), Name));
}
+LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
const char *Name) {
return wrap(unwrap(B)->CreateFAdd(unwrap(LHS), unwrap(RHS), Name));
@@ -1605,6 +1769,16 @@ LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name));
}
+LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
LLVMValueRef LLVMBuildFSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
const char *Name) {
return wrap(unwrap(B)->CreateFSub(unwrap(LHS), unwrap(RHS), Name));
@@ -1615,6 +1789,16 @@ LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name));
}
+LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
LLVMValueRef LLVMBuildFMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
const char *Name) {
return wrap(unwrap(B)->CreateFMul(unwrap(LHS), unwrap(RHS), Name));
@@ -1685,10 +1869,27 @@ LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
return wrap(unwrap(B)->CreateXor(unwrap(LHS), unwrap(RHS), Name));
}
+LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op,
+ LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(Op), unwrap(LHS),
+ unwrap(RHS), Name));
+}
+
LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
return wrap(unwrap(B)->CreateNeg(unwrap(V), Name));
}
+LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWNeg(unwrap(V), Name));
+}
+
LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
return wrap(unwrap(B)->CreateFNeg(unwrap(V), Name));
}
@@ -1856,6 +2057,12 @@ LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
Name));
}
+LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateCast(Instruction::CastOps(Op), unwrap(Val),
+ unwrap(DestTy), Name));
+}
+
LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
LLVMTypeRef DestTy, const char *Name) {
return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name));
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index f00f6ee..dbc283e 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -73,35 +73,35 @@ unsigned Argument::getArgNo() const {
/// hasByValAttr - Return true if this argument has the byval attribute on it
/// in its containing function.
bool Argument::hasByValAttr() const {
- if (!isa<PointerType>(getType())) return false;
+ if (!getType()->isPointerTy()) return false;
return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal);
}
/// hasNestAttr - Return true if this argument has the nest attribute on
/// it in its containing function.
bool Argument::hasNestAttr() const {
- if (!isa<PointerType>(getType())) return false;
+ if (!getType()->isPointerTy()) return false;
return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest);
}
/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
/// it in its containing function.
bool Argument::hasNoAliasAttr() const {
- if (!isa<PointerType>(getType())) return false;
+ if (!getType()->isPointerTy()) return false;
return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoAlias);
}
/// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
/// on it in its containing function.
bool Argument::hasNoCaptureAttr() const {
- if (!isa<PointerType>(getType())) return false;
+ if (!getType()->isPointerTy()) return false;
return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoCapture);
}
/// hasSRetAttr - Return true if this argument has the sret attribute on
/// it in its containing function.
bool Argument::hasStructRetAttr() const {
- if (!isa<PointerType>(getType())) return false;
+ if (!getType()->isPointerTy()) return false;
if (this != getParent()->arg_begin())
return false; // StructRet param must be first param
return getParent()->paramHasAttr(1, Attribute::StructRet);
@@ -155,7 +155,7 @@ Function::Function(const FunctionType *Ty, LinkageTypes Linkage,
: GlobalValue(PointerType::getUnqual(Ty),
Value::FunctionVal, 0, 0, Linkage, name) {
assert(FunctionType::isValidReturnType(getReturnType()) &&
- !isa<OpaqueType>(getReturnType()) && "invalid return type");
+ !getReturnType()->isOpaqueTy() && "invalid return type");
SymTab = new ValueSymbolTable();
// If the function has arguments, mark them as lazily built.
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index f149c44..489ec65 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -44,10 +44,10 @@ static bool removeDeadUsersOfConstant(const Constant *C) {
}
bool GlobalValue::isMaterializable() const {
- return getParent()->isMaterializable(this);
+ return getParent() && getParent()->isMaterializable(this);
}
bool GlobalValue::isDematerializable() const {
- return getParent()->isDematerializable(this);
+ return getParent() && getParent()->isDematerializable(this);
}
bool GlobalValue::Materialize(std::string *ErrInfo) {
return getParent()->Materialize(this, ErrInfo);
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
index 4bc3cbb..9f2786e 100644
--- a/lib/VMCore/IRBuilder.cpp
+++ b/lib/VMCore/IRBuilder.cpp
@@ -19,7 +19,7 @@
using namespace llvm;
/// CreateGlobalString - Make a new global variable with an initializer that
-/// has array of i8 type filled in the nul terminated string value
+/// has array of i8 type filled in with the nul terminated string value
/// specified. If Name is specified, it is the name of the global variable
/// created.
Value *IRBuilderBase::CreateGlobalString(const char *Str, const Twine &Name) {
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index ec21773..6355834 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -220,7 +220,7 @@ bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
if (!Ty->getReturnType()->isVoidTy()) return false;
break;
case 1:
- if (isa<StructType>(Ty->getReturnType())) return false;
+ if (Ty->getReturnType()->isStructTy()) return false;
break;
default:
const StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 4ec8295..8f4763f 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -562,7 +562,7 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore,
BasicBlock *InsertAtEnd) {
assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
"createFree needs either InsertBefore or InsertAtEnd");
- assert(isa<PointerType>(Source->getType()) &&
+ assert(Source->getType()->isPointerTy() &&
"Can not free something of nonpointer type!");
BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
@@ -787,7 +787,7 @@ BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
void BranchInst::AssertOK() {
if (isConditional())
- assert(getCondition()->getType()->isInteger(1) &&
+ assert(getCondition()->getType()->isIntegerTy(1) &&
"May only branch on boolean predicates!");
}
@@ -892,7 +892,7 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) {
else {
assert(!isa<BasicBlock>(Amt) &&
"Passed basic block into allocation size parameter! Use other ctor");
- assert(Amt->getType()->isInteger(32) &&
+ assert(Amt->getType()->isIntegerTy(32) &&
"Allocation array size is not a 32-bit integer!");
}
return Amt;
@@ -989,7 +989,7 @@ bool AllocaInst::isStaticAlloca() const {
//===----------------------------------------------------------------------===//
void LoadInst::AssertOK() {
- assert(isa<PointerType>(getOperand(0)->getType()) &&
+ assert(getOperand(0)->getType()->isPointerTy() &&
"Ptr must have pointer type.");
}
@@ -1103,7 +1103,7 @@ void LoadInst::setAlignment(unsigned Align) {
void StoreInst::AssertOK() {
assert(getOperand(0) && getOperand(1) && "Both operands must be non-null!");
- assert(isa<PointerType>(getOperand(1)->getType()) &&
+ assert(getOperand(1)->getType()->isPointerTy() &&
"Ptr must have pointer type!");
assert(getOperand(0)->getType() ==
cast<PointerType>(getOperand(1)->getType())->getElementType()
@@ -1285,7 +1285,7 @@ static const Type* getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
unsigned CurIdx = 1;
for (; CurIdx != NumIdx; ++CurIdx) {
const CompositeType *CT = dyn_cast<CompositeType>(Agg);
- if (!CT || isa<PointerType>(CT)) return 0;
+ if (!CT || CT->isPointerTy()) return 0;
IndexTy Index = Idxs[CurIdx];
if (!CT->indexValid(Index)) return 0;
Agg = CT->getTypeAtIndex(Index);
@@ -1391,7 +1391,7 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
- if (!isa<VectorType>(Val->getType()) || !Index->getType()->isInteger(32))
+ if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32))
return false;
return true;
}
@@ -1432,13 +1432,13 @@ InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
const Value *Index) {
- if (!isa<VectorType>(Vec->getType()))
+ if (!Vec->getType()->isVectorTy())
return false; // First operand of insertelement must be vector type.
if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
return false;// Second operand of insertelement must be vector element type.
- if (!Index->getType()->isInteger(32))
+ if (!Index->getType()->isIntegerTy(32))
return false; // Third operand of insertelement must be i32.
return true;
}
@@ -1485,12 +1485,12 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
const Value *Mask) {
- if (!isa<VectorType>(V1->getType()) || V1->getType() != V2->getType())
+ if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
return false;
const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
if (!isa<Constant>(Mask) || MaskTy == 0 ||
- !MaskTy->getElementType()->isInteger(32))
+ !MaskTy->getElementType()->isIntegerTy(32))
return false;
return true;
}
@@ -1602,7 +1602,7 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
unsigned CurIdx = 0;
for (; CurIdx != NumIdx; ++CurIdx) {
const CompositeType *CT = dyn_cast<CompositeType>(Agg);
- if (!CT || isa<PointerType>(CT) || isa<VectorType>(CT)) return 0;
+ if (!CT || CT->isPointerTy() || CT->isVectorTy()) return 0;
unsigned Index = Idxs[CurIdx];
if (!CT->indexValid(Index)) return 0;
Agg = CT->getTypeAtIndex(Index);
@@ -1632,7 +1632,7 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
static BinaryOperator::BinaryOps AdjustIType(BinaryOperator::BinaryOps iType,
const Type *Ty) {
// API compatibility: Adjust integer opcodes to floating-point opcodes.
- if (Ty->isFPOrFPVector()) {
+ if (Ty->isFPOrFPVectorTy()) {
if (iType == BinaryOperator::Add) iType = BinaryOperator::FAdd;
else if (iType == BinaryOperator::Sub) iType = BinaryOperator::FSub;
else if (iType == BinaryOperator::Mul) iType = BinaryOperator::FMul;
@@ -1678,14 +1678,14 @@ void BinaryOperator::init(BinaryOps iType) {
case Mul:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
- assert(getType()->isIntOrIntVector() &&
+ assert(getType()->isIntOrIntVectorTy() &&
"Tried to create an integer operation on a non-integer type!");
break;
case FAdd: case FSub:
case FMul:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
- assert(getType()->isFPOrFPVector() &&
+ assert(getType()->isFPOrFPVectorTy() &&
"Tried to create a floating-point operation on a "
"non-floating-point type!");
break;
@@ -1693,28 +1693,28 @@ void BinaryOperator::init(BinaryOps iType) {
case SDiv:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
- assert((getType()->isInteger() || (isa<VectorType>(getType()) &&
- cast<VectorType>(getType())->getElementType()->isInteger())) &&
+ assert((getType()->isIntegerTy() || (getType()->isVectorTy() &&
+ cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
"Incorrect operand type (not integer) for S/UDIV");
break;
case FDiv:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
- assert(getType()->isFPOrFPVector() &&
+ assert(getType()->isFPOrFPVectorTy() &&
"Incorrect operand type (not floating point) for FDIV");
break;
case URem:
case SRem:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
- assert((getType()->isInteger() || (isa<VectorType>(getType()) &&
- cast<VectorType>(getType())->getElementType()->isInteger())) &&
+ assert((getType()->isIntegerTy() || (getType()->isVectorTy() &&
+ cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
"Incorrect operand type (not integer) for S/UREM");
break;
case FRem:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
- assert(getType()->isFPOrFPVector() &&
+ assert(getType()->isFPOrFPVectorTy() &&
"Incorrect operand type (not floating point) for FREM");
break;
case Shl:
@@ -1722,18 +1722,18 @@ void BinaryOperator::init(BinaryOps iType) {
case AShr:
assert(getType() == LHS->getType() &&
"Shift operation should return same type as operands!");
- assert((getType()->isInteger() ||
- (isa<VectorType>(getType()) &&
- cast<VectorType>(getType())->getElementType()->isInteger())) &&
+ assert((getType()->isIntegerTy() ||
+ (getType()->isVectorTy() &&
+ cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
"Tried to create a shift operation on a non-integral type!");
break;
case And: case Or:
case Xor:
assert(getType() == LHS->getType() &&
"Logical operation should return same type as operands!");
- assert((getType()->isInteger() ||
- (isa<VectorType>(getType()) &&
- cast<VectorType>(getType())->getElementType()->isInteger())) &&
+ assert((getType()->isIntegerTy() ||
+ (getType()->isVectorTy() &&
+ cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
"Tried to create a logical operation on a non-integral type!");
break;
default:
@@ -1960,7 +1960,8 @@ bool CastInst::isIntegerCast() const {
case Instruction::Trunc:
return true;
case Instruction::BitCast:
- return getOperand(0)->getType()->isInteger() && getType()->isInteger();
+ return getOperand(0)->getType()->isIntegerTy() &&
+ getType()->isIntegerTy();
}
}
@@ -1976,8 +1977,8 @@ bool CastInst::isLosslessCast() const {
return true;
// Pointer to pointer is always lossless.
- if (isa<PointerType>(SrcTy))
- return isa<PointerType>(DstTy);
+ if (SrcTy->isPointerTy())
+ return DstTy->isPointerTy();
return false; // Other types have no identity values
}
@@ -2093,25 +2094,25 @@ unsigned CastInst::isEliminableCastPair(
// no-op cast in second op implies firstOp as long as the DestTy
// is integer and we are not converting between a vector and a
// non vector type.
- if (!isa<VectorType>(SrcTy) && DstTy->isInteger())
+ if (!SrcTy->isVectorTy() && DstTy->isIntegerTy())
return firstOp;
return 0;
case 4:
// no-op cast in second op implies firstOp as long as the DestTy
// is floating point.
- if (DstTy->isFloatingPoint())
+ if (DstTy->isFloatingPointTy())
return firstOp;
return 0;
case 5:
// no-op cast in first op implies secondOp as long as the SrcTy
// is an integer.
- if (SrcTy->isInteger())
+ if (SrcTy->isIntegerTy())
return secondOp;
return 0;
case 6:
// no-op cast in first op implies secondOp as long as the SrcTy
// is a floating point.
- if (SrcTy->isFloatingPoint())
+ if (SrcTy->isFloatingPointTy())
return secondOp;
return 0;
case 7: {
@@ -2147,12 +2148,12 @@ unsigned CastInst::isEliminableCastPair(
case 11:
// bitcast followed by ptrtoint is allowed as long as the bitcast
// is a pointer to pointer cast.
- if (isa<PointerType>(SrcTy) && isa<PointerType>(MidTy))
+ if (SrcTy->isPointerTy() && MidTy->isPointerTy())
return secondOp;
return 0;
case 12:
// inttoptr, bitcast -> intptr if bitcast is a ptr to ptr cast
- if (isa<PointerType>(MidTy) && isa<PointerType>(DstTy))
+ if (MidTy->isPointerTy() && DstTy->isPointerTy())
return firstOp;
return 0;
case 13: {
@@ -2273,11 +2274,11 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
const Twine &Name,
BasicBlock *InsertAtEnd) {
- assert(isa<PointerType>(S->getType()) && "Invalid cast");
- assert((Ty->isInteger() || isa<PointerType>(Ty)) &&
+ assert(S->getType()->isPointerTy() && "Invalid cast");
+ assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Invalid cast");
- if (Ty->isInteger())
+ if (Ty->isIntegerTy())
return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd);
return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
}
@@ -2286,11 +2287,11 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
const Twine &Name,
Instruction *InsertBefore) {
- assert(isa<PointerType>(S->getType()) && "Invalid cast");
- assert((Ty->isInteger() || isa<PointerType>(Ty)) &&
+ assert(S->getType()->isPointerTy() && "Invalid cast");
+ assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Invalid cast");
- if (Ty->isInteger())
+ if (Ty->isIntegerTy())
return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore);
return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
}
@@ -2298,7 +2299,7 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
bool isSigned, const Twine &Name,
Instruction *InsertBefore) {
- assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() &&
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
"Invalid integer cast");
unsigned SrcBits = C->getType()->getScalarSizeInBits();
unsigned DstBits = Ty->getScalarSizeInBits();
@@ -2312,7 +2313,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
bool isSigned, const Twine &Name,
BasicBlock *InsertAtEnd) {
- assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() &&
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
"Invalid cast");
unsigned SrcBits = C->getType()->getScalarSizeInBits();
unsigned DstBits = Ty->getScalarSizeInBits();
@@ -2326,7 +2327,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
const Twine &Name,
Instruction *InsertBefore) {
- assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
"Invalid cast");
unsigned SrcBits = C->getType()->getScalarSizeInBits();
unsigned DstBits = Ty->getScalarSizeInBits();
@@ -2339,7 +2340,7 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
const Twine &Name,
BasicBlock *InsertAtEnd) {
- assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+ assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
"Invalid cast");
unsigned SrcBits = C->getType()->getScalarSizeInBits();
unsigned DstBits = Ty->getScalarSizeInBits();
@@ -2363,21 +2364,21 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr
// Run through the possibilities ...
- if (DestTy->isInteger()) { // Casting to integral
- if (SrcTy->isInteger()) { // Casting from integral
+ if (DestTy->isIntegerTy()) { // Casting to integral
+ if (SrcTy->isIntegerTy()) { // Casting from integral
return true;
- } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt
+ } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
return true;
} else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
// Casting from vector
return DestBits == PTy->getBitWidth();
} else { // Casting from something else
- return isa<PointerType>(SrcTy);
+ return SrcTy->isPointerTy();
}
- } else if (DestTy->isFloatingPoint()) { // Casting to floating pt
- if (SrcTy->isInteger()) { // Casting from integral
+ } else if (DestTy->isFloatingPointTy()) { // Casting to floating pt
+ if (SrcTy->isIntegerTy()) { // Casting from integral
return true;
- } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt
+ } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
return true;
} else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
// Casting from vector
@@ -2393,10 +2394,10 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
} else { // Casting from something else
return DestPTy->getBitWidth() == SrcBits;
}
- } else if (isa<PointerType>(DestTy)) { // Casting to pointer
- if (isa<PointerType>(SrcTy)) { // Casting from pointer
+ } else if (DestTy->isPointerTy()) { // Casting to pointer
+ if (SrcTy->isPointerTy()) { // Casting from pointer
return true;
- } else if (SrcTy->isInteger()) { // Casting from integral
+ } else if (SrcTy->isIntegerTy()) { // Casting from integral
return true;
} else { // Casting from something else
return false;
@@ -2425,8 +2426,8 @@ CastInst::getCastOpcode(
"Only first class types are castable!");
// Run through the possibilities ...
- if (DestTy->isInteger()) { // Casting to integral
- if (SrcTy->isInteger()) { // Casting from integral
+ if (DestTy->isIntegerTy()) { // Casting to integral
+ if (SrcTy->isIntegerTy()) { // Casting from integral
if (DestBits < SrcBits)
return Trunc; // int -> smaller int
else if (DestBits > SrcBits) { // its an extension
@@ -2437,7 +2438,7 @@ CastInst::getCastOpcode(
} else {
return BitCast; // Same size, No-op cast
}
- } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt
+ } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
if (DestIsSigned)
return FPToSI; // FP -> sint
else
@@ -2448,17 +2449,17 @@ CastInst::getCastOpcode(
PTy = NULL;
return BitCast; // Same size, no-op cast
} else {
- assert(isa<PointerType>(SrcTy) &&
+ assert(SrcTy->isPointerTy() &&
"Casting from a value that is not first-class type");
return PtrToInt; // ptr -> int
}
- } else if (DestTy->isFloatingPoint()) { // Casting to floating pt
- if (SrcTy->isInteger()) { // Casting from integral
+ } else if (DestTy->isFloatingPointTy()) { // Casting to floating pt
+ if (SrcTy->isIntegerTy()) { // Casting from integral
if (SrcIsSigned)
return SIToFP; // sint -> FP
else
return UIToFP; // uint -> FP
- } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt
+ } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
if (DestBits < SrcBits) {
return FPTrunc; // FP -> smaller FP
} else if (DestBits > SrcBits) {
@@ -2485,10 +2486,10 @@ CastInst::getCastOpcode(
} else {
assert(!"Illegal cast to vector (wrong type or size)");
}
- } else if (isa<PointerType>(DestTy)) {
- if (isa<PointerType>(SrcTy)) {
+ } else if (DestTy->isPointerTy()) {
+ if (SrcTy->isPointerTy()) {
return BitCast; // ptr -> ptr
- } else if (SrcTy->isInteger()) {
+ } else if (SrcTy->isIntegerTy()) {
return IntToPtr; // int -> ptr
} else {
assert(!"Casting pointer to other than pointer or int");
@@ -2528,50 +2529,50 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
switch (op) {
default: return false; // This is an input error
case Instruction::Trunc:
- return SrcTy->isIntOrIntVector() &&
- DstTy->isIntOrIntVector()&& SrcBitSize > DstBitSize;
+ return SrcTy->isIntOrIntVectorTy() &&
+ DstTy->isIntOrIntVectorTy()&& SrcBitSize > DstBitSize;
case Instruction::ZExt:
- return SrcTy->isIntOrIntVector() &&
- DstTy->isIntOrIntVector()&& SrcBitSize < DstBitSize;
+ return SrcTy->isIntOrIntVectorTy() &&
+ DstTy->isIntOrIntVectorTy()&& SrcBitSize < DstBitSize;
case Instruction::SExt:
- return SrcTy->isIntOrIntVector() &&
- DstTy->isIntOrIntVector()&& SrcBitSize < DstBitSize;
+ return SrcTy->isIntOrIntVectorTy() &&
+ DstTy->isIntOrIntVectorTy()&& SrcBitSize < DstBitSize;
case Instruction::FPTrunc:
- return SrcTy->isFPOrFPVector() &&
- DstTy->isFPOrFPVector() &&
+ return SrcTy->isFPOrFPVectorTy() &&
+ DstTy->isFPOrFPVectorTy() &&
SrcBitSize > DstBitSize;
case Instruction::FPExt:
- return SrcTy->isFPOrFPVector() &&
- DstTy->isFPOrFPVector() &&
+ return SrcTy->isFPOrFPVectorTy() &&
+ DstTy->isFPOrFPVectorTy() &&
SrcBitSize < DstBitSize;
case Instruction::UIToFP:
case Instruction::SIToFP:
if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) {
if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
- return SVTy->getElementType()->isIntOrIntVector() &&
- DVTy->getElementType()->isFPOrFPVector() &&
+ return SVTy->getElementType()->isIntOrIntVectorTy() &&
+ DVTy->getElementType()->isFPOrFPVectorTy() &&
SVTy->getNumElements() == DVTy->getNumElements();
}
}
- return SrcTy->isIntOrIntVector() && DstTy->isFPOrFPVector();
+ return SrcTy->isIntOrIntVectorTy() && DstTy->isFPOrFPVectorTy();
case Instruction::FPToUI:
case Instruction::FPToSI:
if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) {
if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
- return SVTy->getElementType()->isFPOrFPVector() &&
- DVTy->getElementType()->isIntOrIntVector() &&
+ return SVTy->getElementType()->isFPOrFPVectorTy() &&
+ DVTy->getElementType()->isIntOrIntVectorTy() &&
SVTy->getNumElements() == DVTy->getNumElements();
}
}
- return SrcTy->isFPOrFPVector() && DstTy->isIntOrIntVector();
+ return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy();
case Instruction::PtrToInt:
- return isa<PointerType>(SrcTy) && DstTy->isInteger();
+ return SrcTy->isPointerTy() && DstTy->isIntegerTy();
case Instruction::IntToPtr:
- return SrcTy->isInteger() && isa<PointerType>(DstTy);
+ return SrcTy->isIntegerTy() && DstTy->isPointerTy();
case Instruction::BitCast:
// BitCast implies a no-op cast of type only. No bits change.
// However, you can't cast pointers to anything but pointers.
- if (isa<PointerType>(SrcTy) != isa<PointerType>(DstTy))
+ if (SrcTy->isPointerTy() != DstTy->isPointerTy())
return false;
// Now we know we're not dealing with a pointer/non-pointer mismatch. In all
@@ -3149,7 +3150,7 @@ void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
//===----------------------------------------------------------------------===//
void IndirectBrInst::init(Value *Address, unsigned NumDests) {
- assert(Address && isa<PointerType>(Address->getType()) &&
+ assert(Address && Address->getType()->isPointerTy() &&
"Address of indirectbr must be a pointer");
ReservedSpace = 1+NumDests;
NumOperands = 1;
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index ccca789..9887f28 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -116,6 +116,10 @@ public:
ConstantStruct, true /*largekey*/> StructConstantsTy;
StructConstantsTy StructConstants;
+ typedef ConstantUniqueMap<Constant*, UnionType, ConstantUnion>
+ UnionConstantsTy;
+ UnionConstantsTy UnionConstants;
+
typedef ConstantUniqueMap<std::vector<Constant*>, VectorType,
ConstantVector> VectorConstantsTy;
VectorConstantsTy VectorConstants;
@@ -159,12 +163,16 @@ public:
TypeMap<PointerValType, PointerType> PointerTypes;
TypeMap<FunctionValType, FunctionType> FunctionTypes;
TypeMap<StructValType, StructType> StructTypes;
+ TypeMap<UnionValType, UnionType> UnionTypes;
TypeMap<IntegerValType, IntegerType> IntegerTypes;
// Opaque types are not structurally uniqued, so don't use TypeMap.
typedef SmallPtrSet<const OpaqueType*, 8> OpaqueTypesTy;
OpaqueTypesTy OpaqueTypes;
-
+
+ /// Used as an abstract type that will never be resolved.
+ OpaqueType *const AlwaysOpaqueTy;
+
/// ValueHandles - This map keeps track of all of the value handles that are
/// watching a Value*. The Value::HasValueHandle bit is used to know
@@ -196,7 +204,12 @@ public:
Int8Ty(C, 8),
Int16Ty(C, 16),
Int32Ty(C, 32),
- Int64Ty(C, 64) { }
+ Int64Ty(C, 64),
+ AlwaysOpaqueTy(new OpaqueType(C)) {
+ // Make sure the AlwaysOpaqueTy stays alive as long as the Context.
+ AlwaysOpaqueTy->addRef();
+ OpaqueTypes.insert(AlwaysOpaqueTy);
+ }
~LLVMContextImpl() {
ExprConstants.freeConstants();
@@ -216,12 +229,28 @@ public:
if (I->second->use_empty())
delete I->second;
}
- MDNodeSet.clear();
+ AlwaysOpaqueTy->dropRef();
for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end();
I != E; ++I) {
(*I)->AbstractTypeUsers.clear();
delete *I;
}
+ // Destroy MDNode operands first.
+ for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
+ I != E;) {
+ MDNode *N = &(*I);
+ ++I;
+ N->replaceAllOperandsWithNull();
+ }
+ while (!MDNodeSet.empty()) {
+ MDNode *N = &(*MDNodeSet.begin());
+ N->destroy();
+ }
+ // Destroy MDStrings.
+ for (StringMap<MDString*>::iterator I = MDStringCache.begin(),
+ E = MDStringCache.end(); I != E; ++I) {
+ delete I->second;
+ }
}
};
diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile
index bc5e77d..4395ecf 100644
--- a/lib/VMCore/Makefile
+++ b/lib/VMCore/Makefile
@@ -30,5 +30,5 @@ $(GENFILE): $(ObjDir)/Intrinsics.gen.tmp
changed significantly. )
install-local:: $(GENFILE)
- $(Echo) Installing $(PROJ_includedir)/llvm/Intrinsics.gen
- $(Verb) $(DataInstall) $(GENFILE) $(PROJ_includedir)/llvm/Intrinsics.gen
+ $(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen
+ $(Verb) $(DataInstall) $(GENFILE) $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 07a5f3c..a08c454 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -257,6 +257,13 @@ void MDNode::Profile(FoldingSetNodeID &ID) const {
ID.AddPointer(getOperand(i));
}
+// replaceAllOperandsWithNull - This is used while destroying llvm context to
+// gracefully delete all nodes. This method replaces all operands with null.
+void MDNode::replaceAllOperandsWithNull() {
+ for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+ Op != E; ++Op)
+ replaceOperand(Op, 0);
+}
// Replace value from this node's operand list.
void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 2b0b235..a782e5a 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -194,6 +194,9 @@ PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
//
namespace {
class PassRegistrar {
+ /// Guards the contents of this class.
+ mutable sys::SmartMutex<true> Lock;
+
/// PassInfoMap - Keep track of the passinfo object for each registered llvm
/// pass.
typedef std::map<intptr_t, const PassInfo*> MapType;
@@ -213,16 +216,19 @@ class PassRegistrar {
public:
const PassInfo *GetPassInfo(intptr_t TI) const {
+ sys::SmartScopedLock<true> Guard(Lock);
MapType::const_iterator I = PassInfoMap.find(TI);
return I != PassInfoMap.end() ? I->second : 0;
}
const PassInfo *GetPassInfo(StringRef Arg) const {
+ sys::SmartScopedLock<true> Guard(Lock);
StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
return I != PassInfoStringMap.end() ? I->second : 0;
}
void RegisterPass(const PassInfo &PI) {
+ sys::SmartScopedLock<true> Guard(Lock);
bool Inserted =
PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted;
@@ -230,6 +236,7 @@ public:
}
void UnregisterPass(const PassInfo &PI) {
+ sys::SmartScopedLock<true> Guard(Lock);
MapType::iterator I = PassInfoMap.find(PI.getTypeInfo());
assert(I != PassInfoMap.end() && "Pass registered but not in map!");
@@ -239,6 +246,7 @@ public:
}
void EnumerateWith(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(Lock);
for (MapType::const_iterator I = PassInfoMap.begin(),
E = PassInfoMap.end(); I != E; ++I)
L->passEnumerate(I->second);
@@ -249,6 +257,7 @@ public:
void RegisterAnalysisGroup(PassInfo *InterfaceInfo,
const PassInfo *ImplementationInfo,
bool isDefault) {
+ sys::SmartScopedLock<true> Guard(Lock);
AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo];
assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
"Cannot add a pass to the same analysis group more than once!");
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index a1d554e..c4dfe14 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -1118,6 +1118,7 @@ bool BBPassManager::runOnFunction(Function &F) {
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
BasicBlockPass *BP = getContainedPass(Index);
+ bool LocalChanged = false;
dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
dumpRequiredSet(BP);
@@ -1129,11 +1130,12 @@ bool BBPassManager::runOnFunction(Function &F) {
PassManagerPrettyStackEntry X(BP, *I);
Timer *T = StartPassTimer(BP);
- Changed |= BP->runOnBasicBlock(*I);
+ LocalChanged |= BP->runOnBasicBlock(*I);
StopPassTimer(BP, T);
}
- if (Changed)
+ Changed |= LocalChanged;
+ if (LocalChanged)
dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
I->getName());
dumpPreservedSet(BP);
@@ -1220,9 +1222,11 @@ void FunctionPassManager::add(Pass *P) {
/// so, return true.
///
bool FunctionPassManager::run(Function &F) {
- std::string errstr;
- if (F.Materialize(&errstr)) {
- llvm_report_error("Error reading bitcode file: " + errstr);
+ if (F.isMaterializable()) {
+ std::string errstr;
+ if (F.Materialize(&errstr)) {
+ llvm_report_error("Error reading bitcode file: " + errstr);
+ }
}
return FPM->run(F);
}
@@ -1332,6 +1336,7 @@ bool FPPassManager::runOnFunction(Function &F) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
FunctionPass *FP = getContainedPass(Index);
+ bool LocalChanged = false;
dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
dumpRequiredSet(FP);
@@ -1342,11 +1347,12 @@ bool FPPassManager::runOnFunction(Function &F) {
PassManagerPrettyStackEntry X(FP, F);
Timer *T = StartPassTimer(FP);
- Changed |= FP->runOnFunction(F);
+ LocalChanged |= FP->runOnFunction(F);
StopPassTimer(FP, T);
}
- if (Changed)
+ Changed |= LocalChanged;
+ if (LocalChanged)
dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
dumpPreservedSet(FP);
@@ -1405,6 +1411,7 @@ MPPassManager::runOnModule(Module &M) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
ModulePass *MP = getContainedPass(Index);
+ bool LocalChanged = false;
dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
dumpRequiredSet(MP);
@@ -1414,11 +1421,12 @@ MPPassManager::runOnModule(Module &M) {
{
PassManagerPrettyStackEntry X(MP, M);
Timer *T = StartPassTimer(MP);
- Changed |= MP->runOnModule(M);
+ LocalChanged |= MP->runOnModule(M);
StopPassTimer(MP, T);
}
- if (Changed)
+ Changed |= LocalChanged;
+ if (LocalChanged)
dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
M.getModuleIdentifier());
dumpPreservedSet(MP);
@@ -1704,8 +1712,13 @@ LLVMPassManagerRef LLVMCreatePassManager() {
return wrap(new PassManager());
}
+LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
+ return wrap(new FunctionPassManager(unwrap(M)));
+}
+
LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
- return wrap(new FunctionPassManager(unwrap(P)));
+ return LLVMCreateFunctionPassManagerForModule(
+ reinterpret_cast<LLVMModuleRef>(P));
}
LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 044de4f..9b2c2ca 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -50,8 +50,8 @@ void AbstractTypeUser::setType(Value *V, const Type *NewTy) {
/// Because of the way Type subclasses are allocated, this function is necessary
/// to use the correct kind of "delete" operator to deallocate the Type object.
-/// Some type objects (FunctionTy, StructTy) allocate additional space after
-/// the space for their derived type to hold the contained types array of
+/// Some type objects (FunctionTy, StructTy, UnionTy) allocate additional space
+/// after the space for their derived type to hold the contained types array of
/// PATypeHandles. Using this allocation scheme means all the PATypeHandles are
/// allocated with the type object, decreasing allocations and eliminating the
/// need for a std::vector to be used in the Type class itself.
@@ -61,7 +61,8 @@ void Type::destroy() const {
// Structures and Functions allocate their contained types past the end of
// the type object itself. These need to be destroyed differently than the
// other types.
- if (isa<FunctionType>(this) || isa<StructType>(this)) {
+ if (this->isFunctionTy() || this->isStructTy() ||
+ this->isUnionTy()) {
// First, make sure we destruct any PATypeHandles allocated by these
// subclasses. They must be manually destructed.
for (unsigned i = 0; i < NumContainedTys; ++i)
@@ -69,10 +70,12 @@ void Type::destroy() const {
// Now call the destructor for the subclass directly because we're going
// to delete this as an array of char.
- if (isa<FunctionType>(this))
+ if (this->isFunctionTy())
static_cast<const FunctionType*>(this)->FunctionType::~FunctionType();
- else
+ else if (this->isStructTy())
static_cast<const StructType*>(this)->StructType::~StructType();
+ else
+ static_cast<const UnionType*>(this)->UnionType::~UnionType();
// Finally, remove the memory as an array deallocation of the chars it was
// constructed from.
@@ -124,32 +127,32 @@ const Type *Type::getScalarType() const {
return this;
}
-/// isInteger - Return true if this is an IntegerType of the specified width.
-bool Type::isInteger(unsigned Bitwidth) const {
- return isInteger() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
+/// isIntegerTy - Return true if this is an IntegerType of the specified width.
+bool Type::isIntegerTy(unsigned Bitwidth) const {
+ return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
}
-/// isIntOrIntVector - Return true if this is an integer type or a vector of
+/// isIntOrIntVectorTy - Return true if this is an integer type or a vector of
/// integer types.
///
-bool Type::isIntOrIntVector() const {
- if (isInteger())
+bool Type::isIntOrIntVectorTy() const {
+ if (isIntegerTy())
return true;
if (ID != Type::VectorTyID) return false;
- return cast<VectorType>(this)->getElementType()->isInteger();
+ return cast<VectorType>(this)->getElementType()->isIntegerTy();
}
-/// isFPOrFPVector - Return true if this is a FP type or a vector of FP types.
+/// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP types.
///
-bool Type::isFPOrFPVector() const {
+bool Type::isFPOrFPVectorTy() const {
if (ID == Type::FloatTyID || ID == Type::DoubleTyID ||
ID == Type::FP128TyID || ID == Type::X86_FP80TyID ||
ID == Type::PPC_FP128TyID)
return true;
if (ID != Type::VectorTyID) return false;
- return cast<VectorType>(this)->getElementType()->isFloatingPoint();
+ return cast<VectorType>(this)->getElementType()->isFloatingPointTy();
}
// canLosslesslyBitCastTo - Return true if this type can be converted to
@@ -173,8 +176,8 @@ bool Type::canLosslesslyBitCastTo(const Type *Ty) const {
// At this point we have only various mismatches of the first class types
// remaining and ptr->ptr. Just select the lossless conversions. Everything
// else is not lossless.
- if (isa<PointerType>(this))
- return isa<PointerType>(Ty);
+ if (this->isPointerTy())
+ return Ty->isPointerTy();
return false; // Other types have no identity values
}
@@ -204,7 +207,7 @@ unsigned Type::getScalarSizeInBits() const {
int Type::getFPMantissaWidth() const {
if (const VectorType *VTy = dyn_cast<VectorType>(this))
return VTy->getElementType()->getFPMantissaWidth();
- assert(isFloatingPoint() && "Not a floating point type!");
+ assert(isFloatingPointTy() && "Not a floating point type!");
if (ID == FloatTyID) return 24;
if (ID == DoubleTyID) return 53;
if (ID == X86_FP80TyID) return 64;
@@ -217,7 +220,7 @@ int Type::getFPMantissaWidth() const {
/// iff all of the members of the type are sized as well. Since asking for
/// their size is relatively uncommon, move this operation out of line.
bool Type::isSizedDerivedType() const {
- if (isa<IntegerType>(this))
+ if (this->isIntegerTy())
return true;
if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
@@ -226,7 +229,7 @@ bool Type::isSizedDerivedType() const {
if (const VectorType *PTy = dyn_cast<VectorType>(this))
return PTy->getElementType()->isSized();
- if (!isa<StructType>(this))
+ if (!this->isStructTy() && !this->isUnionTy())
return false;
// Okay, our struct is sized if all of the elements are...
@@ -285,7 +288,7 @@ std::string Type::getDescription() const {
bool StructType::indexValid(const Value *V) const {
// Structure indexes require 32-bit integer constants.
- if (V->getType()->isInteger(32))
+ if (V->getType()->isIntegerTy(32))
if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
return indexValid(CU->getZExtValue());
return false;
@@ -308,6 +311,32 @@ const Type *StructType::getTypeAtIndex(unsigned Idx) const {
return ContainedTys[Idx];
}
+
+bool UnionType::indexValid(const Value *V) const {
+ // Union indexes require 32-bit integer constants.
+ if (V->getType()->isIntegerTy(32))
+ if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
+ return indexValid(CU->getZExtValue());
+ return false;
+}
+
+bool UnionType::indexValid(unsigned V) const {
+ return V < NumContainedTys;
+}
+
+// getTypeAtIndex - Given an index value into the type, return the type of the
+// element. For a structure type, this must be a constant value...
+//
+const Type *UnionType::getTypeAtIndex(const Value *V) const {
+ unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
+ return getTypeAtIndex(Idx);
+}
+
+const Type *UnionType::getTypeAtIndex(unsigned Idx) const {
+ assert(indexValid(Idx) && "Invalid structure index!");
+ return ContainedTys[Idx];
+}
+
//===----------------------------------------------------------------------===//
// Primitive 'Type' data
//===----------------------------------------------------------------------===//
@@ -418,7 +447,7 @@ bool FunctionType::isValidReturnType(const Type *RetTy) {
/// isValidArgumentType - Return true if the specified type is valid as an
/// argument type.
bool FunctionType::isValidArgumentType(const Type *ArgTy) {
- return ArgTy->isFirstClassType() || isa<OpaqueType>(ArgTy);
+ return ArgTy->isFirstClassType() || ArgTy->isOpaqueTy();
}
FunctionType::FunctionType(const Type *Result,
@@ -463,6 +492,23 @@ StructType::StructType(LLVMContext &C,
setAbstract(isAbstract);
}
+UnionType::UnionType(LLVMContext &C,const Type* const* Types, unsigned NumTypes)
+ : CompositeType(C, UnionTyID) {
+ ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
+ NumContainedTys = NumTypes;
+ bool isAbstract = false;
+ for (unsigned i = 0; i < NumTypes; ++i) {
+ assert(Types[i] && "<null> type for union field!");
+ assert(isValidElementType(Types[i]) &&
+ "Invalid type for union element!");
+ new (&ContainedTys[i]) PATypeHandle(Types[i], this);
+ isAbstract |= Types[i]->isAbstract();
+ }
+
+ // Calculate whether or not this type is abstract
+ setAbstract(isAbstract);
+}
+
ArrayType::ArrayType(const Type *ElType, uint64_t NumEl)
: SequentialType(ArrayTyID, ElType) {
NumElements = NumEl;
@@ -507,30 +553,7 @@ void DerivedType::dropAllTypeUses() {
if (NumContainedTys != 0) {
// The type must stay abstract. To do this, we insert a pointer to a type
// that will never get resolved, thus will always be abstract.
- static Type *AlwaysOpaqueTy = 0;
- static PATypeHolder* Holder = 0;
- Type *tmp = AlwaysOpaqueTy;
- if (llvm_is_multithreaded()) {
- sys::MemoryFence();
- if (!tmp) {
- llvm_acquire_global_lock();
- tmp = AlwaysOpaqueTy;
- if (!tmp) {
- tmp = OpaqueType::get(getContext());
- PATypeHolder* tmp2 = new PATypeHolder(tmp);
- sys::MemoryFence();
- AlwaysOpaqueTy = tmp;
- Holder = tmp2;
- }
-
- llvm_release_global_lock();
- }
- } else if (!AlwaysOpaqueTy) {
- AlwaysOpaqueTy = OpaqueType::get(getContext());
- Holder = new PATypeHolder(AlwaysOpaqueTy);
- }
-
- ContainedTys[0] = AlwaysOpaqueTy;
+ ContainedTys[0] = getContext().pImpl->AlwaysOpaqueTy;
// Change the rest of the types to be Int32Ty's. It doesn't matter what we
// pick so long as it doesn't point back to this type. We choose something
@@ -590,7 +613,7 @@ void Type::PromoteAbstractToConcrete() {
// Concrete types are leaves in the tree. Since an SCC will either be all
// abstract or all concrete, we only need to check one type.
if (SCC[0]->isAbstract()) {
- if (isa<OpaqueType>(SCC[0]))
+ if (SCC[0]->isOpaqueTy())
return; // Not going to be concrete, sorry.
// If all of the children of all of the types in this SCC are concrete,
@@ -637,7 +660,7 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
std::map<const Type *, const Type *> &EqTypes) {
if (Ty == Ty2) return true;
if (Ty->getTypeID() != Ty2->getTypeID()) return false;
- if (isa<OpaqueType>(Ty))
+ if (Ty->isOpaqueTy())
return false; // Two unequal opaque types are never equal
std::map<const Type*, const Type*>::iterator It = EqTypes.find(Ty);
@@ -667,6 +690,13 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes))
return false;
return true;
+ } else if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
+ const UnionType *UTy2 = cast<UnionType>(Ty2);
+ if (UTy->getNumElements() != UTy2->getNumElements()) return false;
+ for (unsigned i = 0, e = UTy2->getNumElements(); i != e; ++i)
+ if (!TypesEqual(UTy->getElementType(i), UTy2->getElementType(i), EqTypes))
+ return false;
+ return true;
} else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
const ArrayType *ATy2 = cast<ArrayType>(Ty2);
return ATy->getNumElements() == ATy2->getNumElements() &&
@@ -858,7 +888,7 @@ ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
bool ArrayType::isValidElementType(const Type *ElemTy) {
return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID &&
- ElemTy->getTypeID() != MetadataTyID && !isa<FunctionType>(ElemTy);
+ ElemTy->getTypeID() != MetadataTyID && !ElemTy->isFunctionTy();
}
VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
@@ -881,8 +911,8 @@ VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
}
bool VectorType::isValidElementType(const Type *ElemTy) {
- return ElemTy->isInteger() || ElemTy->isFloatingPoint() ||
- isa<OpaqueType>(ElemTy);
+ return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() ||
+ ElemTy->isOpaqueTy();
}
//===----------------------------------------------------------------------===//
@@ -924,12 +954,66 @@ StructType *StructType::get(LLVMContext &Context, const Type *type, ...) {
}
bool StructType::isValidElementType(const Type *ElemTy) {
- return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID &&
- ElemTy->getTypeID() != MetadataTyID && !isa<FunctionType>(ElemTy);
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
}
//===----------------------------------------------------------------------===//
+// Union Type Factory...
+//
+
+UnionType *UnionType::get(const Type* const* Types, unsigned NumTypes) {
+ assert(NumTypes > 0 && "union must have at least one member type!");
+ UnionValType UTV(Types, NumTypes);
+ UnionType *UT = 0;
+
+ LLVMContextImpl *pImpl = Types[0]->getContext().pImpl;
+
+ UT = pImpl->UnionTypes.get(UTV);
+
+ if (!UT) {
+ // Value not found. Derive a new type!
+ UT = (UnionType*) operator new(sizeof(UnionType) +
+ sizeof(PATypeHandle) * NumTypes);
+ new (UT) UnionType(Types[0]->getContext(), Types, NumTypes);
+ pImpl->UnionTypes.add(UTV, UT);
+ }
+#ifdef DEBUG_MERGE_TYPES
+ DEBUG(dbgs() << "Derived new type: " << *UT << "\n");
+#endif
+ return UT;
+}
+
+UnionType *UnionType::get(const Type *type, ...) {
+ va_list ap;
+ SmallVector<const llvm::Type*, 8> UnionFields;
+ va_start(ap, type);
+ while (type) {
+ UnionFields.push_back(type);
+ type = va_arg(ap, llvm::Type*);
+ }
+ unsigned NumTypes = UnionFields.size();
+ assert(NumTypes > 0 && "union must have at least one member type!");
+ return llvm::UnionType::get(&UnionFields[0], NumTypes);
+}
+
+bool UnionType::isValidElementType(const Type *ElemTy) {
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+int UnionType::getElementTypeIndex(const Type *ElemTy) const {
+ int index = 0;
+ for (UnionType::element_iterator I = element_begin(), E = element_end();
+ I != E; ++I, ++index) {
+ if (ElemTy == *I) return index;
+ }
+
+ return -1;
+}
+
+//===----------------------------------------------------------------------===//
// Pointer Type Factory...
//
@@ -1192,6 +1276,21 @@ void StructType::typeBecameConcrete(const DerivedType *AbsTy) {
// concrete - this could potentially change us from an abstract type to a
// concrete type.
//
+void UnionType::refineAbstractType(const DerivedType *OldType,
+ const Type *NewType) {
+ LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+ pImpl->UnionTypes.RefineAbstractType(this, OldType, NewType);
+}
+
+void UnionType::typeBecameConcrete(const DerivedType *AbsTy) {
+ LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+ pImpl->UnionTypes.TypeBecameConcrete(this, AbsTy);
+}
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
void PointerType::refineAbstractType(const DerivedType *OldType,
const Type *NewType) {
LLVMContextImpl *pImpl = OldType->getContext().pImpl;
@@ -1204,7 +1303,7 @@ void PointerType::typeBecameConcrete(const DerivedType *AbsTy) {
}
bool SequentialType::indexValid(const Value *V) const {
- if (isa<IntegerType>(V->getType()))
+ if (V->getType()->isIntegerTy())
return true;
return false;
}
diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h
index 4842845..02ab113 100644
--- a/lib/VMCore/TypesContext.h
+++ b/lib/VMCore/TypesContext.h
@@ -180,6 +180,32 @@ public:
}
};
+// UnionValType - Define a class to hold the key that goes into the TypeMap
+//
+class UnionValType {
+ std::vector<const Type*> ElTypes;
+public:
+ UnionValType(const Type* const* Types, unsigned NumTypes)
+ : ElTypes(&Types[0], &Types[NumTypes]) {}
+
+ static UnionValType get(const UnionType *UT) {
+ std::vector<const Type *> ElTypes;
+ ElTypes.reserve(UT->getNumElements());
+ for (unsigned i = 0, e = UT->getNumElements(); i != e; ++i)
+ ElTypes.push_back(UT->getElementType(i));
+
+ return UnionValType(&ElTypes[0], ElTypes.size());
+ }
+
+ static unsigned hashTypeStructure(const UnionType *UT) {
+ return UT->getNumElements();
+ }
+
+ inline bool operator<(const UnionValType &UTV) const {
+ return (ElTypes < UTV.ElTypes);
+ }
+};
+
// FunctionValType - Define a class to hold the key that goes into the TypeMap
//
class FunctionValType {
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 3759b8a..a36d262 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -45,11 +45,11 @@ Value::Value(const Type *ty, unsigned scid)
UseList(0), Name(0) {
if (isa<CallInst>(this) || isa<InvokeInst>(this))
assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
- isa<OpaqueType>(ty) || VTy->getTypeID() == Type::StructTyID) &&
+ ty->isOpaqueTy() || VTy->isStructTy()) &&
"invalid CallInst type!");
else if (!isa<Constant>(this) && !isa<BasicBlock>(this))
assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
- isa<OpaqueType>(ty)) &&
+ ty->isOpaqueTy()) &&
"Cannot create non-first-class values except for constants!");
}
@@ -320,7 +320,7 @@ void Value::replaceAllUsesWith(Value *New) {
}
Value *Value::stripPointerCasts() {
- if (!isa<PointerType>(getType()))
+ if (!getType()->isPointerTy())
return this;
Value *V = this;
do {
@@ -337,12 +337,12 @@ Value *Value::stripPointerCasts() {
} else {
return V;
}
- assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
} while (1);
}
Value *Value::getUnderlyingObject(unsigned MaxLookup) {
- if (!isa<PointerType>(getType()))
+ if (!getType()->isPointerTy())
return this;
Value *V = this;
for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
@@ -357,7 +357,7 @@ Value *Value::getUnderlyingObject(unsigned MaxLookup) {
} else {
return V;
}
- assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
}
return V;
}
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index 7f9a6cd..a092cd1 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -36,17 +36,17 @@ EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT,
bool EVT::isExtendedFloatingPoint() const {
assert(isExtended() && "Type is not extended!");
- return LLVMTy->isFPOrFPVector();
+ return LLVMTy->isFPOrFPVectorTy();
}
bool EVT::isExtendedInteger() const {
assert(isExtended() && "Type is not extended!");
- return LLVMTy->isIntOrIntVector();
+ return LLVMTy->isIntOrIntVectorTy();
}
bool EVT::isExtendedVector() const {
assert(isExtended() && "Type is not extended!");
- return isa<VectorType>(LLVMTy);
+ return LLVMTy->isVectorTy();
}
bool EVT::isExtended64BitVector() const {
@@ -126,6 +126,7 @@ std::string EVT::getEVTString() const {
case MVT::v8f32: return "v8f32";
case MVT::v2f64: return "v2f64";
case MVT::v4f64: return "v4f64";
+ case MVT::Metadata:return "Metadata";
}
}
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index d0e8d30..721e96a 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -161,7 +161,8 @@ namespace {
VerifierFailureAction action;
// What to do if verification fails.
Module *Mod; // Module we are verifying right now
- DominatorTree *DT; // Dominator Tree, caution can be null!
+ LLVMContext *Context; // Context within which we are verifying
+ DominatorTree *DT; // Dominator Tree, caution can be null!
std::string Messages;
raw_string_ostream MessagesStr;
@@ -178,24 +179,25 @@ namespace {
Verifier()
: FunctionPass(&ID),
Broken(false), RealPass(true), action(AbortProcessAction),
- DT(0), MessagesStr(Messages) {}
+ Mod(0), Context(0), DT(0), MessagesStr(Messages) {}
explicit Verifier(VerifierFailureAction ctn)
: FunctionPass(&ID),
- Broken(false), RealPass(true), action(ctn), DT(0),
+ Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0),
MessagesStr(Messages) {}
explicit Verifier(bool AB)
: FunctionPass(&ID),
Broken(false), RealPass(true),
- action( AB ? AbortProcessAction : PrintMessageAction), DT(0),
- MessagesStr(Messages) {}
+ action( AB ? AbortProcessAction : PrintMessageAction), Mod(0),
+ Context(0), DT(0), MessagesStr(Messages) {}
explicit Verifier(DominatorTree &dt)
: FunctionPass(&ID),
- Broken(false), RealPass(false), action(PrintMessageAction),
- DT(&dt), MessagesStr(Messages) {}
+ Broken(false), RealPass(false), action(PrintMessageAction), Mod(0),
+ Context(0), DT(&dt), MessagesStr(Messages) {}
bool doInitialization(Module &M) {
Mod = &M;
+ Context = &M.getContext();
verifyTypeSymbolTable(M.getTypeSymbolTable());
// If this is a real pass, in a pass manager, we must abort before
@@ -211,6 +213,7 @@ namespace {
if (RealPass) DT = &getAnalysis<DominatorTree>();
Mod = F.getParent();
+ if (!Context) Context = &F.getContext();
visit(F);
InstsInThisBlock.clear();
@@ -314,6 +317,7 @@ namespace {
void visitStoreInst(StoreInst &SI);
void visitInstruction(Instruction &I);
void visitTerminatorInst(TerminatorInst &I);
+ void visitBranchInst(BranchInst &BI);
void visitReturnInst(ReturnInst &RI);
void visitSwitchInst(SwitchInst &SI);
void visitSelectInst(SelectInst &SI);
@@ -429,7 +433,7 @@ void Verifier::visitGlobalValue(GlobalValue &GV) {
if (GV.hasAppendingLinkage()) {
GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
- Assert1(GVar && isa<ArrayType>(GVar->getType()->getElementType()),
+ Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
"Only global arrays can have appending linkage!", GVar);
}
}
@@ -596,13 +600,16 @@ void Verifier::visitFunction(Function &F) {
const FunctionType *FT = F.getFunctionType();
unsigned NumArgs = F.arg_size();
+ Assert1(Context == &F.getContext(),
+ "Function context does not match Module context!", &F);
+
Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
Assert2(FT->getNumParams() == NumArgs,
"# formal arguments must match # of arguments for function type!",
&F, FT);
Assert1(F.getReturnType()->isFirstClassType() ||
F.getReturnType()->isVoidTy() ||
- isa<StructType>(F.getReturnType()),
+ F.getReturnType()->isStructTy(),
"Functions cannot return aggregate values!", &F);
Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
@@ -743,6 +750,14 @@ void Verifier::visitTerminatorInst(TerminatorInst &I) {
visitInstruction(I);
}
+void Verifier::visitBranchInst(BranchInst &BI) {
+ if (BI.isConditional()) {
+ Assert2(BI.getCondition()->getType()->isIntegerTy(1),
+ "Branch condition is not 'i1' type!", &BI, BI.getCondition());
+ }
+ visitTerminatorInst(BI);
+}
+
void Verifier::visitReturnInst(ReturnInst &RI) {
Function *F = RI.getParent()->getParent();
unsigned N = RI.getNumOperands();
@@ -821,9 +836,9 @@ void Verifier::visitTruncInst(TruncInst &I) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert1(SrcTy->isIntOrIntVector(), "Trunc only operates on integer", &I);
- Assert1(DestTy->isIntOrIntVector(), "Trunc only produces integer", &I);
- Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
+ Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
"trunc source and destination must both be a vector or neither", &I);
Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
@@ -836,9 +851,9 @@ void Verifier::visitZExtInst(ZExtInst &I) {
const Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
- Assert1(SrcTy->isIntOrIntVector(), "ZExt only operates on integer", &I);
- Assert1(DestTy->isIntOrIntVector(), "ZExt only produces an integer", &I);
- Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
+ Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
"zext source and destination must both be a vector or neither", &I);
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
@@ -857,9 +872,9 @@ void Verifier::visitSExtInst(SExtInst &I) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert1(SrcTy->isIntOrIntVector(), "SExt only operates on integer", &I);
- Assert1(DestTy->isIntOrIntVector(), "SExt only produces an integer", &I);
- Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
+ Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
"sext source and destination must both be a vector or neither", &I);
Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
@@ -874,9 +889,9 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert1(SrcTy->isFPOrFPVector(),"FPTrunc only operates on FP", &I);
- Assert1(DestTy->isFPOrFPVector(),"FPTrunc only produces an FP", &I);
- Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I);
+ Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
"fptrunc source and destination must both be a vector or neither",&I);
Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
@@ -892,9 +907,9 @@ void Verifier::visitFPExtInst(FPExtInst &I) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert1(SrcTy->isFPOrFPVector(),"FPExt only operates on FP", &I);
- Assert1(DestTy->isFPOrFPVector(),"FPExt only produces an FP", &I);
- Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I);
+ Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
"fpext source and destination must both be a vector or neither", &I);
Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
@@ -906,14 +921,14 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) {
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DestTy = I.getType();
- bool SrcVec = isa<VectorType>(SrcTy);
- bool DstVec = isa<VectorType>(DestTy);
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
Assert1(SrcVec == DstVec,
"UIToFP source and dest must both be vector or scalar", &I);
- Assert1(SrcTy->isIntOrIntVector(),
+ Assert1(SrcTy->isIntOrIntVectorTy(),
"UIToFP source must be integer or integer vector", &I);
- Assert1(DestTy->isFPOrFPVector(),
+ Assert1(DestTy->isFPOrFPVectorTy(),
"UIToFP result must be FP or FP vector", &I);
if (SrcVec && DstVec)
@@ -929,14 +944,14 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) {
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DestTy = I.getType();
- bool SrcVec = isa<VectorType>(SrcTy);
- bool DstVec = isa<VectorType>(DestTy);
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
Assert1(SrcVec == DstVec,
"SIToFP source and dest must both be vector or scalar", &I);
- Assert1(SrcTy->isIntOrIntVector(),
+ Assert1(SrcTy->isIntOrIntVectorTy(),
"SIToFP source must be integer or integer vector", &I);
- Assert1(DestTy->isFPOrFPVector(),
+ Assert1(DestTy->isFPOrFPVectorTy(),
"SIToFP result must be FP or FP vector", &I);
if (SrcVec && DstVec)
@@ -952,13 +967,14 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) {
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DestTy = I.getType();
- bool SrcVec = isa<VectorType>(SrcTy);
- bool DstVec = isa<VectorType>(DestTy);
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
Assert1(SrcVec == DstVec,
"FPToUI source and dest must both be vector or scalar", &I);
- Assert1(SrcTy->isFPOrFPVector(), "FPToUI source must be FP or FP vector", &I);
- Assert1(DestTy->isIntOrIntVector(),
+ Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
+ &I);
+ Assert1(DestTy->isIntOrIntVectorTy(),
"FPToUI result must be integer or integer vector", &I);
if (SrcVec && DstVec)
@@ -974,14 +990,14 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) {
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DestTy = I.getType();
- bool SrcVec = isa<VectorType>(SrcTy);
- bool DstVec = isa<VectorType>(DestTy);
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
Assert1(SrcVec == DstVec,
"FPToSI source and dest must both be vector or scalar", &I);
- Assert1(SrcTy->isFPOrFPVector(),
+ Assert1(SrcTy->isFPOrFPVectorTy(),
"FPToSI source must be FP or FP vector", &I);
- Assert1(DestTy->isIntOrIntVector(),
+ Assert1(DestTy->isIntOrIntVectorTy(),
"FPToSI result must be integer or integer vector", &I);
if (SrcVec && DstVec)
@@ -997,8 +1013,8 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DestTy = I.getType();
- Assert1(isa<PointerType>(SrcTy), "PtrToInt source must be pointer", &I);
- Assert1(DestTy->isInteger(), "PtrToInt result must be integral", &I);
+ Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I);
+ Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I);
visitInstruction(I);
}
@@ -1008,8 +1024,8 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DestTy = I.getType();
- Assert1(SrcTy->isInteger(), "IntToPtr source must be an integral", &I);
- Assert1(isa<PointerType>(DestTy), "IntToPtr result must be a pointer",&I);
+ Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I);
+ Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I);
visitInstruction(I);
}
@@ -1025,7 +1041,7 @@ void Verifier::visitBitCastInst(BitCastInst &I) {
// BitCast implies a no-op cast of type only. No bits change.
// However, you can't cast pointers to anything but pointers.
- Assert1(isa<PointerType>(DestTy) == isa<PointerType>(DestTy),
+ Assert1(DestTy->isPointerTy() == DestTy->isPointerTy(),
"Bitcast requires both operands to be pointer or neither", &I);
Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
@@ -1068,11 +1084,11 @@ void Verifier::visitPHINode(PHINode &PN) {
void Verifier::VerifyCallSite(CallSite CS) {
Instruction *I = CS.getInstruction();
- Assert1(isa<PointerType>(CS.getCalledValue()->getType()),
+ Assert1(CS.getCalledValue()->getType()->isPointerTy(),
"Called function must be a pointer!", I);
const PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
- Assert1(isa<FunctionType>(FPTy->getElementType()),
+ Assert1(FPTy->getElementType()->isFunctionTy(),
"Called function is not pointer to function type!", I);
const FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
@@ -1151,7 +1167,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
case Instruction::UDiv:
case Instruction::SRem:
case Instruction::URem:
- Assert1(B.getType()->isIntOrIntVector(),
+ Assert1(B.getType()->isIntOrIntVectorTy(),
"Integer arithmetic operators only work with integral types!", &B);
Assert1(B.getType() == B.getOperand(0)->getType(),
"Integer arithmetic operators must have same type "
@@ -1164,7 +1180,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
- Assert1(B.getType()->isFPOrFPVector(),
+ Assert1(B.getType()->isFPOrFPVectorTy(),
"Floating-point arithmetic operators only work with "
"floating-point types!", &B);
Assert1(B.getType() == B.getOperand(0)->getType(),
@@ -1175,7 +1191,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
- Assert1(B.getType()->isIntOrIntVector(),
+ Assert1(B.getType()->isIntOrIntVectorTy(),
"Logical operators only work with integral types!", &B);
Assert1(B.getType() == B.getOperand(0)->getType(),
"Logical operators must have same type for operands and result!",
@@ -1184,7 +1200,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
- Assert1(B.getType()->isIntOrIntVector(),
+ Assert1(B.getType()->isIntOrIntVectorTy(),
"Shifts only work with integral types!", &B);
Assert1(B.getType() == B.getOperand(0)->getType(),
"Shift return type must be same as operands!", &B);
@@ -1203,7 +1219,7 @@ void Verifier::visitICmpInst(ICmpInst& IC) {
Assert1(Op0Ty == Op1Ty,
"Both operands to ICmp instruction are not of the same type!", &IC);
// Check that the operands are the right type
- Assert1(Op0Ty->isIntOrIntVector() || isa<PointerType>(Op0Ty),
+ Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(),
"Invalid operand types for ICmp instruction", &IC);
visitInstruction(IC);
@@ -1216,7 +1232,7 @@ void Verifier::visitFCmpInst(FCmpInst& FC) {
Assert1(Op0Ty == Op1Ty,
"Both operands to FCmp instruction are not of the same type!", &FC);
// Check that the operands are the right type
- Assert1(Op0Ty->isFPOrFPVector(),
+ Assert1(Op0Ty->isFPOrFPVectorTy(),
"Invalid operand types for FCmp instruction", &FC);
visitInstruction(FC);
}
@@ -1270,7 +1286,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(),
Idxs.begin(), Idxs.end());
Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
- Assert2(isa<PointerType>(GEP.getType()) &&
+ Assert2(GEP.getType()->isPointerTy() &&
cast<PointerType>(GEP.getType())->getElementType() == ElTy,
"GEP is not of right type for indices!", &GEP, ElTy);
visitInstruction(GEP);
@@ -1302,7 +1318,7 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
&AI);
Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type",
&AI);
- Assert1(AI.getArraySize()->getType()->isInteger(32),
+ Assert1(AI.getArraySize()->getType()->isIntegerTy(32),
"Alloca array size must be i32", &AI);
visitInstruction(AI);
}
@@ -1481,7 +1497,7 @@ void Verifier::visitInstruction(Instruction &I) {
void Verifier::VerifyType(const Type *Ty) {
if (!Types.insert(Ty)) return;
- Assert1(&Mod->getContext() == &Ty->getContext(),
+ Assert1(Context == &Ty->getContext(),
"Type context does not match Module context!", Ty);
switch (Ty->getTypeID()) {
@@ -1509,6 +1525,15 @@ void Verifier::VerifyType(const Type *Ty) {
VerifyType(ElTy);
}
} break;
+ case Type::UnionTyID: {
+ const UnionType *UTy = cast<UnionType>(Ty);
+ for (unsigned i = 0, e = UTy->getNumElements(); i != e; ++i) {
+ const Type *ElTy = UTy->getElementType(i);
+ Assert2(UnionType::isValidElementType(ElTy),
+ "Union type with invalid element type", ElTy, UTy);
+ VerifyType(ElTy);
+ }
+ } break;
case Type::ArrayTyID: {
const ArrayType *ATy = cast<ArrayType>(Ty);
Assert1(ArrayType::isValidElementType(ATy->getElementType()),
@@ -1616,7 +1641,7 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
if (ID == Intrinsic::gcroot) {
AllocaInst *AI =
dyn_cast<AllocaInst>(CI.getOperand(1)->stripPointerCasts());
- Assert1(AI && isa<PointerType>(AI->getType()->getElementType()),
+ Assert1(AI && AI->getType()->getElementType()->isPointerTy(),
"llvm.gcroot parameter #1 must be a pointer alloca.", &CI);
Assert1(isa<Constant>(CI.getOperand(2)),
"llvm.gcroot parameter #2 must be a constant.", &CI);
@@ -1734,7 +1759,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
}
}
} else if (VT == MVT::iAny) {
- if (!EltTy->isInteger()) {
+ if (!EltTy->isIntegerTy()) {
CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not "
"an integer type.", F);
return false;
@@ -1759,7 +1784,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
break;
}
} else if (VT == MVT::fAny) {
- if (!EltTy->isFloatingPoint()) {
+ if (!EltTy->isFloatingPointTy()) {
CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not "
"a floating-point type.", F);
return false;
@@ -1778,7 +1803,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
}
Suffix += ".v" + utostr(NumElts) + EVT::getEVT(EltTy).getEVTString();
} else if (VT == MVT::iPTR) {
- if (!isa<PointerType>(Ty)) {
+ if (!Ty->isPointerTy()) {
CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a "
"pointer and a pointer is required.", F);
return false;