aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorDan Gohman <djg@cray.com>2007-07-18 16:29:46 +0000
committerDan Gohman <djg@cray.com>2007-07-18 16:29:46 +0000
commitf17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc (patch)
treeebb79ea1ee5e3bc1fdf38541a811a8b804f0679a /lib/CodeGen
downloadexternal_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.zip
external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.tar.gz
external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.tar.bz2
It's not necessary to do rounding for alloca operations when the requested
alignment is equal to the stack alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40004 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AsmPrinter.cpp1210
-rw-r--r--lib/CodeGen/BranchFolding.cpp1093
-rw-r--r--lib/CodeGen/DwarfWriter.cpp3616
-rw-r--r--lib/CodeGen/ELFWriter.cpp547
-rw-r--r--lib/CodeGen/ELFWriter.h228
-rw-r--r--lib/CodeGen/IfConversion.cpp1226
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp799
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp205
-rw-r--r--lib/CodeGen/LiveInterval.cpp526
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp692
-rw-r--r--lib/CodeGen/LiveVariables.cpp643
-rw-r--r--lib/CodeGen/MachOWriter.cpp945
-rw-r--r--lib/CodeGen/MachOWriter.h627
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp287
-rw-r--r--lib/CodeGen/MachineFunction.cpp483
-rw-r--r--lib/CodeGen/MachineInstr.cpp426
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp1905
-rw-r--r--lib/CodeGen/MachinePassRegistry.cpp41
-rw-r--r--lib/CodeGen/Makefile22
-rw-r--r--lib/CodeGen/PHIElimination.cpp342
-rw-r--r--lib/CodeGen/Passes.cpp56
-rw-r--r--lib/CodeGen/PhysRegTracker.h73
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp81
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp549
-rw-r--r--lib/CodeGen/README.txt145
-rw-r--r--lib/CodeGen/RegAllocBigBlock.cpp893
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp828
-rw-r--r--lib/CodeGen/RegAllocLocal.cpp830
-rw-r--r--lib/CodeGen/RegAllocSimple.cpp253
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp292
-rw-r--r--lib/CodeGen/SelectionDAG/CallingConvLower.cpp102
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp4749
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp5755
-rw-r--r--lib/CodeGen/SelectionDAG/Makefile15
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAG.cpp725
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp531
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp944
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp1156
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp3694
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp4904
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp245
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp1753
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp1138
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp228
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp81
-rw-r--r--lib/CodeGen/VirtRegMap.cpp1118
-rw-r--r--lib/CodeGen/VirtRegMap.h211
47 files changed, 47212 insertions, 0 deletions
diff --git a/lib/CodeGen/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter.cpp
new file mode 100644
index 0000000..586472c
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter.cpp
@@ -0,0 +1,1210 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cerrno>
+using namespace llvm;
+
+static cl::opt<bool>
+AsmVerbose("asm-verbose", cl::Hidden, cl::desc("Add comments to directives."));
+
+char AsmPrinter::ID = 0;
+AsmPrinter::AsmPrinter(std::ostream &o, TargetMachine &tm,
+ const TargetAsmInfo *T)
+ : MachineFunctionPass((intptr_t)&ID), FunctionNumber(0), O(o), TM(tm), TAI(T)
+{}
+
+std::string AsmPrinter::getSectionForFunction(const Function &F) const {
+ return TAI->getTextSection();
+}
+
+
+/// SwitchToTextSection - Switch to the specified text section of the executable
+/// if we are not already in it!
+///
+void AsmPrinter::SwitchToTextSection(const char *NewSection,
+ const GlobalValue *GV) {
+ std::string NS;
+ if (GV && GV->hasSection())
+ NS = TAI->getSwitchToSectionDirective() + GV->getSection();
+ else
+ NS = NewSection;
+
+ // If we're already in this section, we're done.
+ if (CurrentSection == NS) return;
+
+ // Close the current section, if applicable.
+ if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
+ O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << "\n";
+
+ CurrentSection = NS;
+
+ if (!CurrentSection.empty())
+ O << CurrentSection << TAI->getTextSectionStartSuffix() << '\n';
+}
+
+/// SwitchToDataSection - Switch to the specified data section of the executable
+/// if we are not already in it!
+///
+void AsmPrinter::SwitchToDataSection(const char *NewSection,
+ const GlobalValue *GV) {
+ std::string NS;
+ if (GV && GV->hasSection())
+ NS = TAI->getSwitchToSectionDirective() + GV->getSection();
+ else
+ NS = NewSection;
+
+ // If we're already in this section, we're done.
+ if (CurrentSection == NS) return;
+
+ // Close the current section, if applicable.
+ if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty())
+ O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << "\n";
+
+ CurrentSection = NS;
+
+ if (!CurrentSection.empty())
+ O << CurrentSection << TAI->getDataSectionStartSuffix() << '\n';
+}
+
+
+bool AsmPrinter::doInitialization(Module &M) {
+ Mang = new Mangler(M, TAI->getGlobalPrefix());
+
+ if (!M.getModuleInlineAsm().empty())
+ O << TAI->getCommentString() << " Start of file scope inline assembly\n"
+ << M.getModuleInlineAsm()
+ << "\n" << TAI->getCommentString()
+ << " End of file scope inline assembly\n";
+
+ SwitchToDataSection(""); // Reset back to no section.
+
+ if (MachineModuleInfo *MMI = getAnalysisToUpdate<MachineModuleInfo>()) {
+ MMI->AnalyzeModule(M);
+ }
+
+ return false;
+}
+
+bool AsmPrinter::doFinalization(Module &M) {
+ if (TAI->getWeakRefDirective()) {
+ if (!ExtWeakSymbols.empty())
+ SwitchToDataSection("");
+
+ for (std::set<const GlobalValue*>::iterator i = ExtWeakSymbols.begin(),
+ e = ExtWeakSymbols.end(); i != e; ++i) {
+ const GlobalValue *GV = *i;
+ std::string Name = Mang->getValueName(GV);
+ O << TAI->getWeakRefDirective() << Name << "\n";
+ }
+ }
+
+ if (TAI->getSetDirective()) {
+ if (!M.alias_empty())
+ SwitchToTextSection(TAI->getTextSection());
+
+ O << "\n";
+ for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
+ I!=E; ++I) {
+ std::string Name = Mang->getValueName(I);
+ std::string Target;
+
+ if (const GlobalValue *GV = I->getAliasedGlobal())
+ Target = Mang->getValueName(GV);
+ else
+ assert(0 && "Unsupported aliasee");
+
+ if (I->hasExternalLinkage())
+ O << "\t.globl\t" << Name << "\n";
+ else if (I->hasWeakLinkage())
+ O << TAI->getWeakRefDirective() << Name << "\n";
+ else if (!I->hasInternalLinkage())
+ assert(0 && "Invalid alias linkage");
+
+ O << TAI->getSetDirective() << Name << ", " << Target << "\n";
+ }
+ }
+
+ delete Mang; Mang = 0;
+ return false;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+ // What's my mangled name?
+ CurrentFnName = Mang->getValueName(MF.getFunction());
+ IncrementFunctionNumber();
+}
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+///
+void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // Some targets require 4-, 8-, and 16- byte constant literals to be placed
+ // in special sections.
+ std::vector<std::pair<MachineConstantPoolEntry,unsigned> > FourByteCPs;
+ std::vector<std::pair<MachineConstantPoolEntry,unsigned> > EightByteCPs;
+ std::vector<std::pair<MachineConstantPoolEntry,unsigned> > SixteenByteCPs;
+ std::vector<std::pair<MachineConstantPoolEntry,unsigned> > OtherCPs;
+ std::vector<std::pair<MachineConstantPoolEntry,unsigned> > TargetCPs;
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ MachineConstantPoolEntry CPE = CP[i];
+ const Type *Ty = CPE.getType();
+ if (TAI->getFourByteConstantSection() &&
+ TM.getTargetData()->getTypeSize(Ty) == 4)
+ FourByteCPs.push_back(std::make_pair(CPE, i));
+ else if (TAI->getEightByteConstantSection() &&
+ TM.getTargetData()->getTypeSize(Ty) == 8)
+ EightByteCPs.push_back(std::make_pair(CPE, i));
+ else if (TAI->getSixteenByteConstantSection() &&
+ TM.getTargetData()->getTypeSize(Ty) == 16)
+ SixteenByteCPs.push_back(std::make_pair(CPE, i));
+ else
+ OtherCPs.push_back(std::make_pair(CPE, i));
+ }
+
+ unsigned Alignment = MCP->getConstantPoolAlignment();
+ EmitConstantPool(Alignment, TAI->getFourByteConstantSection(), FourByteCPs);
+ EmitConstantPool(Alignment, TAI->getEightByteConstantSection(), EightByteCPs);
+ EmitConstantPool(Alignment, TAI->getSixteenByteConstantSection(),
+ SixteenByteCPs);
+ EmitConstantPool(Alignment, TAI->getConstantPoolSection(), OtherCPs);
+}
+
+void AsmPrinter::EmitConstantPool(unsigned Alignment, const char *Section,
+ std::vector<std::pair<MachineConstantPoolEntry,unsigned> > &CP) {
+ if (CP.empty()) return;
+
+ SwitchToDataSection(Section);
+ EmitAlignment(Alignment);
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << CP[i].second << ":\t\t\t\t\t" << TAI->getCommentString() << " ";
+ WriteTypeSymbolic(O, CP[i].first.getType(), 0) << '\n';
+ if (CP[i].first.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(CP[i].first.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(CP[i].first.Val.ConstVal);
+ if (i != e-1) {
+ const Type *Ty = CP[i].first.getType();
+ unsigned EntSize =
+ TM.getTargetData()->getTypeSize(Ty);
+ unsigned ValEnd = CP[i].first.getOffset() + EntSize;
+ // Emit inter-object padding for alignment.
+ EmitZeros(CP[i+1].first.getOffset()-ValEnd);
+ }
+ }
+}
+
+/// EmitJumpTableInfo - Print assembly representations of the jump tables used
+/// by the current function to the current output stream.
+///
+void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
+ MachineFunction &MF) {
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+ bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
+
+ // Use JumpTableDirective otherwise honor the entry size from the jump table
+ // info.
+ const char *JTEntryDirective = TAI->getJumpTableDirective();
+ bool HadJTEntryDirective = JTEntryDirective != NULL;
+ if (!HadJTEntryDirective) {
+ JTEntryDirective = MJTI->getEntrySize() == 4 ?
+ TAI->getData32bitsDirective() : TAI->getData64bitsDirective();
+ }
+
+ // Pick the directive to use to print the jump table entries, and switch to
+ // the appropriate section.
+ TargetLowering *LoweringInfo = TM.getTargetLowering();
+
+ const char* JumpTableDataSection = TAI->getJumpTableDataSection();
+ if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) ||
+ !JumpTableDataSection) {
+ // In PIC mode, we need to emit the jump table to the same section as the
+ // function body itself, otherwise the label differences won't make sense.
+ // We should also do if the section name is NULL.
+ const Function *F = MF.getFunction();
+ SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
+ } else {
+ SwitchToDataSection(JumpTableDataSection);
+ }
+
+ EmitAlignment(Log2_32(MJTI->getAlignment()));
+
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[i].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ // For PIC codegen, if possible we want to use the SetDirective to reduce
+ // the number of relocations the assembler will generate for the jump table.
+ // Set directives are all printed before the jump table itself.
+ std::set<MachineBasicBlock*> EmittedSets;
+ if (TAI->getSetDirective() && IsPic)
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+ if (EmittedSets.insert(JTBBs[ii]).second)
+ printSetLabel(i, JTBBs[ii]);
+
+ // On some targets (e.g. darwin) we want to emit two consequtive labels
+ // before each jump table. The first label is never referenced, but tells
+ // the assembler and linker the extents of the jump table object. The
+ // second label is actually referenced by the code.
+ if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix())
+ O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n";
+
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << i << ":\n";
+
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+ O << JTEntryDirective << ' ';
+ // If we have emitted set directives for the jump table entries, print
+ // them rather than the entries themselves. If we're emitting PIC, then
+ // emit the table entries as differences between two text section labels.
+ // If we're emitting non-PIC code, then emit the entries as direct
+ // references to the target basic blocks.
+ if (!EmittedSets.empty()) {
+ O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << '_' << i << "_set_" << JTBBs[ii]->getNumber();
+ } else if (IsPic) {
+ printBasicBlockLabel(JTBBs[ii], false, false);
+ // If the arch uses custom Jump Table directives, don't calc relative to
+ // JT
+ if (!HadJTEntryDirective)
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
+ << getFunctionNumber() << '_' << i;
+ } else {
+ printBasicBlockLabel(JTBBs[ii], false, false);
+ }
+ O << '\n';
+ }
+ }
+}
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ // Ignore debug and non-emitted data.
+ if (GV->getSection() == "llvm.metadata") return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ if (GV->getName() == "llvm.used") {
+ if (TAI->getUsedDirective() != 0) // No need to emit this at all.
+ EmitLLVMUsedList(GV->getInitializer());
+ return true;
+ }
+
+ const TargetData *TD = TM.getTargetData();
+ unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+ if (GV->getName() == "llvm.global_ctors" && GV->use_empty()) {
+ SwitchToDataSection(TAI->getStaticCtorsSection());
+ EmitAlignment(Align, 0);
+ EmitXXStructorList(GV->getInitializer());
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors" && GV->use_empty()) {
+ SwitchToDataSection(TAI->getStaticDtorsSection());
+ EmitAlignment(Align, 0);
+ EmitXXStructorList(GV->getInitializer());
+ return true;
+ }
+
+ return false;
+}
+
+/// EmitLLVMUsedList - For targets that define a TAI::UsedDirective, mark each
+/// global in the specified llvm.used list as being used with this directive.
+void AsmPrinter::EmitLLVMUsedList(Constant *List) {
+ const char *Directive = TAI->getUsedDirective();
+
+ // Should be an array of 'sbyte*'.
+ ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ O << Directive;
+ EmitConstantValueOnly(InitList->getOperand(i));
+ O << "\n";
+ }
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the
+/// function pointers, ignoring the init priority.
+void AsmPrinter::EmitXXStructorList(Constant *List) {
+ // Should be an array of '{ int, void ()* }' structs. The first value is the
+ // init priority, which we ignore.
+ if (!isa<ConstantArray>(List)) return;
+ ConstantArray *InitList = cast<ConstantArray>(List);
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ if (CS->getOperand(1)->isNullValue())
+ return; // Found a null terminator, exit printing.
+ // Emit the function pointer.
+ EmitGlobalConstant(CS->getOperand(1));
+ }
+}
+
+/// getGlobalLinkName - Returns the asm/link name of of the specified
+/// global variable. Should be overridden by each target asm printer to
+/// generate the appropriate value.
+const std::string AsmPrinter::getGlobalLinkName(const GlobalVariable *GV) const{
+ std::string LinkName;
+
+ if (isa<Function>(GV)) {
+ LinkName += TAI->getFunctionAddrPrefix();
+ LinkName += Mang->getValueName(GV);
+ LinkName += TAI->getFunctionAddrSuffix();
+ } else {
+ LinkName += TAI->getGlobalVarAddrPrefix();
+ LinkName += Mang->getValueName(GV);
+ LinkName += TAI->getGlobalVarAddrSuffix();
+ }
+
+ return LinkName;
+}
+
+/// EmitExternalGlobal - Emit the external reference to a global variable.
+/// Should be overridden if an indirect reference should be used.
+void AsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) {
+ O << getGlobalLinkName(GV);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+/// LEB 128 number encoding.
+
+/// PrintULEB128 - Print a series of hexidecimal values (separated by commas)
+/// representing an unsigned leb128 value.
+void AsmPrinter::PrintULEB128(unsigned Value) const {
+ do {
+ unsigned Byte = Value & 0x7f;
+ Value >>= 7;
+ if (Value) Byte |= 0x80;
+ O << "0x" << std::hex << Byte << std::dec;
+ if (Value) O << ", ";
+ } while (Value);
+}
+
+/// SizeULEB128 - Compute the number of bytes required for an unsigned leb128
+/// value.
+unsigned AsmPrinter::SizeULEB128(unsigned Value) {
+ unsigned Size = 0;
+ do {
+ Value >>= 7;
+ Size += sizeof(int8_t);
+ } while (Value);
+ return Size;
+}
+
+/// PrintSLEB128 - Print a series of hexidecimal values (separated by commas)
+/// representing a signed leb128 value.
+void AsmPrinter::PrintSLEB128(int Value) const {
+ int Sign = Value >> (8 * sizeof(Value) - 1);
+ bool IsMore;
+
+ do {
+ unsigned Byte = Value & 0x7f;
+ Value >>= 7;
+ IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+ if (IsMore) Byte |= 0x80;
+ O << "0x" << std::hex << Byte << std::dec;
+ if (IsMore) O << ", ";
+ } while (IsMore);
+}
+
+/// SizeSLEB128 - Compute the number of bytes required for a signed leb128
+/// value.
+unsigned AsmPrinter::SizeSLEB128(int Value) {
+ unsigned Size = 0;
+ int Sign = Value >> (8 * sizeof(Value) - 1);
+ bool IsMore;
+
+ do {
+ unsigned Byte = Value & 0x7f;
+ Value >>= 7;
+ IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+ Size += sizeof(int8_t);
+ } while (IsMore);
+ return Size;
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// PrintHex - Print a value as a hexidecimal value.
+///
+void AsmPrinter::PrintHex(int Value) const {
+ O << "0x" << std::hex << Value << std::dec;
+}
+
+/// EOL - Print a newline character to asm stream. If a comment is present
+/// then it will be printed first. Comments should not contain '\n'.
+void AsmPrinter::EOL() const {
+ O << "\n";
+}
+void AsmPrinter::EOL(const std::string &Comment) const {
+ if (AsmVerbose && !Comment.empty()) {
+ O << "\t"
+ << TAI->getCommentString()
+ << " "
+ << Comment;
+ }
+ O << "\n";
+}
+
+/// EmitULEB128Bytes - Emit an assembler byte data directive to compose an
+/// unsigned leb128 value.
+void AsmPrinter::EmitULEB128Bytes(unsigned Value) const {
+ if (TAI->hasLEB128()) {
+ O << "\t.uleb128\t"
+ << Value;
+ } else {
+ O << TAI->getData8bitsDirective();
+ PrintULEB128(Value);
+ }
+}
+
+/// EmitSLEB128Bytes - print an assembler byte data directive to compose a
+/// signed leb128 value.
+void AsmPrinter::EmitSLEB128Bytes(int Value) const {
+ if (TAI->hasLEB128()) {
+ O << "\t.sleb128\t"
+ << Value;
+ } else {
+ O << TAI->getData8bitsDirective();
+ PrintSLEB128(Value);
+ }
+}
+
+/// EmitInt8 - Emit a byte directive and value.
+///
+void AsmPrinter::EmitInt8(int Value) const {
+ O << TAI->getData8bitsDirective();
+ PrintHex(Value & 0xFF);
+}
+
+/// EmitInt16 - Emit a short directive and value.
+///
+void AsmPrinter::EmitInt16(int Value) const {
+ O << TAI->getData16bitsDirective();
+ PrintHex(Value & 0xFFFF);
+}
+
+/// EmitInt32 - Emit a long directive and value.
+///
+void AsmPrinter::EmitInt32(int Value) const {
+ O << TAI->getData32bitsDirective();
+ PrintHex(Value);
+}
+
+/// EmitInt64 - Emit a long long directive and value.
+///
+void AsmPrinter::EmitInt64(uint64_t Value) const {
+ if (TAI->getData64bitsDirective()) {
+ O << TAI->getData64bitsDirective();
+ PrintHex(Value);
+ } else {
+ if (TM.getTargetData()->isBigEndian()) {
+ EmitInt32(unsigned(Value >> 32)); O << "\n";
+ EmitInt32(unsigned(Value));
+ } else {
+ EmitInt32(unsigned(Value)); O << "\n";
+ EmitInt32(unsigned(Value >> 32));
+ }
+ }
+}
+
+/// toOctal - Convert the low order bits of X into an octal digit.
+///
+static inline char toOctal(int X) {
+ return (X&7)+'0';
+}
+
+/// printStringChar - Print a char, escaped if necessary.
+///
+static void printStringChar(std::ostream &O, unsigned char C) {
+ if (C == '"') {
+ O << "\\\"";
+ } else if (C == '\\') {
+ O << "\\\\";
+ } else if (isprint(C)) {
+ O << C;
+ } else {
+ switch(C) {
+ case '\b': O << "\\b"; break;
+ case '\f': O << "\\f"; break;
+ case '\n': O << "\\n"; break;
+ case '\r': O << "\\r"; break;
+ case '\t': O << "\\t"; break;
+ default:
+ O << '\\';
+ O << toOctal(C >> 6);
+ O << toOctal(C >> 3);
+ O << toOctal(C >> 0);
+ break;
+ }
+ }
+}
+
+/// EmitString - Emit a string with quotes and a null terminator.
+/// Special characters are emitted properly.
+/// \literal (Eg. '\t') \endliteral
+void AsmPrinter::EmitString(const std::string &String) const {
+ const char* AscizDirective = TAI->getAscizDirective();
+ if (AscizDirective)
+ O << AscizDirective;
+ else
+ O << TAI->getAsciiDirective();
+ O << "\"";
+ for (unsigned i = 0, N = String.size(); i < N; ++i) {
+ unsigned char C = String[i];
+ printStringChar(O, C);
+ }
+ if (AscizDirective)
+ O << "\"";
+ else
+ O << "\\0\"";
+}
+
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary. For example, if you pass in 3 here, you will get an 8
+// byte alignment. If a global value is specified, and if that global has
+// an explicit alignment requested, it will unconditionally override the
+// alignment request. However, if ForcedAlignBits is specified, this value
+// has final say: the ultimate alignment will be the max of ForcedAlignBits
+// and the alignment computed with NumBits and the global.
+//
+// The algorithm is:
+// Align = NumBits;
+// if (GV && GV->hasalignment) Align = GV->getalignment();
+// Align = std::max(Align, ForcedAlignBits);
+//
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
+ unsigned ForcedAlignBits) const {
+ if (GV && GV->getAlignment())
+ NumBits = Log2_32(GV->getAlignment());
+ NumBits = std::max(NumBits, ForcedAlignBits);
+
+ if (NumBits == 0) return; // No need to emit alignment.
+ if (TAI->getAlignmentIsInBytes()) NumBits = 1 << NumBits;
+ O << TAI->getAlignDirective() << NumBits << "\n";
+}
+
+
+/// EmitZeros - Emit a block of zeros.
+///
+void AsmPrinter::EmitZeros(uint64_t NumZeros) const {
+ if (NumZeros) {
+ if (TAI->getZeroDirective()) {
+ O << TAI->getZeroDirective() << NumZeros;
+ if (TAI->getZeroDirectiveSuffix())
+ O << TAI->getZeroDirectiveSuffix();
+ O << "\n";
+ } else {
+ for (; NumZeros; --NumZeros)
+ O << TAI->getData8bitsDirective() << "0\n";
+ }
+ }
+}
+
+// Print out the specified constant, without a storage class. Only the
+// constants valid in constant expressions can occur here.
+void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ O << "0";
+ else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ O << CI->getZExtValue();
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ // This is a constant address for a global variable or function. Use the
+ // name of the variable or function as the address value, possibly
+ // decorating it with GlobalVarAddrPrefix/Suffix or
+ // FunctionAddrPrefix/Suffix (these all default to "" )
+ if (isa<Function>(GV)) {
+ O << TAI->getFunctionAddrPrefix()
+ << Mang->getValueName(GV)
+ << TAI->getFunctionAddrSuffix();
+ } else {
+ O << TAI->getGlobalVarAddrPrefix()
+ << Mang->getValueName(GV)
+ << TAI->getGlobalVarAddrSuffix();
+ }
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned Opcode = CE->getOpcode();
+ switch (Opcode) {
+ case Instruction::GetElementPtr: {
+ // generate a symbolic expression for the byte address
+ const Constant *ptrVal = CE->getOperand(0);
+ SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+ if (int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
+ idxVec.size())) {
+ if (Offset)
+ O << "(";
+ EmitConstantValueOnly(ptrVal);
+ if (Offset > 0)
+ O << ") + " << Offset;
+ else if (Offset < 0)
+ O << ") - " << -Offset;
+ } else {
+ EmitConstantValueOnly(ptrVal);
+ }
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ assert(0 && "FIXME: Don't yet support this kind of constant cast expr");
+ break;
+ case Instruction::BitCast:
+ return EmitConstantValueOnly(CE->getOperand(0));
+
+ case Instruction::IntToPtr: {
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(), false/*ZExt*/);
+ return EmitConstantValueOnly(Op);
+ }
+
+
+ case Instruction::PtrToInt: {
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ const Type *Ty = CE->getType();
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot greater or equal to the size of the pointer.
+ if (Ty->isInteger() &&
+ TD->getTypeSize(Ty) >= TD->getTypeSize(Op->getType()))
+ return EmitConstantValueOnly(Op);
+
+ assert(0 && "FIXME: Don't yet support this kind of constant cast expr");
+ EmitConstantValueOnly(Op);
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ O << "(";
+ EmitConstantValueOnly(CE->getOperand(0));
+ O << (Opcode==Instruction::Add ? ") + (" : ") - (");
+ EmitConstantValueOnly(CE->getOperand(1));
+ O << ")";
+ break;
+ default:
+ assert(0 && "Unsupported operator!");
+ }
+ } else {
+ assert(0 && "Unknown constant value!");
+ }
+}
+
+/// printAsCString - Print the specified array as a C compatible string, only if
+/// the predicate isString is true.
+///
+static void printAsCString(std::ostream &O, const ConstantArray *CVA,
+ unsigned LastElt) {
+ assert(CVA->isString() && "Array is not string compatible!");
+
+ O << "\"";
+ for (unsigned i = 0; i != LastElt; ++i) {
+ unsigned char C =
+ (unsigned char)cast<ConstantInt>(CVA->getOperand(i))->getZExtValue();
+ printStringChar(O, C);
+ }
+ O << "\"";
+}
+
+/// EmitString - Emit a zero-byte-terminated string constant.
+///
+void AsmPrinter::EmitString(const ConstantArray *CVA) const {
+ unsigned NumElts = CVA->getNumOperands();
+ if (TAI->getAscizDirective() && NumElts &&
+ cast<ConstantInt>(CVA->getOperand(NumElts-1))->getZExtValue() == 0) {
+ O << TAI->getAscizDirective();
+ printAsCString(O, CVA, NumElts-1);
+ } else {
+ O << TAI->getAsciiDirective();
+ printAsCString(O, CVA, NumElts);
+ }
+ O << "\n";
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+///
+void AsmPrinter::EmitGlobalConstant(const Constant *CV) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (CV->isNullValue() || isa<UndefValue>(CV)) {
+ EmitZeros(TD->getTypeSize(CV->getType()));
+ return;
+ } else if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+ if (CVA->isString()) {
+ EmitString(CVA);
+ } else { // Not a string. Print the values in successive locations
+ for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+ EmitGlobalConstant(CVA->getOperand(i));
+ }
+ return;
+ } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+ // Print the fields in successive locations. Pad to align if needed!
+ const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
+ uint64_t sizeSoFar = 0;
+ for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
+ const Constant* field = CVS->getOperand(i);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t fieldSize = TD->getTypeSize(field->getType());
+ uint64_t padSize = ((i == e-1? cvsLayout->getSizeInBytes()
+ : cvsLayout->getElementOffset(i+1))
+ - cvsLayout->getElementOffset(i)) - fieldSize;
+ sizeSoFar += fieldSize + padSize;
+
+ // Now print the actual field value
+ EmitGlobalConstant(field);
+
+ // Insert the field padding unless it's zero bytes...
+ EmitZeros(padSize);
+ }
+ assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+ return;
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ // FP Constants are printed as integer constants to avoid losing
+ // precision...
+ double Val = CFP->getValue();
+ if (CFP->getType() == Type::DoubleTy) {
+ if (TAI->getData64bitsDirective())
+ O << TAI->getData64bitsDirective() << DoubleToBits(Val) << "\t"
+ << TAI->getCommentString() << " double value: " << Val << "\n";
+ else if (TD->isBigEndian()) {
+ O << TAI->getData32bitsDirective() << unsigned(DoubleToBits(Val) >> 32)
+ << "\t" << TAI->getCommentString()
+ << " double most significant word " << Val << "\n";
+ O << TAI->getData32bitsDirective() << unsigned(DoubleToBits(Val))
+ << "\t" << TAI->getCommentString()
+ << " double least significant word " << Val << "\n";
+ } else {
+ O << TAI->getData32bitsDirective() << unsigned(DoubleToBits(Val))
+ << "\t" << TAI->getCommentString()
+ << " double least significant word " << Val << "\n";
+ O << TAI->getData32bitsDirective() << unsigned(DoubleToBits(Val) >> 32)
+ << "\t" << TAI->getCommentString()
+ << " double most significant word " << Val << "\n";
+ }
+ return;
+ } else {
+ O << TAI->getData32bitsDirective() << FloatToBits(Val)
+ << "\t" << TAI->getCommentString() << " float " << Val << "\n";
+ return;
+ }
+ } else if (CV->getType() == Type::Int64Ty) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ uint64_t Val = CI->getZExtValue();
+
+ if (TAI->getData64bitsDirective())
+ O << TAI->getData64bitsDirective() << Val << "\n";
+ else if (TD->isBigEndian()) {
+ O << TAI->getData32bitsDirective() << unsigned(Val >> 32)
+ << "\t" << TAI->getCommentString()
+ << " Double-word most significant word " << Val << "\n";
+ O << TAI->getData32bitsDirective() << unsigned(Val)
+ << "\t" << TAI->getCommentString()
+ << " Double-word least significant word " << Val << "\n";
+ } else {
+ O << TAI->getData32bitsDirective() << unsigned(Val)
+ << "\t" << TAI->getCommentString()
+ << " Double-word least significant word " << Val << "\n";
+ O << TAI->getData32bitsDirective() << unsigned(Val >> 32)
+ << "\t" << TAI->getCommentString()
+ << " Double-word most significant word " << Val << "\n";
+ }
+ return;
+ }
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ const VectorType *PTy = CP->getType();
+
+ for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
+ EmitGlobalConstant(CP->getOperand(I));
+
+ return;
+ }
+
+ const Type *type = CV->getType();
+ printDataDirective(type);
+ EmitConstantValueOnly(CV);
+ O << "\n";
+}
+
+void
+AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ // Target doesn't support this yet!
+ abort();
+}
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself. This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings. The
+/// syntax used is ${:comment}. Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) {
+ if (!strcmp(Code, "private")) {
+ O << TAI->getPrivateGlobalPrefix();
+ } else if (!strcmp(Code, "comment")) {
+ O << TAI->getCommentString();
+ } else if (!strcmp(Code, "uid")) {
+ // Assign a unique ID to this machine instruction.
+ static const MachineInstr *LastMI = 0;
+ static const Function *F = 0;
+ static unsigned Counter = 0U-1;
+
+ // Comparing the address of MI isn't sufficient, because machineinstrs may
+ // be allocated to the same address across functions.
+ const Function *ThisF = MI->getParent()->getParent()->getFunction();
+
+ // If this is a new machine instruction, bump the counter.
+ if (LastMI != MI || F != ThisF) {
+ ++Counter;
+ LastMI = MI;
+ F = ThisF;
+ }
+ O << Counter;
+ } else {
+ cerr << "Unknown special formatter '" << Code
+ << "' for machine instr: " << *MI;
+ exit(1);
+ }
+}
+
+
+/// printInlineAsm - This method formats and prints the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
+ unsigned NumOperands = MI->getNumOperands();
+
+ // Count the number of register definitions.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != NumOperands-1 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isExternalSymbol() && "No asm string?");
+
+ // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+ // If this asmstr is empty, don't bother printing the #APP/#NOAPP markers.
+ if (AsmStr[0] == 0) {
+ O << "\n"; // Tab already printed, avoid double indenting next instr.
+ return;
+ }
+
+ O << TAI->getInlineAsmStart() << "\n\t";
+
+ // The variant of the current asmprinter.
+ int AsmPrinterVariant = TAI->getAssemblerDialect();
+
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ O.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ O << "\n"; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$': // $$ -> $
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ O << '$';
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1) {
+ cerr << "Nested variants found in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // consume '|' character.
+ if (CurVariant == -1) {
+ cerr << "Found '|' character outside of variant in inline asm "
+ << "string: '" << AsmStr << "'\n";
+ exit(1);
+ }
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // consume ')' character.
+ if (CurVariant == -1) {
+ cerr << "Found '}' character outside of variant in inline asm "
+ << "string: '" << AsmStr << "'\n";
+ exit(1);
+ }
+ CurVariant = -1;
+ break;
+ }
+ if (Done) break;
+
+ bool HasCurlyBraces = false;
+ if (*LastEmitted == '{') { // ${variable}
+ ++LastEmitted; // Consume '{' character.
+ HasCurlyBraces = true;
+ }
+
+ const char *IDStart = LastEmitted;
+ char *IDEnd;
+ errno = 0;
+ long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs.
+ if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) {
+ cerr << "Bad $ operand number in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+ LastEmitted = IDEnd;
+
+ char Modifier[2] = { 0, 0 };
+
+ if (HasCurlyBraces) {
+ // If we have curly braces, check for a modifier character. This
+ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+ if (*LastEmitted == ':') {
+ ++LastEmitted; // Consume ':' character.
+ if (*LastEmitted == 0) {
+ cerr << "Bad ${:} expression in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+
+ Modifier[0] = *LastEmitted;
+ ++LastEmitted; // Consume modifier character.
+ }
+
+ if (*LastEmitted != '}') {
+ cerr << "Bad ${} expression in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+ ++LastEmitted; // Consume '}' character.
+ }
+
+ if ((unsigned)Val >= NumOperands-1) {
+ cerr << "Invalid $ operand number in inline asm string: '"
+ << AsmStr << "'\n";
+ exit(1);
+ }
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = 1;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImmedValue();
+ OpNo += (OpFlags >> 3) + 1;
+ }
+
+ if (OpNo >= MI->getNumOperands()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImmedValue();
+ ++OpNo; // Skip over the ID number.
+
+ AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+ if ((OpFlags & 7) == 4 /*ADDR MODE*/) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
+ Modifier[0] ? Modifier : 0);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
+ Modifier[0] ? Modifier : 0);
+ }
+ }
+ if (Error) {
+ cerr << "Invalid operand found in inline asm: '"
+ << AsmStr << "'\n";
+ MI->dump();
+ exit(1);
+ }
+ }
+ break;
+ }
+ }
+ }
+ O << "\n\t" << TAI->getInlineAsmEnd() << "\n";
+}
+
+/// printLabel - This method prints a local label used by debug and
+/// exception handling tables.
+void AsmPrinter::printLabel(const MachineInstr *MI) const {
+ O << "\n"
+ << TAI->getPrivateGlobalPrefix()
+ << "label"
+ << MI->getOperand(0).getImmedValue()
+ << ":\n";
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant. Targets should
+/// overried this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode) {
+ // Target doesn't support this yet!
+ return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Target doesn't support this yet!
+ return true;
+}
+
+/// printBasicBlockLabel - This method prints the label for the specified
+/// MachineBasicBlock
+void AsmPrinter::printBasicBlockLabel(const MachineBasicBlock *MBB,
+ bool printColon,
+ bool printComment) const {
+ O << TAI->getPrivateGlobalPrefix() << "BB" << FunctionNumber << "_"
+ << MBB->getNumber();
+ if (printColon)
+ O << ':';
+ if (printComment && MBB->getBasicBlock())
+ O << '\t' << TAI->getCommentString() << MBB->getBasicBlock()->getName();
+}
+
+/// printSetLabel - This method prints a set label for the specified
+/// MachineBasicBlock
+void AsmPrinter::printSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const {
+ if (!TAI->getSetDirective())
+ return;
+
+ O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
+ printBasicBlockLabel(MBB, false, false);
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << uid << '\n';
+}
+
+void AsmPrinter::printSetLabel(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const {
+ if (!TAI->getSetDirective())
+ return;
+
+ O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << '_' << uid2
+ << "_set_" << MBB->getNumber() << ',';
+ printBasicBlockLabel(MBB, false, false);
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << uid << '_' << uid2 << '\n';
+}
+
+/// printDataDirective - This method prints the asm directive for the
+/// specified type.
+void AsmPrinter::printDataDirective(const Type *type) {
+ const TargetData *TD = TM.getTargetData();
+ switch (type->getTypeID()) {
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(type)->getBitWidth();
+ if (BitWidth <= 8)
+ O << TAI->getData8bitsDirective();
+ else if (BitWidth <= 16)
+ O << TAI->getData16bitsDirective();
+ else if (BitWidth <= 32)
+ O << TAI->getData32bitsDirective();
+ else if (BitWidth <= 64) {
+ assert(TAI->getData64bitsDirective() &&
+ "Target cannot handle 64-bit constant exprs!");
+ O << TAI->getData64bitsDirective();
+ }
+ break;
+ }
+ case Type::PointerTyID:
+ if (TD->getPointerSize() == 8) {
+ assert(TAI->getData64bitsDirective() &&
+ "Target cannot handle 64-bit pointer exprs!");
+ O << TAI->getData64bitsDirective();
+ } else {
+ O << TAI->getData32bitsDirective();
+ }
+ break;
+ case Type::FloatTyID: case Type::DoubleTyID:
+ assert (0 && "Should have already output floating point constant.");
+ default:
+ assert (0 && "Can't handle printing this type of thing");
+ break;
+ }
+}
+
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
new file mode 100644
index 0000000..0fca985
--- /dev/null
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -0,0 +1,1093 @@
+//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass forwards branches to unconditional branches to make them branch
+// directly to the target block. This pass often results in dead MBB's, which
+// it then removes.
+//
+// Note that this pass must be run after register allocation, it cannot handle
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "branchfolding"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumBranchOpts, "Number of branches optimized");
+STATISTIC(NumTailMerge , "Number of block tails merged");
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+namespace {
+ // Throttle for huge numbers of predecessors (compile speed problems)
+ cl::opt<unsigned>
+ TailMergeThreshold("tail-merge-threshold",
+ cl::desc("Max number of predecessors to consider tail merging"),
+ cl::init(100), cl::Hidden);
+
+ struct BranchFolder : public MachineFunctionPass {
+ static char ID;
+ BranchFolder(bool defaultEnableTailMerge) :
+ MachineFunctionPass((intptr_t)&ID) {
+ switch (FlagEnableTailMerge) {
+ case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+ case cl::BOU_TRUE: EnableTailMerge = true; break;
+ case cl::BOU_FALSE: EnableTailMerge = false; break;
+ }
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+ const TargetInstrInfo *TII;
+ MachineModuleInfo *MMI;
+ bool MadeChange;
+ private:
+ // Tail Merging.
+ bool EnableTailMerge;
+ bool TailMergeBlocks(MachineFunction &MF);
+ bool TryMergeBlocks(MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest);
+ MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1);
+
+ std::vector<std::pair<unsigned,MachineBasicBlock*> > MergePotentials;
+ const MRegisterInfo *RegInfo;
+ RegScavenger *RS;
+ // Branch optzn.
+ bool OptimizeBranches(MachineFunction &MF);
+ void OptimizeBlock(MachineBasicBlock *MBB);
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+
+ bool CanFallThrough(MachineBasicBlock *CurBB);
+ bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable,
+ MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond);
+ };
+ char BranchFolder::ID = 0;
+}
+
+FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
+ return new BranchFolder(DefaultEnableTailMerge); }
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DOUT << "\nRemoving MBB: " << *MBB;
+
+ MachineFunction *MF = MBB->getParent();
+ // drop all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // If there is DWARF info to active, check to see if there are any LABEL
+ // records in the basic block. If so, unregister them from MachineModuleInfo.
+ if (MMI && !MBB->empty()) {
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if ((unsigned)I->getOpcode() == TargetInstrInfo::LABEL) {
+ // The label ID # is always operand #0, an immediate.
+ MMI->InvalidateLabel(I->getOperand(0).getImm());
+ }
+ }
+ }
+
+ // Remove the block.
+ MF->getBasicBlockList().erase(MBB);
+}
+
+bool BranchFolder::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ if (!TII) return false;
+
+ // Fix CFG. The later algorithms expect it to be right.
+ bool EverMadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
+ MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
+ std::vector<MachineOperand> Cond;
+ if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
+ EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+ }
+
+ RegInfo = MF.getTarget().getRegisterInfo();
+ RS = RegInfo->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+
+ MMI = getAnalysisToUpdate<MachineModuleInfo>();
+
+ bool MadeChangeThisIteration = true;
+ while (MadeChangeThisIteration) {
+ MadeChangeThisIteration = false;
+ MadeChangeThisIteration |= TailMergeBlocks(MF);
+ MadeChangeThisIteration |= OptimizeBranches(MF);
+ EverMadeChange |= MadeChangeThisIteration;
+ }
+
+ // See if any jump tables have become mergable or dead as the code generator
+ // did its thing.
+ MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables();
+ if (!JTs.empty()) {
+ // Figure out how these jump tables should be merged.
+ std::vector<unsigned> JTMapping;
+ JTMapping.reserve(JTs.size());
+
+ // We always keep the 0th jump table.
+ JTMapping.push_back(0);
+
+ // Scan the jump tables, seeing if there are any duplicates. Note that this
+ // is N^2, which should be fixed someday.
+ for (unsigned i = 1, e = JTs.size(); i != e; ++i)
+ JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
+
+ // If a jump table was merge with another one, walk the function rewriting
+ // references to jump tables to reference the new JT ID's. Keep track of
+ // whether we see a jump table idx, if not, we can delete the JT.
+ std::vector<bool> JTIsLive;
+ JTIsLive.resize(JTs.size());
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+ BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I)
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ MachineOperand &Op = I->getOperand(op);
+ if (!Op.isJumpTableIndex()) continue;
+ unsigned NewIdx = JTMapping[Op.getJumpTableIndex()];
+ Op.setJumpTableIndex(NewIdx);
+
+ // Remember that this JT is live.
+ JTIsLive[NewIdx] = true;
+ }
+ }
+
+ // Finally, remove dead jump tables. This happens either because the
+ // indirect jump was unreachable (and thus deleted) or because the jump
+ // table was merged with some other one.
+ for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+ if (!JTIsLive[i]) {
+ JTI->RemoveJumpTable(i);
+ EverMadeChange = true;
+ }
+ }
+
+ delete RS;
+ return EverMadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Tail Merging of Blocks
+//===----------------------------------------------------------------------===//
+
+/// HashMachineInstr - Compute a hash value for MI and its operands.
+static unsigned HashMachineInstr(const MachineInstr *MI) {
+ unsigned Hash = MI->getOpcode();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+
+ // Merge in bits from the operand if easy.
+ unsigned OperandHash = 0;
+ switch (Op.getType()) {
+ case MachineOperand::MO_Register: OperandHash = Op.getReg(); break;
+ case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OperandHash = Op.getMachineBasicBlock()->getNumber();
+ break;
+ case MachineOperand::MO_FrameIndex: OperandHash = Op.getFrameIndex(); break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ OperandHash = Op.getConstantPoolIndex();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ OperandHash = Op.getJumpTableIndex();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ // Global address / external symbol are too hard, don't bother, but do
+ // pull in the offset.
+ OperandHash = Op.getOffset();
+ break;
+ default: break;
+ }
+
+ Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
+ }
+ return Hash;
+}
+
+/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks
+/// with no successors, we hash two instructions, because cross-jumping
+/// only saves code when at least two instructions are removed (since a
+/// branch must be inserted). For blocks with a successor, one of the
+/// two blocks to be tail-merged will end with a branch already, so
+/// it gains to cross-jump even for one instruction.
+
+static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
+ unsigned minCommonTailLength) {
+ MachineBasicBlock::const_iterator I = MBB->end();
+ if (I == MBB->begin())
+ return 0; // Empty MBB.
+
+ --I;
+ unsigned Hash = HashMachineInstr(I);
+
+ if (I == MBB->begin() || minCommonTailLength == 1)
+ return Hash; // Single instr MBB.
+
+ --I;
+ // Hash in the second-to-last instruction.
+ Hash ^= HashMachineInstr(I) << 2;
+ return Hash;
+}
+
+/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
+/// of instructions they actually have in common together at their end. Return
+/// iterators for the first shared instruction in each block.
+static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2) {
+ I1 = MBB1->end();
+ I2 = MBB2->end();
+
+ unsigned TailLen = 0;
+ while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
+ --I1; --I2;
+ if (!I1->isIdenticalTo(I2)) {
+ ++I1; ++I2;
+ break;
+ }
+ ++TailLen;
+ }
+ return TailLen;
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest. This
+/// returns true if OldInst's block is modified, false if NewDest is modified.
+void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest) {
+ MachineBasicBlock *OldBB = OldInst->getParent();
+
+ // Remove all the old successors of OldBB from the CFG.
+ while (!OldBB->succ_empty())
+ OldBB->removeSuccessor(OldBB->succ_begin());
+
+ // Remove all the dead instructions from the end of OldBB.
+ OldBB->erase(OldInst, OldBB->end());
+
+ // If OldBB isn't immediately before OldBB, insert a branch to it.
+ if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest))
+ TII->InsertBranch(*OldBB, NewDest, 0, std::vector<MachineOperand>());
+ OldBB->addSuccessor(NewDest);
+ ++NumTailMerge;
+}
+
+/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
+/// MBB so that the part before the iterator falls into the part starting at the
+/// iterator. This returns the new MBB.
+MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1) {
+ // Create the fall-through block.
+ MachineFunction::iterator MBBI = &CurMBB;
+ MachineBasicBlock *NewMBB = new MachineBasicBlock(CurMBB.getBasicBlock());
+ CurMBB.getParent()->getBasicBlockList().insert(++MBBI, NewMBB);
+
+ // Move all the successors of this block to the specified block.
+ while (!CurMBB.succ_empty()) {
+ MachineBasicBlock *S = *(CurMBB.succ_end()-1);
+ NewMBB->addSuccessor(S);
+ CurMBB.removeSuccessor(S);
+ }
+
+ // Add an edge from CurMBB to NewMBB for the fall-through.
+ CurMBB.addSuccessor(NewMBB);
+
+ // Splice the code over.
+ NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+
+ // For targets that use the register scavenger, we must maintain LiveIns.
+ if (RS) {
+ RS->enterBasicBlock(&CurMBB);
+ if (!CurMBB.empty())
+ RS->forward(prior(CurMBB.end()));
+ BitVector RegsLiveAtExit(RegInfo->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (unsigned int i=0, e=RegInfo->getNumRegs(); i!=e; i++)
+ if (RegsLiveAtExit[i])
+ NewMBB->addLiveIn(i);
+ }
+
+ return NewMBB;
+}
+
+/// EstimateRuntime - Make a rough estimate for how long it will take to run
+/// the specified code.
+static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ const TargetInstrInfo *TII) {
+ unsigned Time = 0;
+ for (; I != E; ++I) {
+ const TargetInstrDescriptor &TID = TII->get(I->getOpcode());
+ if (TID.Flags & M_CALL_FLAG)
+ Time += 10;
+ else if (TID.Flags & (M_LOAD_FLAG|M_STORE_FLAG))
+ Time += 2;
+ else
+ ++Time;
+ }
+ return Time;
+}
+
+/// ShouldSplitFirstBlock - We need to either split MBB1 at MBB1I or MBB2 at
+/// MBB2I and then insert an unconditional branch in the other block. Determine
+/// which is the best to split
+static bool ShouldSplitFirstBlock(MachineBasicBlock *MBB1,
+ MachineBasicBlock::iterator MBB1I,
+ MachineBasicBlock *MBB2,
+ MachineBasicBlock::iterator MBB2I,
+ const TargetInstrInfo *TII,
+ MachineBasicBlock *PredBB) {
+ // If one block is the entry block, split the other one; we can't generate
+ // a branch to the entry block, as its label is not emitted.
+ MachineBasicBlock *Entry = MBB1->getParent()->begin();
+ if (MBB1 == Entry)
+ return false;
+ if (MBB2 == Entry)
+ return true;
+
+ // If one block falls through into the common successor, choose that
+ // one to split; it is one instruction less to do that.
+ if (PredBB) {
+ if (MBB1 == PredBB)
+ return true;
+ else if (MBB2 == PredBB)
+ return false;
+ }
+ // TODO: if we had some notion of which block was hotter, we could split
+ // the hot block, so it is the fall-through. Since we don't have profile info
+ // make a decision based on which will hurt most to split.
+ unsigned MBB1Time = EstimateRuntime(MBB1->begin(), MBB1I, TII);
+ unsigned MBB2Time = EstimateRuntime(MBB2->begin(), MBB2I, TII);
+
+ // If the MBB1 prefix takes "less time" to run than the MBB2 prefix, split the
+ // MBB1 block so it falls through. This will penalize the MBB2 path, but will
+ // have a lower overall impact on the program execution.
+ return MBB1Time < MBB2Time;
+}
+
+// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
+// branches temporarily for tail merging). In the case where CurMBB ends
+// with a conditional branch to the next block, optimize by reversing the
+// test and conditionally branching to SuccMBB instead.
+
+static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = CurMBB->getParent();
+ MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB));
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ std::vector<MachineOperand> Cond;
+ if (I != MF->end() &&
+ !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond)) {
+ MachineBasicBlock *NextBB = I;
+ if (TBB == NextBB && Cond.size() && !FBB) {
+ if (!TII->ReverseBranchCondition(Cond)) {
+ TII->RemoveBranch(*CurMBB);
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond);
+ return;
+ }
+ }
+ }
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, std::vector<MachineOperand>());
+}
+
+static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p,
+ const std::pair<unsigned,MachineBasicBlock*> &q) {
+ if (p.first < q.first)
+ return true;
+ else if (p.first > q.first)
+ return false;
+ else if (p.second->getNumber() < q.second->getNumber())
+ return true;
+ else if (p.second->getNumber() > q.second->getNumber())
+ return false;
+ else {
+ // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+ // an object with itself.
+#ifndef _GLIBCXX_DEBUG
+ assert(0 && "Predecessor appears twice");
+#endif
+ return(false);
+ }
+}
+
+// See if any of the blocks in MergePotentials (which all have a common single
+// successor, or all have no successor) can be tail-merged. If there is a
+// successor, any blocks in MergePotentials that are not tail-merged and
+// are not immediately before Succ must have an unconditional branch to
+// Succ added (but the predecessor/successor lists need no adjustment).
+// The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+
+bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
+ MachineBasicBlock* PredBB) {
+ unsigned minCommonTailLength = (SuccBB ? 1 : 2);
+ MadeChange = false;
+
+ // Sort by hash value so that blocks with identical end sequences sort
+ // together.
+ std::stable_sort(MergePotentials.begin(), MergePotentials.end(), MergeCompare);
+
+ // Walk through equivalence sets looking for actual exact matches.
+ while (MergePotentials.size() > 1) {
+ unsigned CurHash = (MergePotentials.end()-1)->first;
+ unsigned PrevHash = (MergePotentials.end()-2)->first;
+ MachineBasicBlock *CurMBB = (MergePotentials.end()-1)->second;
+
+ // If there is nothing that matches the hash of the current basic block,
+ // give up.
+ if (CurHash != PrevHash) {
+ if (SuccBB && CurMBB != PredBB)
+ FixTail(CurMBB, SuccBB, TII);
+ MergePotentials.pop_back();
+ continue;
+ }
+
+ // Look through all the pairs of blocks that have the same hash as this
+ // one, and find the pair that has the largest number of instructions in
+ // common.
+ // Since instructions may get combined later (e.g. single stores into
+ // store multiple) this measure is not particularly accurate.
+ MachineBasicBlock::iterator BBI1, BBI2;
+
+ unsigned FoundI = ~0U, FoundJ = ~0U;
+ unsigned maxCommonTailLength = 0U;
+ for (int i = MergePotentials.size()-1;
+ i != -1 && MergePotentials[i].first == CurHash; --i) {
+ for (int j = i-1;
+ j != -1 && MergePotentials[j].first == CurHash; --j) {
+ MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
+ unsigned CommonTailLen = ComputeCommonTailLength(
+ MergePotentials[i].second,
+ MergePotentials[j].second,
+ TrialBBI1, TrialBBI2);
+ if (CommonTailLen >= minCommonTailLength &&
+ CommonTailLen > maxCommonTailLength) {
+ FoundI = i;
+ FoundJ = j;
+ maxCommonTailLength = CommonTailLen;
+ BBI1 = TrialBBI1;
+ BBI2 = TrialBBI2;
+ }
+ }
+ }
+
+ // If we didn't find any pair that has at least minCommonTailLength
+ // instructions in common, bail out. All entries with this
+ // hash code can go away now.
+ if (FoundI == ~0U) {
+ for (int i = MergePotentials.size()-1;
+ i != -1 && MergePotentials[i].first == CurHash; --i) {
+ // Put the unconditional branch back, if we need one.
+ CurMBB = MergePotentials[i].second;
+ if (SuccBB && CurMBB != PredBB)
+ FixTail(CurMBB, SuccBB, TII);
+ MergePotentials.pop_back();
+ }
+ continue;
+ }
+
+ // Otherwise, move the block(s) to the right position(s). So that
+ // BBI1/2 will be valid, the last must be I and the next-to-last J.
+ if (FoundI != MergePotentials.size()-1)
+ std::swap(MergePotentials[FoundI], *(MergePotentials.end()-1));
+ if (FoundJ != MergePotentials.size()-2)
+ std::swap(MergePotentials[FoundJ], *(MergePotentials.end()-2));
+
+ CurMBB = (MergePotentials.end()-1)->second;
+ MachineBasicBlock *MBB2 = (MergePotentials.end()-2)->second;
+
+ // If neither block is the entire common tail, split the tail of one block
+ // to make it redundant with the other tail. Also, we cannot jump to the
+ // entry block, so if one block is the entry block, split the other one.
+ MachineBasicBlock *Entry = CurMBB->getParent()->begin();
+ if (CurMBB->begin() == BBI1 && CurMBB != Entry)
+ ; // CurMBB is common tail
+ else if (MBB2->begin() == BBI2 && MBB2 != Entry)
+ ; // MBB2 is common tail
+ else {
+ if (0) { // Enable this to disable partial tail merges.
+ MergePotentials.pop_back();
+ continue;
+ }
+
+ // Decide whether we want to split CurMBB or MBB2.
+ if (ShouldSplitFirstBlock(CurMBB, BBI1, MBB2, BBI2, TII, PredBB)) {
+ CurMBB = SplitMBBAt(*CurMBB, BBI1);
+ BBI1 = CurMBB->begin();
+ MergePotentials.back().second = CurMBB;
+ } else {
+ MBB2 = SplitMBBAt(*MBB2, BBI2);
+ BBI2 = MBB2->begin();
+ (MergePotentials.end()-2)->second = MBB2;
+ }
+ }
+
+ if (MBB2->begin() == BBI2 && MBB2 != Entry) {
+ // Hack the end off CurMBB, making it jump to MBBI@ instead.
+ ReplaceTailWithBranchTo(BBI1, MBB2);
+ // This modifies CurMBB, so remove it from the worklist.
+ MergePotentials.pop_back();
+ } else {
+ assert(CurMBB->begin() == BBI1 && CurMBB != Entry &&
+ "Didn't split block correctly?");
+ // Hack the end off MBB2, making it jump to CurMBB instead.
+ ReplaceTailWithBranchTo(BBI2, CurMBB);
+ // This modifies MBB2, so remove it from the worklist.
+ MergePotentials.erase(MergePotentials.end()-2);
+ }
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+
+ if (!EnableTailMerge) return false;
+
+ MadeChange = false;
+
+ // First find blocks with no successors.
+ MergePotentials.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ if (I->succ_empty())
+ MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I));
+ }
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() < TailMergeThreshold)
+ MadeChange |= TryMergeBlocks(NULL, NULL);
+
+ // Look at blocks (IBB) with multiple predecessors (PBB).
+ // We change each predecessor to a canonical form, by
+ // (1) temporarily removing any unconditional branch from the predecessor
+ // to IBB, and
+ // (2) alter conditional branches so they branch to the other block
+ // not IBB; this may require adding back an unconditional branch to IBB
+ // later, where there wasn't one coming in. E.g.
+ // Bcc IBB
+ // fallthrough to QBB
+ // here becomes
+ // Bncc QBB
+ // with a conceptual B to IBB after that, which never actually exists.
+ // With those changes, we see whether the predecessors' tails match,
+ // and merge them if so. We change things out of canonical form and
+ // back to the way they were later in the process. (OptimizeBranches
+ // would undo some of this, but we can't use it, because we'd get into
+ // a compile-time infinite loop repeatedly doing and undoing the same
+ // transformations.)
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ if (!I->succ_empty() && I->pred_size() >= 2 &&
+ I->pred_size() < TailMergeThreshold) {
+ MachineBasicBlock *IBB = I;
+ MachineBasicBlock *PredBB = prior(I);
+ MergePotentials.clear();
+ for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+ E2 = I->pred_end();
+ P != E2; ++P) {
+ MachineBasicBlock* PBB = *P;
+ // Skip blocks that loop to themselves, can't tail merge these.
+ if (PBB==IBB)
+ continue;
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ std::vector<MachineOperand> Cond;
+ if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond)) {
+ // Failing case: IBB is the target of a cbr, and
+ // we cannot reverse the branch.
+ std::vector<MachineOperand> NewCond(Cond);
+ if (Cond.size() && TBB==IBB) {
+ if (TII->ReverseBranchCondition(NewCond))
+ continue;
+ // This is the QBB case described above
+ if (!FBB)
+ FBB = next(MachineFunction::iterator(PBB));
+ }
+ // Failing case: the only way IBB can be reached from PBB is via
+ // exception handling. Happens for landing pads. Would be nice
+ // to have a bit in the edge so we didn't have to do all this.
+ if (IBB->isLandingPad()) {
+ MachineFunction::iterator IP = PBB; IP++;
+ MachineBasicBlock* PredNextBB = NULL;
+ if (IP!=MF.end())
+ PredNextBB = IP;
+ if (TBB==NULL) {
+ if (IBB!=PredNextBB) // fallthrough
+ continue;
+ } else if (FBB) {
+ if (TBB!=IBB && FBB!=IBB) // cbr then ubr
+ continue;
+ } else if (Cond.size() == 0) {
+ if (TBB!=IBB) // ubr
+ continue;
+ } else {
+ if (TBB!=IBB && IBB!=PredNextBB) // cbr
+ continue;
+ }
+ }
+ // Remove the unconditional branch at the end, if any.
+ if (TBB && (Cond.size()==0 || FBB)) {
+ TII->RemoveBranch(*PBB);
+ if (Cond.size())
+ // reinsert conditional branch only, for now
+ TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond);
+ }
+ MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P));
+ }
+ }
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryMergeBlocks(I, PredBB);
+ // Reinsert an unconditional branch if needed.
+ // The 1 below can be either an original single predecessor, or a result
+ // of removing blocks in TryMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryMergeBlocks
+ if (MergePotentials.size()==1 &&
+ (MergePotentials.begin())->second != PredBB)
+ FixTail((MergePotentials.begin())->second, I, TII);
+ }
+ }
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Optimization
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
+ MadeChange = false;
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+ OptimizeBlock(MBB);
+
+ // If it is dead, remove it.
+ if (MBB->pred_empty()) {
+ RemoveDeadBlock(MBB);
+ MadeChange = true;
+ ++NumDeadBlocks;
+ }
+ }
+ return MadeChange;
+}
+
+
+/// CanFallThrough - Return true if the specified block (with the specified
+/// branch condition) can implicitly transfer control to the block after it by
+/// falling off the end of it. This should return false if it can reach the
+/// block after it, but it uses an explicit branch to do so (e.g. a table jump).
+///
+/// True is a conservative answer.
+///
+bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
+ bool BranchUnAnalyzable,
+ MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond) {
+ MachineFunction::iterator Fallthrough = CurBB;
+ ++Fallthrough;
+ // If FallthroughBlock is off the end of the function, it can't fall through.
+ if (Fallthrough == CurBB->getParent()->end())
+ return false;
+
+ // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible.
+ if (!CurBB->isSuccessor(Fallthrough))
+ return false;
+
+ // If we couldn't analyze the branch, assume it could fall through.
+ if (BranchUnAnalyzable) return true;
+
+ // If there is no branch, control always falls through.
+ if (TBB == 0) return true;
+
+ // If there is some explicit branch to the fallthrough block, it can obviously
+ // reach, even though the branch should get folded to fall through implicitly.
+ if (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough)
+ return true;
+
+ // If it's an unconditional branch to some block not the fall through, it
+ // doesn't fall through.
+ if (Cond.empty()) return false;
+
+ // Otherwise, if it is conditional and has no explicit false block, it falls
+ // through.
+ return FBB == 0;
+}
+
+/// CanFallThrough - Return true if the specified can implicitly transfer
+/// control to the block after it by falling off the end of it. This should
+/// return false if it can reach the block after it, but it uses an explicit
+/// branch to do so (e.g. a table jump).
+///
+/// True is a conservative answer.
+///
+bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ std::vector<MachineOperand> Cond;
+ bool CurUnAnalyzable = TII->AnalyzeBranch(*CurBB, TBB, FBB, Cond);
+ return CanFallThrough(CurBB, CurUnAnalyzable, TBB, FBB, Cond);
+}
+
+/// IsBetterFallthrough - Return true if it would be clearly better to
+/// fall-through to MBB1 than to fall through into MBB2. This has to return
+/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
+/// result in infinite loops.
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ const TargetInstrInfo &TII) {
+ // Right now, we use a simple heuristic. If MBB2 ends with a call, and
+ // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
+ // optimize branches that branch to either a return block or an assert block
+ // into a fallthrough to the return.
+ if (MBB1->empty() || MBB2->empty()) return false;
+
+ MachineInstr *MBB1I = --MBB1->end();
+ MachineInstr *MBB2I = --MBB2->end();
+ return TII.isCall(MBB2I->getOpcode()) && !TII.isCall(MBB1I->getOpcode());
+}
+
+/// OptimizeBlock - Analyze and optimize control flow related to the specified
+/// block. This is never called on the entry block.
+void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+ MachineFunction::iterator FallThrough = MBB;
+ ++FallThrough;
+
+ // If this block is empty, make everyone use its fall-through, not the block
+ // explicitly. Landing pads should not do this since the landing-pad table
+ // points to this block.
+ if (MBB->empty() && !MBB->isLandingPad()) {
+ // Dead block? Leave for cleanup later.
+ if (MBB->pred_empty()) return;
+
+ if (FallThrough == MBB->getParent()->end()) {
+ // TODO: Simplify preds to not branch here if possible!
+ } else {
+ // Rewrite all predecessors of the old block to go to the fallthrough
+ // instead.
+ while (!MBB->pred_empty()) {
+ MachineBasicBlock *Pred = *(MBB->pred_end()-1);
+ Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+ }
+
+ // If MBB was the target of a jump table, update jump tables to go to the
+ // fallthrough instead.
+ MBB->getParent()->getJumpTableInfo()->
+ ReplaceMBBInJumpTables(MBB, FallThrough);
+ MadeChange = true;
+ }
+ return;
+ }
+
+ // Check to see if we can simplify the terminator of the block before this
+ // one.
+ MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB));
+
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ std::vector<MachineOperand> PriorCond;
+ bool PriorUnAnalyzable =
+ TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond);
+ if (!PriorUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
+ !PriorCond.empty());
+
+ // If the previous branch is conditional and both conditions go to the same
+ // destination, remove the branch, replacing it with an unconditional one or
+ // a fall-through.
+ if (PriorTBB && PriorTBB == PriorFBB) {
+ TII->RemoveBranch(PrevBB);
+ PriorCond.clear();
+ if (PriorTBB != MBB)
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+
+ // If the previous branch *only* branches to *this* block (conditional or
+ // not) remove the branch.
+ if (PriorTBB == MBB && PriorFBB == 0) {
+ TII->RemoveBranch(PrevBB);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+
+ // If the prior block branches somewhere else on the condition and here if
+ // the condition is false, remove the uncond second branch.
+ if (PriorFBB == MBB) {
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+
+ // If the prior block branches here on true and somewhere else on false, and
+ // if the branch condition is reversible, reverse the branch to create a
+ // fall-through.
+ if (PriorTBB == MBB) {
+ std::vector<MachineOperand> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+ }
+
+ // If this block doesn't fall through (e.g. it ends with an uncond branch or
+ // has no successors) and if the pred falls through into this block, and if
+ // it would otherwise fall through into the block after this, move this
+ // block to the end of the function.
+ //
+ // We consider it more likely that execution will stay in the function (e.g.
+ // due to loops) than it is to exit it. This asserts in loops etc, moving
+ // the assert condition out of the loop body.
+ if (!PriorCond.empty() && PriorFBB == 0 &&
+ MachineFunction::iterator(PriorTBB) == FallThrough &&
+ !CanFallThrough(MBB)) {
+ bool DoTransform = true;
+
+ // We have to be careful that the succs of PredBB aren't both no-successor
+ // blocks. If neither have successors and if PredBB is the second from
+ // last block in the function, we'd just keep swapping the two blocks for
+ // last. Only do the swap if one is clearly better to fall through than
+ // the other.
+ if (FallThrough == --MBB->getParent()->end() &&
+ !IsBetterFallthrough(PriorTBB, MBB, *TII))
+ DoTransform = false;
+
+ // We don't want to do this transformation if we have control flow like:
+ // br cond BB2
+ // BB1:
+ // ..
+ // jmp BBX
+ // BB2:
+ // ..
+ // ret
+ //
+ // In this case, we could actually be moving the return block *into* a
+ // loop!
+ if (DoTransform && !MBB->succ_empty() &&
+ (!CanFallThrough(PriorTBB) || PriorTBB->empty()))
+ DoTransform = false;
+
+
+ if (DoTransform) {
+ // Reverse the branch so we will fall through on the previous true cond.
+ std::vector<MachineOperand> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DOUT << "\nMoving MBB: " << *MBB;
+ DOUT << "To make fallthrough to: " << *PriorTBB << "\n";
+
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
+
+ // Move this block to the end of the function.
+ MBB->moveAfter(--MBB->getParent()->end());
+ MadeChange = true;
+ ++NumBranchOpts;
+ return;
+ }
+ }
+ }
+ }
+
+ // Analyze the branch in the current block.
+ MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
+ std::vector<MachineOperand> CurCond;
+ bool CurUnAnalyzable = TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond);
+ if (!CurUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
+
+ // If this is a two-way branch, and the FBB branches to this block, reverse
+ // the condition so the single-basic-block loop is faster. Instead of:
+ // Loop: xxx; jcc Out; jmp Loop
+ // we want:
+ // Loop: xxx; jncc Loop; jmp Out
+ if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+ std::vector<MachineOperand> NewCond(CurCond);
+ if (!TII->ReverseBranchCondition(NewCond)) {
+ TII->RemoveBranch(*MBB);
+ TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond);
+ MadeChange = true;
+ ++NumBranchOpts;
+ return OptimizeBlock(MBB);
+ }
+ }
+
+
+ // If this branch is the only thing in its block, see if we can forward
+ // other blocks across it.
+ if (CurTBB && CurCond.empty() && CurFBB == 0 &&
+ TII->isBranch(MBB->begin()->getOpcode()) && CurTBB != MBB) {
+ // This block may contain just an unconditional branch. Because there can
+ // be 'non-branch terminators' in the block, try removing the branch and
+ // then seeing if the block is empty.
+ TII->RemoveBranch(*MBB);
+
+ // If this block is just an unconditional branch to CurTBB, we can
+ // usually completely eliminate the block. The only case we cannot
+ // completely eliminate the block is when the block before this one
+ // falls through into MBB and we can't understand the prior block's branch
+ // condition.
+ if (MBB->empty()) {
+ bool PredHasNoFallThrough = TII->BlockHasNoFallThrough(PrevBB);
+ if (PredHasNoFallThrough || !PriorUnAnalyzable ||
+ !PrevBB.isSuccessor(MBB)) {
+ // If the prior block falls through into us, turn it into an
+ // explicit branch to us to make updates simpler.
+ if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+ PriorTBB != MBB && PriorFBB != MBB) {
+ if (PriorTBB == 0) {
+ assert(PriorCond.empty() && PriorFBB == 0 &&
+ "Bad branch analysis");
+ PriorTBB = MBB;
+ } else {
+ assert(PriorFBB == 0 && "Machine CFG out of date!");
+ PriorFBB = MBB;
+ }
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond);
+ }
+
+ // Iterate through all the predecessors, revectoring each in-turn.
+ size_t PI = 0;
+ bool DidChange = false;
+ bool HasBranchToSelf = false;
+ while(PI != MBB->pred_size()) {
+ MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
+ if (PMBB == MBB) {
+ // If this block has an uncond branch to itself, leave it.
+ ++PI;
+ HasBranchToSelf = true;
+ } else {
+ DidChange = true;
+ PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+ }
+ }
+
+ // Change any jumptables to go to the new MBB.
+ MBB->getParent()->getJumpTableInfo()->
+ ReplaceMBBInJumpTables(MBB, CurTBB);
+ if (DidChange) {
+ ++NumBranchOpts;
+ MadeChange = true;
+ if (!HasBranchToSelf) return;
+ }
+ }
+ }
+
+ // Add the branch back if the block is more than just an uncond branch.
+ TII->InsertBranch(*MBB, CurTBB, 0, CurCond);
+ }
+ }
+
+ // If the prior block doesn't fall through into this block, and if this
+ // block doesn't fall through into some other block, see if we can find a
+ // place to move this block where a fall-through will happen.
+ if (!CanFallThrough(&PrevBB, PriorUnAnalyzable,
+ PriorTBB, PriorFBB, PriorCond)) {
+ // Now we know that there was no fall-through into this block, check to
+ // see if it has a fall-through into its successor.
+ bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB,
+ CurCond);
+
+ if (!MBB->isLandingPad()) {
+ // Check all the predecessors of this block. If one of them has no fall
+ // throughs, move this block right after it.
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI) {
+ // Analyze the branch at the end of the pred.
+ MachineBasicBlock *PredBB = *PI;
+ MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
+ if (PredBB != MBB && !CanFallThrough(PredBB)
+ && (!CurFallsThru || !CurTBB || !CurFBB)
+ && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+ // If the current block doesn't fall through, just move it.
+ // If the current block can fall through and does not end with a
+ // conditional branch, we need to append an unconditional jump to
+ // the (current) next block. To avoid a possible compile-time
+ // infinite loop, move blocks only backward in this case.
+ // Also, if there are already 2 branches here, we cannot add a third;
+ // this means we have the case
+ // Bcc next
+ // B elsewhere
+ // next:
+ if (CurFallsThru) {
+ MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+ CurCond.clear();
+ TII->InsertBranch(*MBB, NextBB, 0, CurCond);
+ }
+ MBB->moveAfter(PredBB);
+ MadeChange = true;
+ return OptimizeBlock(MBB);
+ }
+ }
+ }
+
+ if (!CurFallsThru) {
+ // Check all successors to see if we can move this block before it.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++SI) {
+ // Analyze the branch at the end of the block before the succ.
+ MachineBasicBlock *SuccBB = *SI;
+ MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+ std::vector<MachineOperand> SuccPrevCond;
+
+ // If this block doesn't already fall-through to that successor, and if
+ // the succ doesn't already have a block that can fall through into it,
+ // and if the successor isn't an EH destination, we can arrange for the
+ // fallthrough to happen.
+ if (SuccBB != MBB && !CanFallThrough(SuccPrev) &&
+ !SuccBB->isLandingPad()) {
+ MBB->moveBefore(SuccBB);
+ MadeChange = true;
+ return OptimizeBlock(MBB);
+ }
+ }
+
+ // Okay, there is no really great place to put this block. If, however,
+ // the block before this one would be a fall-through if this block were
+ // removed, move this block to the end of the function.
+ if (FallThrough != MBB->getParent()->end() &&
+ PrevBB.isSuccessor(FallThrough)) {
+ MBB->moveAfter(--MBB->getParent()->end());
+ MadeChange = true;
+ return;
+ }
+ }
+ }
+}
diff --git a/lib/CodeGen/DwarfWriter.cpp b/lib/CodeGen/DwarfWriter.cpp
new file mode 100644
index 0000000..257a776
--- /dev/null
+++ b/lib/CodeGen/DwarfWriter.cpp
@@ -0,0 +1,3616 @@
+//===-- llvm/CodeGen/DwarfWriter.cpp - Dwarf Framework ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DwarfWriter.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <ostream>
+#include <string>
+using namespace llvm;
+using namespace llvm::dwarf;
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+
+/// Configuration values for initial hash set sizes (log2).
+///
+static const unsigned InitDiesSetSize = 9; // 512
+static const unsigned InitAbbreviationsSetSize = 9; // 512
+static const unsigned InitValuesSetSize = 9; // 512
+
+//===----------------------------------------------------------------------===//
+/// Forward declarations.
+///
+class DIE;
+class DIEValue;
+
+//===----------------------------------------------------------------------===//
+/// DWLabel - Labels are used to track locations in the assembler file.
+/// Labels appear in the form <prefix><Tag><Number>, where the tag is a
+/// category of label (Ex. location) and number is a value unique in that
+/// category.
+class DWLabel {
+public:
+ /// Tag - Label category tag. Should always be a staticly declared C string.
+ ///
+ const char *Tag;
+
+ /// Number - Value to make label unique.
+ ///
+ unsigned Number;
+
+ DWLabel(const char *T, unsigned N) : Tag(T), Number(N) {}
+
+ void Profile(FoldingSetNodeID &ID) const {
+ ID.AddString(std::string(Tag));
+ ID.AddInteger(Number);
+ }
+
+#ifndef NDEBUG
+ void print(std::ostream *O) const {
+ if (O) print(*O);
+ }
+ void print(std::ostream &O) const {
+ O << "." << Tag;
+ if (Number) O << Number;
+ }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
+/// Dwarf abbreviation.
+class DIEAbbrevData {
+private:
+ /// Attribute - Dwarf attribute code.
+ ///
+ unsigned Attribute;
+
+ /// Form - Dwarf form code.
+ ///
+ unsigned Form;
+
+public:
+ DIEAbbrevData(unsigned A, unsigned F)
+ : Attribute(A)
+ , Form(F)
+ {}
+
+ // Accessors.
+ unsigned getAttribute() const { return Attribute; }
+ unsigned getForm() const { return Form; }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID)const {
+ ID.AddInteger(Attribute);
+ ID.AddInteger(Form);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
+/// information object.
+class DIEAbbrev : public FoldingSetNode {
+private:
+ /// Tag - Dwarf tag code.
+ ///
+ unsigned Tag;
+
+ /// Unique number for node.
+ ///
+ unsigned Number;
+
+ /// ChildrenFlag - Dwarf children flag.
+ ///
+ unsigned ChildrenFlag;
+
+ /// Data - Raw data bytes for abbreviation.
+ ///
+ std::vector<DIEAbbrevData> Data;
+
+public:
+
+ DIEAbbrev(unsigned T, unsigned C)
+ : Tag(T)
+ , ChildrenFlag(C)
+ , Data()
+ {}
+ ~DIEAbbrev() {}
+
+ // Accessors.
+ unsigned getTag() const { return Tag; }
+ unsigned getNumber() const { return Number; }
+ unsigned getChildrenFlag() const { return ChildrenFlag; }
+ const std::vector<DIEAbbrevData> &getData() const { return Data; }
+ void setTag(unsigned T) { Tag = T; }
+ void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; }
+ void setNumber(unsigned N) { Number = N; }
+
+ /// AddAttribute - Adds another set of attribute information to the
+ /// abbreviation.
+ void AddAttribute(unsigned Attribute, unsigned Form) {
+ Data.push_back(DIEAbbrevData(Attribute, Form));
+ }
+
+ /// AddFirstAttribute - Adds a set of attribute information to the front
+ /// of the abbreviation.
+ void AddFirstAttribute(unsigned Attribute, unsigned Form) {
+ Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
+ }
+
+ /// Profile - Used to gather unique data for the abbreviation folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) {
+ ID.AddInteger(Tag);
+ ID.AddInteger(ChildrenFlag);
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i)
+ Data[i].Profile(ID);
+ }
+
+ /// Emit - Print the abbreviation using the specified Dwarf writer.
+ ///
+ void Emit(const DwarfDebug &DD) const;
+
+#ifndef NDEBUG
+ void print(std::ostream *O) {
+ if (O) print(*O);
+ }
+ void print(std::ostream &O);
+ void dump();
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIE - A structured debug information entry. Has an abbreviation which
+/// describes it's organization.
+class DIE : public FoldingSetNode {
+protected:
+ /// Abbrev - Buffer for constructing abbreviation.
+ ///
+ DIEAbbrev Abbrev;
+
+ /// Offset - Offset in debug info section.
+ ///
+ unsigned Offset;
+
+ /// Size - Size of instance + children.
+ ///
+ unsigned Size;
+
+ /// Children DIEs.
+ ///
+ std::vector<DIE *> Children;
+
+ /// Attributes values.
+ ///
+ std::vector<DIEValue *> Values;
+
+public:
+ DIE(unsigned Tag)
+ : Abbrev(Tag, DW_CHILDREN_no)
+ , Offset(0)
+ , Size(0)
+ , Children()
+ , Values()
+ {}
+ virtual ~DIE();
+
+ // Accessors.
+ DIEAbbrev &getAbbrev() { return Abbrev; }
+ unsigned getAbbrevNumber() const {
+ return Abbrev.getNumber();
+ }
+ unsigned getTag() const { return Abbrev.getTag(); }
+ unsigned getOffset() const { return Offset; }
+ unsigned getSize() const { return Size; }
+ const std::vector<DIE *> &getChildren() const { return Children; }
+ std::vector<DIEValue *> &getValues() { return Values; }
+ void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
+ void setOffset(unsigned O) { Offset = O; }
+ void setSize(unsigned S) { Size = S; }
+
+ /// AddValue - Add a value and attributes to a DIE.
+ ///
+ void AddValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+ Abbrev.AddAttribute(Attribute, Form);
+ Values.push_back(Value);
+ }
+
+ /// SiblingOffset - Return the offset of the debug information entry's
+ /// sibling.
+ unsigned SiblingOffset() const { return Offset + Size; }
+
+ /// AddSiblingOffset - Add a sibling offset field to the front of the DIE.
+ ///
+ void AddSiblingOffset();
+
+ /// AddChild - Add a child to the DIE.
+ ///
+ void AddChild(DIE *Child) {
+ Abbrev.setChildrenFlag(DW_CHILDREN_yes);
+ Children.push_back(Child);
+ }
+
+ /// Detach - Detaches objects connected to it after copying.
+ ///
+ void Detach() {
+ Children.clear();
+ }
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ void Profile(FoldingSetNodeID &ID) ;
+
+#ifndef NDEBUG
+ void print(std::ostream *O, unsigned IncIndent = 0) {
+ if (O) print(*O, IncIndent);
+ }
+ void print(std::ostream &O, unsigned IncIndent = 0);
+ void dump();
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEValue - A debug information entry value.
+///
+class DIEValue : public FoldingSetNode {
+public:
+ enum {
+ isInteger,
+ isString,
+ isLabel,
+ isAsIsLabel,
+ isDelta,
+ isEntry,
+ isBlock
+ };
+
+ /// Type - Type of data stored in the value.
+ ///
+ unsigned Type;
+
+ DIEValue(unsigned T)
+ : Type(T)
+ {}
+ virtual ~DIEValue() {}
+
+ // Accessors
+ unsigned getType() const { return Type; }
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEValue *) { return true; }
+
+ /// EmitValue - Emit value via the Dwarf writer.
+ ///
+ virtual void EmitValue(DwarfDebug &DD, unsigned Form) = 0;
+
+ /// SizeOf - Return the size of a value in bytes.
+ ///
+ virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const = 0;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ virtual void Profile(FoldingSetNodeID &ID) = 0;
+
+#ifndef NDEBUG
+ void print(std::ostream *O) {
+ if (O) print(*O);
+ }
+ virtual void print(std::ostream &O) = 0;
+ void dump();
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DWInteger - An integer value DIE.
+///
+class DIEInteger : public DIEValue {
+private:
+ uint64_t Integer;
+
+public:
+ DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEInteger *) { return true; }
+ static bool classof(const DIEValue *I) { return I->Type == isInteger; }
+
+ /// BestForm - Choose the best form for integer.
+ ///
+ static unsigned BestForm(bool IsSigned, uint64_t Integer) {
+ if (IsSigned) {
+ if ((char)Integer == (signed)Integer) return DW_FORM_data1;
+ if ((short)Integer == (signed)Integer) return DW_FORM_data2;
+ if ((int)Integer == (signed)Integer) return DW_FORM_data4;
+ } else {
+ if ((unsigned char)Integer == Integer) return DW_FORM_data1;
+ if ((unsigned short)Integer == Integer) return DW_FORM_data2;
+ if ((unsigned int)Integer == Integer) return DW_FORM_data4;
+ }
+ return DW_FORM_data8;
+ }
+
+ /// EmitValue - Emit integer of appropriate size.
+ ///
+ virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+
+ /// SizeOf - Determine size of integer value in bytes.
+ ///
+ virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, unsigned Integer) {
+ ID.AddInteger(isInteger);
+ ID.AddInteger(Integer);
+ }
+ virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, Integer); }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O) {
+ O << "Int: " << (int64_t)Integer
+ << " 0x" << std::hex << Integer << std::dec;
+ }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEString - A string value DIE.
+///
+class DIEString : public DIEValue {
+public:
+ const std::string String;
+
+ DIEString(const std::string &S) : DIEValue(isString), String(S) {}
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEString *) { return true; }
+ static bool classof(const DIEValue *S) { return S->Type == isString; }
+
+ /// EmitValue - Emit string value.
+ ///
+ virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+
+ /// SizeOf - Determine size of string value in bytes.
+ ///
+ virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const {
+ return String.size() + sizeof(char); // sizeof('\0');
+ }
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const std::string &String) {
+ ID.AddInteger(isString);
+ ID.AddString(String);
+ }
+ virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, String); }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O) {
+ O << "Str: \"" << String << "\"";
+ }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEDwarfLabel - A Dwarf internal label expression DIE.
+//
+class DIEDwarfLabel : public DIEValue {
+public:
+
+ const DWLabel Label;
+
+ DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {}
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEDwarfLabel *) { return true; }
+ static bool classof(const DIEValue *L) { return L->Type == isLabel; }
+
+ /// EmitValue - Emit label value.
+ ///
+ virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+
+ /// SizeOf - Determine size of label value in bytes.
+ ///
+ virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const DWLabel &Label) {
+ ID.AddInteger(isLabel);
+ Label.Profile(ID);
+ }
+ virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, Label); }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O) {
+ O << "Lbl: ";
+ Label.print(O);
+ }
+#endif
+};
+
+
+//===----------------------------------------------------------------------===//
+/// DIEObjectLabel - A label to an object in code or data.
+//
+class DIEObjectLabel : public DIEValue {
+public:
+ const std::string Label;
+
+ DIEObjectLabel(const std::string &L) : DIEValue(isAsIsLabel), Label(L) {}
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEObjectLabel *) { return true; }
+ static bool classof(const DIEValue *L) { return L->Type == isAsIsLabel; }
+
+ /// EmitValue - Emit label value.
+ ///
+ virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+
+ /// SizeOf - Determine size of label value in bytes.
+ ///
+ virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const std::string &Label) {
+ ID.AddInteger(isAsIsLabel);
+ ID.AddString(Label);
+ }
+ virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, Label); }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O) {
+ O << "Obj: " << Label;
+ }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEDelta - A simple label difference DIE.
+///
+class DIEDelta : public DIEValue {
+public:
+ const DWLabel LabelHi;
+ const DWLabel LabelLo;
+
+ DIEDelta(const DWLabel &Hi, const DWLabel &Lo)
+ : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEDelta *) { return true; }
+ static bool classof(const DIEValue *D) { return D->Type == isDelta; }
+
+ /// EmitValue - Emit delta value.
+ ///
+ virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+
+ /// SizeOf - Determine size of delta value in bytes.
+ ///
+ virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const;
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi,
+ const DWLabel &LabelLo) {
+ ID.AddInteger(isDelta);
+ LabelHi.Profile(ID);
+ LabelLo.Profile(ID);
+ }
+ virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, LabelHi, LabelLo); }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O) {
+ O << "Del: ";
+ LabelHi.print(O);
+ O << "-";
+ LabelLo.print(O);
+ }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEntry - A pointer to another debug information entry. An instance of this
+/// class can also be used as a proxy for a debug information entry not yet
+/// defined (ie. types.)
+class DIEntry : public DIEValue {
+public:
+ DIE *Entry;
+
+ DIEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEntry *) { return true; }
+ static bool classof(const DIEValue *E) { return E->Type == isEntry; }
+
+ /// EmitValue - Emit debug information entry offset.
+ ///
+ virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+
+ /// SizeOf - Determine size of debug information entry in bytes.
+ ///
+ virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const {
+ return sizeof(int32_t);
+ }
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ static void Profile(FoldingSetNodeID &ID, DIE *Entry) {
+ ID.AddInteger(isEntry);
+ ID.AddPointer(Entry);
+ }
+ virtual void Profile(FoldingSetNodeID &ID) {
+ ID.AddInteger(isEntry);
+
+ if (Entry) {
+ ID.AddPointer(Entry);
+ } else {
+ ID.AddPointer(this);
+ }
+ }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O) {
+ O << "Die: 0x" << std::hex << (intptr_t)Entry << std::dec;
+ }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// DIEBlock - A block of values. Primarily used for location expressions.
+//
+class DIEBlock : public DIEValue, public DIE {
+public:
+ unsigned Size; // Size in bytes excluding size header.
+
+ DIEBlock()
+ : DIEValue(isBlock)
+ , DIE(0)
+ , Size(0)
+ {}
+ ~DIEBlock() {
+ }
+
+ // Implement isa/cast/dyncast.
+ static bool classof(const DIEBlock *) { return true; }
+ static bool classof(const DIEValue *E) { return E->Type == isBlock; }
+
+ /// ComputeSize - calculate the size of the block.
+ ///
+ unsigned ComputeSize(DwarfDebug &DD);
+
+ /// BestForm - Choose the best form for data.
+ ///
+ unsigned BestForm() const {
+ if ((unsigned char)Size == Size) return DW_FORM_block1;
+ if ((unsigned short)Size == Size) return DW_FORM_block2;
+ if ((unsigned int)Size == Size) return DW_FORM_block4;
+ return DW_FORM_block;
+ }
+
+ /// EmitValue - Emit block data.
+ ///
+ virtual void EmitValue(DwarfDebug &DD, unsigned Form);
+
+ /// SizeOf - Determine size of block data in bytes.
+ ///
+ virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const;
+
+
+ /// Profile - Used to gather unique data for the value folding set.
+ ///
+ virtual void Profile(FoldingSetNodeID &ID) {
+ ID.AddInteger(isBlock);
+ DIE::Profile(ID);
+ }
+
+#ifndef NDEBUG
+ virtual void print(std::ostream &O) {
+ O << "Blk: ";
+ DIE::print(O, 5);
+ }
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associate
+/// with a source file.
+class CompileUnit {
+private:
+ /// Desc - Compile unit debug descriptor.
+ ///
+ CompileUnitDesc *Desc;
+
+ /// ID - File identifier for source.
+ ///
+ unsigned ID;
+
+ /// Die - Compile unit debug information entry.
+ ///
+ DIE *Die;
+
+ /// DescToDieMap - Tracks the mapping of unit level debug informaton
+ /// descriptors to debug information entries.
+ std::map<DebugInfoDesc *, DIE *> DescToDieMap;
+
+ /// DescToDIEntryMap - Tracks the mapping of unit level debug informaton
+ /// descriptors to debug information entries using a DIEntry proxy.
+ std::map<DebugInfoDesc *, DIEntry *> DescToDIEntryMap;
+
+ /// Globals - A map of globally visible named entities for this unit.
+ ///
+ std::map<std::string, DIE *> Globals;
+
+ /// DiesSet - Used to uniquely define dies within the compile unit.
+ ///
+ FoldingSet<DIE> DiesSet;
+
+ /// Dies - List of all dies in the compile unit.
+ ///
+ std::vector<DIE *> Dies;
+
+public:
+ CompileUnit(CompileUnitDesc *CUD, unsigned I, DIE *D)
+ : Desc(CUD)
+ , ID(I)
+ , Die(D)
+ , DescToDieMap()
+ , DescToDIEntryMap()
+ , Globals()
+ , DiesSet(InitDiesSetSize)
+ , Dies()
+ {}
+
+ ~CompileUnit() {
+ delete Die;
+
+ for (unsigned i = 0, N = Dies.size(); i < N; ++i)
+ delete Dies[i];
+ }
+
+ // Accessors.
+ CompileUnitDesc *getDesc() const { return Desc; }
+ unsigned getID() const { return ID; }
+ DIE* getDie() const { return Die; }
+ std::map<std::string, DIE *> &getGlobals() { return Globals; }
+
+ /// hasContent - Return true if this compile unit has something to write out.
+ ///
+ bool hasContent() const {
+ return !Die->getChildren().empty();
+ }
+
+ /// AddGlobal - Add a new global entity to the compile unit.
+ ///
+ void AddGlobal(const std::string &Name, DIE *Die) {
+ Globals[Name] = Die;
+ }
+
+ /// getDieMapSlotFor - Returns the debug information entry map slot for the
+ /// specified debug descriptor.
+ DIE *&getDieMapSlotFor(DebugInfoDesc *DID) {
+ return DescToDieMap[DID];
+ }
+
+ /// getDIEntrySlotFor - Returns the debug information entry proxy slot for the
+ /// specified debug descriptor.
+ DIEntry *&getDIEntrySlotFor(DebugInfoDesc *DID) {
+ return DescToDIEntryMap[DID];
+ }
+
+ /// AddDie - Adds or interns the DIE to the compile unit.
+ ///
+ DIE *AddDie(DIE &Buffer) {
+ FoldingSetNodeID ID;
+ Buffer.Profile(ID);
+ void *Where;
+ DIE *Die = DiesSet.FindNodeOrInsertPos(ID, Where);
+
+ if (!Die) {
+ Die = new DIE(Buffer);
+ DiesSet.InsertNode(Die, Where);
+ this->Die->AddChild(Die);
+ Buffer.Detach();
+ }
+
+ return Die;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// Dwarf - Emits general Dwarf directives.
+///
+class Dwarf {
+
+protected:
+
+ //===--------------------------------------------------------------------===//
+ // Core attributes used by the Dwarf writer.
+ //
+
+ //
+ /// O - Stream to .s file.
+ ///
+ std::ostream &O;
+
+ /// Asm - Target of Dwarf emission.
+ ///
+ AsmPrinter *Asm;
+
+ /// TAI - Target Asm Printer.
+ const TargetAsmInfo *TAI;
+
+ /// TD - Target data.
+ const TargetData *TD;
+
+ /// RI - Register Information.
+ const MRegisterInfo *RI;
+
+ /// M - Current module.
+ ///
+ Module *M;
+
+ /// MF - Current machine function.
+ ///
+ MachineFunction *MF;
+
+ /// MMI - Collected machine module information.
+ ///
+ MachineModuleInfo *MMI;
+
+ /// SubprogramCount - The running count of functions being compiled.
+ ///
+ unsigned SubprogramCount;
+
+ unsigned SetCounter;
+ Dwarf(std::ostream &OS, AsmPrinter *A, const TargetAsmInfo *T)
+ : O(OS)
+ , Asm(A)
+ , TAI(T)
+ , TD(Asm->TM.getTargetData())
+ , RI(Asm->TM.getRegisterInfo())
+ , M(NULL)
+ , MF(NULL)
+ , MMI(NULL)
+ , SubprogramCount(0)
+ , SetCounter(1)
+ {
+ }
+
+public:
+
+ //===--------------------------------------------------------------------===//
+ // Accessors.
+ //
+ AsmPrinter *getAsm() const { return Asm; }
+ MachineModuleInfo *getMMI() const { return MMI; }
+ const TargetAsmInfo *getTargetAsmInfo() const { return TAI; }
+
+ /// PrintLabelName - Print label name in form used by Dwarf writer.
+ ///
+ void PrintLabelName(DWLabel Label) const {
+ PrintLabelName(Label.Tag, Label.Number);
+ }
+ void PrintLabelName(const char *Tag, unsigned Number,
+ bool isInSection = false) const {
+ if (isInSection && TAI->getDwarfSectionOffsetDirective())
+ O << TAI->getDwarfSectionOffsetDirective() << Tag;
+ else
+ O << TAI->getPrivateGlobalPrefix() << Tag;
+ if (Number) O << Number;
+ }
+
+ /// EmitLabel - Emit location label for internal use by Dwarf.
+ ///
+ void EmitLabel(DWLabel Label) const {
+ EmitLabel(Label.Tag, Label.Number);
+ }
+ void EmitLabel(const char *Tag, unsigned Number) const {
+ PrintLabelName(Tag, Number);
+ O << ":\n";
+ }
+
+ /// EmitReference - Emit a reference to a label.
+ ///
+ void EmitReference(DWLabel Label, bool IsPCRelative = false) const {
+ EmitReference(Label.Tag, Label.Number, IsPCRelative);
+ }
+ void EmitReference(const char *Tag, unsigned Number,
+ bool IsPCRelative = false) const {
+ if (TAI->getAddressSize() == sizeof(int32_t))
+ O << TAI->getData32bitsDirective();
+ else
+ O << TAI->getData64bitsDirective();
+
+ PrintLabelName(Tag, Number);
+
+ if (IsPCRelative) O << "-" << TAI->getPCSymbol();
+ }
+ void EmitReference(const std::string &Name, bool IsPCRelative = false) const {
+ if (TAI->getAddressSize() == sizeof(int32_t))
+ O << TAI->getData32bitsDirective();
+ else
+ O << TAI->getData64bitsDirective();
+
+ O << Name;
+
+ if (IsPCRelative) O << "-" << TAI->getPCSymbol();
+ }
+
+ /// EmitDifference - Emit the difference between two labels. Some
+ /// assemblers do not behave with absolute expressions with data directives,
+ /// so there is an option (needsSet) to use an intermediary set expression.
+ void EmitDifference(DWLabel LabelHi, DWLabel LabelLo,
+ bool IsSmall = false) {
+ EmitDifference(LabelHi.Tag, LabelHi.Number,
+ LabelLo.Tag, LabelLo.Number,
+ IsSmall);
+ }
+ void EmitDifference(const char *TagHi, unsigned NumberHi,
+ const char *TagLo, unsigned NumberLo,
+ bool IsSmall = false) {
+ if (TAI->needsSet()) {
+ O << "\t.set\t";
+ PrintLabelName("set", SetCounter);
+ O << ",";
+ PrintLabelName(TagHi, NumberHi);
+ O << "-";
+ PrintLabelName(TagLo, NumberLo);
+ O << "\n";
+
+ if (IsSmall || TAI->getAddressSize() == sizeof(int32_t))
+ O << TAI->getData32bitsDirective();
+ else
+ O << TAI->getData64bitsDirective();
+
+ PrintLabelName("set", SetCounter);
+
+ ++SetCounter;
+ } else {
+ if (IsSmall || TAI->getAddressSize() == sizeof(int32_t))
+ O << TAI->getData32bitsDirective();
+ else
+ O << TAI->getData64bitsDirective();
+
+ PrintLabelName(TagHi, NumberHi);
+ O << "-";
+ PrintLabelName(TagLo, NumberLo);
+ }
+ }
+
+ void EmitSectionOffset(const char* Label, const char* Section,
+ unsigned LabelNumber, unsigned SectionNumber,
+ bool IsSmall = false, bool isEH = false) {
+ bool printAbsolute = false;
+ if (TAI->needsSet()) {
+ O << "\t.set\t";
+ PrintLabelName("set", SetCounter);
+ O << ",";
+ PrintLabelName(Label, LabelNumber, true);
+
+ if (isEH)
+ printAbsolute = TAI->isAbsoluteEHSectionOffsets();
+ else
+ printAbsolute = TAI->isAbsoluteDebugSectionOffsets();
+
+ if (!printAbsolute) {
+ O << "-";
+ PrintLabelName(Section, SectionNumber);
+ }
+ O << "\n";
+
+ if (IsSmall || TAI->getAddressSize() == sizeof(int32_t))
+ O << TAI->getData32bitsDirective();
+ else
+ O << TAI->getData64bitsDirective();
+
+ PrintLabelName("set", SetCounter);
+ ++SetCounter;
+ } else {
+ if (IsSmall || TAI->getAddressSize() == sizeof(int32_t))
+ O << TAI->getData32bitsDirective();
+ else
+ O << TAI->getData64bitsDirective();
+
+ PrintLabelName(Label, LabelNumber, true);
+
+ if (isEH)
+ printAbsolute = TAI->isAbsoluteEHSectionOffsets();
+ else
+ printAbsolute = TAI->isAbsoluteDebugSectionOffsets();
+
+ if (!printAbsolute) {
+ O << "-";
+ PrintLabelName(Section, SectionNumber);
+ }
+ }
+ }
+
+ /// EmitFrameMoves - Emit frame instructions to describe the layout of the
+ /// frame.
+ void EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
+ const std::vector<MachineMove> &Moves) {
+ int stackGrowth =
+ Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TAI->getAddressSize() : -TAI->getAddressSize();
+ bool IsLocal = BaseLabel && strcmp(BaseLabel, "label") == 0;
+
+ for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+ const MachineMove &Move = Moves[i];
+ unsigned LabelID = Move.getLabelID();
+
+ if (LabelID) {
+ LabelID = MMI->MappedLabel(LabelID);
+
+ // Throw out move if the label is invalid.
+ if (!LabelID) continue;
+ }
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // Advance row if new location.
+ if (BaseLabel && LabelID && (BaseLabelID != LabelID || !IsLocal)) {
+ Asm->EmitInt8(DW_CFA_advance_loc4);
+ Asm->EOL("DW_CFA_advance_loc4");
+ EmitDifference("label", LabelID, BaseLabel, BaseLabelID, true);
+ Asm->EOL();
+
+ BaseLabelID = LabelID;
+ BaseLabel = "label";
+ IsLocal = true;
+ }
+
+ // If advancing cfa.
+ if (Dst.isRegister() && Dst.getRegister() == MachineLocation::VirtualFP) {
+ if (!Src.isRegister()) {
+ if (Src.getRegister() == MachineLocation::VirtualFP) {
+ Asm->EmitInt8(DW_CFA_def_cfa_offset);
+ Asm->EOL("DW_CFA_def_cfa_offset");
+ } else {
+ Asm->EmitInt8(DW_CFA_def_cfa);
+ Asm->EOL("DW_CFA_def_cfa");
+ Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Src.getRegister()));
+ Asm->EOL("Register");
+ }
+
+ int Offset = -Src.getOffset();
+
+ Asm->EmitULEB128Bytes(Offset);
+ Asm->EOL("Offset");
+ } else {
+ assert(0 && "Machine move no supported yet.");
+ }
+ } else if (Src.isRegister() &&
+ Src.getRegister() == MachineLocation::VirtualFP) {
+ if (Dst.isRegister()) {
+ Asm->EmitInt8(DW_CFA_def_cfa_register);
+ Asm->EOL("DW_CFA_def_cfa_register");
+ Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getRegister()));
+ Asm->EOL("Register");
+ } else {
+ assert(0 && "Machine move no supported yet.");
+ }
+ } else {
+ unsigned Reg = RI->getDwarfRegNum(Src.getRegister());
+ int Offset = Dst.getOffset() / stackGrowth;
+
+ if (Offset < 0) {
+ Asm->EmitInt8(DW_CFA_offset_extended_sf);
+ Asm->EOL("DW_CFA_offset_extended_sf");
+ Asm->EmitULEB128Bytes(Reg);
+ Asm->EOL("Reg");
+ Asm->EmitSLEB128Bytes(Offset);
+ Asm->EOL("Offset");
+ } else if (Reg < 64) {
+ Asm->EmitInt8(DW_CFA_offset + Reg);
+ Asm->EOL("DW_CFA_offset + Reg");
+ Asm->EmitULEB128Bytes(Offset);
+ Asm->EOL("Offset");
+ } else {
+ Asm->EmitInt8(DW_CFA_offset_extended);
+ Asm->EOL("DW_CFA_offset_extended");
+ Asm->EmitULEB128Bytes(Reg);
+ Asm->EOL("Reg");
+ Asm->EmitULEB128Bytes(Offset);
+ Asm->EOL("Offset");
+ }
+ }
+ }
+ }
+
+};
+
+//===----------------------------------------------------------------------===//
+/// DwarfDebug - Emits Dwarf debug directives.
+///
+class DwarfDebug : public Dwarf {
+
+private:
+ //===--------------------------------------------------------------------===//
+ // Attributes used to construct specific Dwarf sections.
+ //
+
+ /// CompileUnits - All the compile units involved in this build. The index
+ /// of each entry in this vector corresponds to the sources in MMI.
+ std::vector<CompileUnit *> CompileUnits;
+
+ /// AbbreviationsSet - Used to uniquely define abbreviations.
+ ///
+ FoldingSet<DIEAbbrev> AbbreviationsSet;
+
+ /// Abbreviations - A list of all the unique abbreviations in use.
+ ///
+ std::vector<DIEAbbrev *> Abbreviations;
+
+ /// ValuesSet - Used to uniquely define values.
+ ///
+ FoldingSet<DIEValue> ValuesSet;
+
+ /// Values - A list of all the unique values in use.
+ ///
+ std::vector<DIEValue *> Values;
+
+ /// StringPool - A UniqueVector of strings used by indirect references.
+ ///
+ UniqueVector<std::string> StringPool;
+
+ /// UnitMap - Map debug information descriptor to compile unit.
+ ///
+ std::map<DebugInfoDesc *, CompileUnit *> DescToUnitMap;
+
+ /// SectionMap - Provides a unique id per text section.
+ ///
+ UniqueVector<std::string> SectionMap;
+
+ /// SectionSourceLines - Tracks line numbers per text section.
+ ///
+ std::vector<std::vector<SourceLineInfo> > SectionSourceLines;
+
+ /// didInitial - Flag to indicate if initial emission has been done.
+ ///
+ bool didInitial;
+
+ /// shouldEmit - Flag to indicate if debug information should be emitted.
+ ///
+ bool shouldEmit;
+
+ struct FunctionDebugFrameInfo {
+ unsigned Number;
+ std::vector<MachineMove> Moves;
+
+ FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M):
+ Number(Num), Moves(M) { };
+ };
+
+ std::vector<FunctionDebugFrameInfo> DebugFrames;
+
+public:
+
+ /// ShouldEmitDwarf - Returns true if Dwarf declarations should be made.
+ ///
+ bool ShouldEmitDwarf() const { return shouldEmit; }
+
+ /// AssignAbbrevNumber - Define a unique number for the abbreviation.
+ ///
+ void AssignAbbrevNumber(DIEAbbrev &Abbrev) {
+ // Profile the node so that we can make it unique.
+ FoldingSetNodeID ID;
+ Abbrev.Profile(ID);
+
+ // Check the set for priors.
+ DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+
+ // If it's newly added.
+ if (InSet == &Abbrev) {
+ // Add to abbreviation list.
+ Abbreviations.push_back(&Abbrev);
+ // Assign the vector position + 1 as its number.
+ Abbrev.setNumber(Abbreviations.size());
+ } else {
+ // Assign existing abbreviation number.
+ Abbrev.setNumber(InSet->getNumber());
+ }
+ }
+
+ /// NewString - Add a string to the constant pool and returns a label.
+ ///
+ DWLabel NewString(const std::string &String) {
+ unsigned StringID = StringPool.insert(String);
+ return DWLabel("string", StringID);
+ }
+
+ /// NewDIEntry - Creates a new DIEntry to be a proxy for a debug information
+ /// entry.
+ DIEntry *NewDIEntry(DIE *Entry = NULL) {
+ DIEntry *Value;
+
+ if (Entry) {
+ FoldingSetNodeID ID;
+ DIEntry::Profile(ID, Entry);
+ void *Where;
+ Value = static_cast<DIEntry *>(ValuesSet.FindNodeOrInsertPos(ID, Where));
+
+ if (Value) return Value;
+
+ Value = new DIEntry(Entry);
+ ValuesSet.InsertNode(Value, Where);
+ } else {
+ Value = new DIEntry(Entry);
+ }
+
+ Values.push_back(Value);
+ return Value;
+ }
+
+ /// SetDIEntry - Set a DIEntry once the debug information entry is defined.
+ ///
+ void SetDIEntry(DIEntry *Value, DIE *Entry) {
+ Value->Entry = Entry;
+ // Add to values set if not already there. If it is, we merely have a
+ // duplicate in the values list (no harm.)
+ ValuesSet.GetOrInsertNode(Value);
+ }
+
+ /// AddUInt - Add an unsigned integer attribute data and value.
+ ///
+ void AddUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(false, Integer);
+
+ FoldingSetNodeID ID;
+ DIEInteger::Profile(ID, Integer);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+ if (!Value) {
+ Value = new DIEInteger(Integer);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+ }
+
+ /// AddSInt - Add an signed integer attribute data and value.
+ ///
+ void AddSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer) {
+ if (!Form) Form = DIEInteger::BestForm(true, Integer);
+
+ FoldingSetNodeID ID;
+ DIEInteger::Profile(ID, (uint64_t)Integer);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+ if (!Value) {
+ Value = new DIEInteger(Integer);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+ }
+
+ /// AddString - Add a std::string attribute data and value.
+ ///
+ void AddString(DIE *Die, unsigned Attribute, unsigned Form,
+ const std::string &String) {
+ FoldingSetNodeID ID;
+ DIEString::Profile(ID, String);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+ if (!Value) {
+ Value = new DIEString(String);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+ }
+
+ /// AddLabel - Add a Dwarf label attribute data and value.
+ ///
+ void AddLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Label) {
+ FoldingSetNodeID ID;
+ DIEDwarfLabel::Profile(ID, Label);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+ if (!Value) {
+ Value = new DIEDwarfLabel(Label);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+ }
+
+ /// AddObjectLabel - Add an non-Dwarf label attribute data and value.
+ ///
+ void AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ const std::string &Label) {
+ FoldingSetNodeID ID;
+ DIEObjectLabel::Profile(ID, Label);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+ if (!Value) {
+ Value = new DIEObjectLabel(Label);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+ }
+
+ /// AddDelta - Add a label delta attribute data and value.
+ ///
+ void AddDelta(DIE *Die, unsigned Attribute, unsigned Form,
+ const DWLabel &Hi, const DWLabel &Lo) {
+ FoldingSetNodeID ID;
+ DIEDelta::Profile(ID, Hi, Lo);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+ if (!Value) {
+ Value = new DIEDelta(Hi, Lo);
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ }
+
+ Die->AddValue(Attribute, Form, Value);
+ }
+
+ /// AddDIEntry - Add a DIE attribute data and value.
+ ///
+ void AddDIEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) {
+ Die->AddValue(Attribute, Form, NewDIEntry(Entry));
+ }
+
+ /// AddBlock - Add block data.
+ ///
+ void AddBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block) {
+ Block->ComputeSize(*this);
+ FoldingSetNodeID ID;
+ Block->Profile(ID);
+ void *Where;
+ DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
+ if (!Value) {
+ Value = Block;
+ ValuesSet.InsertNode(Value, Where);
+ Values.push_back(Value);
+ } else {
+ delete Block;
+ }
+
+ Die->AddValue(Attribute, Block->BestForm(), Value);
+ }
+
+private:
+
+ /// AddSourceLine - Add location information to specified debug information
+ /// entry.
+ void AddSourceLine(DIE *Die, CompileUnitDesc *File, unsigned Line) {
+ if (File && Line) {
+ CompileUnit *FileUnit = FindCompileUnit(File);
+ unsigned FileID = FileUnit->getID();
+ AddUInt(Die, DW_AT_decl_file, 0, FileID);
+ AddUInt(Die, DW_AT_decl_line, 0, Line);
+ }
+ }
+
+ /// AddAddress - Add an address attribute to a die based on the location
+ /// provided.
+ void AddAddress(DIE *Die, unsigned Attribute,
+ const MachineLocation &Location) {
+ unsigned Reg = RI->getDwarfRegNum(Location.getRegister());
+ DIEBlock *Block = new DIEBlock();
+
+ if (Location.isRegister()) {
+ if (Reg < 32) {
+ AddUInt(Block, 0, DW_FORM_data1, DW_OP_reg0 + Reg);
+ } else {
+ AddUInt(Block, 0, DW_FORM_data1, DW_OP_regx);
+ AddUInt(Block, 0, DW_FORM_udata, Reg);
+ }
+ } else {
+ if (Reg < 32) {
+ AddUInt(Block, 0, DW_FORM_data1, DW_OP_breg0 + Reg);
+ } else {
+ AddUInt(Block, 0, DW_FORM_data1, DW_OP_bregx);
+ AddUInt(Block, 0, DW_FORM_udata, Reg);
+ }
+ AddUInt(Block, 0, DW_FORM_sdata, Location.getOffset());
+ }
+
+ AddBlock(Die, Attribute, 0, Block);
+ }
+
+ /// AddBasicType - Add a new basic type attribute to the specified entity.
+ ///
+ void AddBasicType(DIE *Entity, CompileUnit *Unit,
+ const std::string &Name,
+ unsigned Encoding, unsigned Size) {
+ DIE *Die = ConstructBasicType(Unit, Name, Encoding, Size);
+ AddDIEntry(Entity, DW_AT_type, DW_FORM_ref4, Die);
+ }
+
+ /// ConstructBasicType - Construct a new basic type.
+ ///
+ DIE *ConstructBasicType(CompileUnit *Unit,
+ const std::string &Name,
+ unsigned Encoding, unsigned Size) {
+ DIE Buffer(DW_TAG_base_type);
+ AddUInt(&Buffer, DW_AT_byte_size, 0, Size);
+ AddUInt(&Buffer, DW_AT_encoding, DW_FORM_data1, Encoding);
+ if (!Name.empty()) AddString(&Buffer, DW_AT_name, DW_FORM_string, Name);
+ return Unit->AddDie(Buffer);
+ }
+
+ /// AddPointerType - Add a new pointer type attribute to the specified entity.
+ ///
+ void AddPointerType(DIE *Entity, CompileUnit *Unit, const std::string &Name) {
+ DIE *Die = ConstructPointerType(Unit, Name);
+ AddDIEntry(Entity, DW_AT_type, DW_FORM_ref4, Die);
+ }
+
+ /// ConstructPointerType - Construct a new pointer type.
+ ///
+ DIE *ConstructPointerType(CompileUnit *Unit, const std::string &Name) {
+ DIE Buffer(DW_TAG_pointer_type);
+ AddUInt(&Buffer, DW_AT_byte_size, 0, TAI->getAddressSize());
+ if (!Name.empty()) AddString(&Buffer, DW_AT_name, DW_FORM_string, Name);
+ return Unit->AddDie(Buffer);
+ }
+
+ /// AddType - Add a new type attribute to the specified entity.
+ ///
+ void AddType(DIE *Entity, TypeDesc *TyDesc, CompileUnit *Unit) {
+ if (!TyDesc) {
+ AddBasicType(Entity, Unit, "", DW_ATE_signed, sizeof(int32_t));
+ } else {
+ // Check for pre-existence.
+ DIEntry *&Slot = Unit->getDIEntrySlotFor(TyDesc);
+
+ // If it exists then use the existing value.
+ if (Slot) {
+ Entity->AddValue(DW_AT_type, DW_FORM_ref4, Slot);
+ return;
+ }
+
+ if (SubprogramDesc *SubprogramTy = dyn_cast<SubprogramDesc>(TyDesc)) {
+ // FIXME - Not sure why programs and variables are coming through here.
+ // Short cut for handling subprogram types (not really a TyDesc.)
+ AddPointerType(Entity, Unit, SubprogramTy->getName());
+ } else if (GlobalVariableDesc *GlobalTy =
+ dyn_cast<GlobalVariableDesc>(TyDesc)) {
+ // FIXME - Not sure why programs and variables are coming through here.
+ // Short cut for handling global variable types (not really a TyDesc.)
+ AddPointerType(Entity, Unit, GlobalTy->getName());
+ } else {
+ // Set up proxy.
+ Slot = NewDIEntry();
+
+ // Construct type.
+ DIE Buffer(DW_TAG_base_type);
+ ConstructType(Buffer, TyDesc, Unit);
+
+ // Add debug information entry to entity and unit.
+ DIE *Die = Unit->AddDie(Buffer);
+ SetDIEntry(Slot, Die);
+ Entity->AddValue(DW_AT_type, DW_FORM_ref4, Slot);
+ }
+ }
+ }
+
+ /// ConstructType - Adds all the required attributes to the type.
+ ///
+ void ConstructType(DIE &Buffer, TypeDesc *TyDesc, CompileUnit *Unit) {
+ // Get core information.
+ const std::string &Name = TyDesc->getName();
+ uint64_t Size = TyDesc->getSize() >> 3;
+
+ if (BasicTypeDesc *BasicTy = dyn_cast<BasicTypeDesc>(TyDesc)) {
+ // Fundamental types like int, float, bool
+ Buffer.setTag(DW_TAG_base_type);
+ AddUInt(&Buffer, DW_AT_encoding, DW_FORM_data1, BasicTy->getEncoding());
+ } else if (DerivedTypeDesc *DerivedTy = dyn_cast<DerivedTypeDesc>(TyDesc)) {
+ // Fetch tag.
+ unsigned Tag = DerivedTy->getTag();
+ // FIXME - Workaround for templates.
+ if (Tag == DW_TAG_inheritance) Tag = DW_TAG_reference_type;
+ // Pointers, typedefs et al.
+ Buffer.setTag(Tag);
+ // Map to main type, void will not have a type.
+ if (TypeDesc *FromTy = DerivedTy->getFromType())
+ AddType(&Buffer, FromTy, Unit);
+ } else if (CompositeTypeDesc *CompTy = dyn_cast<CompositeTypeDesc>(TyDesc)){
+ // Fetch tag.
+ unsigned Tag = CompTy->getTag();
+
+ // Set tag accordingly.
+ if (Tag == DW_TAG_vector_type)
+ Buffer.setTag(DW_TAG_array_type);
+ else
+ Buffer.setTag(Tag);
+
+ std::vector<DebugInfoDesc *> &Elements = CompTy->getElements();
+
+ switch (Tag) {
+ case DW_TAG_vector_type:
+ AddUInt(&Buffer, DW_AT_GNU_vector, DW_FORM_flag, 1);
+ // Fall thru
+ case DW_TAG_array_type: {
+ // Add element type.
+ if (TypeDesc *FromTy = CompTy->getFromType())
+ AddType(&Buffer, FromTy, Unit);
+
+ // Don't emit size attribute.
+ Size = 0;
+
+ // Construct an anonymous type for index type.
+ DIE *IndexTy = ConstructBasicType(Unit, "", DW_ATE_signed,
+ sizeof(int32_t));
+
+ // Add subranges to array type.
+ for(unsigned i = 0, N = Elements.size(); i < N; ++i) {
+ SubrangeDesc *SRD = cast<SubrangeDesc>(Elements[i]);
+ int64_t Lo = SRD->getLo();
+ int64_t Hi = SRD->getHi();
+ DIE *Subrange = new DIE(DW_TAG_subrange_type);
+
+ // If a range is available.
+ if (Lo != Hi) {
+ AddDIEntry(Subrange, DW_AT_type, DW_FORM_ref4, IndexTy);
+ // Only add low if non-zero.
+ if (Lo) AddSInt(Subrange, DW_AT_lower_bound, 0, Lo);
+ AddSInt(Subrange, DW_AT_upper_bound, 0, Hi);
+ }
+
+ Buffer.AddChild(Subrange);
+ }
+ break;
+ }
+ case DW_TAG_structure_type:
+ case DW_TAG_union_type: {
+ // Add elements to structure type.
+ for(unsigned i = 0, N = Elements.size(); i < N; ++i) {
+ DebugInfoDesc *Element = Elements[i];
+
+ if (DerivedTypeDesc *MemberDesc = dyn_cast<DerivedTypeDesc>(Element)){
+ // Add field or base class.
+
+ unsigned Tag = MemberDesc->getTag();
+
+ // Extract the basic information.
+ const std::string &Name = MemberDesc->getName();
+ uint64_t Size = MemberDesc->getSize();
+ uint64_t Align = MemberDesc->getAlign();
+ uint64_t Offset = MemberDesc->getOffset();
+
+ // Construct member debug information entry.
+ DIE *Member = new DIE(Tag);
+
+ // Add name if not "".
+ if (!Name.empty())
+ AddString(Member, DW_AT_name, DW_FORM_string, Name);
+ // Add location if available.
+ AddSourceLine(Member, MemberDesc->getFile(), MemberDesc->getLine());
+
+ // Most of the time the field info is the same as the members.
+ uint64_t FieldSize = Size;
+ uint64_t FieldAlign = Align;
+ uint64_t FieldOffset = Offset;
+
+ // Set the member type.
+ TypeDesc *FromTy = MemberDesc->getFromType();
+ AddType(Member, FromTy, Unit);
+
+ // Walk up typedefs until a real size is found.
+ while (FromTy) {
+ if (FromTy->getTag() != DW_TAG_typedef) {
+ FieldSize = FromTy->getSize();
+ FieldAlign = FromTy->getSize();
+ break;
+ }
+
+ FromTy = dyn_cast<DerivedTypeDesc>(FromTy)->getFromType();
+ }
+
+ // Unless we have a bit field.
+ if (Tag == DW_TAG_member && FieldSize != Size) {
+ // Construct the alignment mask.
+ uint64_t AlignMask = ~(FieldAlign - 1);
+ // Determine the high bit + 1 of the declared size.
+ uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+ // Work backwards to determine the base offset of the field.
+ FieldOffset = HiMark - FieldSize;
+ // Now normalize offset to the field.
+ Offset -= FieldOffset;
+
+ // Maybe we need to work from the other end.
+ if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size);
+
+ // Add size and offset.
+ AddUInt(Member, DW_AT_byte_size, 0, FieldSize >> 3);
+ AddUInt(Member, DW_AT_bit_size, 0, Size);
+ AddUInt(Member, DW_AT_bit_offset, 0, Offset);
+ }
+
+ // Add computation for offset.
+ DIEBlock *Block = new DIEBlock();
+ AddUInt(Block, 0, DW_FORM_data1, DW_OP_plus_uconst);
+ AddUInt(Block, 0, DW_FORM_udata, FieldOffset >> 3);
+ AddBlock(Member, DW_AT_data_member_location, 0, Block);
+
+ // Add accessibility (public default unless is base class.
+ if (MemberDesc->isProtected()) {
+ AddUInt(Member, DW_AT_accessibility, 0, DW_ACCESS_protected);
+ } else if (MemberDesc->isPrivate()) {
+ AddUInt(Member, DW_AT_accessibility, 0, DW_ACCESS_private);
+ } else if (Tag == DW_TAG_inheritance) {
+ AddUInt(Member, DW_AT_accessibility, 0, DW_ACCESS_public);
+ }
+
+ Buffer.AddChild(Member);
+ } else if (GlobalVariableDesc *StaticDesc =
+ dyn_cast<GlobalVariableDesc>(Element)) {
+ // Add static member.
+
+ // Construct member debug information entry.
+ DIE *Static = new DIE(DW_TAG_variable);
+
+ // Add name and mangled name.
+ const std::string &Name = StaticDesc->getName();
+ const std::string &LinkageName = StaticDesc->getLinkageName();
+ AddString(Static, DW_AT_name, DW_FORM_string, Name);
+ if (!LinkageName.empty()) {
+ AddString(Static, DW_AT_MIPS_linkage_name, DW_FORM_string,
+ LinkageName);
+ }
+
+ // Add location.
+ AddSourceLine(Static, StaticDesc->getFile(), StaticDesc->getLine());
+
+ // Add type.
+ if (TypeDesc *StaticTy = StaticDesc->getType())
+ AddType(Static, StaticTy, Unit);
+
+ // Add flags.
+ if (!StaticDesc->isStatic())
+ AddUInt(Static, DW_AT_external, DW_FORM_flag, 1);
+ AddUInt(Static, DW_AT_declaration, DW_FORM_flag, 1);
+
+ Buffer.AddChild(Static);
+ } else if (SubprogramDesc *MethodDesc =
+ dyn_cast<SubprogramDesc>(Element)) {
+ // Add member function.
+
+ // Construct member debug information entry.
+ DIE *Method = new DIE(DW_TAG_subprogram);
+
+ // Add name and mangled name.
+ const std::string &Name = MethodDesc->getName();
+ const std::string &LinkageName = MethodDesc->getLinkageName();
+
+ AddString(Method, DW_AT_name, DW_FORM_string, Name);
+ bool IsCTor = TyDesc->getName() == Name;
+
+ if (!LinkageName.empty()) {
+ AddString(Method, DW_AT_MIPS_linkage_name, DW_FORM_string,
+ LinkageName);
+ }
+
+ // Add location.
+ AddSourceLine(Method, MethodDesc->getFile(), MethodDesc->getLine());
+
+ // Add type.
+ if (CompositeTypeDesc *MethodTy =
+ dyn_cast_or_null<CompositeTypeDesc>(MethodDesc->getType())) {
+ // Get argument information.
+ std::vector<DebugInfoDesc *> &Args = MethodTy->getElements();
+
+ // If not a ctor.
+ if (!IsCTor) {
+ // Add return type.
+ AddType(Method, dyn_cast<TypeDesc>(Args[0]), Unit);
+ }
+
+ // Add arguments.
+ for(unsigned i = 1, N = Args.size(); i < N; ++i) {
+ DIE *Arg = new DIE(DW_TAG_formal_parameter);
+ AddType(Arg, cast<TypeDesc>(Args[i]), Unit);
+ AddUInt(Arg, DW_AT_artificial, DW_FORM_flag, 1);
+ Method->AddChild(Arg);
+ }
+ }
+
+ // Add flags.
+ if (!MethodDesc->isStatic())
+ AddUInt(Method, DW_AT_external, DW_FORM_flag, 1);
+ AddUInt(Method, DW_AT_declaration, DW_FORM_flag, 1);
+
+ Buffer.AddChild(Method);
+ }
+ }
+ break;
+ }
+ case DW_TAG_enumeration_type: {
+ // Add enumerators to enumeration type.
+ for(unsigned i = 0, N = Elements.size(); i < N; ++i) {
+ EnumeratorDesc *ED = cast<EnumeratorDesc>(Elements[i]);
+ const std::string &Name = ED->getName();
+ int64_t Value = ED->getValue();
+ DIE *Enumerator = new DIE(DW_TAG_enumerator);
+ AddString(Enumerator, DW_AT_name, DW_FORM_string, Name);
+ AddSInt(Enumerator, DW_AT_const_value, DW_FORM_sdata, Value);
+ Buffer.AddChild(Enumerator);
+ }
+
+ break;
+ }
+ case DW_TAG_subroutine_type: {
+ // Add prototype flag.
+ AddUInt(&Buffer, DW_AT_prototyped, DW_FORM_flag, 1);
+ // Add return type.
+ AddType(&Buffer, dyn_cast<TypeDesc>(Elements[0]), Unit);
+
+ // Add arguments.
+ for(unsigned i = 1, N = Elements.size(); i < N; ++i) {
+ DIE *Arg = new DIE(DW_TAG_formal_parameter);
+ AddType(Arg, cast<TypeDesc>(Elements[i]), Unit);
+ Buffer.AddChild(Arg);
+ }
+
+ break;
+ }
+ default: break;
+ }
+ }
+
+ // Add size if non-zero (derived types don't have a size.)
+ if (Size) AddUInt(&Buffer, DW_AT_byte_size, 0, Size);
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty()) AddString(&Buffer, DW_AT_name, DW_FORM_string, Name);
+ // Add source line info if available.
+ AddSourceLine(&Buffer, TyDesc->getFile(), TyDesc->getLine());
+ }
+
+ /// NewCompileUnit - Create new compile unit and it's debug information entry.
+ ///
+ CompileUnit *NewCompileUnit(CompileUnitDesc *UnitDesc, unsigned ID) {
+ // Construct debug information entry.
+ DIE *Die = new DIE(DW_TAG_compile_unit);
+ if (TAI->isAbsoluteDebugSectionOffsets())
+ AddLabel(Die, DW_AT_stmt_list, DW_FORM_data4, DWLabel("section_line", 0));
+ else
+ AddDelta(Die, DW_AT_stmt_list, DW_FORM_data4, DWLabel("section_line", 0),
+ DWLabel("section_line", 0));
+ AddString(Die, DW_AT_producer, DW_FORM_string, UnitDesc->getProducer());
+ AddUInt (Die, DW_AT_language, DW_FORM_data1, UnitDesc->getLanguage());
+ AddString(Die, DW_AT_name, DW_FORM_string, UnitDesc->getFileName());
+ AddString(Die, DW_AT_comp_dir, DW_FORM_string, UnitDesc->getDirectory());
+
+ // Construct compile unit.
+ CompileUnit *Unit = new CompileUnit(UnitDesc, ID, Die);
+
+ // Add Unit to compile unit map.
+ DescToUnitMap[UnitDesc] = Unit;
+
+ return Unit;
+ }
+
+ /// GetBaseCompileUnit - Get the main compile unit.
+ ///
+ CompileUnit *GetBaseCompileUnit() const {
+ CompileUnit *Unit = CompileUnits[0];
+ assert(Unit && "Missing compile unit.");
+ return Unit;
+ }
+
+ /// FindCompileUnit - Get the compile unit for the given descriptor.
+ ///
+ CompileUnit *FindCompileUnit(CompileUnitDesc *UnitDesc) {
+ CompileUnit *Unit = DescToUnitMap[UnitDesc];
+ assert(Unit && "Missing compile unit.");
+ return Unit;
+ }
+
+ /// NewGlobalVariable - Add a new global variable DIE.
+ ///
+ DIE *NewGlobalVariable(GlobalVariableDesc *GVD) {
+ // Get the compile unit context.
+ CompileUnitDesc *UnitDesc =
+ static_cast<CompileUnitDesc *>(GVD->getContext());
+ CompileUnit *Unit = GetBaseCompileUnit();
+
+ // Check for pre-existence.
+ DIE *&Slot = Unit->getDieMapSlotFor(GVD);
+ if (Slot) return Slot;
+
+ // Get the global variable itself.
+ GlobalVariable *GV = GVD->getGlobalVariable();
+
+ const std::string &Name = GVD->getName();
+ const std::string &FullName = GVD->getFullName();
+ const std::string &LinkageName = GVD->getLinkageName();
+ // Create the global's variable DIE.
+ DIE *VariableDie = new DIE(DW_TAG_variable);
+ AddString(VariableDie, DW_AT_name, DW_FORM_string, Name);
+ if (!LinkageName.empty()) {
+ AddString(VariableDie, DW_AT_MIPS_linkage_name, DW_FORM_string,
+ LinkageName);
+ }
+ AddType(VariableDie, GVD->getType(), Unit);
+ if (!GVD->isStatic())
+ AddUInt(VariableDie, DW_AT_external, DW_FORM_flag, 1);
+
+ // Add source line info if available.
+ AddSourceLine(VariableDie, UnitDesc, GVD->getLine());
+
+ // Add address.
+ DIEBlock *Block = new DIEBlock();
+ AddUInt(Block, 0, DW_FORM_data1, DW_OP_addr);
+ AddObjectLabel(Block, 0, DW_FORM_udata, Asm->getGlobalLinkName(GV));
+ AddBlock(VariableDie, DW_AT_location, 0, Block);
+
+ // Add to map.
+ Slot = VariableDie;
+
+ // Add to context owner.
+ Unit->getDie()->AddChild(VariableDie);
+
+ // Expose as global.
+ // FIXME - need to check external flag.
+ Unit->AddGlobal(FullName, VariableDie);
+
+ return VariableDie;
+ }
+
+ /// NewSubprogram - Add a new subprogram DIE.
+ ///
+ DIE *NewSubprogram(SubprogramDesc *SPD) {
+ // Get the compile unit context.
+ CompileUnitDesc *UnitDesc =
+ static_cast<CompileUnitDesc *>(SPD->getContext());
+ CompileUnit *Unit = GetBaseCompileUnit();
+
+ // Check for pre-existence.
+ DIE *&Slot = Unit->getDieMapSlotFor(SPD);
+ if (Slot) return Slot;
+
+ // Gather the details (simplify add attribute code.)
+ const std::string &Name = SPD->getName();
+ const std::string &FullName = SPD->getFullName();
+ const std::string &LinkageName = SPD->getLinkageName();
+
+ DIE *SubprogramDie = new DIE(DW_TAG_subprogram);
+ AddString(SubprogramDie, DW_AT_name, DW_FORM_string, Name);
+ if (!LinkageName.empty()) {
+ AddString(SubprogramDie, DW_AT_MIPS_linkage_name, DW_FORM_string,
+ LinkageName);
+ }
+ if (SPD->getType()) AddType(SubprogramDie, SPD->getType(), Unit);
+ if (!SPD->isStatic())
+ AddUInt(SubprogramDie, DW_AT_external, DW_FORM_flag, 1);
+ AddUInt(SubprogramDie, DW_AT_prototyped, DW_FORM_flag, 1);
+
+ // Add source line info if available.
+ AddSourceLine(SubprogramDie, UnitDesc, SPD->getLine());
+
+ // Add to map.
+ Slot = SubprogramDie;
+
+ // Add to context owner.
+ Unit->getDie()->AddChild(SubprogramDie);
+
+ // Expose as global.
+ Unit->AddGlobal(FullName, SubprogramDie);
+
+ return SubprogramDie;
+ }
+
+ /// NewScopeVariable - Create a new scope variable.
+ ///
+ DIE *NewScopeVariable(DebugVariable *DV, CompileUnit *Unit) {
+ // Get the descriptor.
+ VariableDesc *VD = DV->getDesc();
+
+ // Translate tag to proper Dwarf tag. The result variable is dropped for
+ // now.
+ unsigned Tag;
+ switch (VD->getTag()) {
+ case DW_TAG_return_variable: return NULL;
+ case DW_TAG_arg_variable: Tag = DW_TAG_formal_parameter; break;
+ case DW_TAG_auto_variable: // fall thru
+ default: Tag = DW_TAG_variable; break;
+ }
+
+ // Define variable debug information entry.
+ DIE *VariableDie = new DIE(Tag);
+ AddString(VariableDie, DW_AT_name, DW_FORM_string, VD->getName());
+
+ // Add source line info if available.
+ AddSourceLine(VariableDie, VD->getFile(), VD->getLine());
+
+ // Add variable type.
+ AddType(VariableDie, VD->getType(), Unit);
+
+ // Add variable address.
+ MachineLocation Location;
+ RI->getLocation(*MF, DV->getFrameIndex(), Location);
+ AddAddress(VariableDie, DW_AT_location, Location);
+
+ return VariableDie;
+ }
+
+ /// ConstructScope - Construct the components of a scope.
+ ///
+ void ConstructScope(DebugScope *ParentScope,
+ unsigned ParentStartID, unsigned ParentEndID,
+ DIE *ParentDie, CompileUnit *Unit) {
+ // Add variables to scope.
+ std::vector<DebugVariable *> &Variables = ParentScope->getVariables();
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+ DIE *VariableDie = NewScopeVariable(Variables[i], Unit);
+ if (VariableDie) ParentDie->AddChild(VariableDie);
+ }
+
+ // Add nested scopes.
+ std::vector<DebugScope *> &Scopes = ParentScope->getScopes();
+ for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
+ // Define the Scope debug information entry.
+ DebugScope *Scope = Scopes[j];
+ // FIXME - Ignore inlined functions for the time being.
+ if (!Scope->getParent()) continue;
+
+ unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
+ unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+
+ // Ignore empty scopes.
+ if (StartID == EndID && StartID != 0) continue;
+ if (Scope->getScopes().empty() && Scope->getVariables().empty()) continue;
+
+ if (StartID == ParentStartID && EndID == ParentEndID) {
+ // Just add stuff to the parent scope.
+ ConstructScope(Scope, ParentStartID, ParentEndID, ParentDie, Unit);
+ } else {
+ DIE *ScopeDie = new DIE(DW_TAG_lexical_block);
+
+ // Add the scope bounds.
+ if (StartID) {
+ AddLabel(ScopeDie, DW_AT_low_pc, DW_FORM_addr,
+ DWLabel("label", StartID));
+ } else {
+ AddLabel(ScopeDie, DW_AT_low_pc, DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+ }
+ if (EndID) {
+ AddLabel(ScopeDie, DW_AT_high_pc, DW_FORM_addr,
+ DWLabel("label", EndID));
+ } else {
+ AddLabel(ScopeDie, DW_AT_high_pc, DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+ }
+
+ // Add the scope contents.
+ ConstructScope(Scope, StartID, EndID, ScopeDie, Unit);
+ ParentDie->AddChild(ScopeDie);
+ }
+ }
+ }
+
+ /// ConstructRootScope - Construct the scope for the subprogram.
+ ///
+ void ConstructRootScope(DebugScope *RootScope) {
+ // Exit if there is no root scope.
+ if (!RootScope) return;
+
+ // Get the subprogram debug information entry.
+ SubprogramDesc *SPD = cast<SubprogramDesc>(RootScope->getDesc());
+
+ // Get the compile unit context.
+ CompileUnit *Unit = GetBaseCompileUnit();
+
+ // Get the subprogram die.
+ DIE *SPDie = Unit->getDieMapSlotFor(SPD);
+ assert(SPDie && "Missing subprogram descriptor");
+
+ // Add the function bounds.
+ AddLabel(SPDie, DW_AT_low_pc, DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+ AddLabel(SPDie, DW_AT_high_pc, DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+ MachineLocation Location(RI->getFrameRegister(*MF));
+ AddAddress(SPDie, DW_AT_frame_base, Location);
+
+ ConstructScope(RootScope, 0, 0, SPDie, Unit);
+ }
+
+ /// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc
+ /// tools to recognize the object file contains Dwarf information.
+ void EmitInitial() {
+ // Check to see if we already emitted intial headers.
+ if (didInitial) return;
+ didInitial = true;
+
+ // Dwarf sections base addresses.
+ if (TAI->doesDwarfRequireFrameSection()) {
+ Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+ EmitLabel("section_debug_frame", 0);
+ }
+ Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
+ EmitLabel("section_info", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection());
+ EmitLabel("section_abbrev", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
+ EmitLabel("section_aranges", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+ EmitLabel("section_macinfo", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfLineSection());
+ EmitLabel("section_line", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfLocSection());
+ EmitLabel("section_loc", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
+ EmitLabel("section_pubnames", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfStrSection());
+ EmitLabel("section_str", 0);
+ Asm->SwitchToDataSection(TAI->getDwarfRangesSection());
+ EmitLabel("section_ranges", 0);
+
+ Asm->SwitchToTextSection(TAI->getTextSection());
+ EmitLabel("text_begin", 0);
+ Asm->SwitchToDataSection(TAI->getDataSection());
+ EmitLabel("data_begin", 0);
+ }
+
+ /// EmitDIE - Recusively Emits a debug information entry.
+ ///
+ void EmitDIE(DIE *Die) {
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+ Asm->EOL();
+
+ // Emit the code (index) for the abbreviation.
+ Asm->EmitULEB128Bytes(AbbrevNumber);
+ Asm->EOL(std::string("Abbrev [" +
+ utostr(AbbrevNumber) +
+ "] 0x" + utohexstr(Die->getOffset()) +
+ ":0x" + utohexstr(Die->getSize()) + " " +
+ TagString(Abbrev->getTag())));
+
+ std::vector<DIEValue *> &Values = Die->getValues();
+ const std::vector<DIEAbbrevData> &AbbrevData = Abbrev->getData();
+
+ // Emit the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ unsigned Attr = AbbrevData[i].getAttribute();
+ unsigned Form = AbbrevData[i].getForm();
+ assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+ switch (Attr) {
+ case DW_AT_sibling: {
+ Asm->EmitInt32(Die->SiblingOffset());
+ break;
+ }
+ default: {
+ // Emit an attribute using the defined form.
+ Values[i]->EmitValue(*this, Form);
+ break;
+ }
+ }
+
+ Asm->EOL(AttributeString(Attr));
+ }
+
+ // Emit the DIE children if any.
+ if (Abbrev->getChildrenFlag() == DW_CHILDREN_yes) {
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+ EmitDIE(Children[j]);
+ }
+
+ Asm->EmitInt8(0); Asm->EOL("End Of Children Mark");
+ }
+ }
+
+ /// SizeAndOffsetDie - Compute the size and offset of a DIE.
+ ///
+ unsigned SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) {
+ // Get the children.
+ const std::vector<DIE *> &Children = Die->getChildren();
+
+ // If not last sibling and has children then add sibling offset attribute.
+ if (!Last && !Children.empty()) Die->AddSiblingOffset();
+
+ // Record the abbreviation.
+ AssignAbbrevNumber(Die->getAbbrev());
+
+ // Get the abbreviation for this DIE.
+ unsigned AbbrevNumber = Die->getAbbrevNumber();
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+ // Set DIE offset
+ Die->setOffset(Offset);
+
+ // Start the size with the size of abbreviation code.
+ Offset += Asm->SizeULEB128(AbbrevNumber);
+
+ const std::vector<DIEValue *> &Values = Die->getValues();
+ const std::vector<DIEAbbrevData> &AbbrevData = Abbrev->getData();
+
+ // Size the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ // Size attribute value.
+ Offset += Values[i]->SizeOf(*this, AbbrevData[i].getForm());
+ }
+
+ // Size the DIE children if any.
+ if (!Children.empty()) {
+ assert(Abbrev->getChildrenFlag() == DW_CHILDREN_yes &&
+ "Children flag not set");
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+ Offset = SizeAndOffsetDie(Children[j], Offset, (j + 1) == M);
+ }
+
+ // End of children marker.
+ Offset += sizeof(int8_t);
+ }
+
+ Die->setSize(Offset - Die->getOffset());
+ return Offset;
+ }
+
+ /// SizeAndOffsets - Compute the size and offset of all the DIEs.
+ ///
+ void SizeAndOffsets() {
+ // Process base compile unit.
+ CompileUnit *Unit = GetBaseCompileUnit();
+ // Compute size of compile unit header
+ unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+ SizeAndOffsetDie(Unit->getDie(), Offset, true);
+ }
+
+ /// EmitDebugInfo - Emit the debug info section.
+ ///
+ void EmitDebugInfo() {
+ // Start debug info section.
+ Asm->SwitchToDataSection(TAI->getDwarfInfoSection());
+
+ CompileUnit *Unit = GetBaseCompileUnit();
+ DIE *Die = Unit->getDie();
+ // Emit the compile units header.
+ EmitLabel("info_begin", Unit->getID());
+ // Emit size of content not including length itself
+ unsigned ContentSize = Die->getSize() +
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ sizeof(int32_t); // FIXME - extra pad for gdb bug.
+
+ Asm->EmitInt32(ContentSize); Asm->EOL("Length of Compilation Unit Info");
+ Asm->EmitInt16(DWARF_VERSION); Asm->EOL("DWARF version number");
+ EmitSectionOffset("abbrev_begin", "section_abbrev", 0, 0, true, false);
+ Asm->EOL("Offset Into Abbrev. Section");
+ Asm->EmitInt8(TAI->getAddressSize()); Asm->EOL("Address Size (in bytes)");
+
+ EmitDIE(Die);
+ // FIXME - extra padding for gdb bug.
+ Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+ Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+ Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+ Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
+ EmitLabel("info_end", Unit->getID());
+
+ Asm->EOL();
+ }
+
+ /// EmitAbbreviations - Emit the abbreviation section.
+ ///
+ void EmitAbbreviations() const {
+ // Check to see if it is worth the effort.
+ if (!Abbreviations.empty()) {
+ // Start the debug abbrev section.
+ Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection());
+
+ EmitLabel("abbrev_begin", 0);
+
+ // For each abbrevation.
+ for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) {
+ // Get abbreviation data
+ const DIEAbbrev *Abbrev = Abbreviations[i];
+
+ // Emit the abbrevations code (base 1 index.)
+ Asm->EmitULEB128Bytes(Abbrev->getNumber());
+ Asm->EOL("Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev->Emit(*this);
+
+ Asm->EOL();
+ }
+
+ // Mark end of abbreviations.
+ Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(3)");
+
+ EmitLabel("abbrev_end", 0);
+
+ Asm->EOL();
+ }
+ }
+
+ /// EmitDebugLines - Emit source line information.
+ ///
+ void EmitDebugLines() {
+ // Minimum line delta, thus ranging from -10..(255-10).
+ const int MinLineDelta = -(DW_LNS_fixed_advance_pc + 1);
+ // Maximum line delta, thus ranging from -10..(255-10).
+ const int MaxLineDelta = 255 + MinLineDelta;
+
+ // Start the dwarf line section.
+ Asm->SwitchToDataSection(TAI->getDwarfLineSection());
+
+ // Construct the section header.
+
+ EmitDifference("line_end", 0, "line_begin", 0, true);
+ Asm->EOL("Length of Source Line Info");
+ EmitLabel("line_begin", 0);
+
+ Asm->EmitInt16(DWARF_VERSION); Asm->EOL("DWARF version number");
+
+ EmitDifference("line_prolog_end", 0, "line_prolog_begin", 0, true);
+ Asm->EOL("Prolog Length");
+ EmitLabel("line_prolog_begin", 0);
+
+ Asm->EmitInt8(1); Asm->EOL("Minimum Instruction Length");
+
+ Asm->EmitInt8(1); Asm->EOL("Default is_stmt_start flag");
+
+ Asm->EmitInt8(MinLineDelta); Asm->EOL("Line Base Value (Special Opcodes)");
+
+ Asm->EmitInt8(MaxLineDelta); Asm->EOL("Line Range Value (Special Opcodes)");
+
+ Asm->EmitInt8(-MinLineDelta); Asm->EOL("Special Opcode Base");
+
+ // Line number standard opcode encodings argument count
+ Asm->EmitInt8(0); Asm->EOL("DW_LNS_copy arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_pc arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_line arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_file arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_column arg count");
+ Asm->EmitInt8(0); Asm->EOL("DW_LNS_negate_stmt arg count");
+ Asm->EmitInt8(0); Asm->EOL("DW_LNS_set_basic_block arg count");
+ Asm->EmitInt8(0); Asm->EOL("DW_LNS_const_add_pc arg count");
+ Asm->EmitInt8(1); Asm->EOL("DW_LNS_fixed_advance_pc arg count");
+
+ const UniqueVector<std::string> &Directories = MMI->getDirectories();
+ const UniqueVector<SourceFileInfo>
+ &SourceFiles = MMI->getSourceFiles();
+
+ // Emit directories.
+ for (unsigned DirectoryID = 1, NDID = Directories.size();
+ DirectoryID <= NDID; ++DirectoryID) {
+ Asm->EmitString(Directories[DirectoryID]); Asm->EOL("Directory");
+ }
+ Asm->EmitInt8(0); Asm->EOL("End of directories");
+
+ // Emit files.
+ for (unsigned SourceID = 1, NSID = SourceFiles.size();
+ SourceID <= NSID; ++SourceID) {
+ const SourceFileInfo &SourceFile = SourceFiles[SourceID];
+ Asm->EmitString(SourceFile.getName());
+ Asm->EOL("Source");
+ Asm->EmitULEB128Bytes(SourceFile.getDirectoryID());
+ Asm->EOL("Directory #");
+ Asm->EmitULEB128Bytes(0);
+ Asm->EOL("Mod date");
+ Asm->EmitULEB128Bytes(0);
+ Asm->EOL("File size");
+ }
+ Asm->EmitInt8(0); Asm->EOL("End of files");
+
+ EmitLabel("line_prolog_end", 0);
+
+ // A sequence for each text section.
+ for (unsigned j = 0, M = SectionSourceLines.size(); j < M; ++j) {
+ // Isolate current sections line info.
+ const std::vector<SourceLineInfo> &LineInfos = SectionSourceLines[j];
+
+ Asm->EOL(std::string("Section ") + SectionMap[j + 1]);
+
+ // Dwarf assumes we start with first line of first source file.
+ unsigned Source = 1;
+ unsigned Line = 1;
+
+ // Construct rows of the address, source, line, column matrix.
+ for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) {
+ const SourceLineInfo &LineInfo = LineInfos[i];
+ unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID());
+ if (!LabelID) continue;
+
+ unsigned SourceID = LineInfo.getSourceID();
+ const SourceFileInfo &SourceFile = SourceFiles[SourceID];
+ unsigned DirectoryID = SourceFile.getDirectoryID();
+ Asm->EOL(Directories[DirectoryID]
+ + SourceFile.getName()
+ + ":"
+ + utostr_32(LineInfo.getLine()));
+
+ // Define the line address.
+ Asm->EmitInt8(0); Asm->EOL("Extended Op");
+ Asm->EmitInt8(TAI->getAddressSize() + 1); Asm->EOL("Op size");
+ Asm->EmitInt8(DW_LNE_set_address); Asm->EOL("DW_LNE_set_address");
+ EmitReference("label", LabelID); Asm->EOL("Location label");
+
+ // If change of source, then switch to the new source.
+ if (Source != LineInfo.getSourceID()) {
+ Source = LineInfo.getSourceID();
+ Asm->EmitInt8(DW_LNS_set_file); Asm->EOL("DW_LNS_set_file");
+ Asm->EmitULEB128Bytes(Source); Asm->EOL("New Source");
+ }
+
+ // If change of line.
+ if (Line != LineInfo.getLine()) {
+ // Determine offset.
+ int Offset = LineInfo.getLine() - Line;
+ int Delta = Offset - MinLineDelta;
+
+ // Update line.
+ Line = LineInfo.getLine();
+
+ // If delta is small enough and in range...
+ if (Delta >= 0 && Delta < (MaxLineDelta - 1)) {
+ // ... then use fast opcode.
+ Asm->EmitInt8(Delta - MinLineDelta); Asm->EOL("Line Delta");
+ } else {
+ // ... otherwise use long hand.
+ Asm->EmitInt8(DW_LNS_advance_line); Asm->EOL("DW_LNS_advance_line");
+ Asm->EmitSLEB128Bytes(Offset); Asm->EOL("Line Offset");
+ Asm->EmitInt8(DW_LNS_copy); Asm->EOL("DW_LNS_copy");
+ }
+ } else {
+ // Copy the previous row (different address or source)
+ Asm->EmitInt8(DW_LNS_copy); Asm->EOL("DW_LNS_copy");
+ }
+ }
+
+ // Define last address of section.
+ Asm->EmitInt8(0); Asm->EOL("Extended Op");
+ Asm->EmitInt8(TAI->getAddressSize() + 1); Asm->EOL("Op size");
+ Asm->EmitInt8(DW_LNE_set_address); Asm->EOL("DW_LNE_set_address");
+ EmitReference("section_end", j + 1); Asm->EOL("Section end label");
+
+ // Mark end of matrix.
+ Asm->EmitInt8(0); Asm->EOL("DW_LNE_end_sequence");
+ Asm->EmitULEB128Bytes(1); Asm->EOL();
+ Asm->EmitInt8(1); Asm->EOL();
+ }
+
+ EmitLabel("line_end", 0);
+
+ Asm->EOL();
+ }
+
+ /// EmitCommonDebugFrame - Emit common frame info into a debug frame section.
+ ///
+ void EmitCommonDebugFrame() {
+ if (!TAI->doesDwarfRequireFrameSection())
+ return;
+
+ int stackGrowth =
+ Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TAI->getAddressSize() : -TAI->getAddressSize();
+
+ // Start the dwarf frame section.
+ Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+
+ EmitLabel("debug_frame_common", 0);
+ EmitDifference("debug_frame_common_end", 0,
+ "debug_frame_common_begin", 0, true);
+ Asm->EOL("Length of Common Information Entry");
+
+ EmitLabel("debug_frame_common_begin", 0);
+ Asm->EmitInt32((int)DW_CIE_ID);
+ Asm->EOL("CIE Identifier Tag");
+ Asm->EmitInt8(DW_CIE_VERSION);
+ Asm->EOL("CIE Version");
+ Asm->EmitString("");
+ Asm->EOL("CIE Augmentation");
+ Asm->EmitULEB128Bytes(1);
+ Asm->EOL("CIE Code Alignment Factor");
+ Asm->EmitSLEB128Bytes(stackGrowth);
+ Asm->EOL("CIE Data Alignment Factor");
+ Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister()));
+ Asm->EOL("CIE RA Column");
+
+ std::vector<MachineMove> Moves;
+ RI->getInitialFrameState(Moves);
+
+ EmitFrameMoves(NULL, 0, Moves);
+
+ Asm->EmitAlignment(2);
+ EmitLabel("debug_frame_common_end", 0);
+
+ Asm->EOL();
+ }
+
+ /// EmitFunctionDebugFrame - Emit per function frame info into a debug frame
+ /// section.
+ void EmitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
+ if (!TAI->doesDwarfRequireFrameSection())
+ return;
+
+ // Start the dwarf frame section.
+ Asm->SwitchToDataSection(TAI->getDwarfFrameSection());
+
+ EmitDifference("debug_frame_end", DebugFrameInfo.Number,
+ "debug_frame_begin", DebugFrameInfo.Number, true);
+ Asm->EOL("Length of Frame Information Entry");
+
+ EmitLabel("debug_frame_begin", DebugFrameInfo.Number);
+
+ EmitSectionOffset("debug_frame_common", "section_debug_frame",
+ 0, 0, true, false);
+ Asm->EOL("FDE CIE offset");
+
+ EmitReference("func_begin", DebugFrameInfo.Number);
+ Asm->EOL("FDE initial location");
+ EmitDifference("func_end", DebugFrameInfo.Number,
+ "func_begin", DebugFrameInfo.Number);
+ Asm->EOL("FDE address range");
+
+ EmitFrameMoves("func_begin", DebugFrameInfo.Number, DebugFrameInfo.Moves);
+
+ Asm->EmitAlignment(2);
+ EmitLabel("debug_frame_end", DebugFrameInfo.Number);
+
+ Asm->EOL();
+ }
+
+ /// EmitDebugPubNames - Emit visible names into a debug pubnames section.
+ ///
+ void EmitDebugPubNames() {
+ // Start the dwarf pubnames section.
+ Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection());
+
+ CompileUnit *Unit = GetBaseCompileUnit();
+
+ EmitDifference("pubnames_end", Unit->getID(),
+ "pubnames_begin", Unit->getID(), true);
+ Asm->EOL("Length of Public Names Info");
+
+ EmitLabel("pubnames_begin", Unit->getID());
+
+ Asm->EmitInt16(DWARF_VERSION); Asm->EOL("DWARF Version");
+
+ EmitSectionOffset("info_begin", "section_info",
+ Unit->getID(), 0, true, false);
+ Asm->EOL("Offset of Compilation Unit Info");
+
+ EmitDifference("info_end", Unit->getID(), "info_begin", Unit->getID(),true);
+ Asm->EOL("Compilation Unit Length");
+
+ std::map<std::string, DIE *> &Globals = Unit->getGlobals();
+
+ for (std::map<std::string, DIE *>::iterator GI = Globals.begin(),
+ GE = Globals.end();
+ GI != GE; ++GI) {
+ const std::string &Name = GI->first;
+ DIE * Entity = GI->second;
+
+ Asm->EmitInt32(Entity->getOffset()); Asm->EOL("DIE offset");
+ Asm->EmitString(Name); Asm->EOL("External Name");
+ }
+
+ Asm->EmitInt32(0); Asm->EOL("End Mark");
+ EmitLabel("pubnames_end", Unit->getID());
+
+ Asm->EOL();
+ }
+
+ /// EmitDebugStr - Emit visible names into a debug str section.
+ ///
+ void EmitDebugStr() {
+ // Check to see if it is worth the effort.
+ if (!StringPool.empty()) {
+ // Start the dwarf str section.
+ Asm->SwitchToDataSection(TAI->getDwarfStrSection());
+
+ // For each of strings in the string pool.
+ for (unsigned StringID = 1, N = StringPool.size();
+ StringID <= N; ++StringID) {
+ // Emit a label for reference from debug information entries.
+ EmitLabel("string", StringID);
+ // Emit the string itself.
+ const std::string &String = StringPool[StringID];
+ Asm->EmitString(String); Asm->EOL();
+ }
+
+ Asm->EOL();
+ }
+ }
+
+ /// EmitDebugLoc - Emit visible names into a debug loc section.
+ ///
+ void EmitDebugLoc() {
+ // Start the dwarf loc section.
+ Asm->SwitchToDataSection(TAI->getDwarfLocSection());
+
+ Asm->EOL();
+ }
+
+ /// EmitDebugARanges - Emit visible names into a debug aranges section.
+ ///
+ void EmitDebugARanges() {
+ // Start the dwarf aranges section.
+ Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
+
+ // FIXME - Mock up
+ #if 0
+ CompileUnit *Unit = GetBaseCompileUnit();
+
+ // Don't include size of length
+ Asm->EmitInt32(0x1c); Asm->EOL("Length of Address Ranges Info");
+
+ Asm->EmitInt16(DWARF_VERSION); Asm->EOL("Dwarf Version");
+
+ EmitReference("info_begin", Unit->getID());
+ Asm->EOL("Offset of Compilation Unit Info");
+
+ Asm->EmitInt8(TAI->getAddressSize()); Asm->EOL("Size of Address");
+
+ Asm->EmitInt8(0); Asm->EOL("Size of Segment Descriptor");
+
+ Asm->EmitInt16(0); Asm->EOL("Pad (1)");
+ Asm->EmitInt16(0); Asm->EOL("Pad (2)");
+
+ // Range 1
+ EmitReference("text_begin", 0); Asm->EOL("Address");
+ EmitDifference("text_end", 0, "text_begin", 0, true); Asm->EOL("Length");
+
+ Asm->EmitInt32(0); Asm->EOL("EOM (1)");
+ Asm->EmitInt32(0); Asm->EOL("EOM (2)");
+
+ Asm->EOL();
+ #endif
+ }
+
+ /// EmitDebugRanges - Emit visible names into a debug ranges section.
+ ///
+ void EmitDebugRanges() {
+ // Start the dwarf ranges section.
+ Asm->SwitchToDataSection(TAI->getDwarfRangesSection());
+
+ Asm->EOL();
+ }
+
+ /// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
+ ///
+ void EmitDebugMacInfo() {
+ // Start the dwarf macinfo section.
+ Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+
+ Asm->EOL();
+ }
+
+ /// ConstructCompileUnitDIEs - Create a compile unit DIE for each source and
+ /// header file.
+ void ConstructCompileUnitDIEs() {
+ const UniqueVector<CompileUnitDesc *> CUW = MMI->getCompileUnits();
+
+ for (unsigned i = 1, N = CUW.size(); i <= N; ++i) {
+ unsigned ID = MMI->RecordSource(CUW[i]);
+ CompileUnit *Unit = NewCompileUnit(CUW[i], ID);
+ CompileUnits.push_back(Unit);
+ }
+ }
+
+ /// ConstructGlobalDIEs - Create DIEs for each of the externally visible
+ /// global variables.
+ void ConstructGlobalDIEs() {
+ std::vector<GlobalVariableDesc *> GlobalVariables =
+ MMI->getAnchoredDescriptors<GlobalVariableDesc>(*M);
+
+ for (unsigned i = 0, N = GlobalVariables.size(); i < N; ++i) {
+ GlobalVariableDesc *GVD = GlobalVariables[i];
+ NewGlobalVariable(GVD);
+ }
+ }
+
+ /// ConstructSubprogramDIEs - Create DIEs for each of the externally visible
+ /// subprograms.
+ void ConstructSubprogramDIEs() {
+ std::vector<SubprogramDesc *> Subprograms =
+ MMI->getAnchoredDescriptors<SubprogramDesc>(*M);
+
+ for (unsigned i = 0, N = Subprograms.size(); i < N; ++i) {
+ SubprogramDesc *SPD = Subprograms[i];
+ NewSubprogram(SPD);
+ }
+ }
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfDebug(std::ostream &OS, AsmPrinter *A, const TargetAsmInfo *T)
+ : Dwarf(OS, A, T)
+ , CompileUnits()
+ , AbbreviationsSet(InitAbbreviationsSetSize)
+ , Abbreviations()
+ , ValuesSet(InitValuesSetSize)
+ , Values()
+ , StringPool()
+ , DescToUnitMap()
+ , SectionMap()
+ , SectionSourceLines()
+ , didInitial(false)
+ , shouldEmit(false)
+ {
+ }
+ virtual ~DwarfDebug() {
+ for (unsigned i = 0, N = CompileUnits.size(); i < N; ++i)
+ delete CompileUnits[i];
+ for (unsigned j = 0, M = Values.size(); j < M; ++j)
+ delete Values[j];
+ }
+
+ /// SetModuleInfo - Set machine module information when it's known that pass
+ /// manager has created it. Set by the target AsmPrinter.
+ void SetModuleInfo(MachineModuleInfo *mmi) {
+ // Make sure initial declarations are made.
+ if (!MMI && mmi->hasDebugInfo()) {
+ MMI = mmi;
+ shouldEmit = true;
+
+ // Emit initial sections
+ EmitInitial();
+
+ // Create all the compile unit DIEs.
+ ConstructCompileUnitDIEs();
+
+ // Create DIEs for each of the externally visible global variables.
+ ConstructGlobalDIEs();
+
+ // Create DIEs for each of the externally visible subprograms.
+ ConstructSubprogramDIEs();
+
+ // Prime section data.
+ SectionMap.insert(TAI->getTextSection());
+ }
+ }
+
+ /// BeginModule - Emit all Dwarf sections that should come prior to the
+ /// content.
+ void BeginModule(Module *M) {
+ this->M = M;
+
+ if (!ShouldEmitDwarf()) return;
+ }
+
+ /// EndModule - Emit all Dwarf sections that should come after the content.
+ ///
+ void EndModule() {
+ if (!ShouldEmitDwarf()) return;
+
+ // Standard sections final addresses.
+ Asm->SwitchToTextSection(TAI->getTextSection());
+ EmitLabel("text_end", 0);
+ Asm->SwitchToDataSection(TAI->getDataSection());
+ EmitLabel("data_end", 0);
+
+ // End text sections.
+ for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
+ Asm->SwitchToTextSection(SectionMap[i].c_str());
+ EmitLabel("section_end", i);
+ }
+
+ // Emit common frame information.
+ EmitCommonDebugFrame();
+
+ // Emit function debug frame information
+ for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(),
+ E = DebugFrames.end(); I != E; ++I)
+ EmitFunctionDebugFrame(*I);
+
+ // Compute DIE offsets and sizes.
+ SizeAndOffsets();
+
+ // Emit all the DIEs into a debug info section
+ EmitDebugInfo();
+
+ // Corresponding abbreviations into a abbrev section.
+ EmitAbbreviations();
+
+ // Emit source line correspondence into a debug line section.
+ EmitDebugLines();
+
+ // Emit info into a debug pubnames section.
+ EmitDebugPubNames();
+
+ // Emit info into a debug str section.
+ EmitDebugStr();
+
+ // Emit info into a debug loc section.
+ EmitDebugLoc();
+
+ // Emit info into a debug aranges section.
+ EmitDebugARanges();
+
+ // Emit info into a debug ranges section.
+ EmitDebugRanges();
+
+ // Emit info into a debug macinfo section.
+ EmitDebugMacInfo();
+ }
+
+ /// BeginFunction - Gather pre-function debug information. Assumes being
+ /// emitted immediately after the function entry point.
+ void BeginFunction(MachineFunction *MF) {
+ this->MF = MF;
+
+ if (!ShouldEmitDwarf()) return;
+
+ // Begin accumulating function debug information.
+ MMI->BeginFunction(MF);
+
+ // Assumes in correct section after the entry point.
+ EmitLabel("func_begin", ++SubprogramCount);
+ }
+
+ /// EndFunction - Gather and emit post-function debug information.
+ ///
+ void EndFunction() {
+ if (!ShouldEmitDwarf()) return;
+
+ // Define end label for subprogram.
+ EmitLabel("func_end", SubprogramCount);
+
+ // Get function line info.
+ const std::vector<SourceLineInfo> &LineInfos = MMI->getSourceLines();
+
+ if (!LineInfos.empty()) {
+ // Get section line info.
+ unsigned ID = SectionMap.insert(Asm->CurrentSection);
+ if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
+ std::vector<SourceLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
+ // Append the function info to section info.
+ SectionLineInfos.insert(SectionLineInfos.end(),
+ LineInfos.begin(), LineInfos.end());
+ }
+
+ // Construct scopes for subprogram.
+ ConstructRootScope(MMI->getRootScope());
+
+ DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
+ MMI->getFrameMoves()));
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// DwarfException - Emits Dwarf exception handling directives.
+///
+class DwarfException : public Dwarf {
+
+private:
+ struct FunctionEHFrameInfo {
+ std::string FnName;
+ unsigned Number;
+ unsigned PersonalityIndex;
+ bool hasCalls;
+ bool hasLandingPads;
+ std::vector<MachineMove> Moves;
+
+ FunctionEHFrameInfo(const std::string &FN, unsigned Num, unsigned P,
+ bool hC, bool hL,
+ const std::vector<MachineMove> &M):
+ FnName(FN), Number(Num), PersonalityIndex(P),
+ hasCalls(hC), hasLandingPads(hL), Moves(M) { };
+ };
+
+ std::vector<FunctionEHFrameInfo> EHFrames;
+
+ /// shouldEmit - Flag to indicate if debug information should be emitted.
+ ///
+ bool shouldEmit;
+
+ /// EmitCommonEHFrame - Emit the common eh unwind frame.
+ ///
+ void EmitCommonEHFrame(const Function *Personality, unsigned Index) {
+ // Size and sign of stack growth.
+ int stackGrowth =
+ Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TAI->getAddressSize() : -TAI->getAddressSize();
+
+ // Begin eh frame section.
+ Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection());
+ O << "EH_frame" << Index << ":\n";
+ EmitLabel("section_eh_frame", Index);
+
+ // Define base labels.
+ EmitLabel("eh_frame_common", Index);
+
+ // Define the eh frame length.
+ EmitDifference("eh_frame_common_end", Index,
+ "eh_frame_common_begin", Index, true);
+ Asm->EOL("Length of Common Information Entry");
+
+ // EH frame header.
+ EmitLabel("eh_frame_common_begin", Index);
+ Asm->EmitInt32((int)0);
+ Asm->EOL("CIE Identifier Tag");
+ Asm->EmitInt8(DW_CIE_VERSION);
+ Asm->EOL("CIE Version");
+
+ // The personality presence indicates that language specific information
+ // will show up in the eh frame.
+ Asm->EmitString(Personality ? "zPLR" : "zR");
+ Asm->EOL("CIE Augmentation");
+
+ // Round out reader.
+ Asm->EmitULEB128Bytes(1);
+ Asm->EOL("CIE Code Alignment Factor");
+ Asm->EmitSLEB128Bytes(stackGrowth);
+ Asm->EOL("CIE Data Alignment Factor");
+ Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister()));
+ Asm->EOL("CIE RA Column");
+
+ // If there is a personality, we need to indicate the functions location.
+ if (Personality) {
+ Asm->EmitULEB128Bytes(7);
+ Asm->EOL("Augmentation Size");
+ Asm->EmitInt8(DW_EH_PE_pcrel | DW_EH_PE_sdata4);
+ Asm->EOL("Personality (pcrel sdata4)");
+
+ O << TAI->getData32bitsDirective();
+ Asm->EmitExternalGlobal((const GlobalVariable *)(Personality));
+ O << "-" << TAI->getPCSymbol();
+ Asm->EOL("Personality");
+
+ Asm->EmitULEB128Bytes(DW_EH_PE_pcrel);
+ Asm->EOL("LSDA Encoding (pcrel)");
+ Asm->EmitULEB128Bytes(DW_EH_PE_pcrel);
+ Asm->EOL("FDE Encoding (pcrel)");
+ } else {
+ Asm->EmitULEB128Bytes(1);
+ Asm->EOL("Augmentation Size");
+ Asm->EmitULEB128Bytes(DW_EH_PE_pcrel);
+ Asm->EOL("FDE Encoding (pcrel)");
+ }
+
+ // Indicate locations of general callee saved registers in frame.
+ std::vector<MachineMove> Moves;
+ RI->getInitialFrameState(Moves);
+ EmitFrameMoves(NULL, 0, Moves);
+
+ Asm->EmitAlignment(2);
+ EmitLabel("eh_frame_common_end", Index);
+
+ Asm->EOL();
+ }
+
+ /// EmitEHFrame - Emit function exception frame information.
+ ///
+ void EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) {
+ Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection());
+
+ // Externally visible entry into the functions eh frame info.
+ if (const char *GlobalDirective = TAI->getGlobalDirective())
+ O << GlobalDirective << EHFrameInfo.FnName << ".eh\n";
+
+ // If there are no calls then you can't unwind.
+ if (!EHFrameInfo.hasCalls) {
+ O << EHFrameInfo.FnName << ".eh = 0\n";
+ } else {
+ O << EHFrameInfo.FnName << ".eh:\n";
+
+ // EH frame header.
+ EmitDifference("eh_frame_end", EHFrameInfo.Number,
+ "eh_frame_begin", EHFrameInfo.Number, true);
+ Asm->EOL("Length of Frame Information Entry");
+
+ EmitLabel("eh_frame_begin", EHFrameInfo.Number);
+
+ EmitSectionOffset("eh_frame_begin", "eh_frame_common",
+ EHFrameInfo.Number, EHFrameInfo.PersonalityIndex,
+ true, true);
+ Asm->EOL("FDE CIE offset");
+
+ EmitReference("eh_func_begin", EHFrameInfo.Number, true);
+ Asm->EOL("FDE initial location");
+ EmitDifference("eh_func_end", EHFrameInfo.Number,
+ "eh_func_begin", EHFrameInfo.Number);
+ Asm->EOL("FDE address range");
+
+ // If there is a personality and landing pads then point to the language
+ // specific data area in the exception table.
+ if (EHFrameInfo.PersonalityIndex) {
+ Asm->EmitULEB128Bytes(4);
+ Asm->EOL("Augmentation size");
+
+ if (EHFrameInfo.hasLandingPads) {
+ EmitReference("exception", EHFrameInfo.Number, true);
+ } else if(TAI->getAddressSize() == 8) {
+ Asm->EmitInt64((int)0);
+ } else {
+ Asm->EmitInt32((int)0);
+ }
+ Asm->EOL("Language Specific Data Area");
+ } else {
+ Asm->EmitULEB128Bytes(0);
+ Asm->EOL("Augmentation size");
+ }
+
+ // Indicate locations of function specific callee saved registers in
+ // frame.
+ EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves);
+
+ Asm->EmitAlignment(2);
+ EmitLabel("eh_frame_end", EHFrameInfo.Number);
+ }
+
+ if (const char *UsedDirective = TAI->getUsedDirective())
+ O << UsedDirective << EHFrameInfo.FnName << ".eh\n\n";
+ }
+
+ /// EmitExceptionTable - Emit landpads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
+
+ /// SharedTypeIds - How many leading type ids two landing pads have in common.
+ static unsigned SharedTypeIds(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+ unsigned Count = 0;
+
+ for (; Count != MinSize; ++Count)
+ if (LIds[Count] != RIds[Count])
+ return Count;
+
+ return Count;
+ }
+
+ /// PadLT - Order landing pads lexicographically by type id.
+ static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+ for (unsigned i = 0; i != MinSize; ++i)
+ if (LIds[i] != RIds[i])
+ return LIds[i] < RIds[i];
+
+ return LSize < RSize;
+ }
+
+ struct KeyInfo {
+ static inline unsigned getEmptyKey() { return -1U; }
+ static inline unsigned getTombstoneKey() { return -2U; }
+ static unsigned getHashValue(const unsigned &Key) { return Key; }
+ static bool isPod() { return true; }
+ };
+
+ struct PadSite {
+ unsigned PadIndex;
+ unsigned SiteIndex;
+ };
+
+ typedef DenseMap<unsigned, PadSite, KeyInfo> PadMapType;
+
+ struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ struct ActionEntry *Previous;
+ };
+
+ void EmitExceptionTable() {
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+ if (PadInfos.empty()) return;
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+ std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+ // Gather first action index for each landing pad site.
+ SmallVector<unsigned, 64> FirstActions;
+ FirstActions.reserve(PadInfos.size());
+
+ // The actions table.
+ SmallVector<ActionEntry, 32> Actions;
+
+ // Negative type ids index into FilterIds, positive type ids index into
+ // TypeInfos. The value written for a positive type id is just the type
+ // id itself. For a negative type id, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type id, because the FilterIds entries
+ // are written using a variable width encoding which outputs one byte per
+ // entry as long as the value written is not too large, but can differ.
+ // This kind of complication does not occur for positive type ids because
+ // type infos are output using a fixed width encoding.
+ // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i].
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+ for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
+ E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= Asm->SizeULEB128(*I);
+ }
+
+ // Compute sizes for exception table.
+ unsigned SizeSites = 0;
+ unsigned SizeActions = 0;
+
+ // Look at each landing pad site to compute size. We need the size of each
+ // landing pad site info and the size of the landing pad's actions.
+ int FirstAction = 0;
+
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LP = LandingPads[i];
+ const std::vector<int> &TypeIds = LP->TypeIds;
+ const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ ActionEntry *PrevAction = 0;
+
+ if (NumShared) {
+ const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = &Actions.back();
+ SizeAction = Asm->SizeSLEB128(PrevAction->NextAction) +
+ Asm->SizeSLEB128(PrevAction->ValueForTypeID);
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ SizeAction -= Asm->SizeSLEB128(PrevAction->ValueForTypeID);
+ SizeAction += -PrevAction->NextAction;
+ PrevAction = PrevAction->Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
+ int TypeID = TypeIds[I];
+ assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = Asm->SizeSLEB128(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + Asm->SizeSLEB128(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
+ Actions.push_back(Action);
+
+ PrevAction = &Actions.back();
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+ unsigned M = LP->BeginLabels.size();
+ SizeSites += M*(sizeof(int32_t) + // Site start.
+ sizeof(int32_t) + // Site length.
+ sizeof(int32_t) + // Landing pad.
+ Asm->SizeULEB128(FirstAction)); // Action.
+ }
+
+ // Final tallies.
+ unsigned SizeTypes = TypeInfos.size() * TAI->getAddressSize();
+
+ unsigned TypeOffset = sizeof(int8_t) + // Call site format
+ Asm->SizeULEB128(SizeSites) + // Call-site table length
+ SizeSites + SizeActions + SizeTypes;
+
+ unsigned TotalSize = sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ Asm->SizeULEB128(TypeOffset) + // TType base offset
+ TypeOffset;
+
+ unsigned SizeAlign = (4 - TotalSize) & 3;
+
+ // Begin the exception table.
+ Asm->SwitchToDataSection(TAI->getDwarfExceptionSection());
+ O << "GCC_except_table" << SubprogramCount << ":\n";
+ Asm->EmitAlignment(2);
+ for (unsigned i = 0; i != SizeAlign; ++i) {
+ Asm->EmitInt8(0);
+ Asm->EOL("Padding");
+ }
+ EmitLabel("exception", SubprogramCount);
+
+ // Emit the header.
+ Asm->EmitInt8(DW_EH_PE_omit);
+ Asm->EOL("LPStart format (DW_EH_PE_omit)");
+ Asm->EmitInt8(DW_EH_PE_absptr);
+ Asm->EOL("TType format (DW_EH_PE_absptr)");
+ Asm->EmitULEB128Bytes(TypeOffset);
+ Asm->EOL("TType base offset");
+ Asm->EmitInt8(DW_EH_PE_udata4);
+ Asm->EOL("Call site format (DW_EH_PE_udata4)");
+ Asm->EmitULEB128Bytes(SizeSites);
+ Asm->EOL("Call-site table length");
+
+ // Emit the landing pad site information in order of address.
+ PadMapType PadMap;
+
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ unsigned BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "duplicate landing pad labels!");
+ PadSite P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+ MI != E; ++MI) {
+ if (MI->getOpcode() != TargetInstrInfo::LABEL)
+ continue;
+
+ unsigned BeginLabel = MI->getOperand(0).getImmedValue();
+ PadMapType::iterator L = PadMap.find(BeginLabel);
+
+ if (L == PadMap.end())
+ continue;
+
+ PadSite P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+
+ assert(BeginLabel == LandingPad->BeginLabels[P.SiteIndex] &&
+ "Inconsistent landing pad map!");
+
+ EmitSectionOffset("label", "eh_func_begin", BeginLabel, SubprogramCount,
+ false, true);
+ Asm->EOL("Region start");
+
+ EmitDifference("label", LandingPad->EndLabels[P.SiteIndex],
+ "label", BeginLabel);
+ Asm->EOL("Region length");
+
+ if (LandingPad->TypeIds.empty()) {
+ if (TAI->getAddressSize() == sizeof(int32_t))
+ Asm->EmitInt32(0);
+ else
+ Asm->EmitInt64(0);
+ } else {
+ EmitSectionOffset("label", "eh_func_begin",
+ LandingPad->LandingPadLabel, SubprogramCount,
+ false, true);
+ }
+ Asm->EOL("Landing pad");
+
+ Asm->EmitULEB128Bytes(FirstActions[P.PadIndex]);
+ Asm->EOL("Action");
+ }
+ }
+
+ // Emit the actions.
+ for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
+ ActionEntry &Action = Actions[I];
+
+ Asm->EmitSLEB128Bytes(Action.ValueForTypeID);
+ Asm->EOL("TypeInfo index");
+ Asm->EmitSLEB128Bytes(Action.NextAction);
+ Asm->EOL("Next action");
+ }
+
+ // Emit the type ids.
+ for (unsigned M = TypeInfos.size(); M; --M) {
+ GlobalVariable *GV = TypeInfos[M - 1];
+
+ if (TAI->getAddressSize() == sizeof(int32_t))
+ O << TAI->getData32bitsDirective();
+ else
+ O << TAI->getData64bitsDirective();
+
+ if (GV)
+ O << Asm->getGlobalLinkName(GV);
+ else
+ O << "0";
+
+ Asm->EOL("TypeInfo");
+ }
+
+ // Emit the filter typeids.
+ for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
+ unsigned TypeID = FilterIds[j];
+ Asm->EmitULEB128Bytes(TypeID);
+ Asm->EOL("Filter TypeInfo index");
+ }
+
+ Asm->EmitAlignment(2);
+ }
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfException(std::ostream &OS, AsmPrinter *A, const TargetAsmInfo *T)
+ : Dwarf(OS, A, T)
+ , shouldEmit(false)
+ {}
+
+ virtual ~DwarfException() {}
+
+ /// SetModuleInfo - Set machine module information when it's known that pass
+ /// manager has created it. Set by the target AsmPrinter.
+ void SetModuleInfo(MachineModuleInfo *mmi) {
+ MMI = mmi;
+ }
+
+ /// BeginModule - Emit all exception information that should come prior to the
+ /// content.
+ void BeginModule(Module *M) {
+ this->M = M;
+ }
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ void EndModule() {
+ if (!shouldEmit) return;
+
+ const std::vector<Function *> Personalities = MMI->getPersonalities();
+ for (unsigned i =0; i < Personalities.size(); ++i)
+ EmitCommonEHFrame(Personalities[i], i);
+
+ for (std::vector<FunctionEHFrameInfo>::iterator I = EHFrames.begin(),
+ E = EHFrames.end(); I != E; ++I)
+ EmitEHFrame(*I);
+ }
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ void BeginFunction(MachineFunction *MF) {
+ this->MF = MF;
+
+ if (MMI &&
+ ExceptionHandling &&
+ TAI->doesSupportExceptionHandling()) {
+ shouldEmit = true;
+ // Assumes in correct section after the entry point.
+ EmitLabel("eh_func_begin", ++SubprogramCount);
+ }
+ }
+
+ /// EndFunction - Gather and emit post-function exception information.
+ ///
+ void EndFunction() {
+ if (!shouldEmit) return;
+
+ EmitLabel("eh_func_end", SubprogramCount);
+ EmitExceptionTable();
+
+ // Save EH frame information
+ EHFrames.push_back(FunctionEHFrameInfo(getAsm()->CurrentFnName,
+ SubprogramCount,
+ MMI->getPersonalityIndex(),
+ MF->getFrameInfo()->hasCalls(),
+ !MMI->getLandingPads().empty(),
+ MMI->getFrameMoves()));
+ }
+};
+
+} // End of namespace llvm
+
+//===----------------------------------------------------------------------===//
+
+/// Emit - Print the abbreviation using the specified Dwarf writer.
+///
+void DIEAbbrev::Emit(const DwarfDebug &DD) const {
+ // Emit its Dwarf tag type.
+ DD.getAsm()->EmitULEB128Bytes(Tag);
+ DD.getAsm()->EOL(TagString(Tag));
+
+ // Emit whether it has children DIEs.
+ DD.getAsm()->EmitULEB128Bytes(ChildrenFlag);
+ DD.getAsm()->EOL(ChildrenString(ChildrenFlag));
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ const DIEAbbrevData &AttrData = Data[i];
+
+ // Emit attribute type.
+ DD.getAsm()->EmitULEB128Bytes(AttrData.getAttribute());
+ DD.getAsm()->EOL(AttributeString(AttrData.getAttribute()));
+
+ // Emit form type.
+ DD.getAsm()->EmitULEB128Bytes(AttrData.getForm());
+ DD.getAsm()->EOL(FormEncodingString(AttrData.getForm()));
+ }
+
+ // Mark end of abbreviation.
+ DD.getAsm()->EmitULEB128Bytes(0); DD.getAsm()->EOL("EOM(1)");
+ DD.getAsm()->EmitULEB128Bytes(0); DD.getAsm()->EOL("EOM(2)");
+}
+
+#ifndef NDEBUG
+void DIEAbbrev::print(std::ostream &O) {
+ O << "Abbreviation @"
+ << std::hex << (intptr_t)this << std::dec
+ << " "
+ << TagString(Tag)
+ << " "
+ << ChildrenString(ChildrenFlag)
+ << "\n";
+
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << " "
+ << AttributeString(Data[i].getAttribute())
+ << " "
+ << FormEncodingString(Data[i].getForm())
+ << "\n";
+ }
+}
+void DIEAbbrev::dump() { print(cerr); }
+#endif
+
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+void DIEValue::dump() {
+ print(cerr);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::EmitValue(DwarfDebug &DD, unsigned Form) {
+ switch (Form) {
+ case DW_FORM_flag: // Fall thru
+ case DW_FORM_ref1: // Fall thru
+ case DW_FORM_data1: DD.getAsm()->EmitInt8(Integer); break;
+ case DW_FORM_ref2: // Fall thru
+ case DW_FORM_data2: DD.getAsm()->EmitInt16(Integer); break;
+ case DW_FORM_ref4: // Fall thru
+ case DW_FORM_data4: DD.getAsm()->EmitInt32(Integer); break;
+ case DW_FORM_ref8: // Fall thru
+ case DW_FORM_data8: DD.getAsm()->EmitInt64(Integer); break;
+ case DW_FORM_udata: DD.getAsm()->EmitULEB128Bytes(Integer); break;
+ case DW_FORM_sdata: DD.getAsm()->EmitSLEB128Bytes(Integer); break;
+ default: assert(0 && "DIE Value form not supported yet"); break;
+ }
+}
+
+/// SizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::SizeOf(const DwarfDebug &DD, unsigned Form) const {
+ switch (Form) {
+ case DW_FORM_flag: // Fall thru
+ case DW_FORM_ref1: // Fall thru
+ case DW_FORM_data1: return sizeof(int8_t);
+ case DW_FORM_ref2: // Fall thru
+ case DW_FORM_data2: return sizeof(int16_t);
+ case DW_FORM_ref4: // Fall thru
+ case DW_FORM_data4: return sizeof(int32_t);
+ case DW_FORM_ref8: // Fall thru
+ case DW_FORM_data8: return sizeof(int64_t);
+ case DW_FORM_udata: return DD.getAsm()->SizeULEB128(Integer);
+ case DW_FORM_sdata: return DD.getAsm()->SizeSLEB128(Integer);
+ default: assert(0 && "DIE Value form not supported yet"); break;
+ }
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit string value.
+///
+void DIEString::EmitValue(DwarfDebug &DD, unsigned Form) {
+ DD.getAsm()->EmitString(String);
+}
+
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIEDwarfLabel::EmitValue(DwarfDebug &DD, unsigned Form) {
+ DD.EmitReference(Label);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIEDwarfLabel::SizeOf(const DwarfDebug &DD, unsigned Form) const {
+ return DD.getTargetAsmInfo()->getAddressSize();
+}
+
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIEObjectLabel::EmitValue(DwarfDebug &DD, unsigned Form) {
+ DD.EmitReference(Label);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIEObjectLabel::SizeOf(const DwarfDebug &DD, unsigned Form) const {
+ return DD.getTargetAsmInfo()->getAddressSize();
+}
+
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::EmitValue(DwarfDebug &DD, unsigned Form) {
+ bool IsSmall = Form == DW_FORM_data4;
+ DD.EmitDifference(LabelHi, LabelLo, IsSmall);
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::SizeOf(const DwarfDebug &DD, unsigned Form) const {
+ if (Form == DW_FORM_data4) return 4;
+ return DD.getTargetAsmInfo()->getAddressSize();
+}
+
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEntry::EmitValue(DwarfDebug &DD, unsigned Form) {
+ DD.getAsm()->EmitInt32(Entry->getOffset());
+}
+
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the block.
+///
+unsigned DIEBlock::ComputeSize(DwarfDebug &DD) {
+ if (!Size) {
+ const std::vector<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ Size += Values[i]->SizeOf(DD, AbbrevData[i].getForm());
+ }
+ }
+ return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::EmitValue(DwarfDebug &DD, unsigned Form) {
+ switch (Form) {
+ case DW_FORM_block1: DD.getAsm()->EmitInt8(Size); break;
+ case DW_FORM_block2: DD.getAsm()->EmitInt16(Size); break;
+ case DW_FORM_block4: DD.getAsm()->EmitInt32(Size); break;
+ case DW_FORM_block: DD.getAsm()->EmitULEB128Bytes(Size); break;
+ default: assert(0 && "Improper form for block"); break;
+ }
+
+ const std::vector<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ DD.getAsm()->EOL();
+ Values[i]->EmitValue(DD, AbbrevData[i].getForm());
+ }
+}
+
+/// SizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::SizeOf(const DwarfDebug &DD, unsigned Form) const {
+ switch (Form) {
+ case DW_FORM_block1: return Size + sizeof(int8_t);
+ case DW_FORM_block2: return Size + sizeof(int16_t);
+ case DW_FORM_block4: return Size + sizeof(int32_t);
+ case DW_FORM_block: return Size + DD.getAsm()->SizeULEB128(Size);
+ default: assert(0 && "Improper form for block"); break;
+ }
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+/// DIE Implementation
+
+DIE::~DIE() {
+ for (unsigned i = 0, N = Children.size(); i < N; ++i)
+ delete Children[i];
+}
+
+/// AddSiblingOffset - Add a sibling offset field to the front of the DIE.
+///
+void DIE::AddSiblingOffset() {
+ DIEInteger *DI = new DIEInteger(0);
+ Values.insert(Values.begin(), DI);
+ Abbrev.AddFirstAttribute(DW_AT_sibling, DW_FORM_ref4);
+}
+
+/// Profile - Used to gather unique data for the value folding set.
+///
+void DIE::Profile(FoldingSetNodeID &ID) {
+ Abbrev.Profile(ID);
+
+ for (unsigned i = 0, N = Children.size(); i < N; ++i)
+ ID.AddPointer(Children[i]);
+
+ for (unsigned j = 0, M = Values.size(); j < M; ++j)
+ ID.AddPointer(Values[j]);
+}
+
+#ifndef NDEBUG
+void DIE::print(std::ostream &O, unsigned IncIndent) {
+ static unsigned IndentCount = 0;
+ IndentCount += IncIndent;
+ const std::string Indent(IndentCount, ' ');
+ bool isBlock = Abbrev.getTag() == 0;
+
+ if (!isBlock) {
+ O << Indent
+ << "Die: "
+ << "0x" << std::hex << (intptr_t)this << std::dec
+ << ", Offset: " << Offset
+ << ", Size: " << Size
+ << "\n";
+
+ O << Indent
+ << TagString(Abbrev.getTag())
+ << " "
+ << ChildrenString(Abbrev.getChildrenFlag());
+ } else {
+ O << "Size: " << Size;
+ }
+ O << "\n";
+
+ const std::vector<DIEAbbrevData> &Data = Abbrev.getData();
+
+ IndentCount += 2;
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << Indent;
+ if (!isBlock) {
+ O << AttributeString(Data[i].getAttribute());
+ } else {
+ O << "Blk[" << i << "]";
+ }
+ O << " "
+ << FormEncodingString(Data[i].getForm())
+ << " ";
+ Values[i]->print(O);
+ O << "\n";
+ }
+ IndentCount -= 2;
+
+ for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+ Children[j]->print(O, 4);
+ }
+
+ if (!isBlock) O << "\n";
+ IndentCount -= IncIndent;
+}
+
+void DIE::dump() {
+ print(cerr);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+/// DwarfWriter Implementation
+///
+
+DwarfWriter::DwarfWriter(std::ostream &OS, AsmPrinter *A,
+ const TargetAsmInfo *T) {
+ DE = new DwarfException(OS, A, T);
+ DD = new DwarfDebug(OS, A, T);
+}
+
+DwarfWriter::~DwarfWriter() {
+ delete DE;
+ delete DD;
+}
+
+/// SetModuleInfo - Set machine module info when it's known that pass manager
+/// has created it. Set by the target AsmPrinter.
+void DwarfWriter::SetModuleInfo(MachineModuleInfo *MMI) {
+ DD->SetModuleInfo(MMI);
+ DE->SetModuleInfo(MMI);
+}
+
+/// BeginModule - Emit all Dwarf sections that should come prior to the
+/// content.
+void DwarfWriter::BeginModule(Module *M) {
+ DE->BeginModule(M);
+ DD->BeginModule(M);
+}
+
+/// EndModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfWriter::EndModule() {
+ DE->EndModule();
+ DD->EndModule();
+}
+
+/// BeginFunction - Gather pre-function debug information. Assumes being
+/// emitted immediately after the function entry point.
+void DwarfWriter::BeginFunction(MachineFunction *MF) {
+ DE->BeginFunction(MF);
+ DD->BeginFunction(MF);
+}
+
+/// EndFunction - Gather and emit post-function debug information.
+///
+void DwarfWriter::EndFunction() {
+ DD->EndFunction();
+ DE->EndFunction();
+
+ if (MachineModuleInfo *MMI = DD->getMMI() ? DD->getMMI() : DE->getMMI()) {
+ // Clear function debug information.
+ MMI->EndFunction();
+ }
+}
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
new file mode 100644
index 0000000..8ecddb8
--- /dev/null
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -0,0 +1,547 @@
+//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent ELF writer. This file writes out
+// the ELF file in the following order:
+//
+// #1. ELF Header
+// #2. '.text' section
+// #3. '.data' section
+// #4. '.bss' section (conceptual position in file)
+// ...
+// #X. '.shstrtab' section
+// #Y. Section Table
+//
+// The entries in the section table are laid out as:
+// #0. Null entry [required]
+// #1. ".text" entry - the program code
+// #2. ".data" entry - global variables with initializers. [ if needed ]
+// #3. ".bss" entry - global variables without initializers. [ if needed ]
+// ...
+// #N. ".shstrtab" entry - String table for the section names.
+//
+// NOTE: This code should eventually be extended to support 64-bit ELF (this
+// won't be hard), but we haven't done so yet!
+//
+//===----------------------------------------------------------------------===//
+
+#include "ELFWriter.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/FileWriters.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/OutputBuffer.h"
+#include "llvm/Support/Streams.h"
+#include <list>
+using namespace llvm;
+
+char ELFWriter::ID = 0;
+/// AddELFWriter - Concrete function to add the ELF writer to the function pass
+/// manager.
+MachineCodeEmitter *llvm::AddELFWriter(FunctionPassManager &FPM,
+ std::ostream &O,
+ TargetMachine &TM) {
+ ELFWriter *EW = new ELFWriter(O, TM);
+ FPM.add(EW);
+ return &EW->getMachineCodeEmitter();
+}
+
+//===----------------------------------------------------------------------===//
+// ELFCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+ /// ELFCodeEmitter - This class is used by the ELFWriter to emit the code for
+ /// functions to the ELF file.
+ class ELFCodeEmitter : public MachineCodeEmitter {
+ ELFWriter &EW;
+ TargetMachine &TM;
+ ELFWriter::ELFSection *ES; // Section to write to.
+ std::vector<unsigned char> *OutBuffer;
+ size_t FnStart;
+ public:
+ ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), OutBuffer(0) {}
+
+ void startFunction(MachineFunction &F);
+ bool finishFunction(MachineFunction &F);
+
+ void addRelocation(const MachineRelocation &MR) {
+ assert(0 && "relo not handled yet!");
+ }
+
+ virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+ }
+
+ virtual intptr_t getConstantPoolEntryAddress(unsigned Index) const {
+ assert(0 && "CP not implementated yet!");
+ return 0;
+ }
+ virtual intptr_t getJumpTableEntryAddress(unsigned Index) const {
+ assert(0 && "JT not implementated yet!");
+ return 0;
+ }
+
+ virtual intptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+ assert(0 && "JT not implementated yet!");
+ return 0;
+ }
+
+ /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
+ void startFunctionStub(unsigned StubSize, unsigned Alignment = 1) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ }
+ void *finishFunctionStub(const Function *F) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ return 0;
+ }
+ };
+}
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void ELFCodeEmitter::startFunction(MachineFunction &F) {
+ // Align the output buffer to the appropriate alignment.
+ unsigned Align = 16; // FIXME: GENERICIZE!!
+ // Get the ELF Section that this function belongs in.
+ ES = &EW.getSection(".text", ELFWriter::ELFSection::SHT_PROGBITS,
+ ELFWriter::ELFSection::SHF_EXECINSTR |
+ ELFWriter::ELFSection::SHF_ALLOC);
+ OutBuffer = &ES->SectionData;
+ cerr << "FIXME: This code needs to be updated for changes in the "
+ << "CodeEmitter interfaces. In particular, this should set "
+ << "BufferBegin/BufferEnd/CurBufferPtr, not deal with OutBuffer!";
+ abort();
+
+ // Upgrade the section alignment if required.
+ if (ES->Align < Align) ES->Align = Align;
+
+ // Add padding zeros to the end of the buffer to make sure that the
+ // function will start on the correct byte alignment within the section.
+ OutputBuffer OB(*OutBuffer,
+ TM.getTargetData()->getPointerSizeInBits() == 64,
+ TM.getTargetData()->isLittleEndian());
+ OB.align(Align);
+ FnStart = OutBuffer->size();
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool ELFCodeEmitter::finishFunction(MachineFunction &F) {
+ // We now know the size of the function, add a symbol to represent it.
+ ELFWriter::ELFSym FnSym(F.getFunction());
+
+ // Figure out the binding (linkage) of the symbol.
+ switch (F.getFunction()->getLinkage()) {
+ default:
+ // appending linkage is illegal for functions.
+ assert(0 && "Unknown linkage type!");
+ case GlobalValue::ExternalLinkage:
+ FnSym.SetBind(ELFWriter::ELFSym::STB_GLOBAL);
+ break;
+ case GlobalValue::LinkOnceLinkage:
+ case GlobalValue::WeakLinkage:
+ FnSym.SetBind(ELFWriter::ELFSym::STB_WEAK);
+ break;
+ case GlobalValue::InternalLinkage:
+ FnSym.SetBind(ELFWriter::ELFSym::STB_LOCAL);
+ break;
+ }
+
+ ES->Size = OutBuffer->size();
+
+ FnSym.SetType(ELFWriter::ELFSym::STT_FUNC);
+ FnSym.SectionIdx = ES->SectionIdx;
+ FnSym.Value = FnStart; // Value = Offset from start of Section.
+ FnSym.Size = OutBuffer->size()-FnStart;
+
+ // Finally, add it to the symtab.
+ EW.SymbolTable.push_back(FnSym);
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// ELFWriter Implementation
+//===----------------------------------------------------------------------===//
+
+ELFWriter::ELFWriter(std::ostream &o, TargetMachine &tm)
+ : MachineFunctionPass((intptr_t)&ID), O(o), TM(tm) {
+ e_flags = 0; // e_flags defaults to 0, no flags.
+
+ is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+ isLittleEndian = TM.getTargetData()->isLittleEndian();
+
+ // Create the machine code emitter object for this target.
+ MCE = new ELFCodeEmitter(*this);
+ NumSections = 0;
+}
+
+ELFWriter::~ELFWriter() {
+ delete MCE;
+}
+
+// doInitialization - Emit the file header and all of the global variables for
+// the module to the ELF file.
+bool ELFWriter::doInitialization(Module &M) {
+ Mang = new Mangler(M);
+
+ // Local alias to shortenify coming code.
+ std::vector<unsigned char> &FH = FileHeader;
+ OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
+
+ FHOut.outbyte(0x7F); // EI_MAG0
+ FHOut.outbyte('E'); // EI_MAG1
+ FHOut.outbyte('L'); // EI_MAG2
+ FHOut.outbyte('F'); // EI_MAG3
+ FHOut.outbyte(is64Bit ? 2 : 1); // EI_CLASS
+ FHOut.outbyte(isLittleEndian ? 1 : 2); // EI_DATA
+ FHOut.outbyte(1); // EI_VERSION
+ FH.resize(16); // EI_PAD up to 16 bytes.
+
+ // This should change for shared objects.
+ FHOut.outhalf(1); // e_type = ET_REL
+ FHOut.outword(TM.getELFWriterInfo()->getEMachine()); // target-defined
+ FHOut.outword(1); // e_version = 1
+ FHOut.outaddr(0); // e_entry = 0 -> no entry point in .o file
+ FHOut.outaddr(0); // e_phoff = 0 -> no program header for .o
+
+ ELFHeader_e_shoff_Offset = FH.size();
+ FHOut.outaddr(0); // e_shoff
+ FHOut.outword(e_flags); // e_flags = whatever the target wants
+
+ FHOut.outhalf(is64Bit ? 64 : 52); // e_ehsize = ELF header size
+ FHOut.outhalf(0); // e_phentsize = prog header entry size
+ FHOut.outhalf(0); // e_phnum = # prog header entries = 0
+ FHOut.outhalf(is64Bit ? 64 : 40); // e_shentsize = sect hdr entry size
+
+
+ ELFHeader_e_shnum_Offset = FH.size();
+ FHOut.outhalf(0); // e_shnum = # of section header ents
+ ELFHeader_e_shstrndx_Offset = FH.size();
+ FHOut.outhalf(0); // e_shstrndx = Section # of '.shstrtab'
+
+ // Add the null section, which is required to be first in the file.
+ getSection("", 0, 0);
+
+ // Start up the symbol table. The first entry in the symtab is the null
+ // entry.
+ SymbolTable.push_back(ELFSym(0));
+
+ return false;
+}
+
+void ELFWriter::EmitGlobal(GlobalVariable *GV) {
+ // If this is an external global, emit it now. TODO: Note that it would be
+ // better to ignore the symbol here and only add it to the symbol table if
+ // referenced.
+ if (!GV->hasInitializer()) {
+ ELFSym ExternalSym(GV);
+ ExternalSym.SetBind(ELFSym::STB_GLOBAL);
+ ExternalSym.SetType(ELFSym::STT_NOTYPE);
+ ExternalSym.SectionIdx = ELFSection::SHN_UNDEF;
+ SymbolTable.push_back(ExternalSym);
+ return;
+ }
+
+ const Type *GVType = (const Type*)GV->getType();
+ unsigned Align = TM.getTargetData()->getPrefTypeAlignment(GVType);
+ unsigned Size = TM.getTargetData()->getTypeSize(GVType);
+
+ // If this global has a zero initializer, it is part of the .bss or common
+ // section.
+ if (GV->getInitializer()->isNullValue()) {
+ // If this global is part of the common block, add it now. Variables are
+ // part of the common block if they are zero initialized and allowed to be
+ // merged with other symbols.
+ if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage()) {
+ ELFSym CommonSym(GV);
+ // Value for common symbols is the alignment required.
+ CommonSym.Value = Align;
+ CommonSym.Size = Size;
+ CommonSym.SetBind(ELFSym::STB_GLOBAL);
+ CommonSym.SetType(ELFSym::STT_OBJECT);
+ // TODO SOMEDAY: add ELF visibility.
+ CommonSym.SectionIdx = ELFSection::SHN_COMMON;
+ SymbolTable.push_back(CommonSym);
+ return;
+ }
+
+ // Otherwise, this symbol is part of the .bss section. Emit it now.
+
+ // Handle alignment. Ensure section is aligned at least as much as required
+ // by this symbol.
+ ELFSection &BSSSection = getBSSSection();
+ BSSSection.Align = std::max(BSSSection.Align, Align);
+
+ // Within the section, emit enough virtual padding to get us to an alignment
+ // boundary.
+ if (Align)
+ BSSSection.Size = (BSSSection.Size + Align - 1) & ~(Align-1);
+
+ ELFSym BSSSym(GV);
+ BSSSym.Value = BSSSection.Size;
+ BSSSym.Size = Size;
+ BSSSym.SetType(ELFSym::STT_OBJECT);
+
+ switch (GV->getLinkage()) {
+ default: // weak/linkonce handled above
+ assert(0 && "Unexpected linkage type!");
+ case GlobalValue::AppendingLinkage: // FIXME: This should be improved!
+ case GlobalValue::ExternalLinkage:
+ BSSSym.SetBind(ELFSym::STB_GLOBAL);
+ break;
+ case GlobalValue::InternalLinkage:
+ BSSSym.SetBind(ELFSym::STB_LOCAL);
+ break;
+ }
+
+ // Set the idx of the .bss section
+ BSSSym.SectionIdx = BSSSection.SectionIdx;
+ SymbolTable.push_back(BSSSym);
+
+ // Reserve space in the .bss section for this symbol.
+ BSSSection.Size += Size;
+ return;
+ }
+
+ // FIXME: handle .rodata
+ //assert(!GV->isConstant() && "unimp");
+
+ // FIXME: handle .data
+ //assert(0 && "unimp");
+}
+
+
+bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
+ // Nothing to do here, this is all done through the MCE object above.
+ return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the ELF file to 'O'.
+bool ELFWriter::doFinalization(Module &M) {
+ // Okay, the ELF header and .text sections have been completed, build the
+ // .data, .bss, and "common" sections next.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobal(I);
+
+ // Emit the symbol table now, if non-empty.
+ EmitSymbolTable();
+
+ // FIXME: Emit the relocations now.
+
+ // Emit the string table for the sections in the ELF file we have.
+ EmitSectionTableStringTable();
+
+ // Emit the sections to the .o file, and emit the section table for the file.
+ OutputSectionsAndSectionTable();
+
+ // We are done with the abstract symbols.
+ SectionList.clear();
+ NumSections = 0;
+
+ // Release the name mangler object.
+ delete Mang; Mang = 0;
+ return false;
+}
+
+/// EmitSymbolTable - If the current symbol table is non-empty, emit the string
+/// table for it and then the symbol table itself.
+void ELFWriter::EmitSymbolTable() {
+ if (SymbolTable.size() == 1) return; // Only the null entry.
+
+ // FIXME: compact all local symbols to the start of the symtab.
+ unsigned FirstNonLocalSymbol = 1;
+
+ ELFSection &StrTab = getSection(".strtab", ELFSection::SHT_STRTAB, 0);
+ StrTab.Align = 1;
+
+ DataBuffer &StrTabBuf = StrTab.SectionData;
+ OutputBuffer StrTabOut(StrTabBuf, is64Bit, isLittleEndian);
+
+ // Set the zero'th symbol to a null byte, as required.
+ StrTabOut.outbyte(0);
+ SymbolTable[0].NameIdx = 0;
+ unsigned Index = 1;
+ for (unsigned i = 1, e = SymbolTable.size(); i != e; ++i) {
+ // Use the name mangler to uniquify the LLVM symbol.
+ std::string Name = Mang->getValueName(SymbolTable[i].GV);
+
+ if (Name.empty()) {
+ SymbolTable[i].NameIdx = 0;
+ } else {
+ SymbolTable[i].NameIdx = Index;
+
+ // Add the name to the output buffer, including the null terminator.
+ StrTabBuf.insert(StrTabBuf.end(), Name.begin(), Name.end());
+
+ // Add a null terminator.
+ StrTabBuf.push_back(0);
+
+ // Keep track of the number of bytes emitted to this section.
+ Index += Name.size()+1;
+ }
+ }
+ assert(Index == StrTabBuf.size());
+ StrTab.Size = Index;
+
+ // Now that we have emitted the string table and know the offset into the
+ // string table of each symbol, emit the symbol table itself.
+ ELFSection &SymTab = getSection(".symtab", ELFSection::SHT_SYMTAB, 0);
+ SymTab.Align = is64Bit ? 8 : 4;
+ SymTab.Link = SymTab.SectionIdx; // Section Index of .strtab.
+ SymTab.Info = FirstNonLocalSymbol; // First non-STB_LOCAL symbol.
+ SymTab.EntSize = 16; // Size of each symtab entry. FIXME: wrong for ELF64
+ DataBuffer &SymTabBuf = SymTab.SectionData;
+ OutputBuffer SymTabOut(SymTabBuf, is64Bit, isLittleEndian);
+
+ if (!is64Bit) { // 32-bit and 64-bit formats are shuffled a bit.
+ for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) {
+ ELFSym &Sym = SymbolTable[i];
+ SymTabOut.outword(Sym.NameIdx);
+ SymTabOut.outaddr32(Sym.Value);
+ SymTabOut.outword(Sym.Size);
+ SymTabOut.outbyte(Sym.Info);
+ SymTabOut.outbyte(Sym.Other);
+ SymTabOut.outhalf(Sym.SectionIdx);
+ }
+ } else {
+ for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) {
+ ELFSym &Sym = SymbolTable[i];
+ SymTabOut.outword(Sym.NameIdx);
+ SymTabOut.outbyte(Sym.Info);
+ SymTabOut.outbyte(Sym.Other);
+ SymTabOut.outhalf(Sym.SectionIdx);
+ SymTabOut.outaddr64(Sym.Value);
+ SymTabOut.outxword(Sym.Size);
+ }
+ }
+
+ SymTab.Size = SymTabBuf.size();
+}
+
+/// EmitSectionTableStringTable - This method adds and emits a section for the
+/// ELF Section Table string table: the string table that holds all of the
+/// section names.
+void ELFWriter::EmitSectionTableStringTable() {
+ // First step: add the section for the string table to the list of sections:
+ ELFSection &SHStrTab = getSection(".shstrtab", ELFSection::SHT_STRTAB, 0);
+
+ // Now that we know which section number is the .shstrtab section, update the
+ // e_shstrndx entry in the ELF header.
+ OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
+ FHOut.fixhalf(SHStrTab.SectionIdx, ELFHeader_e_shstrndx_Offset);
+
+ // Set the NameIdx of each section in the string table and emit the bytes for
+ // the string table.
+ unsigned Index = 0;
+ DataBuffer &Buf = SHStrTab.SectionData;
+
+ for (std::list<ELFSection>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I) {
+ // Set the index into the table. Note if we have lots of entries with
+ // common suffixes, we could memoize them here if we cared.
+ I->NameIdx = Index;
+
+ // Add the name to the output buffer, including the null terminator.
+ Buf.insert(Buf.end(), I->Name.begin(), I->Name.end());
+
+ // Add a null terminator.
+ Buf.push_back(0);
+
+ // Keep track of the number of bytes emitted to this section.
+ Index += I->Name.size()+1;
+ }
+
+ // Set the size of .shstrtab now that we know what it is.
+ assert(Index == Buf.size());
+ SHStrTab.Size = Index;
+}
+
+/// OutputSectionsAndSectionTable - Now that we have constructed the file header
+/// and all of the sections, emit these to the ostream destination and emit the
+/// SectionTable.
+void ELFWriter::OutputSectionsAndSectionTable() {
+ // Pass #1: Compute the file offset for each section.
+ size_t FileOff = FileHeader.size(); // File header first.
+
+ // Emit all of the section data in order.
+ for (std::list<ELFSection>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I) {
+ // Align FileOff to whatever the alignment restrictions of the section are.
+ if (I->Align)
+ FileOff = (FileOff+I->Align-1) & ~(I->Align-1);
+ I->Offset = FileOff;
+ FileOff += I->SectionData.size();
+ }
+
+ // Align Section Header.
+ unsigned TableAlign = is64Bit ? 8 : 4;
+ FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+
+ // Now that we know where all of the sections will be emitted, set the e_shnum
+ // entry in the ELF header.
+ OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
+ FHOut.fixhalf(NumSections, ELFHeader_e_shnum_Offset);
+
+ // Now that we know the offset in the file of the section table, update the
+ // e_shoff address in the ELF header.
+ FHOut.fixaddr(FileOff, ELFHeader_e_shoff_Offset);
+
+ // Now that we know all of the data in the file header, emit it and all of the
+ // sections!
+ O.write((char*)&FileHeader[0], FileHeader.size());
+ FileOff = FileHeader.size();
+ DataBuffer().swap(FileHeader);
+
+ DataBuffer Table;
+ OutputBuffer TableOut(Table, is64Bit, isLittleEndian);
+
+ // Emit all of the section data and build the section table itself.
+ while (!SectionList.empty()) {
+ const ELFSection &S = *SectionList.begin();
+
+ // Align FileOff to whatever the alignment restrictions of the section are.
+ if (S.Align)
+ for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
+ FileOff != NewFileOff; ++FileOff)
+ O.put((char)0xAB);
+ O.write((char*)&S.SectionData[0], S.SectionData.size());
+ FileOff += S.SectionData.size();
+
+ TableOut.outword(S.NameIdx); // sh_name - Symbol table name idx
+ TableOut.outword(S.Type); // sh_type - Section contents & semantics
+ TableOut.outword(S.Flags); // sh_flags - Section flags.
+ TableOut.outaddr(S.Addr); // sh_addr - The mem addr this section is in.
+ TableOut.outaddr(S.Offset); // sh_offset - Offset from the file start.
+ TableOut.outword(S.Size); // sh_size - The section size.
+ TableOut.outword(S.Link); // sh_link - Section header table index link.
+ TableOut.outword(S.Info); // sh_info - Auxillary information.
+ TableOut.outword(S.Align); // sh_addralign - Alignment of section.
+ TableOut.outword(S.EntSize); // sh_entsize - Size of entries in the section
+
+ SectionList.pop_front();
+ }
+
+ // Align output for the section table.
+ for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+ FileOff != NewFileOff; ++FileOff)
+ O.put((char)0xAB);
+
+ // Emit the section table itself.
+ O.write((char*)&Table[0], Table.size());
+}
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
new file mode 100644
index 0000000..f27d78f
--- /dev/null
+++ b/lib/CodeGen/ELFWriter.h
@@ -0,0 +1,228 @@
+//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFWRITER_H
+#define ELFWRITER_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <list>
+
+namespace llvm {
+ class GlobalVariable;
+ class Mangler;
+ class MachineCodeEmitter;
+ class ELFCodeEmitter;
+
+ /// ELFWriter - This class implements the common target-independent code for
+ /// writing ELF files. Targets should derive a class from this to
+ /// parameterize the output format.
+ ///
+ class ELFWriter : public MachineFunctionPass {
+ friend class ELFCodeEmitter;
+ public:
+ static char ID;
+
+ MachineCodeEmitter &getMachineCodeEmitter() const {
+ return *(MachineCodeEmitter*)MCE;
+ }
+
+ ELFWriter(std::ostream &O, TargetMachine &TM);
+ ~ELFWriter();
+
+ typedef std::vector<unsigned char> DataBuffer;
+
+ protected:
+ /// Output stream to send the resultant object file to.
+ ///
+ std::ostream &O;
+
+ /// Target machine description.
+ ///
+ TargetMachine &TM;
+
+ /// Mang - The object used to perform name mangling for this module.
+ ///
+ Mangler *Mang;
+
+ /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+ /// code for functions to the .o file.
+ ELFCodeEmitter *MCE;
+
+ //===------------------------------------------------------------------===//
+ // Properties to be set by the derived class ctor, used to configure the
+ // ELFWriter.
+
+ // e_machine - This field is the target specific value to emit as the
+ // e_machine member of the ELF header.
+ unsigned short e_machine;
+
+ // e_flags - The machine flags for the target. This defaults to zero.
+ unsigned e_flags;
+
+ //===------------------------------------------------------------------===//
+ // Properties inferred automatically from the target machine.
+ //
+
+ /// is64Bit/isLittleEndian - This information is inferred from the target
+ /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
+ bool is64Bit, isLittleEndian;
+
+ /// doInitialization - Emit the file header and all of the global variables
+ /// for the module to the ELF file.
+ bool doInitialization(Module &M);
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+
+ /// doFinalization - Now that the module has been completely processed, emit
+ /// the ELF file to 'O'.
+ bool doFinalization(Module &M);
+
+ private:
+ // The buffer we accumulate the file header into. Note that this should be
+ // changed into something much more efficient later (and the bitcode writer
+ // as well!).
+ DataBuffer FileHeader;
+
+ /// ELFSection - This struct contains information about each section that is
+ /// emitted to the file. This is eventually turned into the section header
+ /// table at the end of the file.
+ struct ELFSection {
+ std::string Name; // Name of the section.
+ unsigned NameIdx; // Index in .shstrtab of name, once emitted.
+ unsigned Type;
+ unsigned Flags;
+ uint64_t Addr;
+ unsigned Offset;
+ unsigned Size;
+ unsigned Link;
+ unsigned Info;
+ unsigned Align;
+ unsigned EntSize;
+
+ /// SectionIdx - The number of the section in the Section Table.
+ ///
+ unsigned short SectionIdx;
+
+ /// SectionData - The actual data for this section which we are building
+ /// up for emission to the file.
+ DataBuffer SectionData;
+
+ enum { SHT_NULL = 0, SHT_PROGBITS = 1, SHT_SYMTAB = 2, SHT_STRTAB = 3,
+ SHT_RELA = 4, SHT_HASH = 5, SHT_DYNAMIC = 6, SHT_NOTE = 7,
+ SHT_NOBITS = 8, SHT_REL = 9, SHT_SHLIB = 10, SHT_DYNSYM = 11 };
+ enum { SHN_UNDEF = 0, SHN_ABS = 0xFFF1, SHN_COMMON = 0xFFF2 };
+ enum { // SHF - ELF Section Header Flags
+ SHF_WRITE = 1 << 0, // Writable
+ SHF_ALLOC = 1 << 1, // Mapped into the process addr space
+ SHF_EXECINSTR = 1 << 2, // Executable
+ SHF_MERGE = 1 << 4, // Might be merged if equal
+ SHF_STRINGS = 1 << 5, // Contains null-terminated strings
+ SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index
+ SHF_LINK_ORDER = 1 << 7, // Preserve order after combining
+ SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required
+ SHF_GROUP = 1 << 9, // Section is a member of a group
+ SHF_TLS = 1 << 10 // Section holds thread-local data
+ };
+
+ ELFSection(const std::string &name)
+ : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0),
+ Link(0), Info(0), Align(0), EntSize(0) {
+ }
+ };
+
+ /// SectionList - This is the list of sections that we have emitted to the
+ /// file. Once the file has been completely built, the section header table
+ /// is constructed from this info.
+ std::list<ELFSection> SectionList;
+ unsigned NumSections; // Always = SectionList.size()
+
+ /// SectionLookup - This is a mapping from section name to section number in
+ /// the SectionList.
+ std::map<std::string, ELFSection*> SectionLookup;
+
+ /// getSection - Return the section with the specified name, creating a new
+ /// section if one does not already exist.
+ ELFSection &getSection(const std::string &Name,
+ unsigned Type, unsigned Flags = 0) {
+ ELFSection *&SN = SectionLookup[Name];
+ if (SN) return *SN;
+
+ SectionList.push_back(Name);
+ SN = &SectionList.back();
+ SN->SectionIdx = NumSections++;
+ SN->Type = Type;
+ SN->Flags = Flags;
+ return *SN;
+ }
+
+ ELFSection &getDataSection() {
+ return getSection(".data", ELFSection::SHT_PROGBITS,
+ ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
+ }
+ ELFSection &getBSSSection() {
+ return getSection(".bss", ELFSection::SHT_NOBITS,
+ ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
+ }
+
+ /// ELFSym - This struct contains information about each symbol that is
+ /// added to logical symbol table for the module. This is eventually
+ /// turned into a real symbol table in the file.
+ struct ELFSym {
+ const GlobalValue *GV; // The global value this corresponds to.
+ unsigned NameIdx; // Index in .strtab of name, once emitted.
+ uint64_t Value;
+ unsigned Size;
+ unsigned char Info;
+ unsigned char Other;
+ unsigned short SectionIdx;
+
+ enum { STB_LOCAL = 0, STB_GLOBAL = 1, STB_WEAK = 2 };
+ enum { STT_NOTYPE = 0, STT_OBJECT = 1, STT_FUNC = 2, STT_SECTION = 3,
+ STT_FILE = 4 };
+ ELFSym(const GlobalValue *gv) : GV(gv), Value(0), Size(0), Info(0),
+ Other(0), SectionIdx(0) {}
+
+ void SetBind(unsigned X) {
+ assert(X == (X & 0xF) && "Bind value out of range!");
+ Info = (Info & 0x0F) | (X << 4);
+ }
+ void SetType(unsigned X) {
+ assert(X == (X & 0xF) && "Type value out of range!");
+ Info = (Info & 0xF0) | X;
+ }
+ };
+
+ /// SymbolTable - This is the list of symbols we have emitted to the file.
+ /// This actually gets rearranged before emission to the file (to put the
+ /// local symbols first in the list).
+ std::vector<ELFSym> SymbolTable;
+
+ // As we complete the ELF file, we need to update fields in the ELF header
+ // (e.g. the location of the section table). These members keep track of
+ // the offset in ELFHeader of these various pieces to update and other
+ // locations in the file.
+ unsigned ELFHeader_e_shoff_Offset; // e_shoff in ELF header.
+ unsigned ELFHeader_e_shstrndx_Offset; // e_shstrndx in ELF header.
+ unsigned ELFHeader_e_shnum_Offset; // e_shnum in ELF header.
+ private:
+ void EmitGlobal(GlobalVariable *GV);
+
+ void EmitSymbolTable();
+
+ void EmitSectionTableStringTable();
+ void OutputSectionsAndSectionTable();
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 0000000..3bddc77
--- /dev/null
+++ b/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,1226 @@
+//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ifcvt"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+namespace {
+ // Hidden options for help debugging.
+ cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+ cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+ cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+ cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+ cl::init(false), cl::Hidden);
+ cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+ cl::init(false), cl::Hidden);
+ cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+ cl::init(false), cl::Hidden);
+ cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+ cl::init(false), cl::Hidden);
+ cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+ cl::init(false), cl::Hidden);
+ cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+ cl::init(false), cl::Hidden);
+ cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+ cl::init(false), cl::Hidden);
+}
+
+STATISTIC(NumSimple, "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
+STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
+STATISTIC(NumDupBBs, "Number of duplicated blocks");
+
+namespace {
+ class IfConverter : public MachineFunctionPass {
+ enum IfcvtKind {
+ ICNotClassfied, // BB data valid, but not classified.
+ ICSimpleFalse, // Same as ICSimple, but on the false path.
+ ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
+ ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
+ ICTriangleRev, // Same as ICTriangle, but true path rev condition.
+ ICTriangleFalse, // Same as ICTriangle, but on the false path.
+ ICTriangle, // BB is entry of a triangle sub-CFG.
+ ICDiamond // BB is entry of a diamond sub-CFG.
+ };
+
+ /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+ /// if-conversion feasibility analysis. This includes results from
+ /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its
+ /// classification, and common tail block of its successors (if it's a
+ /// diamond shape), its size, whether it's predicable, and whether any
+ /// instruction can clobber the 'would-be' predicate.
+ ///
+ /// IsDone - True if BB is not to be considered for ifcvt.
+ /// IsBeingAnalyzed - True if BB is currently being analyzed.
+ /// IsAnalyzed - True if BB has been analyzed (info is still valid).
+ /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
+ /// IsBrAnalyzable - True if AnalyzeBranch() returns false.
+ /// HasFallThrough - True if BB may fallthrough to the following BB.
+ /// IsUnpredicable - True if BB is known to be unpredicable.
+ /// ClobbersPred - True if BB could modify predicates (e.g. has
+ /// cmp, call, etc.)
+ /// NonPredSize - Number of non-predicated instructions.
+ /// BB - Corresponding MachineBasicBlock.
+ /// TrueBB / FalseBB- See AnalyzeBranch().
+ /// BrCond - Conditions for end of block conditional branches.
+ /// Predicate - Predicate used in the BB.
+ struct BBInfo {
+ bool IsDone : 1;
+ bool IsBeingAnalyzed : 1;
+ bool IsAnalyzed : 1;
+ bool IsEnqueued : 1;
+ bool IsBrAnalyzable : 1;
+ bool HasFallThrough : 1;
+ bool IsUnpredicable : 1;
+ bool CannotBeCopied : 1;
+ bool ClobbersPred : 1;
+ unsigned NonPredSize;
+ MachineBasicBlock *BB;
+ MachineBasicBlock *TrueBB;
+ MachineBasicBlock *FalseBB;
+ std::vector<MachineOperand> BrCond;
+ std::vector<MachineOperand> Predicate;
+ BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+ IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+ HasFallThrough(false), IsUnpredicable(false),
+ CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
+ BB(0), TrueBB(0), FalseBB(0) {}
+ };
+
+ /// IfcvtToken - Record information about pending if-conversions to attemp:
+ /// BBI - Corresponding BBInfo.
+ /// Kind - Type of block. See IfcvtKind.
+ /// NeedSubsumsion - True if the to be predicated BB has already been
+ /// predicated.
+ /// NumDups - Number of instructions that would be duplicated due
+ /// to this if-conversion. (For diamonds, the number of
+ /// identical instructions at the beginnings of both
+ /// paths).
+ /// NumDups2 - For diamonds, the number of identical instructions
+ /// at the ends of both paths.
+ struct IfcvtToken {
+ BBInfo &BBI;
+ IfcvtKind Kind;
+ bool NeedSubsumsion;
+ unsigned NumDups;
+ unsigned NumDups2;
+ IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
+ : BBI(b), Kind(k), NeedSubsumsion(s), NumDups(d), NumDups2(d2) {}
+ };
+
+ /// Roots - Basic blocks that do not have successors. These are the starting
+ /// points of Graph traversal.
+ std::vector<MachineBasicBlock*> Roots;
+
+ /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
+ /// basic block number.
+ std::vector<BBInfo> BBAnalysis;
+
+ const TargetLowering *TLI;
+ const TargetInstrInfo *TII;
+ bool MadeChange;
+ public:
+ static char ID;
+ IfConverter() : MachineFunctionPass((intptr_t)&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "If converter"; }
+
+ private:
+ bool ReverseBranchCondition(BBInfo &BBI);
+ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const;
+ bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups) const;
+ bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const;
+ void ScanInstructions(BBInfo &BBI);
+ BBInfo &AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens);
+ bool FeasibilityAnalysis(BBInfo &BBI, std::vector<MachineOperand> &Cond,
+ bool isTriangle = false, bool RevBranch = false);
+ bool AnalyzeBlocks(MachineFunction &MF,
+ std::vector<IfcvtToken*> &Tokens);
+ void InvalidatePreds(MachineBasicBlock *BB);
+ void RemoveExtraEdges(BBInfo &BBI);
+ bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2);
+ void PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ std::vector<MachineOperand> &Cond);
+ void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ std::vector<MachineOperand> &Cond,
+ bool IgnoreBr = false);
+ void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI);
+
+ bool MeetIfcvtSizeLimit(unsigned Size) const {
+ return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit();
+ }
+
+ // blockAlwaysFallThrough - Block ends without a terminator.
+ bool blockAlwaysFallThrough(BBInfo &BBI) const {
+ return BBI.IsBrAnalyzable && BBI.TrueBB == NULL;
+ }
+
+ // IfcvtTokenCmp - Used to sort if-conversion candidates.
+ static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) {
+ int Incr1 = (C1->Kind == ICDiamond)
+ ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+ int Incr2 = (C2->Kind == ICDiamond)
+ ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+ if (Incr1 > Incr2)
+ return true;
+ else if (Incr1 == Incr2) {
+ // Favors subsumsion.
+ if (C1->NeedSubsumsion == false && C2->NeedSubsumsion == true)
+ return true;
+ else if (C1->NeedSubsumsion == C2->NeedSubsumsion) {
+ // Favors diamond over triangle, etc.
+ if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+ return true;
+ else if (C1->Kind == C2->Kind)
+ return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+ }
+ }
+ return false;
+ }
+ };
+
+ char IfConverter::ID = 0;
+}
+
+FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+ TLI = MF.getTarget().getTargetLowering();
+ TII = MF.getTarget().getInstrInfo();
+ if (!TII) return false;
+
+ static int FnNum = -1;
+ DOUT << "\nIfcvt: function (" << ++FnNum << ") \'"
+ << MF.getFunction()->getName() << "\'";
+
+ if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+ DOUT << " skipped\n";
+ return false;
+ }
+ DOUT << "\n";
+
+ MF.RenumberBlocks();
+ BBAnalysis.resize(MF.getNumBlockIDs());
+
+ // Look for root nodes, i.e. blocks without successors.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ if (I->succ_size() == 0)
+ Roots.push_back(I);
+
+ std::vector<IfcvtToken*> Tokens;
+ MadeChange = false;
+ unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+ NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+ // Do an intial analysis for each basic block and finding all the potential
+ // candidates to perform if-convesion.
+ bool Change = AnalyzeBlocks(MF, Tokens);
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ BBInfo &BBI = Token->BBI;
+ IfcvtKind Kind = Token->Kind;
+
+ // If the block has been evicted out of the queue or it has already been
+ // marked dead (due to it being predicated), then skip it.
+ if (BBI.IsDone)
+ BBI.IsEnqueued = false;
+ if (!BBI.IsEnqueued)
+ continue;
+
+ BBI.IsEnqueued = false;
+
+ bool RetVal = false;
+ switch (Kind) {
+ default: assert(false && "Unexpected!");
+ break;
+ case ICSimple:
+ case ICSimpleFalse: {
+ bool isFalse = Kind == ICSimpleFalse;
+ if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+ DOUT << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
+ << "): BB#" << BBI.BB->getNumber() << " ("
+ << ((Kind == ICSimpleFalse)
+ ? BBI.FalseBB->getNumber()
+ : BBI.TrueBB->getNumber()) << ") ";
+ RetVal = IfConvertSimple(BBI, Kind);
+ DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+ if (RetVal)
+ if (isFalse) NumSimpleFalse++;
+ else NumSimple++;
+ break;
+ }
+ case ICTriangle:
+ case ICTriangleRev:
+ case ICTriangleFalse:
+ case ICTriangleFRev: {
+ bool isFalse = Kind == ICTriangleFalse;
+ bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+ if (DisableTriangle && !isFalse && !isRev) break;
+ if (DisableTriangleR && !isFalse && isRev) break;
+ if (DisableTriangleF && isFalse && !isRev) break;
+ if (DisableTriangleFR && isFalse && isRev) break;
+ DOUT << "Ifcvt (Triangle";
+ if (isFalse)
+ DOUT << " false";
+ if (isRev)
+ DOUT << " rev";
+ DOUT << "): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ";
+ RetVal = IfConvertTriangle(BBI, Kind);
+ DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+ if (RetVal) {
+ if (isFalse) {
+ if (isRev) NumTriangleFRev++;
+ else NumTriangleFalse++;
+ } else {
+ if (isRev) NumTriangleRev++;
+ else NumTriangle++;
+ }
+ }
+ break;
+ }
+ case ICDiamond: {
+ if (DisableDiamond) break;
+ DOUT << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ";
+ RetVal = IfConvertDiamond(BBI, Kind, Token->NumDups, Token->NumDups2);
+ DOUT << (RetVal ? "succeeded!" : "failed!") << "\n";
+ if (RetVal) NumDiamonds++;
+ break;
+ }
+ }
+
+ Change |= RetVal;
+
+ NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+ NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+ break;
+ }
+
+ if (!Change)
+ break;
+ MadeChange |= Change;
+ }
+
+ // Delete tokens in case of early exit.
+ while (!Tokens.empty()) {
+ IfcvtToken *Token = Tokens.back();
+ Tokens.pop_back();
+ delete Token;
+ }
+
+ Tokens.clear();
+ Roots.clear();
+ BBAnalysis.clear();
+
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return NULL;
+}
+
+/// ReverseBranchCondition - Reverse the condition of the end of the block
+/// branchs. Swap block's 'true' and 'false' successors.
+bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+ if (!TII->ReverseBranchCondition(BBI.BrCond)) {
+ TII->RemoveBranch(*BBI.BB);
+ TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond);
+ std::swap(BBI.TrueBB, BBI.FalseBB);
+ return true;
+ }
+ return false;
+}
+
+/// getNextBlock - Returns the next block in the function blocks ordering. If
+/// it is the end, returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
+ MachineFunction::iterator I = BB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ if (++I == E)
+ return NULL;
+ return I;
+}
+
+/// ValidSimple - Returns true if the 'true' block (along with its
+/// predecessor) forms a valid simple shape for ifcvt. It also returns the
+/// number of instructions that the ifcvt would need to duplicate if performed
+/// in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.IsBrAnalyzable)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied ||
+ TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit())
+ return false;
+ Dups = TrueBBI.NonPredSize;
+ }
+
+ return true;
+}
+
+/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid triangle shape for ifcvt.
+/// If 'FalseBranch' is true, it checks if 'true' block's false branch
+/// branches to the false branch rather than the other way around. It also
+/// returns the number of instructions that the ifcvt would need to duplicate
+/// if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied)
+ return false;
+
+ unsigned Size = TrueBBI.NonPredSize;
+ if (TrueBBI.IsBrAnalyzable) {
+ if (TrueBBI.TrueBB && TrueBBI.BrCond.size() == 0)
+ // End with an unconditional branch. It will be removed.
+ --Size;
+ else {
+ MachineBasicBlock *FExit = FalseBranch
+ ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+ if (FExit)
+ // Require a conditional branch
+ ++Size;
+ }
+ }
+ if (Size > TLI->getIfCvtDupBlockSizeLimit())
+ return false;
+ Dups = Size;
+ }
+
+ MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+ if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+ MachineFunction::iterator I = TrueBBI.BB;
+ if (++I == TrueBBI.BB->getParent()->end())
+ return false;
+ TExit = I;
+ }
+ return TExit && TExit == FalseBBI.BB;
+}
+
+static
+MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB,
+ const TargetInstrInfo *TII) {
+ MachineBasicBlock::iterator I = BB->end();
+ while (I != BB->begin()) {
+ --I;
+ const TargetInstrDescriptor *TID = I->getInstrDescriptor();
+ if ((TID->Flags & M_BRANCH_FLAG) == 0)
+ break;
+ }
+ return I;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+ if (!TT && blockAlwaysFallThrough(TrueBBI))
+ TT = getNextBlock(TrueBBI.BB);
+ if (!FT && blockAlwaysFallThrough(FalseBBI))
+ FT = getNextBlock(FalseBBI.BB);
+ if (TT != FT)
+ return false;
+ if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+ return false;
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // FIXME: Allow true block to have an early exit?
+ if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
+ (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+ return false;
+
+ MachineBasicBlock::iterator TI = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FI = FalseBBI.BB->begin();
+ while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) {
+ if (!TI->isIdenticalTo(FI))
+ break;
+ ++Dups1;
+ ++TI;
+ ++FI;
+ }
+
+ TI = firstNonBranchInst(TrueBBI.BB, TII);
+ FI = firstNonBranchInst(FalseBBI.BB, TII);
+ while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) {
+ if (!TI->isIdenticalTo(FI))
+ break;
+ ++Dups2;
+ --TI;
+ --FI;
+ }
+
+ return true;
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block has M_PREDICABLE flag. Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI) {
+ if (BBI.IsDone)
+ return;
+
+ bool AlreadyPredicated = BBI.Predicate.size() > 0;
+ // First analyze the end of BB branches.
+ BBI.TrueBB = BBI.FalseBB = NULL;
+ BBI.BrCond.clear();
+ BBI.IsBrAnalyzable =
+ !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL;
+
+ if (BBI.BrCond.size()) {
+ // No false branch. This BB must end with a conditional branch and a
+ // fallthrough.
+ if (!BBI.FalseBB)
+ BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+ assert(BBI.FalseBB && "Expected to find the fallthrough block!");
+ }
+
+ // Then scan all the instructions.
+ BBI.NonPredSize = 0;
+ BBI.ClobbersPred = false;
+ bool SeenCondBr = false;
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
+ I != E; ++I) {
+ const TargetInstrDescriptor *TID = I->getInstrDescriptor();
+ if ((TID->Flags & M_NOT_DUPLICABLE) != 0)
+ BBI.CannotBeCopied = true;
+
+ bool isPredicated = TII->isPredicated(I);
+ bool isCondBr = BBI.IsBrAnalyzable &&
+ (TID->Flags & M_BRANCH_FLAG) != 0 && (TID->Flags & M_BARRIER_FLAG) == 0;
+
+ if (!isCondBr) {
+ if (!isPredicated)
+ BBI.NonPredSize++;
+ else if (!AlreadyPredicated) {
+ // FIXME: This instruction is already predicated before the
+ // if-conversion pass. It's probably something like a conditional move.
+ // Mark this block unpredicable for now.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ }
+
+ if (BBI.ClobbersPred && !isPredicated) {
+ // Predicate modification instruction should end the block (except for
+ // already predicated instructions and end of block branches).
+ if (isCondBr) {
+ SeenCondBr = true;
+
+ // Conditional branches is not predicable. But it may be eliminated.
+ continue;
+ }
+
+ // Predicate may have been modified, the subsequent (currently)
+ // unpredicated instructions cannot be correctly predicated.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+ // still potentially predicable.
+ std::vector<MachineOperand> PredDefs;
+ if (TII->DefinesPredicate(I, PredDefs))
+ BBI.ClobbersPred = true;
+
+ if ((TID->Flags & M_PREDICABLE) == 0) {
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+}
+
+/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
+/// predicated by the specified predicate.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+ std::vector<MachineOperand> &Pred,
+ bool isTriangle, bool RevBranch) {
+ // If the block is dead or unpredicable, then it cannot be predicated.
+ if (BBI.IsDone || BBI.IsUnpredicable)
+ return false;
+
+ // If it is already predicated, check if its predicate subsumes the new
+ // predicate.
+ if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred))
+ return false;
+
+ if (BBI.BrCond.size()) {
+ if (!isTriangle)
+ return false;
+
+ // Test predicate subsumsion.
+ std::vector<MachineOperand> RevPred(Pred);
+ std::vector<MachineOperand> Cond(BBI.BrCond);
+ if (RevBranch) {
+ if (TII->ReverseBranchCondition(Cond))
+ return false;
+ }
+ if (TII->ReverseBranchCondition(RevPred) ||
+ !TII->SubsumesPredicate(Cond, RevPred))
+ return false;
+ }
+
+ return true;
+}
+
+/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
+/// the specified block. Record its successors and whether it looks like an
+/// if-conversion candidate.
+IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
+ std::vector<IfcvtToken*> &Tokens) {
+ BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+ if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed)
+ return BBI;
+
+ BBI.BB = BB;
+ BBI.IsBeingAnalyzed = true;
+
+ ScanInstructions(BBI);
+
+ // Unanalyable or ends with fallthrough or unconditional branch.
+ if (!BBI.IsBrAnalyzable || BBI.BrCond.size() == 0) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ // Do not ifcvt if either path is a back edge to the entry block.
+ if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens);
+ BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
+
+ if (TrueBBI.IsDone && FalseBBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
+ std::vector<MachineOperand> RevCond(BBI.BrCond);
+ bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+
+ unsigned Dups = 0;
+ unsigned Dups2 = 0;
+ bool TNeedSub = TrueBBI.Predicate.size() > 0;
+ bool FNeedSub = FalseBBI.Predicate.size() > 0;
+ bool Enqueued = false;
+ if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ // Diamond:
+ // EBB
+ // / \_
+ // | |
+ // TBB FBB
+ // \ /
+ // TailBB
+ // Note TailBB can be empty.
+ Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
+ Dups2));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+ // Triangle:
+ // EBB
+ // | \_
+ // | |
+ // | TBB
+ // | /
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(TrueBBI, Dups) &&
+ MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+ // Simple (split, no rejoin):
+ // EBB
+ // | \_
+ // | |
+ // | TBB---> exit
+ // |
+ // FBB
+ Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (CanRevCond) {
+ // Try the other path...
+ if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(FalseBBI, Dups) &&
+ MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+ }
+
+ BBI.IsEnqueued = Enqueued;
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+}
+
+/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
+/// candidates. It returns true if any CFG restructuring is done to expose more
+/// if-conversion opportunities.
+bool IfConverter::AnalyzeBlocks(MachineFunction &MF,
+ std::vector<IfcvtToken*> &Tokens) {
+ bool Change = false;
+ std::set<MachineBasicBlock*> Visited;
+ for (unsigned i = 0, e = Roots.size(); i != e; ++i) {
+ for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited),
+ E = idf_ext_end(Roots[i], Visited); I != E; ++I) {
+ MachineBasicBlock *BB = *I;
+ AnalyzeBlock(BB, Tokens);
+ }
+ }
+
+ // Sort to favor more complex ifcvt scheme.
+ std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+
+ return Change;
+}
+
+/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
+/// that all the intervening blocks are empty (given BB can fall through to its
+/// next block).
+static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
+ MachineFunction::iterator I = BB;
+ MachineFunction::iterator TI = ToBB;
+ MachineFunction::iterator E = BB->getParent()->end();
+ while (++I != TI)
+ if (I == E || !I->empty())
+ return false;
+ return true;
+}
+
+/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
+/// to determine if it can be if-converted. If predecessor is already enqueued,
+/// dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+ if (PBBI.IsDone || PBBI.BB == BB)
+ continue;
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+}
+
+/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
+///
+static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+ const TargetInstrInfo *TII) {
+ std::vector<MachineOperand> NoCond;
+ TII->InsertBranch(*BB, ToBB, NULL, NoCond);
+}
+
+/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
+/// successors.
+void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
+ MachineBasicBlock *TBB = NULL, *FBB = NULL;
+ std::vector<MachineOperand> Cond;
+ if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
+ BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+}
+
+/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
+///
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ std::vector<MachineOperand> Cond(BBI.BrCond);
+ if (Kind == ICSimpleFalse)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICSimpleFalse)
+ TII->ReverseBranchCondition(Cond);
+
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
+ } else {
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+
+ // Merge converted block into entry block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ bool IterIfcvt = true;
+ if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ // Now ifcvt'd block will look like this:
+ // BB:
+ // ...
+ // t, f = cmp
+ // if t op
+ // b BBf
+ //
+ // We cannot further ifcvt this block because the unconditional branch
+ // will have to be predicated on the new condition, that will not be
+ // available if cmp executes.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertTriangle - If convert a triangle sub-CFG.
+///
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ std::vector<MachineOperand> Cond(BBI.BrCond);
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ TII->ReverseBranchCondition(Cond);
+
+ if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+ ReverseBranchCondition(*CvtBBI);
+ // BB has been changed, modify its predecessors (except for this
+ // one) so they don't get ifcvt'ed based on bad intel.
+ for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
+ E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PBB = *PI;
+ if (PBB == BBI.BB)
+ continue;
+ BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+ if (PBBI.IsEnqueued) {
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+ }
+ }
+
+ bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+ bool DupBB = CvtBBI->BB->pred_size() > 1;
+ if (DupBB) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
+ } else {
+ // Predicate the 'true' block after removing its branch.
+ CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+ }
+
+ if (!DupBB) {
+ // Now merge the entry of the triangle with the true block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ // If 'true' block has a 'false' successor, add an exit branch to it.
+ if (HasEarlyExit) {
+ std::vector<MachineOperand> RevCond(CvtBBI->BrCond);
+ if (TII->ReverseBranchCondition(RevCond))
+ assert(false && "Unable to reverse branch condition!");
+ TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond);
+ BBI.BB->addSuccessor(CvtBBI->FalseBB);
+ }
+
+ // Merge in the 'false' block if the 'false' block has no other
+ // predecessors. Otherwise, add a unconditional branch from to 'false'.
+ bool FalseBBDead = false;
+ bool IterIfcvt = true;
+ bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+ if (!isFallThrough) {
+ // Only merge them if the true block does not fallthrough to the false
+ // block. By not merging them, we make it possible to iteratively
+ // ifcvt the blocks.
+ if (!HasEarlyExit &&
+ NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+ MergeBlocks(BBI, *NextBBI);
+ FalseBBDead = true;
+ } else {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ }
+ // Mixed predicated and unpredicated code. This cannot be iteratively
+ // predicated.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+ if (FalseBBDead)
+ NextBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+ // True block must fall through or ended with unanalyzable terminator.
+ if (!TailBB) {
+ if (blockAlwaysFallThrough(TrueBBI))
+ TailBB = FalseBBI.TrueBB;
+ assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+ }
+
+ if (TrueBBI.IsDone || FalseBBI.IsDone ||
+ TrueBBI.BB->pred_size() > 1 ||
+ FalseBBI.BB->pred_size() > 1) {
+ // Something has changed. It's no longer safe to predicate these blocks.
+ BBI.IsAnalyzed = false;
+ TrueBBI.IsAnalyzed = false;
+ FalseBBI.IsAnalyzed = false;
+ return false;
+ }
+
+ // Merge the 'true' and 'false' blocks by copying the instructions
+ // from the 'false' block to the 'true' block. That is, unless the true
+ // block would clobber the predicate, in that case, do the opposite.
+ BBInfo *BBI1 = &TrueBBI;
+ BBInfo *BBI2 = &FalseBBI;
+ std::vector<MachineOperand> RevCond(BBI.BrCond);
+ TII->ReverseBranchCondition(RevCond);
+ std::vector<MachineOperand> *Cond1 = &BBI.BrCond;
+ std::vector<MachineOperand> *Cond2 = &RevCond;
+
+ // Figure out the more profitable ordering.
+ bool DoSwap = false;
+ if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+ DoSwap = true;
+ else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+ if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+ DoSwap = true;
+ }
+ if (DoSwap) {
+ std::swap(BBI1, BBI2);
+ std::swap(Cond1, Cond2);
+ }
+
+ // Remove the conditional branch from entry to the blocks.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+
+ // Remove the duplicated instructions at the beginnings of both paths.
+ MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
+ MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+ BBI1->NonPredSize -= NumDups1;
+ BBI2->NonPredSize -= NumDups1;
+ while (NumDups1 != 0) {
+ ++DI1;
+ ++DI2;
+ --NumDups1;
+ }
+ BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
+ BBI2->BB->erase(BBI2->BB->begin(), DI2);
+
+ // Predicate the 'true' block after removing its branch.
+ BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+ DI1 = BBI1->BB->end();
+ for (unsigned i = 0; i != NumDups2; ++i)
+ --DI1;
+ BBI1->BB->erase(DI1, BBI1->BB->end());
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1);
+
+ // Predicate the 'false' block.
+ BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+ DI2 = BBI2->BB->end();
+ while (NumDups2 != 0) {
+ --DI2;
+ --NumDups2;
+ }
+ PredicateBlock(*BBI2, DI2, *Cond2);
+
+ // Merge the true block into the entry of the diamond.
+ MergeBlocks(BBI, *BBI1);
+ MergeBlocks(BBI, *BBI2);
+
+ // If the if-converted block fallthrough or unconditionally branch into the
+ // tail block, and the tail block does not have other predecessors, then
+ // fold the tail block in as well. Otherwise, unless it falls through to the
+ // tail, add a unconditional branch to it.
+ if (TailBB) {
+ BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+ if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, TailBBI);
+ TailBBI.IsDone = true;
+ } else {
+ InsertUncondBranch(BBI.BB, TailBB, TII);
+ BBI.HasFallThrough = false;
+ }
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// PredicateBlock - Predicate instructions from the start of the block to the
+/// specified end with the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ std::vector<MachineOperand> &Cond) {
+ for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
+ if (TII->isPredicated(I))
+ continue;
+ if (!TII->PredicateInstruction(I, Cond)) {
+ cerr << "Unable to predicate " << *I << "!\n";
+ abort();
+ }
+ }
+
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+
+ BBI.IsAnalyzed = false;
+ BBI.NonPredSize = 0;
+
+ NumIfConvBBs++;
+}
+
+/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
+/// the destination block. Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ std::vector<MachineOperand> &Cond,
+ bool IgnoreBr) {
+ for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
+ E = FromBBI.BB->end(); I != E; ++I) {
+ const TargetInstrDescriptor *TID = I->getInstrDescriptor();
+ bool isPredicated = TII->isPredicated(I);
+ // Do not copy the end of the block branches.
+ if (IgnoreBr && !isPredicated && (TID->Flags & M_BRANCH_FLAG) != 0)
+ break;
+
+ MachineInstr *MI = I->clone();
+ ToBBI.BB->insert(ToBBI.BB->end(), MI);
+ ToBBI.NonPredSize++;
+
+ if (!isPredicated)
+ if (!TII->PredicateInstruction(MI, Cond)) {
+ cerr << "Unable to predicate " << *MI << "!\n";
+ abort();
+ }
+ }
+
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ if (!ToBBI.BB->isSuccessor(Succ))
+ ToBBI.BB->addSuccessor(Succ);
+ }
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.IsAnalyzed = false;
+
+ NumDupBBs++;
+}
+
+/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
+///
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) {
+ ToBBI.BB->splice(ToBBI.BB->end(),
+ FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
+
+ // Redirect all branches to FromBB to ToBB.
+ std::vector<MachineBasicBlock *> Preds(FromBBI.BB->pred_begin(),
+ FromBBI.BB->pred_end());
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ MachineBasicBlock *Pred = Preds[i];
+ if (Pred == ToBBI.BB)
+ continue;
+ Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB);
+ }
+
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ FromBBI.BB->removeSuccessor(Succ);
+ if (!ToBBI.BB->isSuccessor(Succ))
+ ToBBI.BB->addSuccessor(Succ);
+ }
+
+ // Now FromBBI always fall through to the next block!
+ if (NBB && !FromBBI.BB->isSuccessor(NBB))
+ FromBBI.BB->addSuccessor(NBB);
+
+ std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+ std::back_inserter(ToBBI.Predicate));
+ FromBBI.Predicate.clear();
+
+ ToBBI.NonPredSize += FromBBI.NonPredSize;
+ FromBBI.NonPredSize = 0;
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+ ToBBI.IsAnalyzed = false;
+ FromBBI.IsAnalyzed = false;
+}
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 0000000..8ae4df6
--- /dev/null
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,799 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+template <class ArgIt>
+static void EnsureFunctionExists(Module &M, const char *Name,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ const Type *RetTy) {
+ // Insert a correctly-typed definition now.
+ std::vector<const Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back(I->getType());
+ M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
+}
+
+/// ReplaceCallWith - This function is used when we want to lower an intrinsic
+/// call to a call of an external function. This handles hard cases such as
+/// when there was already a prototype for the external function, and if that
+/// prototype doesn't match the arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ const Type *RetTy, Constant *&FCache) {
+ if (!FCache) {
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = CI->getParent()->getParent()->getParent();
+ // Get or insert the definition now.
+ std::vector<const Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back((*I)->getType());
+ FCache = M->getOrInsertFunction(NewFn,
+ FunctionType::get(RetTy, ParamTys, false));
+ }
+
+ SmallVector<Value*, 8> Operands(ArgBegin, ArgEnd);
+ CallInst *NewCI = new CallInst(FCache, &Operands[0], Operands.size(),
+ CI->getName(), CI);
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ return NewCI;
+}
+
+void IntrinsicLowering::AddPrototypes(Module &M) {
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->isDeclaration() && !I->use_empty())
+ switch (I->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::setjmp:
+ EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+ Type::Int32Ty);
+ break;
+ case Intrinsic::longjmp:
+ EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+ Type::VoidTy);
+ break;
+ case Intrinsic::siglongjmp:
+ EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+ Type::VoidTy);
+ break;
+ case Intrinsic::memcpy_i32:
+ case Intrinsic::memcpy_i64:
+ M.getOrInsertFunction("memcpy", PointerType::get(Type::Int8Ty),
+ PointerType::get(Type::Int8Ty),
+ PointerType::get(Type::Int8Ty),
+ TD.getIntPtrType(), (Type *)0);
+ break;
+ case Intrinsic::memmove_i32:
+ case Intrinsic::memmove_i64:
+ M.getOrInsertFunction("memmove", PointerType::get(Type::Int8Ty),
+ PointerType::get(Type::Int8Ty),
+ PointerType::get(Type::Int8Ty),
+ TD.getIntPtrType(), (Type *)0);
+ break;
+ case Intrinsic::memset_i32:
+ case Intrinsic::memset_i64:
+ M.getOrInsertFunction("memset", PointerType::get(Type::Int8Ty),
+ PointerType::get(Type::Int8Ty), Type::Int32Ty,
+ TD.getIntPtrType(), (Type *)0);
+ break;
+ case Intrinsic::sqrt_f32:
+ case Intrinsic::sqrt_f64:
+ if(I->arg_begin()->getType() == Type::FloatTy)
+ EnsureFunctionExists(M, "sqrtf", I->arg_begin(), I->arg_end(),
+ Type::FloatTy);
+ else
+ EnsureFunctionExists(M, "sqrt", I->arg_begin(), I->arg_end(),
+ Type::DoubleTy);
+ break;
+ }
+}
+
+/// LowerBSWAP - Emit the code to lower bswap of V before the specified
+/// instruction IP.
+static Value *LowerBSWAP(Value *V, Instruction *IP) {
+ assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+
+ switch(BitSize) {
+ default: assert(0 && "Unhandled type size of value to byteswap!");
+ case 16: {
+ Value *Tmp1 = BinaryOperator::createShl(V,
+ ConstantInt::get(V->getType(),8),"bswap.2",IP);
+ Value *Tmp2 = BinaryOperator::createLShr(V,
+ ConstantInt::get(V->getType(),8),"bswap.1",IP);
+ V = BinaryOperator::createOr(Tmp1, Tmp2, "bswap.i16", IP);
+ break;
+ }
+ case 32: {
+ Value *Tmp4 = BinaryOperator::createShl(V,
+ ConstantInt::get(V->getType(),24),"bswap.4", IP);
+ Value *Tmp3 = BinaryOperator::createShl(V,
+ ConstantInt::get(V->getType(),8),"bswap.3",IP);
+ Value *Tmp2 = BinaryOperator::createLShr(V,
+ ConstantInt::get(V->getType(),8),"bswap.2",IP);
+ Value *Tmp1 = BinaryOperator::createLShr(V,
+ ConstantInt::get(V->getType(),24),"bswap.1", IP);
+ Tmp3 = BinaryOperator::createAnd(Tmp3,
+ ConstantInt::get(Type::Int32Ty, 0xFF0000),
+ "bswap.and3", IP);
+ Tmp2 = BinaryOperator::createAnd(Tmp2,
+ ConstantInt::get(Type::Int32Ty, 0xFF00),
+ "bswap.and2", IP);
+ Tmp4 = BinaryOperator::createOr(Tmp4, Tmp3, "bswap.or1", IP);
+ Tmp2 = BinaryOperator::createOr(Tmp2, Tmp1, "bswap.or2", IP);
+ V = BinaryOperator::createOr(Tmp4, Tmp2, "bswap.i32", IP);
+ break;
+ }
+ case 64: {
+ Value *Tmp8 = BinaryOperator::createShl(V,
+ ConstantInt::get(V->getType(),56),"bswap.8", IP);
+ Value *Tmp7 = BinaryOperator::createShl(V,
+ ConstantInt::get(V->getType(),40),"bswap.7", IP);
+ Value *Tmp6 = BinaryOperator::createShl(V,
+ ConstantInt::get(V->getType(),24),"bswap.6", IP);
+ Value *Tmp5 = BinaryOperator::createShl(V,
+ ConstantInt::get(V->getType(),8),"bswap.5", IP);
+ Value* Tmp4 = BinaryOperator::createLShr(V,
+ ConstantInt::get(V->getType(),8),"bswap.4", IP);
+ Value* Tmp3 = BinaryOperator::createLShr(V,
+ ConstantInt::get(V->getType(),24),"bswap.3", IP);
+ Value* Tmp2 = BinaryOperator::createLShr(V,
+ ConstantInt::get(V->getType(),40),"bswap.2", IP);
+ Value* Tmp1 = BinaryOperator::createLShr(V,
+ ConstantInt::get(V->getType(),56),"bswap.1", IP);
+ Tmp7 = BinaryOperator::createAnd(Tmp7,
+ ConstantInt::get(Type::Int64Ty,
+ 0xFF000000000000ULL),
+ "bswap.and7", IP);
+ Tmp6 = BinaryOperator::createAnd(Tmp6,
+ ConstantInt::get(Type::Int64Ty, 0xFF0000000000ULL),
+ "bswap.and6", IP);
+ Tmp5 = BinaryOperator::createAnd(Tmp5,
+ ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL),
+ "bswap.and5", IP);
+ Tmp4 = BinaryOperator::createAnd(Tmp4,
+ ConstantInt::get(Type::Int64Ty, 0xFF000000ULL),
+ "bswap.and4", IP);
+ Tmp3 = BinaryOperator::createAnd(Tmp3,
+ ConstantInt::get(Type::Int64Ty, 0xFF0000ULL),
+ "bswap.and3", IP);
+ Tmp2 = BinaryOperator::createAnd(Tmp2,
+ ConstantInt::get(Type::Int64Ty, 0xFF00ULL),
+ "bswap.and2", IP);
+ Tmp8 = BinaryOperator::createOr(Tmp8, Tmp7, "bswap.or1", IP);
+ Tmp6 = BinaryOperator::createOr(Tmp6, Tmp5, "bswap.or2", IP);
+ Tmp4 = BinaryOperator::createOr(Tmp4, Tmp3, "bswap.or3", IP);
+ Tmp2 = BinaryOperator::createOr(Tmp2, Tmp1, "bswap.or4", IP);
+ Tmp8 = BinaryOperator::createOr(Tmp8, Tmp6, "bswap.or5", IP);
+ Tmp4 = BinaryOperator::createOr(Tmp4, Tmp2, "bswap.or6", IP);
+ V = BinaryOperator::createOr(Tmp8, Tmp4, "bswap.i64", IP);
+ break;
+ }
+ }
+ return V;
+}
+
+/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
+/// instruction IP.
+static Value *LowerCTPOP(Value *V, Instruction *IP) {
+ assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
+
+ static const uint64_t MaskValues[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ unsigned WordSize = (BitSize + 63) / 64;
+ Value *Count = ConstantInt::get(V->getType(), 0);
+
+ for (unsigned n = 0; n < WordSize; ++n) {
+ Value *PartValue = V;
+ for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
+ i <<= 1, ++ct) {
+ Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+ Value *LHS = BinaryOperator::createAnd(
+ PartValue, MaskCst, "cppop.and1", IP);
+ Value *VShift = BinaryOperator::createLShr(PartValue,
+ ConstantInt::get(V->getType(), i), "ctpop.sh", IP);
+ Value *RHS = BinaryOperator::createAnd(VShift, MaskCst, "cppop.and2", IP);
+ PartValue = BinaryOperator::createAdd(LHS, RHS, "ctpop.step", IP);
+ }
+ Count = BinaryOperator::createAdd(PartValue, Count, "ctpop.part", IP);
+ if (BitSize > 64) {
+ V = BinaryOperator::createLShr(V, ConstantInt::get(V->getType(), 64),
+ "ctpop.part.sh", IP);
+ BitSize -= 64;
+ }
+ }
+
+ return CastInst::createIntegerCast(Count, Type::Int32Ty, false, "ctpop", IP);
+}
+
+/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
+/// instruction IP.
+static Value *LowerCTLZ(Value *V, Instruction *IP) {
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ for (unsigned i = 1; i < BitSize; i <<= 1) {
+ Value *ShVal = ConstantInt::get(V->getType(), i);
+ ShVal = BinaryOperator::createLShr(V, ShVal, "ctlz.sh", IP);
+ V = BinaryOperator::createOr(V, ShVal, "ctlz.step", IP);
+ }
+
+ V = BinaryOperator::createNot(V, "", IP);
+ return LowerCTPOP(V, IP);
+}
+
+/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes
+/// three integer arguments. The first argument is the Value from which the
+/// bits will be selected. It may be of any bit width. The second and third
+/// arguments specify a range of bits to select with the second argument
+/// specifying the low bit and the third argument specifying the high bit. Both
+/// must be type i32. The result is the corresponding selected bits from the
+/// Value in the same width as the Value (first argument). If the low bit index
+/// is higher than the high bit index then the inverse selection is done and
+/// the bits are returned in inverse order.
+/// @brief Lowering of llvm.part.select intrinsic.
+static Instruction *LowerPartSelect(CallInst *CI) {
+ // Make sure we're dealing with a part select intrinsic here
+ Function *F = CI->getCalledFunction();
+ const FunctionType *FT = F->getFunctionType();
+ if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
+ FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() ||
+ !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger())
+ return CI;
+
+ // Get the intrinsic implementation function by converting all the . to _
+ // in the intrinsic's function name and then reconstructing the function
+ // declaration.
+ std::string Name(F->getName());
+ for (unsigned i = 4; i < Name.length(); ++i)
+ if (Name[i] == '.')
+ Name[i] = '_';
+ Module* M = F->getParent();
+ F = cast<Function>(M->getOrInsertFunction(Name, FT));
+ F->setLinkage(GlobalValue::WeakLinkage);
+
+ // If we haven't defined the impl function yet, do so now
+ if (F->isDeclaration()) {
+
+ // Get the arguments to the function
+ Function::arg_iterator args = F->arg_begin();
+ Value* Val = args++; Val->setName("Val");
+ Value* Lo = args++; Lo->setName("Lo");
+ Value* Hi = args++; Hi->setName("High");
+
+ // We want to select a range of bits here such that [Hi, Lo] is shifted
+ // down to the low bits. However, it is quite possible that Hi is smaller
+ // than Lo in which case the bits have to be reversed.
+
+ // Create the blocks we will need for the two cases (forward, reverse)
+ BasicBlock* CurBB = new BasicBlock("entry", F);
+ BasicBlock *RevSize = new BasicBlock("revsize", CurBB->getParent());
+ BasicBlock *FwdSize = new BasicBlock("fwdsize", CurBB->getParent());
+ BasicBlock *Compute = new BasicBlock("compute", CurBB->getParent());
+ BasicBlock *Reverse = new BasicBlock("reverse", CurBB->getParent());
+ BasicBlock *RsltBlk = new BasicBlock("result", CurBB->getParent());
+
+ // Cast Hi and Lo to the size of Val so the widths are all the same
+ if (Hi->getType() != Val->getType())
+ Hi = CastInst::createIntegerCast(Hi, Val->getType(), false,
+ "tmp", CurBB);
+ if (Lo->getType() != Val->getType())
+ Lo = CastInst::createIntegerCast(Lo, Val->getType(), false,
+ "tmp", CurBB);
+
+ // Compute a few things that both cases will need, up front.
+ Constant* Zero = ConstantInt::get(Val->getType(), 0);
+ Constant* One = ConstantInt::get(Val->getType(), 1);
+ Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType());
+
+ // Compare the Hi and Lo bit positions. This is used to determine
+ // which case we have (forward or reverse)
+ ICmpInst *Cmp = new ICmpInst(ICmpInst::ICMP_ULT, Hi, Lo, "less",CurBB);
+ new BranchInst(RevSize, FwdSize, Cmp, CurBB);
+
+ // First, copmute the number of bits in the forward case.
+ Instruction* FBitSize =
+ BinaryOperator::createSub(Hi, Lo,"fbits", FwdSize);
+ new BranchInst(Compute, FwdSize);
+
+ // Second, compute the number of bits in the reverse case.
+ Instruction* RBitSize =
+ BinaryOperator::createSub(Lo, Hi, "rbits", RevSize);
+ new BranchInst(Compute, RevSize);
+
+ // Now, compute the bit range. Start by getting the bitsize and the shift
+ // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for
+ // the number of bits we want in the range. We shift the bits down to the
+ // least significant bits, apply the mask to zero out unwanted high bits,
+ // and we have computed the "forward" result. It may still need to be
+ // reversed.
+
+ // Get the BitSize from one of the two subtractions
+ PHINode *BitSize = new PHINode(Val->getType(), "bits", Compute);
+ BitSize->reserveOperandSpace(2);
+ BitSize->addIncoming(FBitSize, FwdSize);
+ BitSize->addIncoming(RBitSize, RevSize);
+
+ // Get the ShiftAmount as the smaller of Hi/Lo
+ PHINode *ShiftAmt = new PHINode(Val->getType(), "shiftamt", Compute);
+ ShiftAmt->reserveOperandSpace(2);
+ ShiftAmt->addIncoming(Lo, FwdSize);
+ ShiftAmt->addIncoming(Hi, RevSize);
+
+ // Increment the bit size
+ Instruction *BitSizePlusOne =
+ BinaryOperator::createAdd(BitSize, One, "bits", Compute);
+
+ // Create a Mask to zero out the high order bits.
+ Instruction* Mask =
+ BinaryOperator::createShl(AllOnes, BitSizePlusOne, "mask", Compute);
+ Mask = BinaryOperator::createNot(Mask, "mask", Compute);
+
+ // Shift the bits down and apply the mask
+ Instruction* FRes =
+ BinaryOperator::createLShr(Val, ShiftAmt, "fres", Compute);
+ FRes = BinaryOperator::createAnd(FRes, Mask, "fres", Compute);
+ new BranchInst(Reverse, RsltBlk, Cmp, Compute);
+
+ // In the Reverse block we have the mask already in FRes but we must reverse
+ // it by shifting FRes bits right and putting them in RRes by shifting them
+ // in from left.
+
+ // First set up our loop counters
+ PHINode *Count = new PHINode(Val->getType(), "count", Reverse);
+ Count->reserveOperandSpace(2);
+ Count->addIncoming(BitSizePlusOne, Compute);
+
+ // Next, get the value that we are shifting.
+ PHINode *BitsToShift = new PHINode(Val->getType(), "val", Reverse);
+ BitsToShift->reserveOperandSpace(2);
+ BitsToShift->addIncoming(FRes, Compute);
+
+ // Finally, get the result of the last computation
+ PHINode *RRes = new PHINode(Val->getType(), "rres", Reverse);
+ RRes->reserveOperandSpace(2);
+ RRes->addIncoming(Zero, Compute);
+
+ // Decrement the counter
+ Instruction *Decr = BinaryOperator::createSub(Count, One, "decr", Reverse);
+ Count->addIncoming(Decr, Reverse);
+
+ // Compute the Bit that we want to move
+ Instruction *Bit =
+ BinaryOperator::createAnd(BitsToShift, One, "bit", Reverse);
+
+ // Compute the new value for next iteration.
+ Instruction *NewVal =
+ BinaryOperator::createLShr(BitsToShift, One, "rshift", Reverse);
+ BitsToShift->addIncoming(NewVal, Reverse);
+
+ // Shift the bit into the low bits of the result.
+ Instruction *NewRes =
+ BinaryOperator::createShl(RRes, One, "lshift", Reverse);
+ NewRes = BinaryOperator::createOr(NewRes, Bit, "addbit", Reverse);
+ RRes->addIncoming(NewRes, Reverse);
+
+ // Terminate loop if we've moved all the bits.
+ ICmpInst *Cond =
+ new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "cond", Reverse);
+ new BranchInst(RsltBlk, Reverse, Cond, Reverse);
+
+ // Finally, in the result block, select one of the two results with a PHI
+ // node and return the result;
+ CurBB = RsltBlk;
+ PHINode *BitSelect = new PHINode(Val->getType(), "part_select", CurBB);
+ BitSelect->reserveOperandSpace(2);
+ BitSelect->addIncoming(FRes, Compute);
+ BitSelect->addIncoming(NewRes, Reverse);
+ new ReturnInst(BitSelect, CurBB);
+ }
+
+ // Return a call to the implementation function
+ Value *Args[] = {
+ CI->getOperand(1),
+ CI->getOperand(2),
+ CI->getOperand(3)
+ };
+ return new CallInst(F, Args, sizeof(Args)/sizeof(Args[0]), CI->getName(), CI);
+}
+
+/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes
+/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High)
+/// The first two arguments can be any bit width. The result is the same width
+/// as %Value. The operation replaces bits between %Low and %High with the value
+/// in %Replacement. If %Replacement is not the same width, it is truncated or
+/// zero extended as appropriate to fit the bits being replaced. If %Low is
+/// greater than %High then the inverse set of bits are replaced.
+/// @brief Lowering of llvm.bit.part.set intrinsic.
+static Instruction *LowerPartSet(CallInst *CI) {
+ // Make sure we're dealing with a part select intrinsic here
+ Function *F = CI->getCalledFunction();
+ const FunctionType *FT = F->getFunctionType();
+ if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
+ FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() ||
+ !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() ||
+ !FT->getParamType(3)->isInteger())
+ return CI;
+
+ // Get the intrinsic implementation function by converting all the . to _
+ // in the intrinsic's function name and then reconstructing the function
+ // declaration.
+ std::string Name(F->getName());
+ for (unsigned i = 4; i < Name.length(); ++i)
+ if (Name[i] == '.')
+ Name[i] = '_';
+ Module* M = F->getParent();
+ F = cast<Function>(M->getOrInsertFunction(Name, FT));
+ F->setLinkage(GlobalValue::WeakLinkage);
+
+ // If we haven't defined the impl function yet, do so now
+ if (F->isDeclaration()) {
+ // Get the arguments for the function.
+ Function::arg_iterator args = F->arg_begin();
+ Value* Val = args++; Val->setName("Val");
+ Value* Rep = args++; Rep->setName("Rep");
+ Value* Lo = args++; Lo->setName("Lo");
+ Value* Hi = args++; Hi->setName("Hi");
+
+ // Get some types we need
+ const IntegerType* ValTy = cast<IntegerType>(Val->getType());
+ const IntegerType* RepTy = cast<IntegerType>(Rep->getType());
+ uint32_t ValBits = ValTy->getBitWidth();
+ uint32_t RepBits = RepTy->getBitWidth();
+
+ // Constant Definitions
+ ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits);
+ ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy);
+ ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy);
+ ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1);
+ ConstantInt* ValOne = ConstantInt::get(ValTy, 1);
+ ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0);
+ ConstantInt* ValZero = ConstantInt::get(ValTy, 0);
+
+ // Basic blocks we fill in below.
+ BasicBlock* entry = new BasicBlock("entry", F, 0);
+ BasicBlock* large = new BasicBlock("large", F, 0);
+ BasicBlock* small = new BasicBlock("small", F, 0);
+ BasicBlock* reverse = new BasicBlock("reverse", F, 0);
+ BasicBlock* result = new BasicBlock("result", F, 0);
+
+ // BASIC BLOCK: entry
+ // First, get the number of bits that we're placing as an i32
+ ICmpInst* is_forward =
+ new ICmpInst(ICmpInst::ICMP_ULT, Lo, Hi, "", entry);
+ SelectInst* Hi_pn = new SelectInst(is_forward, Hi, Lo, "", entry);
+ SelectInst* Lo_pn = new SelectInst(is_forward, Lo, Hi, "", entry);
+ BinaryOperator* NumBits = BinaryOperator::createSub(Hi_pn, Lo_pn, "",entry);
+ NumBits = BinaryOperator::createAdd(NumBits, One, "", entry);
+ // Now, convert Lo and Hi to ValTy bit width
+ if (ValBits > 32) {
+ Lo = new ZExtInst(Lo_pn, ValTy, "", entry);
+ } else if (ValBits < 32) {
+ Lo = new TruncInst(Lo_pn, ValTy, "", entry);
+ }
+ // Determine if the replacement bits are larger than the number of bits we
+ // are replacing and deal with it.
+ ICmpInst* is_large =
+ new ICmpInst(ICmpInst::ICMP_ULT, NumBits, RepBitWidth, "", entry);
+ new BranchInst(large, small, is_large, entry);
+
+ // BASIC BLOCK: large
+ Instruction* MaskBits =
+ BinaryOperator::createSub(RepBitWidth, NumBits, "", large);
+ MaskBits = CastInst::createIntegerCast(MaskBits, RepMask->getType(),
+ false, "", large);
+ BinaryOperator* Mask1 =
+ BinaryOperator::createLShr(RepMask, MaskBits, "", large);
+ BinaryOperator* Rep2 = BinaryOperator::createAnd(Mask1, Rep, "", large);
+ new BranchInst(small, large);
+
+ // BASIC BLOCK: small
+ PHINode* Rep3 = new PHINode(RepTy, "", small);
+ Rep3->reserveOperandSpace(2);
+ Rep3->addIncoming(Rep2, large);
+ Rep3->addIncoming(Rep, entry);
+ Value* Rep4 = Rep3;
+ if (ValBits > RepBits)
+ Rep4 = new ZExtInst(Rep3, ValTy, "", small);
+ else if (ValBits < RepBits)
+ Rep4 = new TruncInst(Rep3, ValTy, "", small);
+ new BranchInst(result, reverse, is_forward, small);
+
+ // BASIC BLOCK: reverse (reverses the bits of the replacement)
+ // Set up our loop counter as a PHI so we can decrement on each iteration.
+ // We will loop for the number of bits in the replacement value.
+ PHINode *Count = new PHINode(Type::Int32Ty, "count", reverse);
+ Count->reserveOperandSpace(2);
+ Count->addIncoming(NumBits, small);
+
+ // Get the value that we are shifting bits out of as a PHI because
+ // we'll change this with each iteration.
+ PHINode *BitsToShift = new PHINode(Val->getType(), "val", reverse);
+ BitsToShift->reserveOperandSpace(2);
+ BitsToShift->addIncoming(Rep4, small);
+
+ // Get the result of the last computation or zero on first iteration
+ PHINode *RRes = new PHINode(Val->getType(), "rres", reverse);
+ RRes->reserveOperandSpace(2);
+ RRes->addIncoming(ValZero, small);
+
+ // Decrement the loop counter by one
+ Instruction *Decr = BinaryOperator::createSub(Count, One, "", reverse);
+ Count->addIncoming(Decr, reverse);
+
+ // Get the bit that we want to move into the result
+ Value *Bit = BinaryOperator::createAnd(BitsToShift, ValOne, "", reverse);
+
+ // Compute the new value of the bits to shift for the next iteration.
+ Value *NewVal = BinaryOperator::createLShr(BitsToShift, ValOne,"", reverse);
+ BitsToShift->addIncoming(NewVal, reverse);
+
+ // Shift the bit we extracted into the low bit of the result.
+ Instruction *NewRes = BinaryOperator::createShl(RRes, ValOne, "", reverse);
+ NewRes = BinaryOperator::createOr(NewRes, Bit, "", reverse);
+ RRes->addIncoming(NewRes, reverse);
+
+ // Terminate loop if we've moved all the bits.
+ ICmpInst *Cond = new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "", reverse);
+ new BranchInst(result, reverse, Cond, reverse);
+
+ // BASIC BLOCK: result
+ PHINode *Rplcmnt = new PHINode(Val->getType(), "", result);
+ Rplcmnt->reserveOperandSpace(2);
+ Rplcmnt->addIncoming(NewRes, reverse);
+ Rplcmnt->addIncoming(Rep4, small);
+ Value* t0 = CastInst::createIntegerCast(NumBits,ValTy,false,"",result);
+ Value* t1 = BinaryOperator::createShl(ValMask, Lo, "", result);
+ Value* t2 = BinaryOperator::createNot(t1, "", result);
+ Value* t3 = BinaryOperator::createShl(t1, t0, "", result);
+ Value* t4 = BinaryOperator::createOr(t2, t3, "", result);
+ Value* t5 = BinaryOperator::createAnd(t4, Val, "", result);
+ Value* t6 = BinaryOperator::createShl(Rplcmnt, Lo, "", result);
+ Value* Rslt = BinaryOperator::createOr(t5, t6, "part_set", result);
+ new ReturnInst(Rslt, result);
+ }
+
+ // Return a call to the implementation function
+ Value *Args[] = {
+ CI->getOperand(1),
+ CI->getOperand(2),
+ CI->getOperand(3),
+ CI->getOperand(4)
+ };
+ return new CallInst(F, Args, sizeof(Args)/sizeof(Args[0]), CI->getName(), CI);
+}
+
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+ Function *Callee = CI->getCalledFunction();
+ assert(Callee && "Cannot lower an indirect call!");
+
+ switch (Callee->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ cerr << "Cannot lower a call to a non-intrinsic function '"
+ << Callee->getName() << "'!\n";
+ abort();
+ default:
+ cerr << "Error: Code generator does not support intrinsic function '"
+ << Callee->getName() << "'!\n";
+ abort();
+
+ // The setjmp/longjmp intrinsics should only exist in the code if it was
+ // never optimized (ie, right out of the CFE), or if it has been hacked on
+ // by the lowerinvoke pass. In both cases, the right thing to do is to
+ // convert the call to an explicit setjmp or longjmp call.
+ case Intrinsic::setjmp: {
+ static Constant *SetjmpFCache = 0;
+ Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin()+1, CI->op_end(),
+ Type::Int32Ty, SetjmpFCache);
+ if (CI->getType() != Type::VoidTy)
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+ case Intrinsic::sigsetjmp:
+ if (CI->getType() != Type::VoidTy)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::longjmp: {
+ static Constant *LongjmpFCache = 0;
+ ReplaceCallWith("longjmp", CI, CI->op_begin()+1, CI->op_end(),
+ Type::VoidTy, LongjmpFCache);
+ break;
+ }
+
+ case Intrinsic::siglongjmp: {
+ // Insert the call to abort
+ static Constant *AbortFCache = 0;
+ ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(),
+ Type::VoidTy, AbortFCache);
+ break;
+ }
+ case Intrinsic::ctpop:
+ CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI));
+ break;
+
+ case Intrinsic::bswap:
+ CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI));
+ break;
+
+ case Intrinsic::ctlz:
+ CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI));
+ break;
+
+ case Intrinsic::cttz: {
+ // cttz(x) -> ctpop(~X & (X-1))
+ Value *Src = CI->getOperand(1);
+ Value *NotSrc = BinaryOperator::createNot(Src, Src->getName()+".not", CI);
+ Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
+ SrcM1 = BinaryOperator::createSub(Src, SrcM1, "", CI);
+ Src = LowerCTPOP(BinaryOperator::createAnd(NotSrc, SrcM1, "", CI), CI);
+ CI->replaceAllUsesWith(Src);
+ break;
+ }
+
+ case Intrinsic::part_select:
+ CI->replaceAllUsesWith(LowerPartSelect(CI));
+ break;
+
+ case Intrinsic::part_set:
+ CI->replaceAllUsesWith(LowerPartSet(CI));
+ break;
+
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore: {
+ static bool Warned = false;
+ if (!Warned)
+ cerr << "WARNING: this target does not support the llvm.stack"
+ << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+ "save" : "restore") << " intrinsic.\n";
+ Warned = true;
+ if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+ }
+
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ cerr << "WARNING: this target does not support the llvm."
+ << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+ "return" : "frame") << "address intrinsic.\n";
+ CI->replaceAllUsesWith(ConstantPointerNull::get(
+ cast<PointerType>(CI->getType())));
+ break;
+
+ case Intrinsic::prefetch:
+ break; // Simply strip out prefetches on unsupported architectures
+
+ case Intrinsic::pcmarker:
+ break; // Simply strip out pcmarker on unsupported architectures
+ case Intrinsic::readcyclecounter: {
+ cerr << "WARNING: this target does not support the llvm.readcyclecoun"
+ << "ter intrinsic. It is being lowered to a constant 0\n";
+ CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0));
+ break;
+ }
+
+ case Intrinsic::dbg_stoppoint:
+ case Intrinsic::dbg_region_start:
+ case Intrinsic::dbg_region_end:
+ case Intrinsic::dbg_func_start:
+ case Intrinsic::dbg_declare:
+ break; // Simply strip out debugging intrinsics
+
+ case Intrinsic::eh_exception:
+ case Intrinsic::eh_selector:
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::eh_typeid_for:
+ // Return something different to eh_selector.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+
+ case Intrinsic::var_annotation:
+ break; // Strip out annotate intrinsic
+
+ case Intrinsic::memcpy_i32:
+ case Intrinsic::memcpy_i64: {
+ static Constant *MemcpyFCache = 0;
+ Value *Size = CI->getOperand(3);
+ const Type *IntPtr = TD.getIntPtrType();
+ if (Size->getType()->getPrimitiveSizeInBits() <
+ IntPtr->getPrimitiveSizeInBits())
+ Size = new ZExtInst(Size, IntPtr, "", CI);
+ else if (Size->getType()->getPrimitiveSizeInBits() >
+ IntPtr->getPrimitiveSizeInBits())
+ Size = new TruncInst(Size, IntPtr, "", CI);
+ Value *Ops[3];
+ Ops[0] = CI->getOperand(1);
+ Ops[1] = CI->getOperand(2);
+ Ops[2] = Size;
+ ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
+ MemcpyFCache);
+ break;
+ }
+ case Intrinsic::memmove_i32:
+ case Intrinsic::memmove_i64: {
+ static Constant *MemmoveFCache = 0;
+ Value *Size = CI->getOperand(3);
+ const Type *IntPtr = TD.getIntPtrType();
+ if (Size->getType()->getPrimitiveSizeInBits() <
+ IntPtr->getPrimitiveSizeInBits())
+ Size = new ZExtInst(Size, IntPtr, "", CI);
+ else if (Size->getType()->getPrimitiveSizeInBits() >
+ IntPtr->getPrimitiveSizeInBits())
+ Size = new TruncInst(Size, IntPtr, "", CI);
+ Value *Ops[3];
+ Ops[0] = CI->getOperand(1);
+ Ops[1] = CI->getOperand(2);
+ Ops[2] = Size;
+ ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
+ MemmoveFCache);
+ break;
+ }
+ case Intrinsic::memset_i32:
+ case Intrinsic::memset_i64: {
+ static Constant *MemsetFCache = 0;
+ Value *Size = CI->getOperand(3);
+ const Type *IntPtr = TD.getIntPtrType();
+ if (Size->getType()->getPrimitiveSizeInBits() <
+ IntPtr->getPrimitiveSizeInBits())
+ Size = new ZExtInst(Size, IntPtr, "", CI);
+ else if (Size->getType()->getPrimitiveSizeInBits() >
+ IntPtr->getPrimitiveSizeInBits())
+ Size = new TruncInst(Size, IntPtr, "", CI);
+ Value *Ops[3];
+ Ops[0] = CI->getOperand(1);
+ // Extend the amount to i32.
+ Ops[1] = new ZExtInst(CI->getOperand(2), Type::Int32Ty, "", CI);
+ Ops[2] = Size;
+ ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
+ MemsetFCache);
+ break;
+ }
+ case Intrinsic::sqrt_f32: {
+ static Constant *sqrtfFCache = 0;
+ ReplaceCallWith("sqrtf", CI, CI->op_begin()+1, CI->op_end(),
+ Type::FloatTy, sqrtfFCache);
+ break;
+ }
+ case Intrinsic::sqrt_f64: {
+ static Constant *sqrtFCache = 0;
+ ReplaceCallWith("sqrt", CI, CI->op_begin()+1, CI->op_end(),
+ Type::DoubleTy, sqrtFCache);
+ break;
+ }
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
new file mode 100644
index 0000000..b72704b
--- /dev/null
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -0,0 +1,205 @@
+//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMTargetMachine class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+ cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
+
+FileModel::Model
+LLVMTargetMachine::addPassesToEmitFile(FunctionPassManager &PM,
+ std::ostream &Out,
+ CodeGenFileType FileType,
+ bool Fast) {
+ // Standard LLVM-Level Passes.
+
+ // Run loop strength reduction before anything else.
+ if (!Fast) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ if (PrintLSR)
+ PM.add(new PrintFunctionPass("\n\n*** Code after LSR *** \n", &cerr));
+ }
+
+ // FIXME: Implement efficient support for garbage collection intrinsics.
+ PM.add(createLowerGCPass());
+
+ if (!ExceptionHandling)
+ PM.add(createLowerInvokePass(getTargetLowering()));
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ if (!Fast)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ if (PrintISelInput)
+ PM.add(new PrintFunctionPass("\n\n*** Final LLVM Code input to ISel *** \n",
+ &cerr));
+
+ // Ask the target for an isel.
+ if (addInstSelector(PM, Fast))
+ return FileModel::Error;
+
+ // Print the instruction selected machine code...
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ // Perform register allocation to convert to a concrete x86 representation
+ PM.add(createRegisterAllocator());
+
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ // Run post-ra passes.
+ if (addPostRegAlloc(PM, Fast) && PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ PM.add(createPrologEpilogCodeInserter());
+
+ // Second pass scheduler.
+ if (!Fast)
+ PM.add(createPostRAScheduler());
+
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (!Fast)
+ PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+
+ // Fold redundant debug labels.
+ PM.add(createDebugLabelFoldingPass());
+
+ if (PrintMachineCode) // Print the register-allocated code
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ if (addPreEmitPass(PM, Fast) && PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ switch (FileType) {
+ default:
+ break;
+ case TargetMachine::AssemblyFile:
+ if (addAssemblyEmitter(PM, Fast, Out))
+ return FileModel::Error;
+ return FileModel::AsmFile;
+ case TargetMachine::ObjectFile:
+ if (getMachOWriterInfo())
+ return FileModel::MachOFile;
+ else if (getELFWriterInfo())
+ return FileModel::ElfFile;
+ }
+
+ return FileModel::Error;
+}
+
+/// addPassesToEmitFileFinish - If the passes to emit the specified file had to
+/// be split up (e.g., to add an object writer pass), this method can be used to
+/// finish up adding passes to emit the file, if necessary.
+bool LLVMTargetMachine::addPassesToEmitFileFinish(FunctionPassManager &PM,
+ MachineCodeEmitter *MCE,
+ bool Fast) {
+ if (MCE)
+ addSimpleCodeEmitter(PM, Fast, *MCE);
+
+ // Delete machine code for this function
+ PM.add(createMachineCodeDeleter());
+
+ return false; // success!
+}
+
+/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+/// get machine code emitted. This uses a MachineCodeEmitter object to handle
+/// actually outputting the machine code and resolving things like the address
+/// of functions. This method should returns true if machine code emission is
+/// not supported.
+///
+bool LLVMTargetMachine::addPassesToEmitMachineCode(FunctionPassManager &PM,
+ MachineCodeEmitter &MCE,
+ bool Fast) {
+ // Standard LLVM-Level Passes.
+
+ // Run loop strength reduction before anything else.
+ if (!Fast) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ if (PrintLSR)
+ PM.add(new PrintFunctionPass("\n\n*** Code after LSR *** \n", &cerr));
+ }
+
+ // FIXME: Implement efficient support for garbage collection intrinsics.
+ PM.add(createLowerGCPass());
+
+ // FIXME: Implement the invoke/unwind instructions!
+ PM.add(createLowerInvokePass(getTargetLowering()));
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ if (!Fast)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ if (PrintISelInput)
+ PM.add(new PrintFunctionPass("\n\n*** Final LLVM Code input to ISel *** \n",
+ &cerr));
+
+ // Ask the target for an isel.
+ if (addInstSelector(PM, Fast))
+ return true;
+
+ // Print the instruction selected machine code...
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ // Perform register allocation to convert to a concrete x86 representation
+ PM.add(createRegisterAllocator());
+
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ // Run post-ra passes.
+ if (addPostRegAlloc(PM, Fast) && PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ PM.add(createPrologEpilogCodeInserter());
+
+ if (PrintMachineCode) // Print the register-allocated code
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ // Second pass scheduler.
+ if (!Fast)
+ PM.add(createPostRAScheduler());
+
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (!Fast)
+ PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+
+ if (addPreEmitPass(PM, Fast) && PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(cerr));
+
+ addCodeEmitter(PM, Fast, MCE);
+
+ // Delete machine code for this function
+ PM.add(createMachineCodeDeleter());
+
+ return false; // success!
+}
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
new file mode 100644
index 0000000..45c1dd0
--- /dev/null
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -0,0 +1,526 @@
+//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes. Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live interval for register v if there is no instruction with number j' > j
+// such that v is live at j' abd there is no instruction with number i' < i such
+// that v is live at i'. In this implementation intervals can have holes,
+// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each
+// individual range is represented as an instance of LiveRange, and the whole
+// interval is represented as an instance of LiveInterval.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include <algorithm>
+#include <map>
+#include <ostream>
+using namespace llvm;
+
+// An example for liveAt():
+//
+// this = [1,4), liveAt(0) will return false. The instruction defining this
+// spans slots [0,3]. The interval belongs to an spilled definition of the
+// variable it represents. This is because slot 1 is used (def slot) and spans
+// up to slot 3 (store slot).
+//
+bool LiveInterval::liveAt(unsigned I) const {
+ Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
+
+ if (r == ranges.begin())
+ return false;
+
+ --r;
+ return r->contains(I);
+}
+
+// overlaps - Return true if the intersection of the two live intervals is
+// not empty.
+//
+// An example for overlaps():
+//
+// 0: A = ...
+// 4: B = ...
+// 8: C = A + B ;; last use of A
+//
+// The live intervals should look like:
+//
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
+//
+// A->overlaps(C) should return false since we want to be able to join
+// A and C.
+//
+bool LiveInterval::overlapsFrom(const LiveInterval& other,
+ const_iterator StartPos) const {
+ const_iterator i = begin();
+ const_iterator ie = end();
+ const_iterator j = StartPos;
+ const_iterator je = other.end();
+
+ assert((StartPos->start <= i->start || StartPos == other.begin()) &&
+ StartPos != other.end() && "Bogus start position hint!");
+
+ if (i->start < j->start) {
+ i = std::upper_bound(i, ie, j->start);
+ if (i != ranges.begin()) --i;
+ } else if (j->start < i->start) {
+ ++StartPos;
+ if (StartPos != other.end() && StartPos->start <= i->start) {
+ assert(StartPos < other.end() && i < end());
+ j = std::upper_bound(j, je, i->start);
+ if (j != other.ranges.begin()) --j;
+ }
+ } else {
+ return true;
+ }
+
+ if (j == je) return false;
+
+ while (i != ie) {
+ if (i->start > j->start) {
+ std::swap(i, j);
+ std::swap(ie, je);
+ }
+
+ if (i->end > j->start)
+ return true;
+ ++i;
+ }
+
+ return false;
+}
+
+/// extendIntervalEndTo - This method is used when we want to extend the range
+/// specified by I to end at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with. The iterator is
+/// not invalidated.
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ unsigned ValId = I->ValId;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = next(I);
+ for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
+ assert(MergeTo->ValId == ValId && "Cannot merge with differing values!");
+ }
+
+ // If NewEnd was in the middle of an interval, make sure to get its endpoint.
+ I->end = std::max(NewEnd, prior(MergeTo)->end);
+
+ // Erase any dead ranges.
+ ranges.erase(next(I), MergeTo);
+
+ // If the newly formed range now touches the range after it and if they have
+ // the same value number, merge the two ranges into one range.
+ Ranges::iterator Next = next(I);
+ if (Next != ranges.end() && Next->start <= I->end && Next->ValId == ValId) {
+ I->end = Next->end;
+ ranges.erase(Next);
+ }
+}
+
+
+/// extendIntervalStartTo - This method is used when we want to extend the range
+/// specified by I to start at the specified endpoint. To do this, we should
+/// merge and eliminate all ranges that this will overlap with.
+LiveInterval::Ranges::iterator
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) {
+ assert(I != ranges.end() && "Not a valid interval!");
+ unsigned ValId = I->ValId;
+
+ // Search for the first interval that we can't merge with.
+ Ranges::iterator MergeTo = I;
+ do {
+ if (MergeTo == ranges.begin()) {
+ I->start = NewStart;
+ ranges.erase(MergeTo, I);
+ return I;
+ }
+ assert(MergeTo->ValId == ValId && "Cannot merge with differing values!");
+ --MergeTo;
+ } while (NewStart <= MergeTo->start);
+
+ // If we start in the middle of another interval, just delete a range and
+ // extend that interval.
+ if (MergeTo->end >= NewStart && MergeTo->ValId == ValId) {
+ MergeTo->end = I->end;
+ } else {
+ // Otherwise, extend the interval right after.
+ ++MergeTo;
+ MergeTo->start = NewStart;
+ MergeTo->end = I->end;
+ }
+
+ ranges.erase(next(MergeTo), next(I));
+ return MergeTo;
+}
+
+LiveInterval::iterator
+LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
+ unsigned Start = LR.start, End = LR.end;
+ iterator it = std::upper_bound(From, ranges.end(), Start);
+
+ // If the inserted interval starts in the middle or right at the end of
+ // another interval, just extend that interval to contain the range of LR.
+ if (it != ranges.begin()) {
+ iterator B = prior(it);
+ if (LR.ValId == B->ValId) {
+ if (B->start <= Start && B->end >= Start) {
+ extendIntervalEndTo(B, End);
+ return B;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different ValId's.
+ assert(B->end <= Start &&
+ "Cannot overlap two LiveRanges with differing ValID's"
+ " (did you def the same reg twice in a MachineInstr?)");
+ }
+ }
+
+ // Otherwise, if this range ends in the middle of, or right next to, another
+ // interval, merge it into that interval.
+ if (it != ranges.end())
+ if (LR.ValId == it->ValId) {
+ if (it->start <= End) {
+ it = extendIntervalStartTo(it, Start);
+
+ // If LR is a complete superset of an interval, we may need to grow its
+ // endpoint as well.
+ if (End > it->end)
+ extendIntervalEndTo(it, End);
+ return it;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live ranges with
+ // different ValId's.
+ assert(it->start >= End &&
+ "Cannot overlap two LiveRanges with differing ValID's");
+ }
+
+ // Otherwise, this is just a new range that doesn't interact with anything.
+ // Insert it.
+ return ranges.insert(it, LR);
+}
+
+
+/// removeRange - Remove the specified range from this interval. Note that
+/// the range must already be in this interval in its entirety.
+void LiveInterval::removeRange(unsigned Start, unsigned End) {
+ // Find the LiveRange containing this span.
+ Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
+ assert(I != ranges.begin() && "Range is not in interval!");
+ --I;
+ assert(I->contains(Start) && I->contains(End-1) &&
+ "Range is not entirely in interval!");
+
+ // If the span we are removing is at the start of the LiveRange, adjust it.
+ if (I->start == Start) {
+ if (I->end == End)
+ ranges.erase(I); // Removed the whole LiveRange.
+ else
+ I->start = End;
+ return;
+ }
+
+ // Otherwise if the span we are removing is at the end of the LiveRange,
+ // adjust the other way.
+ if (I->end == End) {
+ I->end = Start;
+ return;
+ }
+
+ // Otherwise, we are splitting the LiveRange into two pieces.
+ unsigned OldEnd = I->end;
+ I->end = Start; // Trim the old interval.
+
+ // Insert the new one.
+ ranges.insert(next(I), LiveRange(End, OldEnd, I->ValId));
+}
+
+/// getLiveRangeContaining - Return the live range that contains the
+/// specified index, or null if there is none.
+LiveInterval::const_iterator
+LiveInterval::FindLiveRangeContaining(unsigned Idx) const {
+ const_iterator It = std::upper_bound(begin(), end(), Idx);
+ if (It != ranges.begin()) {
+ --It;
+ if (It->contains(Idx))
+ return It;
+ }
+
+ return end();
+}
+
+LiveInterval::iterator
+LiveInterval::FindLiveRangeContaining(unsigned Idx) {
+ iterator It = std::upper_bound(begin(), end(), Idx);
+ if (It != begin()) {
+ --It;
+ if (It->contains(Idx))
+ return It;
+ }
+
+ return end();
+}
+
+/// join - Join two live intervals (this, and other) together. This applies
+/// mappings to the value numbers in the LHS/RHS intervals as specified. If
+/// the intervals are not joinable, this aborts.
+void LiveInterval::join(LiveInterval &Other, int *LHSValNoAssignments,
+ int *RHSValNoAssignments,
+ SmallVector<std::pair<unsigned,
+ unsigned>, 16> &NewValueNumberInfo) {
+
+ // Try to do the least amount of work possible. In particular, if there are
+ // more liverange chunks in the other set than there are in the 'this' set,
+ // swap sets to merge the fewest chunks in possible.
+ //
+ // Also, if one range is a physreg and one is a vreg, we always merge from the
+ // vreg into the physreg, which leaves the vreg intervals pristine.
+ if ((Other.ranges.size() > ranges.size() &&
+ MRegisterInfo::isVirtualRegister(reg)) ||
+ MRegisterInfo::isPhysicalRegister(Other.reg)) {
+ swap(Other);
+ std::swap(LHSValNoAssignments, RHSValNoAssignments);
+ }
+
+ // Determine if any of our live range values are mapped. This is uncommon, so
+ // we want to avoid the interval scan if not.
+ bool MustMapCurValNos = false;
+ for (unsigned i = 0, e = getNumValNums(); i != e; ++i) {
+ if (ValueNumberInfo[i].first == ~2U) continue; // tombstone value #
+ if (i != (unsigned)LHSValNoAssignments[i]) {
+ MustMapCurValNos = true;
+ break;
+ }
+ }
+
+ // If we have to apply a mapping to our base interval assignment, rewrite it
+ // now.
+ if (MustMapCurValNos) {
+ // Map the first live range.
+ iterator OutIt = begin();
+ OutIt->ValId = LHSValNoAssignments[OutIt->ValId];
+ ++OutIt;
+ for (iterator I = OutIt, E = end(); I != E; ++I) {
+ OutIt->ValId = LHSValNoAssignments[I->ValId];
+
+ // If this live range has the same value # as its immediate predecessor,
+ // and if they are neighbors, remove one LiveRange. This happens when we
+ // have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
+ if (OutIt->ValId == (OutIt-1)->ValId && (OutIt-1)->end == OutIt->start) {
+ (OutIt-1)->end = OutIt->end;
+ } else {
+ if (I != OutIt) {
+ OutIt->start = I->start;
+ OutIt->end = I->end;
+ }
+
+ // Didn't merge, on to the next one.
+ ++OutIt;
+ }
+ }
+
+ // If we merge some live ranges, chop off the end.
+ ranges.erase(OutIt, end());
+ }
+
+ // Okay, now insert the RHS live ranges into the LHS.
+ iterator InsertPos = begin();
+ for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) {
+ // Map the ValId in the other live range to the current live range.
+ I->ValId = RHSValNoAssignments[I->ValId];
+ InsertPos = addRangeFrom(*I, InsertPos);
+ }
+
+ ValueNumberInfo.clear();
+ ValueNumberInfo.append(NewValueNumberInfo.begin(), NewValueNumberInfo.end());
+ weight += Other.weight;
+ if (Other.preference && !preference)
+ preference = Other.preference;
+}
+
+/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
+/// interval as the specified value number. The LiveRanges in RHS are
+/// allowed to overlap with LiveRanges in the current interval, but only if
+/// the overlapping LiveRanges have the specified value number.
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
+ unsigned LHSValNo) {
+ // TODO: Make this more efficient.
+ iterator InsertPos = begin();
+ for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ // Map the ValId in the other live range to the current live range.
+ LiveRange Tmp = *I;
+ Tmp.ValId = LHSValNo;
+ InsertPos = addRangeFrom(Tmp, InsertPos);
+ }
+}
+
+
+/// MergeInClobberRanges - For any live ranges that are not defined in the
+/// current interval, but are defined in the Clobbers interval, mark them
+/// used with an unknown definition value.
+void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers) {
+ if (Clobbers.begin() == Clobbers.end()) return;
+
+ // Find a value # to use for the clobber ranges. If there is already a value#
+ // for unknown values, use it.
+ // FIXME: Use a single sentinal number for these!
+ unsigned ClobberValNo = getNextValue(~0U, 0);
+
+ iterator IP = begin();
+ for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) {
+ unsigned Start = I->start, End = I->end;
+ IP = std::upper_bound(IP, end(), Start);
+
+ // If the start of this range overlaps with an existing liverange, trim it.
+ if (IP != begin() && IP[-1].end > Start) {
+ Start = IP[-1].end;
+ // Trimmed away the whole range?
+ if (Start >= End) continue;
+ }
+ // If the end of this range overlaps with an existing liverange, trim it.
+ if (IP != end() && End > IP->start) {
+ End = IP->start;
+ // If this trimmed away the whole range, ignore it.
+ if (Start == End) continue;
+ }
+
+ // Insert the clobber interval.
+ IP = addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
+ }
+}
+
+/// MergeValueNumberInto - This method is called when two value nubmers
+/// are found to be equivalent. This eliminates V1, replacing all
+/// LiveRanges with the V1 value number with the V2 value number. This can
+/// cause merging of V1/V2 values numbers and compaction of the value space.
+void LiveInterval::MergeValueNumberInto(unsigned V1, unsigned V2) {
+ assert(V1 != V2 && "Identical value#'s are always equivalent!");
+
+ // This code actually merges the (numerically) larger value number into the
+ // smaller value number, which is likely to allow us to compactify the value
+ // space. The only thing we have to be careful of is to preserve the
+ // instruction that defines the result value.
+
+ // Make sure V2 is smaller than V1.
+ if (V1 < V2) {
+ setValueNumberInfo(V1, getValNumInfo(V2));
+ std::swap(V1, V2);
+ }
+
+ // Merge V1 live ranges into V2.
+ for (iterator I = begin(); I != end(); ) {
+ iterator LR = I++;
+ if (LR->ValId != V1) continue; // Not a V1 LiveRange.
+
+ // Okay, we found a V1 live range. If it had a previous, touching, V2 live
+ // range, extend it.
+ if (LR != begin()) {
+ iterator Prev = LR-1;
+ if (Prev->ValId == V2 && Prev->end == LR->start) {
+ Prev->end = LR->end;
+
+ // Erase this live-range.
+ ranges.erase(LR);
+ I = Prev+1;
+ LR = Prev;
+ }
+ }
+
+ // Okay, now we have a V1 or V2 live range that is maximally merged forward.
+ // Ensure that it is a V2 live-range.
+ LR->ValId = V2;
+
+ // If we can merge it into later V2 live ranges, do so now. We ignore any
+ // following V1 live ranges, as they will be merged in subsequent iterations
+ // of the loop.
+ if (I != end()) {
+ if (I->start == LR->end && I->ValId == V2) {
+ LR->end = I->end;
+ ranges.erase(I);
+ I = LR+1;
+ }
+ }
+ }
+
+ // Now that V1 is dead, remove it. If it is the largest value number, just
+ // nuke it (and any other deleted values neighboring it), otherwise mark it as
+ // ~1U so it can be nuked later.
+ if (V1 == getNumValNums()-1) {
+ do {
+ ValueNumberInfo.pop_back();
+ } while (ValueNumberInfo.back().first == ~1U);
+ } else {
+ ValueNumberInfo[V1].first = ~1U;
+ }
+}
+
+unsigned LiveInterval::getSize() const {
+ unsigned Sum = 0;
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ Sum += I->end - I->start;
+ return Sum;
+}
+
+std::ostream& llvm::operator<<(std::ostream& os, const LiveRange &LR) {
+ return os << '[' << LR.start << ',' << LR.end << ':' << LR.ValId << ")";
+}
+
+void LiveRange::dump() const {
+ cerr << *this << "\n";
+}
+
+void LiveInterval::print(std::ostream &OS, const MRegisterInfo *MRI) const {
+ if (MRI && MRegisterInfo::isPhysicalRegister(reg))
+ OS << MRI->getName(reg);
+ else
+ OS << "%reg" << reg;
+
+ OS << ',' << weight;
+
+ if (empty())
+ OS << "EMPTY";
+ else {
+ OS << " = ";
+ for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
+ E = ranges.end(); I != E; ++I)
+ OS << *I;
+ }
+
+ // Print value number info.
+ if (getNumValNums()) {
+ OS << " ";
+ for (unsigned i = 0; i != getNumValNums(); ++i) {
+ if (i) OS << " ";
+ OS << i << "@";
+ if (ValueNumberInfo[i].first == ~0U) {
+ OS << "?";
+ } else {
+ OS << ValueNumberInfo[i].first;
+ }
+ }
+ }
+}
+
+void LiveInterval::dump() const {
+ cerr << *this << "\n";
+}
+
+
+void LiveRange::print(std::ostream &os) const {
+ os << *this;
+}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
new file mode 100644
index 0000000..369493f
--- /dev/null
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -0,0 +1,692 @@
+//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveInterval analysis pass which is used
+// by the Linear Scan Register allocator. This pass linearizes the
+// basic blocks of the function in DFS order and uses the
+// LiveVariables pass to conservatively compute live intervals for
+// each virtual and physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "liveintervals"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "VirtRegMap.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(numIntervals, "Number of original intervals");
+STATISTIC(numIntervalsAfter, "Number of intervals after coalescing");
+STATISTIC(numFolded , "Number of loads/stores folded into instructions");
+
+char LiveIntervals::ID = 0;
+namespace {
+ RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis");
+}
+
+void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<LiveVariables>();
+ AU.addRequired<LiveVariables>();
+ AU.addPreservedID(PHIEliminationID);
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ AU.addRequired<LoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveIntervals::releaseMemory() {
+ mi2iMap_.clear();
+ i2miMap_.clear();
+ r2iMap_.clear();
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ tm_ = &fn.getTarget();
+ mri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ lv_ = &getAnalysis<LiveVariables>();
+ allocatableRegs_ = mri_->getAllocatableSet(fn);
+
+ // Number MachineInstrs and MachineBasicBlocks.
+ // Initialize MBB indexes to a sentinal.
+ MBB2IdxMap.resize(mf_->getNumBlockIDs(), ~0U);
+
+ unsigned MIIndex = 0;
+ for (MachineFunction::iterator MBB = mf_->begin(), E = mf_->end();
+ MBB != E; ++MBB) {
+ // Set the MBB2IdxMap entry for this MBB.
+ MBB2IdxMap[MBB->getNumber()] = MIIndex;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ bool inserted = mi2iMap_.insert(std::make_pair(I, MIIndex)).second;
+ assert(inserted && "multiple MachineInstr -> index mappings");
+ i2miMap_.push_back(I);
+ MIIndex += InstrSlots::NUM;
+ }
+ }
+
+ computeIntervals();
+
+ numIntervals += getNumIntervals();
+
+ DOUT << "********** INTERVALS **********\n";
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ I->second.print(DOUT, mri_);
+ DOUT << "\n";
+ }
+
+ numIntervalsAfter += getNumIntervals();
+ DEBUG(dump());
+ return true;
+}
+
+/// print - Implement the dump method.
+void LiveIntervals::print(std::ostream &O, const Module* ) const {
+ O << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second.print(DOUT, mri_);
+ DOUT << "\n";
+ }
+
+ O << "********** MACHINEINSTRS **********\n";
+ for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+ mbbi != mbbe; ++mbbi) {
+ O << ((Value*)mbbi->getBasicBlock())->getName() << ":\n";
+ for (MachineBasicBlock::iterator mii = mbbi->begin(),
+ mie = mbbi->end(); mii != mie; ++mii) {
+ O << getInstructionIndex(mii) << '\t' << *mii;
+ }
+ }
+}
+
+// Not called?
+/// CreateNewLiveInterval - Create a new live interval with the given live
+/// ranges. The new live interval will have an infinite spill weight.
+LiveInterval&
+LiveIntervals::CreateNewLiveInterval(const LiveInterval *LI,
+ const std::vector<LiveRange> &LRs) {
+ const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(LI->reg);
+
+ // Create a new virtual register for the spill interval.
+ unsigned NewVReg = mf_->getSSARegMap()->createVirtualRegister(RC);
+
+ // Replace the old virtual registers in the machine operands with the shiny
+ // new one.
+ for (std::vector<LiveRange>::const_iterator
+ I = LRs.begin(), E = LRs.end(); I != E; ++I) {
+ unsigned Index = getBaseIndex(I->start);
+ unsigned End = getBaseIndex(I->end - 1) + InstrSlots::NUM;
+
+ for (; Index != End; Index += InstrSlots::NUM) {
+ // Skip deleted instructions
+ while (Index != End && !getInstructionFromIndex(Index))
+ Index += InstrSlots::NUM;
+
+ if (Index == End) break;
+
+ MachineInstr *MI = getInstructionFromIndex(Index);
+
+ for (unsigned J = 0, e = MI->getNumOperands(); J != e; ++J) {
+ MachineOperand &MOp = MI->getOperand(J);
+ if (MOp.isRegister() && MOp.getReg() == LI->reg)
+ MOp.setReg(NewVReg);
+ }
+ }
+ }
+
+ LiveInterval &NewLI = getOrCreateInterval(NewVReg);
+
+ // The spill weight is now infinity as it cannot be spilled again
+ NewLI.weight = float(HUGE_VAL);
+
+ for (std::vector<LiveRange>::const_iterator
+ I = LRs.begin(), E = LRs.end(); I != E; ++I) {
+ DOUT << " Adding live range " << *I << " to new interval\n";
+ NewLI.addRange(*I);
+ }
+
+ DOUT << "Created new live interval " << NewLI << "\n";
+ return NewLI;
+}
+
+std::vector<LiveInterval*> LiveIntervals::
+addIntervalsForSpills(const LiveInterval &li, VirtRegMap &vrm, int slot) {
+ // since this is called after the analysis is done we don't know if
+ // LiveVariables is available
+ lv_ = getAnalysisToUpdate<LiveVariables>();
+
+ std::vector<LiveInterval*> added;
+
+ assert(li.weight != HUGE_VALF &&
+ "attempt to spill already spilled interval!");
+
+ DOUT << "\t\t\t\tadding intervals for spills for interval: ";
+ li.print(DOUT, mri_);
+ DOUT << '\n';
+
+ const TargetRegisterClass* rc = mf_->getSSARegMap()->getRegClass(li.reg);
+
+ for (LiveInterval::Ranges::const_iterator
+ i = li.ranges.begin(), e = li.ranges.end(); i != e; ++i) {
+ unsigned index = getBaseIndex(i->start);
+ unsigned end = getBaseIndex(i->end-1) + InstrSlots::NUM;
+ for (; index != end; index += InstrSlots::NUM) {
+ // skip deleted instructions
+ while (index != end && !getInstructionFromIndex(index))
+ index += InstrSlots::NUM;
+ if (index == end) break;
+
+ MachineInstr *MI = getInstructionFromIndex(index);
+
+ RestartInstruction:
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& mop = MI->getOperand(i);
+ if (mop.isRegister() && mop.getReg() == li.reg) {
+ MachineInstr *fmi = li.remat ? NULL
+ : mri_->foldMemoryOperand(MI, i, slot);
+ if (fmi) {
+ // Attempt to fold the memory reference into the instruction. If we
+ // can do this, we don't need to insert spill code.
+ if (lv_)
+ lv_->instructionChanged(MI, fmi);
+ MachineBasicBlock &MBB = *MI->getParent();
+ vrm.virtFolded(li.reg, MI, i, fmi);
+ mi2iMap_.erase(MI);
+ i2miMap_[index/InstrSlots::NUM] = fmi;
+ mi2iMap_[fmi] = index;
+ MI = MBB.insert(MBB.erase(MI), fmi);
+ ++numFolded;
+ // Folding the load/store can completely change the instruction in
+ // unpredictable ways, rescan it from the beginning.
+ goto RestartInstruction;
+ } else {
+ // Create a new virtual register for the spill interval.
+ unsigned NewVReg = mf_->getSSARegMap()->createVirtualRegister(rc);
+
+ // Scan all of the operands of this instruction rewriting operands
+ // to use NewVReg instead of li.reg as appropriate. We do this for
+ // two reasons:
+ //
+ // 1. If the instr reads the same spilled vreg multiple times, we
+ // want to reuse the NewVReg.
+ // 2. If the instr is a two-addr instruction, we are required to
+ // keep the src/dst regs pinned.
+ //
+ // Keep track of whether we replace a use and/or def so that we can
+ // create the spill interval with the appropriate range.
+ mop.setReg(NewVReg);
+
+ bool HasUse = mop.isUse();
+ bool HasDef = mop.isDef();
+ for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {
+ if (MI->getOperand(j).isReg() &&
+ MI->getOperand(j).getReg() == li.reg) {
+ MI->getOperand(j).setReg(NewVReg);
+ HasUse |= MI->getOperand(j).isUse();
+ HasDef |= MI->getOperand(j).isDef();
+ }
+ }
+
+ // create a new register for this spill
+ vrm.grow();
+ if (li.remat)
+ vrm.setVirtIsReMaterialized(NewVReg, li.remat);
+ vrm.assignVirt2StackSlot(NewVReg, slot);
+ LiveInterval &nI = getOrCreateInterval(NewVReg);
+ nI.remat = li.remat;
+ assert(nI.empty());
+
+ // the spill weight is now infinity as it
+ // cannot be spilled again
+ nI.weight = HUGE_VALF;
+
+ if (HasUse) {
+ LiveRange LR(getLoadIndex(index), getUseIndex(index),
+ nI.getNextValue(~0U, 0));
+ DOUT << " +" << LR;
+ nI.addRange(LR);
+ }
+ if (HasDef) {
+ LiveRange LR(getDefIndex(index), getStoreIndex(index),
+ nI.getNextValue(~0U, 0));
+ DOUT << " +" << LR;
+ nI.addRange(LR);
+ }
+
+ added.push_back(&nI);
+
+ // update live variables if it is available
+ if (lv_)
+ lv_->addVirtualRegisterKilled(NewVReg, MI);
+
+ DOUT << "\t\t\t\tadded new interval: ";
+ nI.print(DOUT, mri_);
+ DOUT << '\n';
+ }
+ }
+ }
+ }
+ }
+
+ return added;
+}
+
+void LiveIntervals::printRegName(unsigned reg) const {
+ if (MRegisterInfo::isPhysicalRegister(reg))
+ cerr << mri_->getName(reg);
+ else
+ cerr << "%reg" << reg;
+}
+
+/// isReDefinedByTwoAddr - Returns true if the Reg re-definition is due to
+/// two addr elimination.
+static bool isReDefinedByTwoAddr(MachineInstr *MI, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO1 = MI->getOperand(i);
+ if (MO1.isRegister() && MO1.isDef() && MO1.getReg() == Reg) {
+ for (unsigned j = i+1; j < e; ++j) {
+ MachineOperand &MO2 = MI->getOperand(j);
+ if (MO2.isRegister() && MO2.isUse() && MO2.getReg() == Reg &&
+ MI->getInstrDescriptor()->
+ getOperandConstraint(j, TOI::TIED_TO) == (int)i)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator mi,
+ unsigned MIIdx,
+ LiveInterval &interval) {
+ DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg));
+ LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
+
+ // Virtual registers may be defined multiple times (due to phi
+ // elimination and 2-addr elimination). Much of what we do only has to be
+ // done once for the vreg. We use an empty interval to detect the first
+ // time we see a vreg.
+ if (interval.empty()) {
+ // Remember if the definition can be rematerialized. All load's from fixed
+ // stack slots are re-materializable. The target may permit other
+ // instructions to be re-materialized as well.
+ int FrameIdx = 0;
+ if (vi.DefInst &&
+ (tii_->isTriviallyReMaterializable(vi.DefInst) ||
+ (tii_->isLoadFromStackSlot(vi.DefInst, FrameIdx) &&
+ mf_->getFrameInfo()->isFixedObjectIndex(FrameIdx))))
+ interval.remat = vi.DefInst;
+
+ // Get the Idx of the defining instructions.
+ unsigned defIndex = getDefIndex(MIIdx);
+
+ unsigned ValNum;
+ unsigned SrcReg, DstReg;
+ if (!tii_->isMoveInstr(*mi, SrcReg, DstReg))
+ ValNum = interval.getNextValue(~0U, 0);
+ else
+ ValNum = interval.getNextValue(defIndex, SrcReg);
+
+ assert(ValNum == 0 && "First value in interval is not 0?");
+ ValNum = 0; // Clue in the optimizer.
+
+ // Loop over all of the blocks that the vreg is defined in. There are
+ // two cases we have to handle here. The most common case is a vreg
+ // whose lifetime is contained within a basic block. In this case there
+ // will be a single kill, in MBB, which comes after the definition.
+ if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
+ // FIXME: what about dead vars?
+ unsigned killIdx;
+ if (vi.Kills[0] != mi)
+ killIdx = getUseIndex(getInstructionIndex(vi.Kills[0]))+1;
+ else
+ killIdx = defIndex+1;
+
+ // If the kill happens after the definition, we have an intra-block
+ // live range.
+ if (killIdx > defIndex) {
+ assert(vi.AliveBlocks.none() &&
+ "Shouldn't be alive across any blocks!");
+ LiveRange LR(defIndex, killIdx, ValNum);
+ interval.addRange(LR);
+ DOUT << " +" << LR << "\n";
+ return;
+ }
+ }
+
+ // The other case we handle is when a virtual register lives to the end
+ // of the defining block, potentially live across some blocks, then is
+ // live into some number of blocks, but gets killed. Start by adding a
+ // range that goes from this definition to the end of the defining block.
+ LiveRange NewLR(defIndex,
+ getInstructionIndex(&mbb->back()) + InstrSlots::NUM,
+ ValNum);
+ DOUT << " +" << NewLR;
+ interval.addRange(NewLR);
+
+ // Iterate over all of the blocks that the variable is completely
+ // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
+ // live interval.
+ for (unsigned i = 0, e = vi.AliveBlocks.size(); i != e; ++i) {
+ if (vi.AliveBlocks[i]) {
+ MachineBasicBlock *MBB = mf_->getBlockNumbered(i);
+ if (!MBB->empty()) {
+ LiveRange LR(getMBBStartIdx(i),
+ getInstructionIndex(&MBB->back()) + InstrSlots::NUM,
+ ValNum);
+ interval.addRange(LR);
+ DOUT << " +" << LR;
+ }
+ }
+ }
+
+ // Finally, this virtual register is live from the start of any killing
+ // block to the 'use' slot of the killing instruction.
+ for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
+ MachineInstr *Kill = vi.Kills[i];
+ LiveRange LR(getMBBStartIdx(Kill->getParent()),
+ getUseIndex(getInstructionIndex(Kill))+1,
+ ValNum);
+ interval.addRange(LR);
+ DOUT << " +" << LR;
+ }
+
+ } else {
+ // Can no longer safely assume definition is rematerializable.
+ interval.remat = NULL;
+
+ // If this is the second time we see a virtual register definition, it
+ // must be due to phi elimination or two addr elimination. If this is
+ // the result of two address elimination, then the vreg is one of the
+ // def-and-use register operand.
+ if (isReDefinedByTwoAddr(mi, interval.reg, tii_)) {
+ // If this is a two-address definition, then we have already processed
+ // the live range. The only problem is that we didn't realize there
+ // are actually two values in the live interval. Because of this we
+ // need to take the LiveRegion that defines this register and split it
+ // into two values.
+ unsigned DefIndex = getDefIndex(getInstructionIndex(vi.DefInst));
+ unsigned RedefIndex = getDefIndex(MIIdx);
+
+ // Delete the initial value, which should be short and continuous,
+ // because the 2-addr copy must be in the same MBB as the redef.
+ interval.removeRange(DefIndex, RedefIndex);
+
+ // Two-address vregs should always only be redefined once. This means
+ // that at this point, there should be exactly one value number in it.
+ assert(interval.containsOneValue() && "Unexpected 2-addr liveint!");
+
+ // The new value number (#1) is defined by the instruction we claimed
+ // defined value #0.
+ unsigned ValNo = interval.getNextValue(0, 0);
+ interval.setValueNumberInfo(1, interval.getValNumInfo(0));
+
+ // Value#0 is now defined by the 2-addr instruction.
+ interval.setValueNumberInfo(0, std::make_pair(~0U, 0U));
+
+ // Add the new live interval which replaces the range for the input copy.
+ LiveRange LR(DefIndex, RedefIndex, ValNo);
+ DOUT << " replace range with " << LR;
+ interval.addRange(LR);
+
+ // If this redefinition is dead, we need to add a dummy unit live
+ // range covering the def slot.
+ if (lv_->RegisterDefIsDead(mi, interval.reg))
+ interval.addRange(LiveRange(RedefIndex, RedefIndex+1, 0));
+
+ DOUT << " RESULT: ";
+ interval.print(DOUT, mri_);
+
+ } else {
+ // Otherwise, this must be because of phi elimination. If this is the
+ // first redefinition of the vreg that we have seen, go back and change
+ // the live range in the PHI block to be a different value number.
+ if (interval.containsOneValue()) {
+ assert(vi.Kills.size() == 1 &&
+ "PHI elimination vreg should have one kill, the PHI itself!");
+
+ // Remove the old range that we now know has an incorrect number.
+ MachineInstr *Killer = vi.Kills[0];
+ unsigned Start = getMBBStartIdx(Killer->getParent());
+ unsigned End = getUseIndex(getInstructionIndex(Killer))+1;
+ DOUT << " Removing [" << Start << "," << End << "] from: ";
+ interval.print(DOUT, mri_); DOUT << "\n";
+ interval.removeRange(Start, End);
+ DOUT << " RESULT: "; interval.print(DOUT, mri_);
+
+ // Replace the interval with one of a NEW value number. Note that this
+ // value number isn't actually defined by an instruction, weird huh? :)
+ LiveRange LR(Start, End, interval.getNextValue(~0U, 0));
+ DOUT << " replace range with " << LR;
+ interval.addRange(LR);
+ DOUT << " RESULT: "; interval.print(DOUT, mri_);
+ }
+
+ // In the case of PHI elimination, each variable definition is only
+ // live until the end of the block. We've already taken care of the
+ // rest of the live range.
+ unsigned defIndex = getDefIndex(MIIdx);
+
+ unsigned ValNum;
+ unsigned SrcReg, DstReg;
+ if (!tii_->isMoveInstr(*mi, SrcReg, DstReg))
+ ValNum = interval.getNextValue(~0U, 0);
+ else
+ ValNum = interval.getNextValue(defIndex, SrcReg);
+
+ LiveRange LR(defIndex,
+ getInstructionIndex(&mbb->back()) + InstrSlots::NUM, ValNum);
+ interval.addRange(LR);
+ DOUT << " +" << LR;
+ }
+ }
+
+ DOUT << '\n';
+}
+
+void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator mi,
+ unsigned MIIdx,
+ LiveInterval &interval,
+ unsigned SrcReg) {
+ // A physical register cannot be live across basic block, so its
+ // lifetime must end somewhere in its defining basic block.
+ DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg));
+
+ unsigned baseIndex = MIIdx;
+ unsigned start = getDefIndex(baseIndex);
+ unsigned end = start;
+
+ // If it is not used after definition, it is considered dead at
+ // the instruction defining it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ if (lv_->RegisterDefIsDead(mi, interval.reg)) {
+ DOUT << " dead";
+ end = getDefIndex(start) + 1;
+ goto exit;
+ }
+
+ // If it is not dead on definition, it must be killed by a
+ // subsequent instruction. Hence its interval is:
+ // [defSlot(def), useSlot(kill)+1)
+ while (++mi != MBB->end()) {
+ baseIndex += InstrSlots::NUM;
+ if (lv_->KillsRegister(mi, interval.reg)) {
+ DOUT << " killed";
+ end = getUseIndex(baseIndex) + 1;
+ goto exit;
+ } else if (lv_->ModifiesRegister(mi, interval.reg)) {
+ // Another instruction redefines the register before it is ever read.
+ // Then the register is essentially dead at the instruction that defines
+ // it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ DOUT << " dead";
+ end = getDefIndex(start) + 1;
+ goto exit;
+ }
+ }
+
+ // The only case we should have a dead physreg here without a killing or
+ // instruction where we know it's dead is if it is live-in to the function
+ // and never used.
+ assert(!SrcReg && "physreg was not killed in defining block!");
+ end = getDefIndex(start) + 1; // It's dead.
+
+exit:
+ assert(start < end && "did not find end of interval?");
+
+ // Already exists? Extend old live interval.
+ LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
+ unsigned Id = (OldLR != interval.end())
+ ? OldLR->ValId
+ : interval.getNextValue(SrcReg != 0 ? start : ~0U, SrcReg);
+ LiveRange LR(start, end, Id);
+ interval.addRange(LR);
+ DOUT << " +" << LR << '\n';
+}
+
+void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned MIIdx,
+ unsigned reg) {
+ if (MRegisterInfo::isVirtualRegister(reg))
+ handleVirtualRegisterDef(MBB, MI, MIIdx, getOrCreateInterval(reg));
+ else if (allocatableRegs_[reg]) {
+ unsigned SrcReg, DstReg;
+ if (!tii_->isMoveInstr(*MI, SrcReg, DstReg))
+ SrcReg = 0;
+ handlePhysicalRegisterDef(MBB, MI, MIIdx, getOrCreateInterval(reg), SrcReg);
+ // Def of a register also defines its sub-registers.
+ for (const unsigned* AS = mri_->getSubRegisters(reg); *AS; ++AS)
+ // Avoid processing some defs more than once.
+ if (!MI->findRegisterDefOperand(*AS))
+ handlePhysicalRegisterDef(MBB, MI, MIIdx, getOrCreateInterval(*AS), 0);
+ }
+}
+
+void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
+ unsigned MIIdx,
+ LiveInterval &interval, bool isAlias) {
+ DOUT << "\t\tlivein register: "; DEBUG(printRegName(interval.reg));
+
+ // Look for kills, if it reaches a def before it's killed, then it shouldn't
+ // be considered a livein.
+ MachineBasicBlock::iterator mi = MBB->begin();
+ unsigned baseIndex = MIIdx;
+ unsigned start = baseIndex;
+ unsigned end = start;
+ while (mi != MBB->end()) {
+ if (lv_->KillsRegister(mi, interval.reg)) {
+ DOUT << " killed";
+ end = getUseIndex(baseIndex) + 1;
+ goto exit;
+ } else if (lv_->ModifiesRegister(mi, interval.reg)) {
+ // Another instruction redefines the register before it is ever read.
+ // Then the register is essentially dead at the instruction that defines
+ // it. Hence its interval is:
+ // [defSlot(def), defSlot(def)+1)
+ DOUT << " dead";
+ end = getDefIndex(start) + 1;
+ goto exit;
+ }
+
+ baseIndex += InstrSlots::NUM;
+ ++mi;
+ }
+
+exit:
+ // Live-in register might not be used at all.
+ if (end == MIIdx) {
+ if (isAlias) {
+ DOUT << " dead";
+ end = getDefIndex(MIIdx) + 1;
+ } else {
+ DOUT << " live through";
+ end = baseIndex;
+ }
+ }
+
+ LiveRange LR(start, end, interval.getNextValue(~0U, 0));
+ DOUT << " +" << LR << '\n';
+ interval.addRange(LR);
+}
+
+/// computeIntervals - computes the live intervals for virtual
+/// registers. for some ordering of the machine instructions [1,N] a
+/// live interval is an interval [i, j) where 1 <= i <= j < N for
+/// which a variable is live
+void LiveIntervals::computeIntervals() {
+ DOUT << "********** COMPUTING LIVE INTERVALS **********\n"
+ << "********** Function: "
+ << ((Value*)mf_->getFunction())->getName() << '\n';
+ // Track the index of the current machine instr.
+ unsigned MIIndex = 0;
+ for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n";
+
+ MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
+
+ if (MBB->livein_begin() != MBB->livein_end()) {
+ // Create intervals for live-ins to this BB first.
+ for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(),
+ LE = MBB->livein_end(); LI != LE; ++LI) {
+ handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
+ // Multiple live-ins can alias the same register.
+ for (const unsigned* AS = mri_->getSubRegisters(*LI); *AS; ++AS)
+ if (!hasInterval(*AS))
+ handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
+ true);
+ }
+ }
+
+ for (; MI != miEnd; ++MI) {
+ DOUT << MIIndex << "\t" << *MI;
+
+ // Handle defs.
+ for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+ MachineOperand &MO = MI->getOperand(i);
+ // handle register defs - build intervals
+ if (MO.isRegister() && MO.getReg() && MO.isDef())
+ handleRegisterDef(MBB, MI, MIIndex, MO.getReg());
+ }
+
+ MIIndex += InstrSlots::NUM;
+ }
+ }
+}
+
+LiveInterval LiveIntervals::createInterval(unsigned reg) {
+ float Weight = MRegisterInfo::isPhysicalRegister(reg) ?
+ HUGE_VALF : 0.0F;
+ return LiveInterval(reg, Weight);
+}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
new file mode 100644
index 0000000..504b607
--- /dev/null
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -0,0 +1,643 @@
+//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariable analysis pass. For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using are sparse implementation based on
+// the machine code SSA form. This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function. It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block). If a physical
+// register is not register allocatable, it is not tracked. This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Config/alloca.h"
+#include <algorithm>
+using namespace llvm;
+
+char LiveVariables::ID = 0;
+static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis");
+
+void LiveVariables::VarInfo::dump() const {
+ cerr << "Register Defined by: ";
+ if (DefInst)
+ cerr << *DefInst;
+ else
+ cerr << "<null>\n";
+ cerr << " Alive in blocks: ";
+ for (unsigned i = 0, e = AliveBlocks.size(); i != e; ++i)
+ if (AliveBlocks[i]) cerr << i << ", ";
+ cerr << "\n Killed by:";
+ if (Kills.empty())
+ cerr << " No instructions.\n";
+ else {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ cerr << "\n #" << i << ": " << *Kills[i];
+ cerr << "\n";
+ }
+}
+
+LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
+ assert(MRegisterInfo::isVirtualRegister(RegIdx) &&
+ "getVarInfo: not a virtual register!");
+ RegIdx -= MRegisterInfo::FirstVirtualRegister;
+ if (RegIdx >= VirtRegInfo.size()) {
+ if (RegIdx >= 2*VirtRegInfo.size())
+ VirtRegInfo.resize(RegIdx*2);
+ else
+ VirtRegInfo.resize(2*VirtRegInfo.size());
+ }
+ VarInfo &VI = VirtRegInfo[RegIdx];
+ VI.AliveBlocks.resize(MF->getNumBlockIDs());
+ return VI;
+}
+
+bool LiveVariables::KillsRegister(MachineInstr *MI, unsigned Reg) const {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ if ((MO.getReg() == Reg) ||
+ (MRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ MRegisterInfo::isPhysicalRegister(Reg) &&
+ RegInfo->isSubRegister(MO.getReg(), Reg)))
+ return true;
+ }
+ }
+ return false;
+}
+
+bool LiveVariables::RegisterDefIsDead(MachineInstr *MI, unsigned Reg) const {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead()) {
+ if ((MO.getReg() == Reg) ||
+ (MRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ MRegisterInfo::isPhysicalRegister(Reg) &&
+ RegInfo->isSubRegister(MO.getReg(), Reg)))
+ return true;
+ }
+ }
+ return false;
+}
+
+bool LiveVariables::ModifiesRegister(MachineInstr *MI, unsigned Reg) const {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
+ return true;
+ }
+ return false;
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+ MachineBasicBlock *MBB,
+ std::vector<MachineBasicBlock*> &WorkList) {
+ unsigned BBNum = MBB->getNumber();
+
+ // Check to see if this basic block is one of the killing blocks. If so,
+ // remove it...
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ if (VRInfo.Kills[i]->getParent() == MBB) {
+ VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry
+ break;
+ }
+
+ if (MBB == VRInfo.DefInst->getParent()) return; // Terminate recursion
+
+ if (VRInfo.AliveBlocks[BBNum])
+ return; // We already know the block is live
+
+ // Mark the variable known alive in this bb
+ VRInfo.AliveBlocks[BBNum] = true;
+
+ for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(),
+ E = MBB->pred_rend(); PI != E; ++PI)
+ WorkList.push_back(*PI);
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+ MachineBasicBlock *MBB) {
+ std::vector<MachineBasicBlock*> WorkList;
+ MarkVirtRegAliveInBlock(VRInfo, MBB, WorkList);
+ while (!WorkList.empty()) {
+ MachineBasicBlock *Pred = WorkList.back();
+ WorkList.pop_back();
+ MarkVirtRegAliveInBlock(VRInfo, Pred, WorkList);
+ }
+}
+
+
+void LiveVariables::HandleVirtRegUse(VarInfo &VRInfo, MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ assert(VRInfo.DefInst && "Register use before def!");
+
+ VRInfo.NumUses++;
+
+ // Check to see if this basic block is already a kill block...
+ if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
+ // Yes, this register is killed in this basic block already. Increase the
+ // live range by updating the kill instruction.
+ VRInfo.Kills.back() = MI;
+ return;
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+#endif
+
+ assert(MBB != VRInfo.DefInst->getParent() &&
+ "Should have kill for defblock!");
+
+ // Add a new kill entry for this basic block.
+ // If this virtual register is already marked as alive in this basic block,
+ // that means it is alive in at least one of the successor block, it's not
+ // a kill.
+ if (!VRInfo.AliveBlocks[MBB->getNumber()])
+ VRInfo.Kills.push_back(MI);
+
+ // Update all dominating blocks to mark them known live.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI)
+ MarkVirtRegAliveInBlock(VRInfo, *PI);
+}
+
+bool LiveVariables::addRegisterKilled(unsigned IncomingReg, MachineInstr *MI,
+ bool AddIfNotFound) {
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Reg == IncomingReg) {
+ MO.setIsKill();
+ Found = true;
+ break;
+ } else if (MRegisterInfo::isPhysicalRegister(Reg) &&
+ MRegisterInfo::isPhysicalRegister(IncomingReg) &&
+ RegInfo->isSuperRegister(IncomingReg, Reg) &&
+ MO.isKill())
+ // A super-register kill already exists.
+ return true;
+ }
+ }
+
+ // If not found, this means an alias of one of the operand is killed. Add a
+ // new implicit operand if required.
+ if (!Found && AddIfNotFound) {
+ MI->addRegOperand(IncomingReg, false/*IsDef*/,true/*IsImp*/,true/*IsKill*/);
+ return true;
+ }
+ return Found;
+}
+
+bool LiveVariables::addRegisterDead(unsigned IncomingReg, MachineInstr *MI,
+ bool AddIfNotFound) {
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Reg == IncomingReg) {
+ MO.setIsDead();
+ Found = true;
+ break;
+ } else if (MRegisterInfo::isPhysicalRegister(Reg) &&
+ MRegisterInfo::isPhysicalRegister(IncomingReg) &&
+ RegInfo->isSuperRegister(IncomingReg, Reg) &&
+ MO.isDead())
+ // There exists a super-register that's marked dead.
+ return true;
+ }
+ }
+
+ // If not found, this means an alias of one of the operand is dead. Add a
+ // new implicit operand.
+ if (!Found && AddIfNotFound) {
+ MI->addRegOperand(IncomingReg, true/*IsDef*/,true/*IsImp*/,false/*IsKill*/,
+ true/*IsDead*/);
+ return true;
+ }
+ return Found;
+}
+
+void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+ // There is a now a proper use, forget about the last partial use.
+ PhysRegPartUse[Reg] = NULL;
+
+ // Turn previous partial def's into read/mod/write.
+ for (unsigned i = 0, e = PhysRegPartDef[Reg].size(); i != e; ++i) {
+ MachineInstr *Def = PhysRegPartDef[Reg][i];
+ // First one is just a def. This means the use is reading some undef bits.
+ if (i != 0)
+ Def->addRegOperand(Reg, false/*IsDef*/,true/*IsImp*/,true/*IsKill*/);
+ Def->addRegOperand(Reg, true/*IsDef*/,true/*IsImp*/);
+ }
+ PhysRegPartDef[Reg].clear();
+
+ // There was an earlier def of a super-register. Add implicit def to that MI.
+ // A: EAX = ...
+ // B: = AX
+ // Add implicit def to A.
+ if (PhysRegInfo[Reg] && !PhysRegUsed[Reg]) {
+ MachineInstr *Def = PhysRegInfo[Reg];
+ if (!Def->findRegisterDefOperand(Reg))
+ Def->addRegOperand(Reg, true/*IsDef*/,true/*IsImp*/);
+ }
+
+ PhysRegInfo[Reg] = MI;
+ PhysRegUsed[Reg] = true;
+
+ for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ PhysRegInfo[SubReg] = MI;
+ PhysRegUsed[SubReg] = true;
+ }
+
+ // Remember the partial uses.
+ for (const unsigned *SuperRegs = RegInfo->getSuperRegisters(Reg);
+ unsigned SuperReg = *SuperRegs; ++SuperRegs)
+ PhysRegPartUse[SuperReg] = MI;
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *RefMI,
+ SmallSet<unsigned, 4> &SubKills) {
+ for (const unsigned *SubRegs = RegInfo->getImmediateSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ MachineInstr *LastRef = PhysRegInfo[SubReg];
+ if (LastRef != RefMI)
+ SubKills.insert(SubReg);
+ else if (!HandlePhysRegKill(SubReg, RefMI, SubKills))
+ SubKills.insert(SubReg);
+ }
+
+ if (*RegInfo->getImmediateSubRegisters(Reg) == 0) {
+ // No sub-registers, just check if reg is killed by RefMI.
+ if (PhysRegInfo[Reg] == RefMI)
+ return true;
+ } else if (SubKills.empty())
+ // None of the sub-registers are killed elsewhere...
+ return true;
+ return false;
+}
+
+void LiveVariables::addRegisterKills(unsigned Reg, MachineInstr *MI,
+ SmallSet<unsigned, 4> &SubKills) {
+ if (SubKills.count(Reg) == 0)
+ addRegisterKilled(Reg, MI, true);
+ else {
+ for (const unsigned *SubRegs = RegInfo->getImmediateSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ addRegisterKills(SubReg, MI, SubKills);
+ }
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *RefMI) {
+ SmallSet<unsigned, 4> SubKills;
+ if (HandlePhysRegKill(Reg, RefMI, SubKills)) {
+ addRegisterKilled(Reg, RefMI);
+ return true;
+ } else {
+ // Some sub-registers are killed by another MI.
+ for (const unsigned *SubRegs = RegInfo->getImmediateSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs)
+ addRegisterKills(SubReg, RefMI, SubKills);
+ return false;
+ }
+}
+
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) {
+ // Does this kill a previous version of this register?
+ if (MachineInstr *LastRef = PhysRegInfo[Reg]) {
+ if (PhysRegUsed[Reg]) {
+ if (!HandlePhysRegKill(Reg, LastRef)) {
+ if (PhysRegPartUse[Reg])
+ addRegisterKilled(Reg, PhysRegPartUse[Reg], true);
+ }
+ } else if (PhysRegPartUse[Reg])
+ // Add implicit use / kill to last use of a sub-register.
+ addRegisterKilled(Reg, PhysRegPartUse[Reg], true);
+ else
+ addRegisterDead(Reg, LastRef);
+ }
+
+ for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (MachineInstr *LastRef = PhysRegInfo[SubReg]) {
+ if (PhysRegUsed[SubReg]) {
+ if (!HandlePhysRegKill(SubReg, LastRef)) {
+ if (PhysRegPartUse[SubReg])
+ addRegisterKilled(SubReg, PhysRegPartUse[SubReg], true);
+ }
+ } else if (PhysRegPartUse[SubReg])
+ // Add implicit use / kill to last use of a sub-register.
+ addRegisterKilled(SubReg, PhysRegPartUse[SubReg], true);
+ else
+ addRegisterDead(SubReg, LastRef);
+ }
+ }
+
+ if (MI) {
+ for (const unsigned *SuperRegs = RegInfo->getSuperRegisters(Reg);
+ unsigned SuperReg = *SuperRegs; ++SuperRegs) {
+ if (PhysRegInfo[SuperReg]) {
+ // The larger register is previously defined. Now a smaller part is
+ // being re-defined. Treat it as read/mod/write.
+ // EAX =
+ // AX = EAX<imp-use,kill>, EAX<imp-def>
+ MI->addRegOperand(SuperReg, false/*IsDef*/,true/*IsImp*/,true/*IsKill*/);
+ MI->addRegOperand(SuperReg, true/*IsDef*/,true/*IsImp*/);
+ PhysRegInfo[SuperReg] = MI;
+ PhysRegUsed[SuperReg] = false;
+ PhysRegPartUse[SuperReg] = NULL;
+ } else {
+ // Remember this partial def.
+ PhysRegPartDef[SuperReg].push_back(MI);
+ }
+ }
+
+ PhysRegInfo[Reg] = MI;
+ PhysRegUsed[Reg] = false;
+ PhysRegPartUse[Reg] = NULL;
+ for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ PhysRegInfo[SubReg] = MI;
+ PhysRegUsed[SubReg] = false;
+ PhysRegPartUse[SubReg] = NULL;
+ }
+ }
+}
+
+bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ RegInfo = MF->getTarget().getRegisterInfo();
+ assert(RegInfo && "Target doesn't have register information?");
+
+ ReservedRegisters = RegInfo->getReservedRegs(mf);
+
+ unsigned NumRegs = RegInfo->getNumRegs();
+ PhysRegInfo = new MachineInstr*[NumRegs];
+ PhysRegUsed = new bool[NumRegs];
+ PhysRegPartUse = new MachineInstr*[NumRegs];
+ PhysRegPartDef = new SmallVector<MachineInstr*,4>[NumRegs];
+ PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
+ std::fill(PhysRegInfo, PhysRegInfo + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUsed, PhysRegUsed + NumRegs, false);
+ std::fill(PhysRegPartUse, PhysRegPartUse + NumRegs, (MachineInstr*)0);
+
+ /// Get some space for a respectable number of registers...
+ VirtRegInfo.resize(64);
+
+ analyzePHINodes(mf);
+
+ // Calculate live variable information in depth first order on the CFG of the
+ // function. This guarantees that we will see the definition of a virtual
+ // register before its uses due to dominance properties of SSA (except for PHI
+ // nodes, which are treated as a special case).
+ //
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+ DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+ DFI != E; ++DFI) {
+ MachineBasicBlock *MBB = *DFI;
+
+ // Mark live-in registers as live-in.
+ for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(),
+ EE = MBB->livein_end(); II != EE; ++II) {
+ assert(MRegisterInfo::isPhysicalRegister(*II) &&
+ "Cannot have a live-in virtual register!");
+ HandlePhysRegDef(*II, 0);
+ }
+
+ // Loop over all of the instructions, processing them.
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+
+ // Process all of the operands of the instruction...
+ unsigned NumOperandsToProcess = MI->getNumOperands();
+
+ // Unless it is a PHI node. In this case, ONLY process the DEF, not any
+ // of the uses. They will be handled in other basic blocks.
+ if (MI->getOpcode() == TargetInstrInfo::PHI)
+ NumOperandsToProcess = 1;
+
+ // Process all uses...
+ for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.isUse() && MO.getReg()) {
+ if (MRegisterInfo::isVirtualRegister(MO.getReg())){
+ HandleVirtRegUse(getVarInfo(MO.getReg()), MBB, MI);
+ } else if (MRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ !ReservedRegisters[MO.getReg()]) {
+ HandlePhysRegUse(MO.getReg(), MI);
+ }
+ }
+ }
+
+ // Process all defs...
+ for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.isDef() && MO.getReg()) {
+ if (MRegisterInfo::isVirtualRegister(MO.getReg())) {
+ VarInfo &VRInfo = getVarInfo(MO.getReg());
+
+ assert(VRInfo.DefInst == 0 && "Variable multiply defined!");
+ VRInfo.DefInst = MI;
+ // Defaults to dead
+ VRInfo.Kills.push_back(MI);
+ } else if (MRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ !ReservedRegisters[MO.getReg()]) {
+ HandlePhysRegDef(MO.getReg(), MI);
+ }
+ }
+ }
+ }
+
+ // Handle any virtual assignments from PHI nodes which might be at the
+ // bottom of this basic block. We check all of our successor blocks to see
+ // if they have PHI nodes, and if so, we simulate an assignment at the end
+ // of the current block.
+ if (!PHIVarInfo[MBB->getNumber()].empty()) {
+ SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+ for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
+ E = VarInfoVec.end(); I != E; ++I) {
+ VarInfo& VRInfo = getVarInfo(*I);
+ assert(VRInfo.DefInst && "Register use before def (or no def)!");
+
+ // Only mark it alive only in the block we are representing.
+ MarkVirtRegAliveInBlock(VRInfo, MBB);
+ }
+ }
+
+ // Finally, if the last instruction in the block is a return, make sure to mark
+ // it as using all of the live-out values in the function.
+ if (!MBB->empty() && TII.isReturn(MBB->back().getOpcode())) {
+ MachineInstr *Ret = &MBB->back();
+ for (MachineFunction::liveout_iterator I = MF->liveout_begin(),
+ E = MF->liveout_end(); I != E; ++I) {
+ assert(MRegisterInfo::isPhysicalRegister(*I) &&
+ "Cannot have a live-in virtual register!");
+ HandlePhysRegUse(*I, Ret);
+ // Add live-out registers as implicit uses.
+ if (Ret->findRegisterUseOperandIdx(*I) == -1)
+ Ret->addRegOperand(*I, false, true);
+ }
+ }
+
+ // Loop over PhysRegInfo, killing any registers that are available at the
+ // end of the basic block. This also resets the PhysRegInfo map.
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if (PhysRegInfo[i])
+ HandlePhysRegDef(i, 0);
+
+ // Clear some states between BB's. These are purely local information.
+ for (unsigned i = 0; i != NumRegs; ++i)
+ PhysRegPartDef[i].clear();
+ std::fill(PhysRegInfo, PhysRegInfo + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegUsed, PhysRegUsed + NumRegs, false);
+ std::fill(PhysRegPartUse, PhysRegPartUse + NumRegs, (MachineInstr*)0);
+ }
+
+ // Convert and transfer the dead / killed information we have gathered into
+ // VirtRegInfo onto MI's.
+ //
+ for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i)
+ for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j) {
+ if (VirtRegInfo[i].Kills[j] == VirtRegInfo[i].DefInst)
+ addRegisterDead(i + MRegisterInfo::FirstVirtualRegister,
+ VirtRegInfo[i].Kills[j]);
+ else
+ addRegisterKilled(i + MRegisterInfo::FirstVirtualRegister,
+ VirtRegInfo[i].Kills[j]);
+ }
+
+ // Check to make sure there are no unreachable blocks in the MC CFG for the
+ // function. If so, it is due to a bug in the instruction selector or some
+ // other part of the code generator if this happens.
+#ifndef NDEBUG
+ for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
+ assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+#endif
+
+ delete[] PhysRegInfo;
+ delete[] PhysRegUsed;
+ delete[] PhysRegPartUse;
+ delete[] PhysRegPartDef;
+ delete[] PHIVarInfo;
+
+ return false;
+}
+
+/// instructionChanged - When the address of an instruction changes, this
+/// method should be called so that live variables can update its internal
+/// data structures. This removes the records for OldMI, transfering them to
+/// the records for NewMI.
+void LiveVariables::instructionChanged(MachineInstr *OldMI,
+ MachineInstr *NewMI) {
+ // If the instruction defines any virtual registers, update the VarInfo,
+ // kill and dead information for the instruction.
+ for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = OldMI->getOperand(i);
+ if (MO.isRegister() && MO.getReg() &&
+ MRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ VarInfo &VI = getVarInfo(Reg);
+ if (MO.isDef()) {
+ if (MO.isDead()) {
+ MO.unsetIsDead();
+ addVirtualRegisterDead(Reg, NewMI);
+ }
+ // Update the defining instruction.
+ if (VI.DefInst == OldMI)
+ VI.DefInst = NewMI;
+ }
+ if (MO.isUse()) {
+ if (MO.isKill()) {
+ MO.unsetIsKill();
+ addVirtualRegisterKilled(Reg, NewMI);
+ }
+ // If this is a kill of the value, update the VI kills list.
+ if (VI.removeKill(OldMI))
+ VI.Kills.push_back(NewMI); // Yes, there was a kill of it
+ }
+ }
+ }
+}
+
+/// removeVirtualRegistersKilled - Remove all killed info for the specified
+/// instruction.
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ MO.unsetIsKill();
+ unsigned Reg = MO.getReg();
+ if (MRegisterInfo::isVirtualRegister(Reg)) {
+ bool removed = getVarInfo(Reg).removeKill(MI);
+ assert(removed && "kill not in register's VarInfo?");
+ }
+ }
+ }
+}
+
+/// removeVirtualRegistersDead - Remove all of the dead registers for the
+/// specified instruction from the live variable information.
+void LiveVariables::removeVirtualRegistersDead(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead()) {
+ MO.unsetIsDead();
+ unsigned Reg = MO.getReg();
+ if (MRegisterInfo::isVirtualRegister(Reg)) {
+ bool removed = getVarInfo(Reg).removeKill(MI);
+ assert(removed && "kill not in register's VarInfo?");
+ }
+ }
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the variable information of a virtual
+/// register which is used in a PHI node. We map that to the BB the vreg is
+/// coming from.
+///
+void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
+ for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ PHIVarInfo[BBI->getOperand(i + 1).getMachineBasicBlock()->getNumber()].
+ push_back(BBI->getOperand(i).getReg());
+}
diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp
new file mode 100644
index 0000000..36060e1
--- /dev/null
+++ b/lib/CodeGen/MachOWriter.cpp
@@ -0,0 +1,945 @@
+//===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Begeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent Mach-O writer. This file writes
+// out the Mach-O file in the following order:
+//
+// #1 FatHeader (universal-only)
+// #2 FatArch (universal-only, 1 per universal arch)
+// Per arch:
+// #3 Header
+// #4 Load Commands
+// #5 Sections
+// #6 Relocations
+// #7 Symbols
+// #8 Strings
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOWriter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/FileWriters.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/OutputBuffer.h"
+#include "llvm/Support/Streams.h"
+#include <algorithm>
+using namespace llvm;
+
+/// AddMachOWriter - Concrete function to add the Mach-O writer to the function
+/// pass manager.
+MachineCodeEmitter *llvm::AddMachOWriter(FunctionPassManager &FPM,
+ std::ostream &O,
+ TargetMachine &TM) {
+ MachOWriter *MOW = new MachOWriter(O, TM);
+ FPM.add(MOW);
+ return &MOW->getMachineCodeEmitter();
+}
+
+//===----------------------------------------------------------------------===//
+// MachOCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+ /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code
+ /// for functions to the Mach-O file.
+ class MachOCodeEmitter : public MachineCodeEmitter {
+ MachOWriter &MOW;
+
+ /// Target machine description.
+ TargetMachine &TM;
+
+ /// is64Bit/isLittleEndian - This information is inferred from the target
+ /// machine directly, indicating what header values and flags to set.
+ bool is64Bit, isLittleEndian;
+
+ /// Relocations - These are the relocations that the function needs, as
+ /// emitted.
+ std::vector<MachineRelocation> Relocations;
+
+ /// CPLocations - This is a map of constant pool indices to offsets from the
+ /// start of the section for that constant pool index.
+ std::vector<intptr_t> CPLocations;
+
+ /// CPSections - This is a map of constant pool indices to the MachOSection
+ /// containing the constant pool entry for that index.
+ std::vector<unsigned> CPSections;
+
+ /// JTLocations - This is a map of jump table indices to offsets from the
+ /// start of the section for that jump table index.
+ std::vector<intptr_t> JTLocations;
+
+ /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+ /// It is filled in by the StartMachineBasicBlock callback and queried by
+ /// the getMachineBasicBlockAddress callback.
+ std::vector<intptr_t> MBBLocations;
+
+ public:
+ MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM) {
+ is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+ isLittleEndian = TM.getTargetData()->isLittleEndian();
+ }
+
+ virtual void startFunction(MachineFunction &MF);
+ virtual bool finishFunction(MachineFunction &MF);
+
+ virtual void addRelocation(const MachineRelocation &MR) {
+ Relocations.push_back(MR);
+ }
+
+ void emitConstantPool(MachineConstantPool *MCP);
+ void emitJumpTables(MachineJumpTableInfo *MJTI);
+
+ virtual intptr_t getConstantPoolEntryAddress(unsigned Index) const {
+ assert(CPLocations.size() > Index && "CP not emitted!");
+ return CPLocations[Index];
+ }
+ virtual intptr_t getJumpTableEntryAddress(unsigned Index) const {
+ assert(JTLocations.size() > Index && "JT not emitted!");
+ return JTLocations[Index];
+ }
+
+ virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+ if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+ MBBLocations.resize((MBB->getNumber()+1)*2);
+ MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
+ }
+
+ virtual intptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+ assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+ MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+ return MBBLocations[MBB->getNumber()];
+ }
+
+ /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
+ virtual void startFunctionStub(unsigned StubSize, unsigned Alignment = 1) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ }
+ virtual void *finishFunctionStub(const Function *F) {
+ assert(0 && "JIT specific function called!");
+ abort();
+ return 0;
+ }
+ };
+}
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void MachOCodeEmitter::startFunction(MachineFunction &MF) {
+ const TargetData *TD = TM.getTargetData();
+ const Function *F = MF.getFunction();
+
+ // Align the output buffer to the appropriate alignment, power of 2.
+ unsigned FnAlign = F->getAlignment();
+ unsigned TDAlign = TD->getPrefTypeAlignment(F->getType());
+ unsigned Align = Log2_32(std::max(FnAlign, TDAlign));
+ assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+
+ // Get the Mach-O Section that this function belongs in.
+ MachOWriter::MachOSection *MOS = MOW.getTextSection();
+
+ // FIXME: better memory management
+ MOS->SectionData.reserve(4096);
+ BufferBegin = &MOS->SectionData[0];
+ BufferEnd = BufferBegin + MOS->SectionData.capacity();
+
+ // Upgrade the section alignment if required.
+ if (MOS->align < Align) MOS->align = Align;
+
+ // Round the size up to the correct alignment for starting the new function.
+ if ((MOS->size & ((1 << Align) - 1)) != 0) {
+ MOS->size += (1 << Align);
+ MOS->size &= ~((1 << Align) - 1);
+ }
+
+ // FIXME: Using MOS->size directly here instead of calculating it from the
+ // output buffer size (impossible because the code emitter deals only in raw
+ // bytes) forces us to manually synchronize size and write padding zero bytes
+ // to the output buffer for all non-text sections. For text sections, we do
+ // not synchonize the output buffer, and we just blow up if anyone tries to
+ // write non-code to it. An assert should probably be added to
+ // AddSymbolToSection to prevent calling it on the text section.
+ CurBufferPtr = BufferBegin + MOS->size;
+
+ // Clear per-function data structures.
+ CPLocations.clear();
+ CPSections.clear();
+ JTLocations.clear();
+ MBBLocations.clear();
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool MachOCodeEmitter::finishFunction(MachineFunction &MF) {
+ // Get the Mach-O Section that this function belongs in.
+ MachOWriter::MachOSection *MOS = MOW.getTextSection();
+
+ // Get a symbol for the function to add to the symbol table
+ // FIXME: it seems like we should call something like AddSymbolToSection
+ // in startFunction rather than changing the section size and symbol n_value
+ // here.
+ const GlobalValue *FuncV = MF.getFunction();
+ MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TM);
+ FnSym.n_value = MOS->size;
+ MOS->size = CurBufferPtr - BufferBegin;
+
+ // Emit constant pool to appropriate section(s)
+ emitConstantPool(MF.getConstantPool());
+
+ // Emit jump tables to appropriate section
+ emitJumpTables(MF.getJumpTableInfo());
+
+ // If we have emitted any relocations to function-specific objects such as
+ // basic blocks, constant pools entries, or jump tables, record their
+ // addresses now so that we can rewrite them with the correct addresses
+ // later.
+ for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+ MachineRelocation &MR = Relocations[i];
+ intptr_t Addr;
+
+ if (MR.isBasicBlock()) {
+ Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
+ MR.setConstantVal(MOS->Index);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isJumpTableIndex()) {
+ Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
+ MR.setConstantVal(MOW.getJumpTableSection()->Index);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isConstantPoolIndex()) {
+ Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+ MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
+ MR.setResultPointer((void*)Addr);
+ } else if (MR.isGlobalValue()) {
+ // FIXME: This should be a set or something that uniques
+ MOW.PendingGlobals.push_back(MR.getGlobalValue());
+ } else {
+ assert(0 && "Unhandled relocation type");
+ }
+ MOS->Relocations.push_back(MR);
+ }
+ Relocations.clear();
+
+ // Finally, add it to the symtab.
+ MOW.SymbolTable.push_back(FnSym);
+ return false;
+}
+
+/// emitConstantPool - For each constant pool entry, figure out which section
+/// the constant should live in, allocate space for it, and emit it to the
+/// Section data buffer.
+void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // FIXME: handle PIC codegen
+ bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
+ assert(!isPIC && "PIC codegen not yet handled for mach-o jump tables!");
+
+ // Although there is no strict necessity that I am aware of, we will do what
+ // gcc for OS X does and put each constant pool entry in a section of constant
+ // objects of a certain size. That means that float constants go in the
+ // literal4 section, and double objects go in literal8, etc.
+ //
+ // FIXME: revisit this decision if we ever do the "stick everything into one
+ // "giant object for PIC" optimization.
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ const Type *Ty = CP[i].getType();
+ unsigned Size = TM.getTargetData()->getTypeSize(Ty);
+
+ MachOWriter::MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal);
+ OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+
+ CPLocations.push_back(Sec->SectionData.size());
+ CPSections.push_back(Sec->Index);
+
+ // FIXME: remove when we have unified size + output buffer
+ Sec->size += Size;
+
+ // Allocate space in the section for the global.
+ // FIXME: need alignment?
+ // FIXME: share between here and AddSymbolToSection?
+ for (unsigned j = 0; j < Size; ++j)
+ SecDataOut.outbyte(0);
+
+ MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i],
+ TM.getTargetData(), Sec->Relocations);
+ }
+}
+
+/// emitJumpTables - Emit all the jump tables for a given jump table info
+/// record to the appropriate section.
+void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // FIXME: handle PIC codegen
+ bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
+ assert(!isPIC && "PIC codegen not yet handled for mach-o jump tables!");
+
+ MachOWriter::MachOSection *Sec = MOW.getJumpTableSection();
+ unsigned TextSecIndex = MOW.getTextSection()->Index;
+ OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+
+ for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+ // For each jump table, record its offset from the start of the section,
+ // reserve space for the relocations to the MBBs, and add the relocations.
+ const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+ JTLocations.push_back(Sec->SectionData.size());
+ for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+ MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(),
+ MBBs[mi]));
+ MR.setResultPointer((void *)JTLocations[i]);
+ MR.setConstantVal(TextSecIndex);
+ Sec->Relocations.push_back(MR);
+ SecDataOut.outaddr(0);
+ }
+ }
+ // FIXME: remove when we have unified size + output buffer
+ Sec->size = Sec->SectionData.size();
+}
+
+//===----------------------------------------------------------------------===//
+// MachOWriter Implementation
+//===----------------------------------------------------------------------===//
+
+char MachOWriter::ID = 0;
+MachOWriter::MachOWriter(std::ostream &o, TargetMachine &tm)
+ : MachineFunctionPass((intptr_t)&ID), O(o), TM(tm) {
+ is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+ isLittleEndian = TM.getTargetData()->isLittleEndian();
+
+ // Create the machine code emitter object for this target.
+ MCE = new MachOCodeEmitter(*this);
+}
+
+MachOWriter::~MachOWriter() {
+ delete MCE;
+}
+
+void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) {
+ const Type *Ty = GV->getType()->getElementType();
+ unsigned Size = TM.getTargetData()->getTypeSize(Ty);
+ unsigned Align = GV->getAlignment();
+ if (Align == 0)
+ Align = TM.getTargetData()->getPrefTypeAlignment(Ty);
+
+ // Reserve space in the .bss section for this symbol while maintaining the
+ // desired section alignment, which must be at least as much as required by
+ // this symbol.
+ OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian);
+
+ if (Align) {
+ uint64_t OrigSize = Sec->size;
+ Align = Log2_32(Align);
+ Sec->align = std::max(unsigned(Sec->align), Align);
+ Sec->size = (Sec->size + Align - 1) & ~(Align-1);
+
+ // Add alignment padding to buffer as well.
+ // FIXME: remove when we have unified size + output buffer
+ unsigned AlignedSize = Sec->size - OrigSize;
+ for (unsigned i = 0; i < AlignedSize; ++i)
+ SecDataOut.outbyte(0);
+ }
+ // Globals without external linkage apparently do not go in the symbol table.
+ if (GV->getLinkage() != GlobalValue::InternalLinkage) {
+ MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TM);
+ Sym.n_value = Sec->size;
+ SymbolTable.push_back(Sym);
+ }
+
+ // Record the offset of the symbol, and then allocate space for it.
+ // FIXME: remove when we have unified size + output buffer
+ Sec->size += Size;
+
+ // Now that we know what section the GlovalVariable is going to be emitted
+ // into, update our mappings.
+ // FIXME: We may also need to update this when outputting non-GlobalVariable
+ // GlobalValues such as functions.
+ GVSection[GV] = Sec;
+ GVOffset[GV] = Sec->SectionData.size();
+
+ // Allocate space in the section for the global.
+ for (unsigned i = 0; i < Size; ++i)
+ SecDataOut.outbyte(0);
+}
+
+void MachOWriter::EmitGlobal(GlobalVariable *GV) {
+ const Type *Ty = GV->getType()->getElementType();
+ unsigned Size = TM.getTargetData()->getTypeSize(Ty);
+ bool NoInit = !GV->hasInitializer();
+
+ // If this global has a zero initializer, it is part of the .bss or common
+ // section.
+ if (NoInit || GV->getInitializer()->isNullValue()) {
+ // If this global is part of the common block, add it now. Variables are
+ // part of the common block if they are zero initialized and allowed to be
+ // merged with other symbols.
+ if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage()) {
+ MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), MachOSym::NO_SECT,TM);
+ // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in
+ // bytes of the symbol.
+ ExtOrCommonSym.n_value = Size;
+ SymbolTable.push_back(ExtOrCommonSym);
+ // Remember that we've seen this symbol
+ GVOffset[GV] = Size;
+ return;
+ }
+ // Otherwise, this symbol is part of the .bss section.
+ MachOSection *BSS = getBSSSection();
+ AddSymbolToSection(BSS, GV);
+ return;
+ }
+
+ // Scalar read-only data goes in a literal section if the scalar is 4, 8, or
+ // 16 bytes, or a cstring. Other read only data goes into a regular const
+ // section. Read-write data goes in the data section.
+ MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) :
+ getDataSection();
+ AddSymbolToSection(Sec, GV);
+ InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV],
+ TM.getTargetData(), Sec->Relocations);
+}
+
+
+bool MachOWriter::runOnMachineFunction(MachineFunction &MF) {
+ // Nothing to do here, this is all done through the MCE object.
+ return false;
+}
+
+bool MachOWriter::doInitialization(Module &M) {
+ // Set the magic value, now that we know the pointer size and endianness
+ Header.setMagic(isLittleEndian, is64Bit);
+
+ // Set the file type
+ // FIXME: this only works for object files, we do not support the creation
+ // of dynamic libraries or executables at this time.
+ Header.filetype = MachOHeader::MH_OBJECT;
+
+ Mang = new Mangler(M);
+ return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the Mach-O file to 'O'.
+bool MachOWriter::doFinalization(Module &M) {
+ // FIXME: we don't handle debug info yet, we should probably do that.
+
+ // Okay, the.text section has been completed, build the .data, .bss, and
+ // "common" sections next.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ EmitGlobal(I);
+
+ // Emit the header and load commands.
+ EmitHeaderAndLoadCommands();
+
+ // Emit the various sections and their relocation info.
+ EmitSections();
+
+ // Write the symbol table and the string table to the end of the file.
+ O.write((char*)&SymT[0], SymT.size());
+ O.write((char*)&StrT[0], StrT.size());
+
+ // We are done with the abstract symbols.
+ SectionList.clear();
+ SymbolTable.clear();
+ DynamicSymbolTable.clear();
+
+ // Release the name mangler object.
+ delete Mang; Mang = 0;
+ return false;
+}
+
+void MachOWriter::EmitHeaderAndLoadCommands() {
+ // Step #0: Fill in the segment load command size, since we need it to figure
+ // out the rest of the header fields
+ MachOSegment SEG("", is64Bit);
+ SEG.nsects = SectionList.size();
+ SEG.cmdsize = SEG.cmdSize(is64Bit) +
+ SEG.nsects * SectionList[0]->cmdSize(is64Bit);
+
+ // Step #1: calculate the number of load commands. We always have at least
+ // one, for the LC_SEGMENT load command, plus two for the normal
+ // and dynamic symbol tables, if there are any symbols.
+ Header.ncmds = SymbolTable.empty() ? 1 : 3;
+
+ // Step #2: calculate the size of the load commands
+ Header.sizeofcmds = SEG.cmdsize;
+ if (!SymbolTable.empty())
+ Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize;
+
+ // Step #3: write the header to the file
+ // Local alias to shortenify coming code.
+ DataBuffer &FH = Header.HeaderData;
+ OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
+
+ FHOut.outword(Header.magic);
+ FHOut.outword(TM.getMachOWriterInfo()->getCPUType());
+ FHOut.outword(TM.getMachOWriterInfo()->getCPUSubType());
+ FHOut.outword(Header.filetype);
+ FHOut.outword(Header.ncmds);
+ FHOut.outword(Header.sizeofcmds);
+ FHOut.outword(Header.flags);
+ if (is64Bit)
+ FHOut.outword(Header.reserved);
+
+ // Step #4: Finish filling in the segment load command and write it out
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I)
+ SEG.filesize += (*I)->size;
+
+ SEG.vmsize = SEG.filesize;
+ SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds;
+
+ FHOut.outword(SEG.cmd);
+ FHOut.outword(SEG.cmdsize);
+ FHOut.outstring(SEG.segname, 16);
+ FHOut.outaddr(SEG.vmaddr);
+ FHOut.outaddr(SEG.vmsize);
+ FHOut.outaddr(SEG.fileoff);
+ FHOut.outaddr(SEG.filesize);
+ FHOut.outword(SEG.maxprot);
+ FHOut.outword(SEG.initprot);
+ FHOut.outword(SEG.nsects);
+ FHOut.outword(SEG.flags);
+
+ // Step #5: Finish filling in the fields of the MachOSections
+ uint64_t currentAddr = 0;
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I) {
+ MachOSection *MOS = *I;
+ MOS->addr = currentAddr;
+ MOS->offset = currentAddr + SEG.fileoff;
+
+ // FIXME: do we need to do something with alignment here?
+ currentAddr += MOS->size;
+ }
+
+ // Step #6: Emit the symbol table to temporary buffers, so that we know the
+ // size of the string table when we write the next load command. This also
+ // sorts and assigns indices to each of the symbols, which is necessary for
+ // emitting relocations to externally-defined objects.
+ BufferSymbolAndStringTable();
+
+ // Step #7: Calculate the number of relocations for each section and write out
+ // the section commands for each section
+ currentAddr += SEG.fileoff;
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I) {
+ MachOSection *MOS = *I;
+ // Convert the relocations to target-specific relocations, and fill in the
+ // relocation offset for this section.
+ CalculateRelocations(*MOS);
+ MOS->reloff = MOS->nreloc ? currentAddr : 0;
+ currentAddr += MOS->nreloc * 8;
+
+ // write the finalized section command to the output buffer
+ FHOut.outstring(MOS->sectname, 16);
+ FHOut.outstring(MOS->segname, 16);
+ FHOut.outaddr(MOS->addr);
+ FHOut.outaddr(MOS->size);
+ FHOut.outword(MOS->offset);
+ FHOut.outword(MOS->align);
+ FHOut.outword(MOS->reloff);
+ FHOut.outword(MOS->nreloc);
+ FHOut.outword(MOS->flags);
+ FHOut.outword(MOS->reserved1);
+ FHOut.outword(MOS->reserved2);
+ if (is64Bit)
+ FHOut.outword(MOS->reserved3);
+ }
+
+ // Step #8: Emit LC_SYMTAB/LC_DYSYMTAB load commands
+ SymTab.symoff = currentAddr;
+ SymTab.nsyms = SymbolTable.size();
+ SymTab.stroff = SymTab.symoff + SymT.size();
+ SymTab.strsize = StrT.size();
+ FHOut.outword(SymTab.cmd);
+ FHOut.outword(SymTab.cmdsize);
+ FHOut.outword(SymTab.symoff);
+ FHOut.outword(SymTab.nsyms);
+ FHOut.outword(SymTab.stroff);
+ FHOut.outword(SymTab.strsize);
+
+ // FIXME: set DySymTab fields appropriately
+ // We should probably just update these in BufferSymbolAndStringTable since
+ // thats where we're partitioning up the different kinds of symbols.
+ FHOut.outword(DySymTab.cmd);
+ FHOut.outword(DySymTab.cmdsize);
+ FHOut.outword(DySymTab.ilocalsym);
+ FHOut.outword(DySymTab.nlocalsym);
+ FHOut.outword(DySymTab.iextdefsym);
+ FHOut.outword(DySymTab.nextdefsym);
+ FHOut.outword(DySymTab.iundefsym);
+ FHOut.outword(DySymTab.nundefsym);
+ FHOut.outword(DySymTab.tocoff);
+ FHOut.outword(DySymTab.ntoc);
+ FHOut.outword(DySymTab.modtaboff);
+ FHOut.outword(DySymTab.nmodtab);
+ FHOut.outword(DySymTab.extrefsymoff);
+ FHOut.outword(DySymTab.nextrefsyms);
+ FHOut.outword(DySymTab.indirectsymoff);
+ FHOut.outword(DySymTab.nindirectsyms);
+ FHOut.outword(DySymTab.extreloff);
+ FHOut.outword(DySymTab.nextrel);
+ FHOut.outword(DySymTab.locreloff);
+ FHOut.outword(DySymTab.nlocrel);
+
+ O.write((char*)&FH[0], FH.size());
+}
+
+/// EmitSections - Now that we have constructed the file header and load
+/// commands, emit the data for each section to the file.
+void MachOWriter::EmitSections() {
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I)
+ // Emit the contents of each section
+ O.write((char*)&(*I)->SectionData[0], (*I)->size);
+ for (std::vector<MachOSection*>::iterator I = SectionList.begin(),
+ E = SectionList.end(); I != E; ++I)
+ // Emit the relocation entry data for each section.
+ O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size());
+}
+
+/// PartitionByLocal - Simple boolean predicate that returns true if Sym is
+/// a local symbol rather than an external symbol.
+bool MachOWriter::PartitionByLocal(const MachOSym &Sym) {
+ return (Sym.n_type & (MachOSym::N_EXT | MachOSym::N_PEXT)) == 0;
+}
+
+/// PartitionByDefined - Simple boolean predicate that returns true if Sym is
+/// defined in this module.
+bool MachOWriter::PartitionByDefined(const MachOSym &Sym) {
+ // FIXME: Do N_ABS or N_INDR count as defined?
+ return (Sym.n_type & MachOSym::N_SECT) == MachOSym::N_SECT;
+}
+
+/// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them
+/// each a string table index so that they appear in the correct order in the
+/// output file.
+void MachOWriter::BufferSymbolAndStringTable() {
+ // The order of the symbol table is:
+ // 1. local symbols
+ // 2. defined external symbols (sorted by name)
+ // 3. undefined external symbols (sorted by name)
+
+ // Before sorting the symbols, check the PendingGlobals for any undefined
+ // globals that need to be put in the symbol table.
+ for (std::vector<GlobalValue*>::iterator I = PendingGlobals.begin(),
+ E = PendingGlobals.end(); I != E; ++I) {
+ if (GVOffset[*I] == 0 && GVSection[*I] == 0) {
+ MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TM);
+ SymbolTable.push_back(UndfSym);
+ GVOffset[*I] = -1;
+ }
+ }
+
+ // Sort the symbols by name, so that when we partition the symbols by scope
+ // of definition, we won't have to sort by name within each partition.
+ std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSymCmp());
+
+ // Parition the symbol table entries so that all local symbols come before
+ // all symbols with external linkage. { 1 | 2 3 }
+ std::partition(SymbolTable.begin(), SymbolTable.end(), PartitionByLocal);
+
+ // Advance iterator to beginning of external symbols and partition so that
+ // all external symbols defined in this module come before all external
+ // symbols defined elsewhere. { 1 | 2 | 3 }
+ for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
+ E = SymbolTable.end(); I != E; ++I) {
+ if (!PartitionByLocal(*I)) {
+ std::partition(I, E, PartitionByDefined);
+ break;
+ }
+ }
+
+ // Calculate the starting index for each of the local, extern defined, and
+ // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB
+ // load command.
+ for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
+ E = SymbolTable.end(); I != E; ++I) {
+ if (PartitionByLocal(*I)) {
+ ++DySymTab.nlocalsym;
+ ++DySymTab.iextdefsym;
+ ++DySymTab.iundefsym;
+ } else if (PartitionByDefined(*I)) {
+ ++DySymTab.nextdefsym;
+ ++DySymTab.iundefsym;
+ } else {
+ ++DySymTab.nundefsym;
+ }
+ }
+
+ // Write out a leading zero byte when emitting string table, for n_strx == 0
+ // which means an empty string.
+ OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian);
+ StrTOut.outbyte(0);
+
+ // The order of the string table is:
+ // 1. strings for external symbols
+ // 2. strings for local symbols
+ // Since this is the opposite order from the symbol table, which we have just
+ // sorted, we can walk the symbol table backwards to output the string table.
+ for (std::vector<MachOSym>::reverse_iterator I = SymbolTable.rbegin(),
+ E = SymbolTable.rend(); I != E; ++I) {
+ if (I->GVName == "") {
+ I->n_strx = 0;
+ } else {
+ I->n_strx = StrT.size();
+ StrTOut.outstring(I->GVName, I->GVName.length()+1);
+ }
+ }
+
+ OutputBuffer SymTOut(SymT, is64Bit, isLittleEndian);
+
+ unsigned index = 0;
+ for (std::vector<MachOSym>::iterator I = SymbolTable.begin(),
+ E = SymbolTable.end(); I != E; ++I, ++index) {
+ // Add the section base address to the section offset in the n_value field
+ // to calculate the full address.
+ // FIXME: handle symbols where the n_value field is not the address
+ GlobalValue *GV = const_cast<GlobalValue*>(I->GV);
+ if (GV && GVSection[GV])
+ I->n_value += GVSection[GV]->addr;
+ if (GV && (GVOffset[GV] == -1))
+ GVOffset[GV] = index;
+
+ // Emit nlist to buffer
+ SymTOut.outword(I->n_strx);
+ SymTOut.outbyte(I->n_type);
+ SymTOut.outbyte(I->n_sect);
+ SymTOut.outhalf(I->n_desc);
+ SymTOut.outaddr(I->n_value);
+ }
+}
+
+/// CalculateRelocations - For each MachineRelocation in the current section,
+/// calculate the index of the section containing the object to be relocated,
+/// and the offset into that section. From this information, create the
+/// appropriate target-specific MachORelocation type and add buffer it to be
+/// written out after we are finished writing out sections.
+void MachOWriter::CalculateRelocations(MachOSection &MOS) {
+ for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) {
+ MachineRelocation &MR = MOS.Relocations[i];
+ unsigned TargetSection = MR.getConstantVal();
+ unsigned TargetAddr = 0;
+ unsigned TargetIndex = 0;
+
+ // This is a scattered relocation entry if it points to a global value with
+ // a non-zero offset.
+ bool Scattered = false;
+ bool Extern = false;
+
+ // Since we may not have seen the GlobalValue we were interested in yet at
+ // the time we emitted the relocation for it, fix it up now so that it
+ // points to the offset into the correct section.
+ if (MR.isGlobalValue()) {
+ GlobalValue *GV = MR.getGlobalValue();
+ MachOSection *MOSPtr = GVSection[GV];
+ intptr_t Offset = GVOffset[GV];
+
+ // If we have never seen the global before, it must be to a symbol
+ // defined in another module (N_UNDF).
+ if (!MOSPtr) {
+ // FIXME: need to append stub suffix
+ Extern = true;
+ TargetAddr = 0;
+ TargetIndex = GVOffset[GV];
+ } else {
+ Scattered = TargetSection != 0;
+ TargetSection = MOSPtr->Index;
+ }
+ MR.setResultPointer((void*)Offset);
+ }
+
+ // If the symbol is locally defined, pass in the address of the section and
+ // the section index to the code which will generate the target relocation.
+ if (!Extern) {
+ MachOSection &To = *SectionList[TargetSection - 1];
+ TargetAddr = To.addr;
+ TargetIndex = To.Index;
+ }
+
+ OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian);
+ OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian);
+
+ MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex,
+ RelocOut, SecOut, Scattered, Extern);
+ }
+}
+
+// InitMem - Write the value of a Constant to the specified memory location,
+// converting it into bytes and relocations.
+void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset,
+ const TargetData *TD,
+ std::vector<MachineRelocation> &MRs) {
+ typedef std::pair<const Constant*, intptr_t> CPair;
+ std::vector<CPair> WorkList;
+
+ WorkList.push_back(CPair(C,(intptr_t)Addr + Offset));
+
+ intptr_t ScatteredOffset = 0;
+
+ while (!WorkList.empty()) {
+ const Constant *PC = WorkList.back().first;
+ intptr_t PA = WorkList.back().second;
+ WorkList.pop_back();
+
+ if (isa<UndefValue>(PC)) {
+ continue;
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(PC)) {
+ unsigned ElementSize = TD->getTypeSize(CP->getType()->getElementType());
+ for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+ WorkList.push_back(CPair(CP->getOperand(i), PA+i*ElementSize));
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(PC)) {
+ //
+ // FIXME: Handle ConstantExpression. See EE::getConstantValue()
+ //
+ switch (CE->getOpcode()) {
+ case Instruction::GetElementPtr: {
+ SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end());
+ ScatteredOffset = TD->getIndexedOffset(CE->getOperand(0)->getType(),
+ &Indices[0], Indices.size());
+ WorkList.push_back(CPair(CE->getOperand(0), PA));
+ break;
+ }
+ case Instruction::Add:
+ default:
+ cerr << "ConstantExpr not handled as global var init: " << *CE << "\n";
+ abort();
+ break;
+ }
+ } else if (PC->getType()->isFirstClassType()) {
+ unsigned char *ptr = (unsigned char *)PA;
+ switch (PC->getType()->getTypeID()) {
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(PC->getType())->getBitWidth();
+ uint64_t val = cast<ConstantInt>(PC)->getZExtValue();
+ if (NumBits <= 8)
+ ptr[0] = val;
+ else if (NumBits <= 16) {
+ if (TD->isBigEndian())
+ val = ByteSwap_16(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ } else if (NumBits <= 32) {
+ if (TD->isBigEndian())
+ val = ByteSwap_32(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ ptr[2] = val >> 16;
+ ptr[3] = val >> 24;
+ } else if (NumBits <= 64) {
+ if (TD->isBigEndian())
+ val = ByteSwap_64(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ ptr[2] = val >> 16;
+ ptr[3] = val >> 24;
+ ptr[4] = val >> 32;
+ ptr[5] = val >> 40;
+ ptr[6] = val >> 48;
+ ptr[7] = val >> 56;
+ } else {
+ assert(0 && "Not implemented: bit widths > 64");
+ }
+ break;
+ }
+ case Type::FloatTyID: {
+ uint64_t val = FloatToBits(cast<ConstantFP>(PC)->getValue());
+ if (TD->isBigEndian())
+ val = ByteSwap_32(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ ptr[2] = val >> 16;
+ ptr[3] = val >> 24;
+ break;
+ }
+ case Type::DoubleTyID: {
+ uint64_t val = DoubleToBits(cast<ConstantFP>(PC)->getValue());
+ if (TD->isBigEndian())
+ val = ByteSwap_64(val);
+ ptr[0] = val;
+ ptr[1] = val >> 8;
+ ptr[2] = val >> 16;
+ ptr[3] = val >> 24;
+ ptr[4] = val >> 32;
+ ptr[5] = val >> 40;
+ ptr[6] = val >> 48;
+ ptr[7] = val >> 56;
+ break;
+ }
+ case Type::PointerTyID:
+ if (isa<ConstantPointerNull>(PC))
+ memset(ptr, 0, TD->getPointerSize());
+ else if (const GlobalValue* GV = dyn_cast<GlobalValue>(PC)) {
+ // FIXME: what about function stubs?
+ MRs.push_back(MachineRelocation::getGV(PA-(intptr_t)Addr,
+ MachineRelocation::VANILLA,
+ const_cast<GlobalValue*>(GV),
+ ScatteredOffset));
+ ScatteredOffset = 0;
+ } else
+ assert(0 && "Unknown constant pointer type!");
+ break;
+ default:
+ cerr << "ERROR: Constant unimp for type: " << *PC->getType() << "\n";
+ abort();
+ }
+ } else if (isa<ConstantAggregateZero>(PC)) {
+ memset((void*)PA, 0, (size_t)TD->getTypeSize(PC->getType()));
+ } else if (const ConstantArray *CPA = dyn_cast<ConstantArray>(PC)) {
+ unsigned ElementSize = TD->getTypeSize(CPA->getType()->getElementType());
+ for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
+ WorkList.push_back(CPair(CPA->getOperand(i), PA+i*ElementSize));
+ } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(PC)) {
+ const StructLayout *SL =
+ TD->getStructLayout(cast<StructType>(CPS->getType()));
+ for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
+ WorkList.push_back(CPair(CPS->getOperand(i),
+ PA+SL->getElementOffset(i)));
+ } else {
+ cerr << "Bad Type: " << *PC->getType() << "\n";
+ assert(0 && "Unknown constant type to initialize memory with!");
+ }
+ }
+}
+
+MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
+ TargetMachine &TM) :
+ GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect),
+ n_desc(0), n_value(0) {
+
+ const TargetAsmInfo *TAI = TM.getTargetAsmInfo();
+
+ switch (GV->getLinkage()) {
+ default:
+ assert(0 && "Unexpected linkage type!");
+ break;
+ case GlobalValue::WeakLinkage:
+ case GlobalValue::LinkOnceLinkage:
+ assert(!isa<Function>(gv) && "Unexpected linkage type for Function!");
+ case GlobalValue::ExternalLinkage:
+ GVName = TAI->getGlobalPrefix() + name;
+ n_type |= GV->hasHiddenVisibility() ? N_PEXT : N_EXT;
+ break;
+ case GlobalValue::InternalLinkage:
+ GVName = TAI->getGlobalPrefix() + name;
+ break;
+ }
+}
diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h
new file mode 100644
index 0000000..6d88832
--- /dev/null
+++ b/lib/CodeGen/MachOWriter.h
@@ -0,0 +1,627 @@
+//=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Begeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachOWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MACHOWRITER_H
+#define MACHOWRITER_H
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetMachOWriterInfo.h"
+
+namespace llvm {
+ class GlobalVariable;
+ class Mangler;
+ class MachineCodeEmitter;
+ class MachOCodeEmitter;
+ class OutputBuffer;
+
+ /// MachOSym - This struct contains information about each symbol that is
+ /// added to logical symbol table for the module. This is eventually
+ /// turned into a real symbol table in the file.
+ struct MachOSym {
+ const GlobalValue *GV; // The global value this corresponds to.
+ std::string GVName; // The mangled name of the global value.
+ uint32_t n_strx; // index into the string table
+ uint8_t n_type; // type flag
+ uint8_t n_sect; // section number or NO_SECT
+ int16_t n_desc; // see <mach-o/stab.h>
+ uint64_t n_value; // value for this symbol (or stab offset)
+
+ // Constants for the n_sect field
+ // see <mach-o/nlist.h>
+ enum { NO_SECT = 0 }; // symbol is not in any section
+
+ // Constants for the n_type field
+ // see <mach-o/nlist.h>
+ enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT
+ N_ABS = 0x2, // absolute, n_sect == NO_SECT
+ N_SECT = 0xe, // defined in section number n_sect
+ N_PBUD = 0xc, // prebound undefined (defined in a dylib)
+ N_INDR = 0xa // indirect
+ };
+ // The following bits are OR'd into the types above. For example, a type
+ // of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
+ enum { N_EXT = 0x01, // external symbol bit
+ N_PEXT = 0x10 // private external symbol bit
+ };
+
+ // Constants for the n_desc field
+ // see <mach-o/loader.h>
+ enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0,
+ REFERENCE_FLAG_UNDEFINED_LAZY = 1,
+ REFERENCE_FLAG_DEFINED = 2,
+ REFERENCE_FLAG_PRIVATE_DEFINED = 3,
+ REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4,
+ REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5
+ };
+ enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped
+ N_WEAK_REF = 0x0040, // symbol is weak referenced
+ N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition
+ };
+
+ MachOSym(const GlobalValue *gv, std::string name, uint8_t sect,
+ TargetMachine &TM);
+ };
+
+ /// MachOWriter - This class implements the common target-independent code for
+ /// writing Mach-O files. Targets should derive a class from this to
+ /// parameterize the output format.
+ ///
+ class MachOWriter : public MachineFunctionPass {
+ friend class MachOCodeEmitter;
+ public:
+ static char ID;
+ MachineCodeEmitter &getMachineCodeEmitter() const {
+ return *(MachineCodeEmitter*)MCE;
+ }
+
+ MachOWriter(std::ostream &O, TargetMachine &TM);
+ virtual ~MachOWriter();
+
+ virtual const char *getPassName() const {
+ return "Mach-O Writer";
+ }
+
+ typedef std::vector<unsigned char> DataBuffer;
+ protected:
+ /// Output stream to send the resultant object file to.
+ ///
+ std::ostream &O;
+
+ /// Target machine description.
+ ///
+ TargetMachine &TM;
+
+ /// Mang - The object used to perform name mangling for this module.
+ ///
+ Mangler *Mang;
+
+ /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+ /// code for functions to the .o file.
+ MachOCodeEmitter *MCE;
+
+ /// is64Bit/isLittleEndian - This information is inferred from the target
+ /// machine directly, indicating what header values and flags to set.
+ bool is64Bit, isLittleEndian;
+
+ /// doInitialization - Emit the file header and all of the global variables
+ /// for the module to the Mach-O file.
+ bool doInitialization(Module &M);
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// doFinalization - Now that the module has been completely processed, emit
+ /// the Mach-O file to 'O'.
+ bool doFinalization(Module &M);
+
+ /// MachOHeader - This struct contains the header information about a
+ /// specific architecture type/subtype pair that is emitted to the file.
+ struct MachOHeader {
+ uint32_t magic; // mach magic number identifier
+ uint32_t filetype; // type of file
+ uint32_t ncmds; // number of load commands
+ uint32_t sizeofcmds; // the size of all the load commands
+ uint32_t flags; // flags
+ uint32_t reserved; // 64-bit only
+
+ /// HeaderData - The actual data for the header which we are building
+ /// up for emission to the file.
+ DataBuffer HeaderData;
+
+ // Constants for the filetype field
+ // see <mach-o/loader.h> for additional info on the various types
+ enum { MH_OBJECT = 1, // relocatable object file
+ MH_EXECUTE = 2, // demand paged executable file
+ MH_FVMLIB = 3, // fixed VM shared library file
+ MH_CORE = 4, // core file
+ MH_PRELOAD = 5, // preloaded executable file
+ MH_DYLIB = 6, // dynamically bound shared library
+ MH_DYLINKER = 7, // dynamic link editor
+ MH_BUNDLE = 8, // dynamically bound bundle file
+ MH_DYLIB_STUB = 9, // shared library stub for static linking only
+ MH_DSYM = 10 // companion file wiht only debug sections
+ };
+
+ // Constants for the flags field
+ enum { MH_NOUNDEFS = 1 << 0,
+ // the object file has no undefined references
+ MH_INCRLINK = 1 << 1,
+ // the object file is the output of an incremental link against
+ // a base file and cannot be link edited again
+ MH_DYLDLINK = 1 << 2,
+ // the object file is input for the dynamic linker and cannot be
+ // statically link edited again.
+ MH_BINDATLOAD = 1 << 3,
+ // the object file's undefined references are bound by the
+ // dynamic linker when loaded.
+ MH_PREBOUND = 1 << 4,
+ // the file has its dynamic undefined references prebound
+ MH_SPLIT_SEGS = 1 << 5,
+ // the file has its read-only and read-write segments split
+ // see <mach/shared_memory_server.h>
+ MH_LAZY_INIT = 1 << 6,
+ // the shared library init routine is to be run lazily via
+ // catching memory faults to its writable segments (obsolete)
+ MH_TWOLEVEL = 1 << 7,
+ // the image is using two-level namespace bindings
+ MH_FORCE_FLAT = 1 << 8,
+ // the executable is forcing all images to use flat namespace
+ // bindings.
+ MH_NOMULTIDEFS = 1 << 8,
+ // this umbrella guarantees no multiple definitions of symbols
+ // in its sub-images so the two-level namespace hints can
+ // always be used.
+ MH_NOFIXPREBINDING = 1 << 10,
+ // do not have dyld notify the prebidning agent about this
+ // executable.
+ MH_PREBINDABLE = 1 << 11,
+ // the binary is not prebound but can have its prebinding
+ // redone. only used when MH_PREBOUND is not set.
+ MH_ALLMODSBOUND = 1 << 12,
+ // indicates that this binary binds to all two-level namespace
+ // modules of its dependent libraries. Only used when
+ // MH_PREBINDABLE and MH_TWOLEVEL are both set.
+ MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13,
+ // safe to divide up the sections into sub-sections via symbols
+ // for dead code stripping.
+ MH_CANONICAL = 1 << 14,
+ // the binary has been canonicalized via the unprebind operation
+ MH_WEAK_DEFINES = 1 << 15,
+ // the final linked image contains external weak symbols
+ MH_BINDS_TO_WEAK = 1 << 16,
+ // the final linked image uses weak symbols
+ MH_ALLOW_STACK_EXECUTION = 1 << 17
+ // When this bit is set, all stacks in the task will be given
+ // stack execution privilege. Only used in MH_EXECUTE filetype
+ };
+
+ MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0),
+ reserved(0) { }
+
+ /// cmdSize - This routine returns the size of the MachOSection as written
+ /// to disk, depending on whether the destination is a 64 bit Mach-O file.
+ unsigned cmdSize(bool is64Bit) const {
+ if (is64Bit)
+ return 8 * sizeof(uint32_t);
+ else
+ return 7 * sizeof(uint32_t);
+ }
+
+ /// setMagic - This routine sets the appropriate value for the 'magic'
+ /// field based on pointer size and endianness.
+ void setMagic(bool isLittleEndian, bool is64Bit) {
+ if (isLittleEndian)
+ if (is64Bit) magic = 0xcffaedfe;
+ else magic = 0xcefaedfe;
+ else
+ if (is64Bit) magic = 0xfeedfacf;
+ else magic = 0xfeedface;
+ }
+ };
+
+ /// Header - An instance of MachOHeader that we will update while we build
+ /// the file, and then emit during finalization.
+ MachOHeader Header;
+
+ /// MachOSegment - This struct contains the necessary information to
+ /// emit the load commands for each section in the file.
+ struct MachOSegment {
+ uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64
+ uint32_t cmdsize; // Total size of this struct and section commands
+ std::string segname; // segment name
+ uint64_t vmaddr; // address of this segment
+ uint64_t vmsize; // size of this segment, may be larger than filesize
+ uint64_t fileoff; // offset in file
+ uint64_t filesize; // amount to read from file
+ uint32_t maxprot; // maximum VM protection
+ uint32_t initprot; // initial VM protection
+ uint32_t nsects; // number of sections in this segment
+ uint32_t flags; // flags
+
+ // The following constants are getting pulled in by one of the
+ // system headers, which creates a neat clash with the enum.
+#if !defined(VM_PROT_NONE)
+#define VM_PROT_NONE 0x00
+#endif
+#if !defined(VM_PROT_READ)
+#define VM_PROT_READ 0x01
+#endif
+#if !defined(VM_PROT_WRITE)
+#define VM_PROT_WRITE 0x02
+#endif
+#if !defined(VM_PROT_EXECUTE)
+#define VM_PROT_EXECUTE 0x04
+#endif
+#if !defined(VM_PROT_ALL)
+#define VM_PROT_ALL 0x07
+#endif
+
+ // Constants for the vm protection fields
+ // see <mach-o/vm_prot.h>
+ enum { SEG_VM_PROT_NONE = VM_PROT_NONE,
+ SEG_VM_PROT_READ = VM_PROT_READ, // read permission
+ SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission
+ SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE,
+ SEG_VM_PROT_ALL = VM_PROT_ALL
+ };
+
+ // Constants for the cmd field
+ // see <mach-o/loader.h>
+ enum { LC_SEGMENT = 0x01, // segment of this file to be mapped
+ LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped
+ };
+
+ /// cmdSize - This routine returns the size of the MachOSection as written
+ /// to disk, depending on whether the destination is a 64 bit Mach-O file.
+ unsigned cmdSize(bool is64Bit) const {
+ if (is64Bit)
+ return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16;
+ else
+ return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits
+ }
+
+ MachOSegment(const std::string &seg, bool is64Bit)
+ : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg),
+ vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL),
+ initprot(VM_PROT_ALL), nsects(0), flags(0) { }
+ };
+
+ /// MachOSection - This struct contains information about each section in a
+ /// particular segment that is emitted to the file. This is eventually
+ /// turned into the SectionCommand in the load command for a particlar
+ /// segment.
+ struct MachOSection {
+ std::string sectname; // name of this section,
+ std::string segname; // segment this section goes in
+ uint64_t addr; // memory address of this section
+ uint64_t size; // size in bytes of this section
+ uint32_t offset; // file offset of this section
+ uint32_t align; // section alignment (power of 2)
+ uint32_t reloff; // file offset of relocation entries
+ uint32_t nreloc; // number of relocation entries
+ uint32_t flags; // flags (section type and attributes)
+ uint32_t reserved1; // reserved (for offset or index)
+ uint32_t reserved2; // reserved (for count or sizeof)
+ uint32_t reserved3; // reserved (64 bit only)
+
+ /// A unique number for this section, which will be used to match symbols
+ /// to the correct section.
+ uint32_t Index;
+
+ /// SectionData - The actual data for this section which we are building
+ /// up for emission to the file.
+ DataBuffer SectionData;
+
+ /// RelocBuffer - A buffer to hold the mach-o relocations before we write
+ /// them out at the appropriate location in the file.
+ DataBuffer RelocBuffer;
+
+ /// Relocations - The relocations that we have encountered so far in this
+ /// section that we will need to convert to MachORelocation entries when
+ /// the file is written.
+ std::vector<MachineRelocation> Relocations;
+
+ // Constants for the section types (low 8 bits of flags field)
+ // see <mach-o/loader.h>
+ enum { S_REGULAR = 0,
+ // regular section
+ S_ZEROFILL = 1,
+ // zero fill on demand section
+ S_CSTRING_LITERALS = 2,
+ // section with only literal C strings
+ S_4BYTE_LITERALS = 3,
+ // section with only 4 byte literals
+ S_8BYTE_LITERALS = 4,
+ // section with only 8 byte literals
+ S_LITERAL_POINTERS = 5,
+ // section with only pointers to literals
+ S_NON_LAZY_SYMBOL_POINTERS = 6,
+ // section with only non-lazy symbol pointers
+ S_LAZY_SYMBOL_POINTERS = 7,
+ // section with only lazy symbol pointers
+ S_SYMBOL_STUBS = 8,
+ // section with only symbol stubs
+ // byte size of stub in the reserved2 field
+ S_MOD_INIT_FUNC_POINTERS = 9,
+ // section with only function pointers for initialization
+ S_MOD_TERM_FUNC_POINTERS = 10,
+ // section with only function pointers for termination
+ S_COALESCED = 11,
+ // section contains symbols that are coalesced
+ S_GB_ZEROFILL = 12,
+ // zero fill on demand section (that can be larger than 4GB)
+ S_INTERPOSING = 13,
+ // section with only pairs of function pointers for interposing
+ S_16BYTE_LITERALS = 14
+ // section with only 16 byte literals
+ };
+
+ // Constants for the section flags (high 24 bits of flags field)
+ // see <mach-o/loader.h>
+ enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31,
+ // section contains only true machine instructions
+ S_ATTR_NO_TOC = 1 << 30,
+ // section contains coalesced symbols that are not to be in a
+ // ranlib table of contents
+ S_ATTR_STRIP_STATIC_SYMS = 1 << 29,
+ // ok to strip static symbols in this section in files with the
+ // MY_DYLDLINK flag
+ S_ATTR_NO_DEAD_STRIP = 1 << 28,
+ // no dead stripping
+ S_ATTR_LIVE_SUPPORT = 1 << 27,
+ // blocks are live if they reference live blocks
+ S_ATTR_SELF_MODIFYING_CODE = 1 << 26,
+ // used with i386 code stubs written on by dyld
+ S_ATTR_DEBUG = 1 << 25,
+ // a debug section
+ S_ATTR_SOME_INSTRUCTIONS = 1 << 10,
+ // section contains some machine instructions
+ S_ATTR_EXT_RELOC = 1 << 9,
+ // section has external relocation entries
+ S_ATTR_LOC_RELOC = 1 << 8
+ // section has local relocation entries
+ };
+
+ /// cmdSize - This routine returns the size of the MachOSection as written
+ /// to disk, depending on whether the destination is a 64 bit Mach-O file.
+ unsigned cmdSize(bool is64Bit) const {
+ if (is64Bit)
+ return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32;
+ else
+ return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits
+ }
+
+ MachOSection(const std::string &seg, const std::string &sect)
+ : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2),
+ reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
+ reserved3(0) { }
+ };
+
+ private:
+
+ /// SectionList - This is the list of sections that we have emitted to the
+ /// file. Once the file has been completely built, the segment load command
+ /// SectionCommands are constructed from this info.
+ std::vector<MachOSection*> SectionList;
+
+ /// SectionLookup - This is a mapping from section name to SectionList entry
+ std::map<std::string, MachOSection*> SectionLookup;
+
+ /// GVSection - This is a mapping from a GlobalValue to a MachOSection,
+ /// to aid in emitting relocations.
+ std::map<GlobalValue*, MachOSection*> GVSection;
+
+ /// GVOffset - This is a mapping from a GlobalValue to an offset from the
+ /// start of the section in which the GV resides, to aid in emitting
+ /// relocations.
+ std::map<GlobalValue*, intptr_t> GVOffset;
+
+ /// getSection - Return the section with the specified name, creating a new
+ /// section if one does not already exist.
+ MachOSection *getSection(const std::string &seg, const std::string &sect,
+ unsigned Flags = 0) {
+ MachOSection *MOS = SectionLookup[seg+sect];
+ if (MOS) return MOS;
+
+ MOS = new MachOSection(seg, sect);
+ SectionList.push_back(MOS);
+ MOS->Index = SectionList.size();
+ MOS->flags = MachOSection::S_REGULAR | Flags;
+ SectionLookup[seg+sect] = MOS;
+ return MOS;
+ }
+ MachOSection *getTextSection(bool isCode = true) {
+ if (isCode)
+ return getSection("__TEXT", "__text",
+ MachOSection::S_ATTR_PURE_INSTRUCTIONS |
+ MachOSection::S_ATTR_SOME_INSTRUCTIONS);
+ else
+ return getSection("__TEXT", "__text");
+ }
+ MachOSection *getBSSSection() {
+ return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL);
+ }
+ MachOSection *getDataSection() {
+ return getSection("__DATA", "__data");
+ }
+ MachOSection *getConstSection(Constant *C) {
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ if (CVA && CVA->isCString())
+ return getSection("__TEXT", "__cstring",
+ MachOSection::S_CSTRING_LITERALS);
+
+ const Type *Ty = C->getType();
+ if (Ty->isPrimitiveType() || Ty->isInteger()) {
+ unsigned Size = TM.getTargetData()->getTypeSize(Ty);
+ switch(Size) {
+ default: break; // Fall through to __TEXT,__const
+ case 4:
+ return getSection("__TEXT", "__literal4",
+ MachOSection::S_4BYTE_LITERALS);
+ case 8:
+ return getSection("__TEXT", "__literal8",
+ MachOSection::S_8BYTE_LITERALS);
+ case 16:
+ return getSection("__TEXT", "__literal16",
+ MachOSection::S_16BYTE_LITERALS);
+ }
+ }
+ return getSection("__TEXT", "__const");
+ }
+ MachOSection *getJumpTableSection() {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return getTextSection(false);
+ else
+ return getSection("__TEXT", "__const");
+ }
+
+ /// MachOSymTab - This struct contains information about the offsets and
+ /// size of symbol table information.
+ /// segment.
+ struct MachOSymTab {
+ uint32_t cmd; // LC_SYMTAB
+ uint32_t cmdsize; // sizeof( MachOSymTab )
+ uint32_t symoff; // symbol table offset
+ uint32_t nsyms; // number of symbol table entries
+ uint32_t stroff; // string table offset
+ uint32_t strsize; // string table size in bytes
+
+ // Constants for the cmd field
+ // see <mach-o/loader.h>
+ enum { LC_SYMTAB = 0x02 // link-edit stab symbol table info
+ };
+
+ MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0),
+ nsyms(0), stroff(0), strsize(0) { }
+ };
+
+ /// MachOSymTab - This struct contains information about the offsets and
+ /// size of symbol table information.
+ /// segment.
+ struct MachODySymTab {
+ uint32_t cmd; // LC_DYSYMTAB
+ uint32_t cmdsize; // sizeof( MachODySymTab )
+ uint32_t ilocalsym; // index to local symbols
+ uint32_t nlocalsym; // number of local symbols
+ uint32_t iextdefsym; // index to externally defined symbols
+ uint32_t nextdefsym; // number of externally defined symbols
+ uint32_t iundefsym; // index to undefined symbols
+ uint32_t nundefsym; // number of undefined symbols
+ uint32_t tocoff; // file offset to table of contents
+ uint32_t ntoc; // number of entries in table of contents
+ uint32_t modtaboff; // file offset to module table
+ uint32_t nmodtab; // number of module table entries
+ uint32_t extrefsymoff; // offset to referenced symbol table
+ uint32_t nextrefsyms; // number of referenced symbol table entries
+ uint32_t indirectsymoff; // file offset to the indirect symbol table
+ uint32_t nindirectsyms; // number of indirect symbol table entries
+ uint32_t extreloff; // offset to external relocation entries
+ uint32_t nextrel; // number of external relocation entries
+ uint32_t locreloff; // offset to local relocation entries
+ uint32_t nlocrel; // number of local relocation entries
+
+ // Constants for the cmd field
+ // see <mach-o/loader.h>
+ enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info
+ };
+
+ MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
+ ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
+ iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
+ nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
+ nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { }
+ };
+
+ /// SymTab - The "stab" style symbol table information
+ MachOSymTab SymTab;
+ /// DySymTab - symbol table info for the dynamic link editor
+ MachODySymTab DySymTab;
+
+ struct MachOSymCmp {
+ // FIXME: this does not appear to be sorting 'f' after 'F'
+ bool operator()(const MachOSym &LHS, const MachOSym &RHS) {
+ return LHS.GVName < RHS.GVName;
+ }
+ };
+
+ /// PartitionByLocal - Simple boolean predicate that returns true if Sym is
+ /// a local symbol rather than an external symbol.
+ static bool PartitionByLocal(const MachOSym &Sym);
+
+ /// PartitionByDefined - Simple boolean predicate that returns true if Sym
+ /// is defined in this module.
+ static bool PartitionByDefined(const MachOSym &Sym);
+
+ protected:
+
+ /// SymbolTable - This is the list of symbols we have emitted to the file.
+ /// This actually gets rearranged before emission to the file (to put the
+ /// local symbols first in the list).
+ std::vector<MachOSym> SymbolTable;
+
+ /// SymT - A buffer to hold the symbol table before we write it out at the
+ /// appropriate location in the file.
+ DataBuffer SymT;
+
+ /// StrT - A buffer to hold the string table before we write it out at the
+ /// appropriate location in the file.
+ DataBuffer StrT;
+
+ /// PendingSyms - This is a list of externally defined symbols that we have
+ /// been asked to emit, but have not seen a reference to. When a reference
+ /// is seen, the symbol will move from this list to the SymbolTable.
+ std::vector<GlobalValue*> PendingGlobals;
+
+ /// DynamicSymbolTable - This is just a vector of indices into
+ /// SymbolTable to aid in emitting the DYSYMTAB load command.
+ std::vector<unsigned> DynamicSymbolTable;
+
+ static void InitMem(const Constant *C, void *Addr, intptr_t Offset,
+ const TargetData *TD,
+ std::vector<MachineRelocation> &MRs);
+
+ private:
+ void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV);
+ void EmitGlobal(GlobalVariable *GV);
+ void EmitHeaderAndLoadCommands();
+ void EmitSections();
+ void BufferSymbolAndStringTable();
+ void CalculateRelocations(MachOSection &MOS);
+
+ MachineRelocation GetJTRelocation(unsigned Offset,
+ MachineBasicBlock *MBB) const {
+ return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB);
+ }
+
+ /// GetTargetRelocation - Returns the number of relocations.
+ unsigned GetTargetRelocation(MachineRelocation &MR,
+ unsigned FromIdx,
+ unsigned ToAddr,
+ unsigned ToIndex,
+ OutputBuffer &RelocOut,
+ OutputBuffer &SecOut,
+ bool Scattered,
+ bool Extern) {
+ return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr,
+ ToIndex, RelocOut,
+ SecOut, Scattered,
+ Extern);
+ }
+ };
+}
+
+#endif
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
new file mode 100644
index 0000000..ba428c5
--- /dev/null
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -0,0 +1,287 @@
+//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/LeakDetector.h"
+#include <algorithm>
+using namespace llvm;
+
+MachineBasicBlock::~MachineBasicBlock() {
+ LeakDetector::removeGarbageObject(this);
+}
+
+std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) {
+ MBB.print(OS);
+ return OS;
+}
+
+// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
+// gets the next available unique MBB number. If it is removed from a
+// MachineFunction, it goes back to being #-1.
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock* N) {
+ assert(N->Parent == 0 && "machine instruction already in a basic block");
+ N->Parent = Parent;
+ N->Number = Parent->addToMBBNumbering(N);
+ LeakDetector::removeGarbageObject(N);
+}
+
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock* N) {
+ assert(N->Parent != 0 && "machine instruction not in a basic block");
+ N->Parent->removeFromMBBNumbering(N->Number);
+ N->Number = -1;
+ N->Parent = 0;
+ LeakDetector::addGarbageObject(N);
+}
+
+
+MachineInstr* ilist_traits<MachineInstr>::createSentinel() {
+ MachineInstr* dummy = new MachineInstr();
+ LeakDetector::removeGarbageObject(dummy);
+ return dummy;
+}
+
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr* N) {
+ assert(N->parent == 0 && "machine instruction already in a basic block");
+ N->parent = parent;
+ LeakDetector::removeGarbageObject(N);
+}
+
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr* N) {
+ assert(N->parent != 0 && "machine instruction not in a basic block");
+ N->parent = 0;
+ LeakDetector::addGarbageObject(N);
+}
+
+void ilist_traits<MachineInstr>::transferNodesFromList(
+ iplist<MachineInstr, ilist_traits<MachineInstr> >& fromList,
+ ilist_iterator<MachineInstr> first,
+ ilist_iterator<MachineInstr> last) {
+ if (parent != fromList.parent)
+ for (; first != last; ++first)
+ first->parent = parent;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
+ const TargetInstrInfo& TII = *getParent()->getTarget().getInstrInfo();
+ iterator I = end();
+ while (I != begin() && TII.isTerminatorInstr((--I)->getOpcode()));
+ if (I != end() && !TII.isTerminatorInstr(I->getOpcode())) ++I;
+ return I;
+}
+
+void MachineBasicBlock::dump() const {
+ print(*cerr.stream());
+}
+
+static inline void OutputReg(std::ostream &os, unsigned RegNo,
+ const MRegisterInfo *MRI = 0) {
+ if (!RegNo || MRegisterInfo::isPhysicalRegister(RegNo)) {
+ if (MRI)
+ os << " %" << MRI->get(RegNo).Name;
+ else
+ os << " %mreg(" << RegNo << ")";
+ } else
+ os << " %reg" << RegNo;
+}
+
+void MachineBasicBlock::print(std::ostream &OS) const {
+ const MachineFunction *MF = getParent();
+ if(!MF) {
+ OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+ << " is null\n";
+ return;
+ }
+
+ const BasicBlock *LBB = getBasicBlock();
+ OS << "\n";
+ if (LBB) OS << LBB->getName() << ": ";
+ OS << (const void*)this
+ << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber();
+ if (isLandingPad()) OS << ", EH LANDING PAD";
+ OS << ":\n";
+
+ const MRegisterInfo *MRI = MF->getTarget().getRegisterInfo();
+ if (livein_begin() != livein_end()) {
+ OS << "Live Ins:";
+ for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+ OutputReg(OS, *I, MRI);
+ OS << "\n";
+ }
+ // Print the preds of this block according to the CFG.
+ if (!pred_empty()) {
+ OS << " Predecessors according to CFG:";
+ for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
+ OS << " " << *PI << " (#" << (*PI)->getNumber() << ")";
+ OS << "\n";
+ }
+
+ for (const_iterator I = begin(); I != end(); ++I) {
+ OS << "\t";
+ I->print(OS, &getParent()->getTarget());
+ }
+
+ // Print the successors of this block according to the CFG.
+ if (!succ_empty()) {
+ OS << " Successors according to CFG:";
+ for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+ OS << " " << *SI << " (#" << (*SI)->getNumber() << ")";
+ OS << "\n";
+ }
+}
+
+void MachineBasicBlock::removeLiveIn(unsigned Reg) {
+ livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+ assert(I != livein_end() && "Not a live in!");
+ LiveIns.erase(I);
+}
+
+void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
+ MachineFunction::BasicBlockListType &BBList =getParent()->getBasicBlockList();
+ getParent()->getBasicBlockList().splice(NewAfter, BBList, this);
+}
+
+void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
+ MachineFunction::BasicBlockListType &BBList =getParent()->getBasicBlockList();
+ MachineFunction::iterator BBI = NewBefore;
+ getParent()->getBasicBlockList().splice(++BBI, BBList, this);
+}
+
+
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
+ Successors.push_back(succ);
+ succ->addPredecessor(this);
+}
+
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
+ succ->removePredecessor(this);
+ succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+ assert(I != Successors.end() && "Not a current successor!");
+ Successors.erase(I);
+}
+
+MachineBasicBlock::succ_iterator MachineBasicBlock::removeSuccessor(succ_iterator I) {
+ assert(I != Successors.end() && "Not a current successor!");
+ (*I)->removePredecessor(this);
+ return(Successors.erase(I));
+}
+
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
+ Predecessors.push_back(pred);
+}
+
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
+ std::vector<MachineBasicBlock *>::iterator I =
+ std::find(Predecessors.begin(), Predecessors.end(), pred);
+ assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
+ Predecessors.erase(I);
+}
+
+bool MachineBasicBlock::isSuccessor(MachineBasicBlock *MBB) const {
+ std::vector<MachineBasicBlock *>::const_iterator I =
+ std::find(Successors.begin(), Successors.end(), MBB);
+ return I != Successors.end();
+}
+
+/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
+/// 'Old', change the code and CFG so that it branches to 'New' instead.
+void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Cannot replace self with self!");
+
+ MachineBasicBlock::iterator I = end();
+ while (I != begin()) {
+ --I;
+ if (!(I->getInstrDescriptor()->Flags & M_TERMINATOR_FLAG)) break;
+
+ // Scan the operands of this machine instruction, replacing any uses of Old
+ // with New.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i).isMachineBasicBlock() &&
+ I->getOperand(i).getMachineBasicBlock() == Old)
+ I->getOperand(i).setMachineBasicBlock(New);
+ }
+
+ // Update the successor information. If New was already a successor, just
+ // remove the link to Old instead of creating another one. PR 1444.
+ removeSuccessor(Old);
+ if (!isSuccessor(New))
+ addSuccessor(New);
+}
+
+/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
+/// CFG to be inserted. If we have proven that MBB can only branch to DestA and
+/// DestB, remove any other MBB successors from the CFG. DestA and DestB can
+/// be null.
+/// Besides DestA and DestB, retain other edges leading to LandingPads (currently
+/// there can be only one; we don't check or require that here).
+/// Note it is possible that DestA and/or DestB are LandingPads.
+bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
+ MachineBasicBlock *DestB,
+ bool isCond) {
+ bool MadeChange = false;
+ bool AddedFallThrough = false;
+
+ MachineBasicBlock *FallThru = getNext();
+
+ // If this block ends with a conditional branch that falls through to its
+ // successor, set DestB as the successor.
+ if (isCond) {
+ if (DestB == 0 && FallThru != getParent()->end()) {
+ DestB = FallThru;
+ AddedFallThrough = true;
+ }
+ } else {
+ // If this is an unconditional branch with no explicit dest, it must just be
+ // a fallthrough into DestB.
+ if (DestA == 0 && FallThru != getParent()->end()) {
+ DestA = FallThru;
+ AddedFallThrough = true;
+ }
+ }
+
+ MachineBasicBlock::succ_iterator SI = succ_begin();
+ MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB;
+ while (SI != succ_end()) {
+ if (*SI == DestA && DestA == DestB) {
+ DestA = DestB = 0;
+ ++SI;
+ } else if (*SI == DestA) {
+ DestA = 0;
+ ++SI;
+ } else if (*SI == DestB) {
+ DestB = 0;
+ ++SI;
+ } else if ((*SI)->isLandingPad() &&
+ *SI!=OrigDestA && *SI!=OrigDestB) {
+ ++SI;
+ } else {
+ // Otherwise, this is a superfluous edge, remove it.
+ SI = removeSuccessor(SI);
+ MadeChange = true;
+ }
+ }
+ if (!AddedFallThrough) {
+ assert(DestA == 0 && DestB == 0 &&
+ "MachineCFG is missing edges!");
+ } else if (isCond) {
+ assert(DestA == 0 && "MachineCFG is missing edges!");
+ }
+ return MadeChange;
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
new file mode 100644
index 0000000..c762ae5
--- /dev/null
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -0,0 +1,483 @@
+//===-- MachineFunction.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code information for a function. This allows
+// target-specific information about the generated code to be stored with each
+// function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+#include <sstream>
+using namespace llvm;
+
+static AnnotationID MF_AID(
+ AnnotationManager::getID("CodeGen::MachineCodeForFunction"));
+
+// Out of line virtual function to home classes.
+void MachineFunctionPass::virtfn() {}
+
+namespace {
+ struct VISIBILITY_HIDDEN Printer : public MachineFunctionPass {
+ static char ID;
+
+ std::ostream *OS;
+ const std::string Banner;
+
+ Printer (std::ostream *_OS, const std::string &_Banner)
+ : MachineFunctionPass((intptr_t)&ID), OS (_OS), Banner (_Banner) { }
+
+ const char *getPassName() const { return "MachineFunction Printer"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ (*OS) << Banner;
+ MF.print (*OS);
+ return false;
+ }
+ };
+ char Printer::ID = 0;
+}
+
+/// Returns a newly-created MachineFunction Printer pass. The default output
+/// stream is std::cerr; the default banner is empty.
+///
+FunctionPass *llvm::createMachineFunctionPrinterPass(std::ostream *OS,
+ const std::string &Banner){
+ return new Printer(OS, Banner);
+}
+
+namespace {
+ struct VISIBILITY_HIDDEN Deleter : public MachineFunctionPass {
+ static char ID;
+ Deleter() : MachineFunctionPass((intptr_t)&ID) {}
+
+ const char *getPassName() const { return "Machine Code Deleter"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) {
+ // Delete the annotation from the function now.
+ MachineFunction::destruct(MF.getFunction());
+ return true;
+ }
+ };
+ char Deleter::ID = 0;
+}
+
+/// MachineCodeDeletion Pass - This pass deletes all of the machine code for
+/// the current function, which should happen after the function has been
+/// emitted to a .s file or to memory.
+FunctionPass *llvm::createMachineCodeDeleter() {
+ return new Deleter();
+}
+
+
+
+//===---------------------------------------------------------------------===//
+// MachineFunction implementation
+//===---------------------------------------------------------------------===//
+
+MachineBasicBlock* ilist_traits<MachineBasicBlock>::createSentinel() {
+ MachineBasicBlock* dummy = new MachineBasicBlock();
+ LeakDetector::removeGarbageObject(dummy);
+ return dummy;
+}
+
+void ilist_traits<MachineBasicBlock>::transferNodesFromList(
+ iplist<MachineBasicBlock, ilist_traits<MachineBasicBlock> >& toList,
+ ilist_iterator<MachineBasicBlock> first,
+ ilist_iterator<MachineBasicBlock> last) {
+ if (Parent != toList.Parent)
+ for (; first != last; ++first)
+ first->Parent = toList.Parent;
+}
+
+MachineFunction::MachineFunction(const Function *F,
+ const TargetMachine &TM)
+ : Annotation(MF_AID), Fn(F), Target(TM) {
+ SSARegMapping = new SSARegMap();
+ MFInfo = 0;
+ FrameInfo = new MachineFrameInfo();
+ ConstantPool = new MachineConstantPool(TM.getTargetData());
+ UsedPhysRegs.resize(TM.getRegisterInfo()->getNumRegs());
+
+ // Set up jump table.
+ const TargetData &TD = *TM.getTargetData();
+ bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
+ unsigned EntrySize = IsPic ? 4 : TD.getPointerSize();
+ unsigned Alignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty)
+ : TD.getPointerABIAlignment();
+ JumpTableInfo = new MachineJumpTableInfo(EntrySize, Alignment);
+
+ BasicBlocks.Parent = this;
+}
+
+MachineFunction::~MachineFunction() {
+ BasicBlocks.clear();
+ delete SSARegMapping;
+ delete MFInfo;
+ delete FrameInfo;
+ delete ConstantPool;
+ delete JumpTableInfo;
+}
+
+
+/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
+/// recomputes them. This guarantees that the MBB numbers are sequential,
+/// dense, and match the ordering of the blocks within the function. If a
+/// specific MachineBasicBlock is specified, only that block and those after
+/// it are renumbered.
+void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
+ if (empty()) { MBBNumbering.clear(); return; }
+ MachineFunction::iterator MBBI, E = end();
+ if (MBB == 0)
+ MBBI = begin();
+ else
+ MBBI = MBB;
+
+ // Figure out the block number this should have.
+ unsigned BlockNo = 0;
+ if (MBBI != begin())
+ BlockNo = prior(MBBI)->getNumber()+1;
+
+ for (; MBBI != E; ++MBBI, ++BlockNo) {
+ if (MBBI->getNumber() != (int)BlockNo) {
+ // Remove use of the old number.
+ if (MBBI->getNumber() != -1) {
+ assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
+ "MBB number mismatch!");
+ MBBNumbering[MBBI->getNumber()] = 0;
+ }
+
+ // If BlockNo is already taken, set that block's number to -1.
+ if (MBBNumbering[BlockNo])
+ MBBNumbering[BlockNo]->setNumber(-1);
+
+ MBBNumbering[BlockNo] = MBBI;
+ MBBI->setNumber(BlockNo);
+ }
+ }
+
+ // Okay, all the blocks are renumbered. If we have compactified the block
+ // numbering, shrink MBBNumbering now.
+ assert(BlockNo <= MBBNumbering.size() && "Mismatch!");
+ MBBNumbering.resize(BlockNo);
+}
+
+
+void MachineFunction::dump() const { print(*cerr.stream()); }
+
+void MachineFunction::print(std::ostream &OS) const {
+ OS << "# Machine code for " << Fn->getName () << "():\n";
+
+ // Print Frame Information
+ getFrameInfo()->print(*this, OS);
+
+ // Print JumpTable Information
+ getJumpTableInfo()->print(OS);
+
+ // Print Constant Pool
+ getConstantPool()->print(OS);
+
+ const MRegisterInfo *MRI = getTarget().getRegisterInfo();
+
+ if (livein_begin() != livein_end()) {
+ OS << "Live Ins:";
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) {
+ if (MRI)
+ OS << " " << MRI->getName(I->first);
+ else
+ OS << " Reg #" << I->first;
+
+ if (I->second)
+ OS << " in VR#" << I->second << " ";
+ }
+ OS << "\n";
+ }
+ if (liveout_begin() != liveout_end()) {
+ OS << "Live Outs:";
+ for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I)
+ if (MRI)
+ OS << " " << MRI->getName(*I);
+ else
+ OS << " Reg #" << *I;
+ OS << "\n";
+ }
+
+ for (const_iterator BB = begin(); BB != end(); ++BB)
+ BB->print(OS);
+
+ OS << "\n# End machine code for " << Fn->getName () << "().\n\n";
+}
+
+/// CFGOnly flag - This is used to control whether or not the CFG graph printer
+/// prints out the contents of basic blocks or not. This is acceptable because
+/// this code is only really used for debugging purposes.
+///
+static bool CFGOnly = false;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+ static std::string getGraphName(const MachineFunction *F) {
+ return "CFG for '" + F->getFunction()->getName() + "' function";
+ }
+
+ static std::string getNodeLabel(const MachineBasicBlock *Node,
+ const MachineFunction *Graph) {
+ if (CFGOnly && Node->getBasicBlock() &&
+ !Node->getBasicBlock()->getName().empty())
+ return Node->getBasicBlock()->getName() + ":";
+
+ std::ostringstream Out;
+ if (CFGOnly) {
+ Out << Node->getNumber() << ':';
+ return Out.str();
+ }
+
+ Node->print(Out);
+
+ std::string OutStr = Out.str();
+ if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+ // Process string output to make it nicer...
+ for (unsigned i = 0; i != OutStr.length(); ++i)
+ if (OutStr[i] == '\n') { // Left justify
+ OutStr[i] = '\\';
+ OutStr.insert(OutStr.begin()+i+1, 'l');
+ }
+ return OutStr;
+ }
+ };
+}
+
+void MachineFunction::viewCFG() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getFunction()->getName());
+#else
+ cerr << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+void MachineFunction::viewCFGOnly() const
+{
+ CFGOnly = true;
+ viewCFG();
+ CFGOnly = false;
+}
+
+// The next two methods are used to construct and to retrieve
+// the MachineCodeForFunction object for the given function.
+// construct() -- Allocates and initializes for a given function and target
+// get() -- Returns a handle to the object.
+// This should not be called before "construct()"
+// for a given Function.
+//
+MachineFunction&
+MachineFunction::construct(const Function *Fn, const TargetMachine &Tar)
+{
+ assert(Fn->getAnnotation(MF_AID) == 0 &&
+ "Object already exists for this function!");
+ MachineFunction* mcInfo = new MachineFunction(Fn, Tar);
+ Fn->addAnnotation(mcInfo);
+ return *mcInfo;
+}
+
+void MachineFunction::destruct(const Function *Fn) {
+ bool Deleted = Fn->deleteAnnotation(MF_AID);
+ assert(Deleted && "Machine code did not exist for function!");
+}
+
+MachineFunction& MachineFunction::get(const Function *F)
+{
+ MachineFunction *mc = (MachineFunction*)F->getAnnotation(MF_AID);
+ assert(mc && "Call construct() method first to allocate the object");
+ return *mc;
+}
+
+void MachineFunction::clearSSARegMap() {
+ delete SSARegMapping;
+ SSARegMapping = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineFrameInfo implementation
+//===----------------------------------------------------------------------===//
+
+void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{
+ int ValOffset = MF.getTarget().getFrameInfo()->getOffsetOfLocalArea();
+
+ for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+ const StackObject &SO = Objects[i];
+ OS << " <fi #" << (int)(i-NumFixedObjects) << ">: ";
+ if (SO.Size == 0)
+ OS << "variable sized";
+ else
+ OS << "size is " << SO.Size << " byte" << (SO.Size != 1 ? "s," : ",");
+ OS << " alignment is " << SO.Alignment << " byte"
+ << (SO.Alignment != 1 ? "s," : ",");
+
+ if (i < NumFixedObjects)
+ OS << " fixed";
+ if (i < NumFixedObjects || SO.SPOffset != -1) {
+ int64_t Off = SO.SPOffset - ValOffset;
+ OS << " at location [SP";
+ if (Off > 0)
+ OS << "+" << Off;
+ else if (Off < 0)
+ OS << Off;
+ OS << "]";
+ }
+ OS << "\n";
+ }
+
+ if (HasVarSizedObjects)
+ OS << " Stack frame contains variable sized objects\n";
+}
+
+void MachineFrameInfo::dump(const MachineFunction &MF) const {
+ print(MF, *cerr.stream());
+}
+
+
+//===----------------------------------------------------------------------===//
+// MachineJumpTableInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// getJumpTableIndex - Create a new jump table entry in the jump table info
+/// or return an existing one.
+///
+unsigned MachineJumpTableInfo::getJumpTableIndex(
+ const std::vector<MachineBasicBlock*> &DestBBs) {
+ assert(!DestBBs.empty() && "Cannot create an empty jump table!");
+ for (unsigned i = 0, e = JumpTables.size(); i != e; ++i)
+ if (JumpTables[i].MBBs == DestBBs)
+ return i;
+
+ JumpTables.push_back(MachineJumpTableEntry(DestBBs));
+ return JumpTables.size()-1;
+}
+
+
+void MachineJumpTableInfo::print(std::ostream &OS) const {
+ // FIXME: this is lame, maybe we could print out the MBB numbers or something
+ // like {1, 2, 4, 5, 3, 0}
+ for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
+ OS << " <jt #" << i << "> has " << JumpTables[i].MBBs.size()
+ << " entries\n";
+ }
+}
+
+void MachineJumpTableInfo::dump() const { print(*cerr.stream()); }
+
+
+//===----------------------------------------------------------------------===//
+// MachineConstantPool implementation
+//===----------------------------------------------------------------------===//
+
+const Type *MachineConstantPoolEntry::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+MachineConstantPool::~MachineConstantPool() {
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (Constants[i].isMachineConstantPoolEntry())
+ delete Constants[i].Val.MachineCPVal;
+}
+
+/// getConstantPoolIndex - Create a new entry in the constant pool or return
+/// an existing one. User must specify an alignment in bytes for the object.
+///
+unsigned MachineConstantPool::getConstantPoolIndex(Constant *C,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ unsigned AlignMask = (1 << Alignment)-1;
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (Constants[i].Val.ConstVal == C && (Constants[i].Offset & AlignMask)== 0)
+ return i;
+
+ unsigned Offset = 0;
+ if (!Constants.empty()) {
+ Offset = Constants.back().getOffset();
+ Offset += TD->getTypeSize(Constants.back().getType());
+ Offset = (Offset+AlignMask)&~AlignMask;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(C, Offset));
+ return Constants.size()-1;
+}
+
+unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ unsigned AlignMask = (1 << Alignment)-1;
+ int Idx = V->getExistingMachineCPValue(this, Alignment);
+ if (Idx != -1)
+ return (unsigned)Idx;
+
+ unsigned Offset = 0;
+ if (!Constants.empty()) {
+ Offset = Constants.back().getOffset();
+ Offset += TD->getTypeSize(Constants.back().getType());
+ Offset = (Offset+AlignMask)&~AlignMask;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(V, Offset));
+ return Constants.size()-1;
+}
+
+
+void MachineConstantPool::print(std::ostream &OS) const {
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ OS << " <cp #" << i << "> is";
+ if (Constants[i].isMachineConstantPoolEntry())
+ Constants[i].Val.MachineCPVal->print(OS);
+ else
+ OS << *(Value*)Constants[i].Val.ConstVal;
+ OS << " , offset=" << Constants[i].getOffset();
+ OS << "\n";
+ }
+}
+
+void MachineConstantPool::dump() const { print(*cerr.stream()); }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
new file mode 100644
index 0000000..c5e2ba8
--- /dev/null
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -0,0 +1,426 @@
+//===-- MachineInstr.cpp --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods common to all machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/Streams.h"
+#include <ostream>
+using namespace llvm;
+
+/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
+/// TID NULL and no operands.
+MachineInstr::MachineInstr()
+ : TID(0), NumImplicitOps(0), parent(0) {
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+void MachineInstr::addImplicitDefUseOperands() {
+ if (TID->ImplicitDefs)
+ for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs) {
+ MachineOperand Op;
+ Op.opType = MachineOperand::MO_Register;
+ Op.IsDef = true;
+ Op.IsImp = true;
+ Op.IsKill = false;
+ Op.IsDead = false;
+ Op.contents.RegNo = *ImpDefs;
+ Op.auxInfo.subReg = 0;
+ Operands.push_back(Op);
+ }
+ if (TID->ImplicitUses)
+ for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses) {
+ MachineOperand Op;
+ Op.opType = MachineOperand::MO_Register;
+ Op.IsDef = false;
+ Op.IsImp = true;
+ Op.IsKill = false;
+ Op.IsDead = false;
+ Op.contents.RegNo = *ImpUses;
+ Op.auxInfo.subReg = 0;
+ Operands.push_back(Op);
+ }
+}
+
+/// MachineInstr ctor - This constructor create a MachineInstr and add the
+/// implicit operands. It reserves space for number of operands specified by
+/// TargetInstrDescriptor or the numOperands if it is not zero. (for
+/// instructions with variable number of operands).
+MachineInstr::MachineInstr(const TargetInstrDescriptor &tid)
+ : TID(&tid), NumImplicitOps(0), parent(0) {
+ if (TID->ImplicitDefs)
+ for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (TID->ImplicitUses)
+ for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->numOperands);
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - Work exactly the same as the ctor above, except that the
+/// MachineInstr is created and added to the end of the specified basic block.
+///
+MachineInstr::MachineInstr(MachineBasicBlock *MBB,
+ const TargetInstrDescriptor &tid)
+ : TID(&tid), NumImplicitOps(0), parent(0) {
+ assert(MBB && "Cannot use inserting ctor with null basic block!");
+ if (TID->ImplicitDefs)
+ for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+ NumImplicitOps++;
+ if (TID->ImplicitUses)
+ for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses)
+ NumImplicitOps++;
+ Operands.reserve(NumImplicitOps + TID->numOperands);
+ addImplicitDefUseOperands();
+ // Make sure that we get added to a machine basicblock
+ LeakDetector::addGarbageObject(this);
+ MBB->push_back(this); // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - Copies MachineInstr arg exactly
+///
+MachineInstr::MachineInstr(const MachineInstr &MI) {
+ TID = MI.getInstrDescriptor();
+ NumImplicitOps = MI.NumImplicitOps;
+ Operands.reserve(MI.getNumOperands());
+
+ // Add operands
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i)
+ Operands.push_back(MI.getOperand(i));
+
+ // Set parent, next, and prev to null
+ parent = 0;
+ prev = 0;
+ next = 0;
+}
+
+
+MachineInstr::~MachineInstr() {
+ LeakDetector::removeGarbageObject(this);
+}
+
+/// getOpcode - Returns the opcode of this MachineInstr.
+///
+const int MachineInstr::getOpcode() const {
+ return TID->Opcode;
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing basic
+/// block, and returns it, but does not delete it.
+MachineInstr *MachineInstr::removeFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->remove(this);
+ return this;
+}
+
+
+/// OperandComplete - Return true if it's illegal to add a new operand
+///
+bool MachineInstr::OperandsComplete() const {
+ unsigned short NumOperands = TID->numOperands;
+ if ((TID->Flags & M_VARIABLE_OPS) == 0 &&
+ getNumOperands()-NumImplicitOps >= NumOperands)
+ return true; // Broken: we have all the operands of this instruction!
+ return false;
+}
+
+/// getNumExplicitOperands - Returns the number of non-implicit operands.
+///
+unsigned MachineInstr::getNumExplicitOperands() const {
+ unsigned NumOperands = TID->numOperands;
+ if ((TID->Flags & M_VARIABLE_OPS) == 0)
+ return NumOperands;
+
+ for (unsigned e = getNumOperands(); NumOperands != e; ++NumOperands) {
+ const MachineOperand &MO = getOperand(NumOperands);
+ if (!MO.isRegister() || !MO.isImplicit())
+ NumOperands++;
+ }
+ return NumOperands;
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+ if (getType() != Other.getType()) return false;
+
+ switch (getType()) {
+ default: assert(0 && "Unrecognized operand type");
+ case MachineOperand::MO_Register:
+ return getReg() == Other.getReg() && isDef() == Other.isDef();
+ case MachineOperand::MO_Immediate:
+ return getImm() == Other.getImm();
+ case MachineOperand::MO_MachineBasicBlock:
+ return getMBB() == Other.getMBB();
+ case MachineOperand::MO_FrameIndex:
+ return getFrameIndex() == Other.getFrameIndex();
+ case MachineOperand::MO_ConstantPoolIndex:
+ return getConstantPoolIndex() == Other.getConstantPoolIndex() &&
+ getOffset() == Other.getOffset();
+ case MachineOperand::MO_JumpTableIndex:
+ return getJumpTableIndex() == Other.getJumpTableIndex();
+ case MachineOperand::MO_GlobalAddress:
+ return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_ExternalSymbol:
+ return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+ getOffset() == Other.getOffset();
+ }
+}
+
+/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
+/// the specific register or -1 if it is not found. It further tightening
+/// the search criteria to a use that kills the register if isKill is true.
+int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
+ if (!isKill || MO.isKill())
+ return i;
+ }
+ return -1;
+}
+
+/// findRegisterDefOperand() - Returns the MachineOperand that is a def of
+/// the specific register or NULL if it is not found.
+MachineOperand *MachineInstr::findRegisterDefOperand(unsigned Reg) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
+ return &MO;
+ }
+ return NULL;
+}
+
+/// findFirstPredOperandIdx() - Find the index of the first operand in the
+/// operand list that is used to represent the predicate. It returns -1 if
+/// none is found.
+int MachineInstr::findFirstPredOperandIdx() const {
+ const TargetInstrDescriptor *TID = getInstrDescriptor();
+ if (TID->Flags & M_PREDICABLE) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if ((TID->OpInfo[i].Flags & M_PREDICATE_OPERAND))
+ return i;
+ }
+
+ return -1;
+}
+
+/// copyKillDeadInfo - Copies kill / dead operand properties from MI.
+///
+void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || (!MO.isKill() && !MO.isDead()))
+ continue;
+ for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) {
+ MachineOperand &MOp = getOperand(j);
+ if (!MOp.isIdenticalTo(MO))
+ continue;
+ if (MO.isKill())
+ MOp.setIsKill();
+ else
+ MOp.setIsDead();
+ break;
+ }
+ }
+}
+
+/// copyPredicates - Copies predicate operand(s) from MI.
+void MachineInstr::copyPredicates(const MachineInstr *MI) {
+ const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+ if (TID->Flags & M_PREDICABLE) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if ((TID->OpInfo[i].Flags & M_PREDICATE_OPERAND)) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Predicated operands must be last operands.
+ if (MO.isReg())
+ addRegOperand(MO.getReg(), false);
+ else {
+ addImmOperand(MO.getImm());
+ }
+ }
+ }
+ }
+}
+
+void MachineInstr::dump() const {
+ cerr << " " << *this;
+}
+
+static inline void OutputReg(std::ostream &os, unsigned RegNo,
+ const MRegisterInfo *MRI = 0) {
+ if (!RegNo || MRegisterInfo::isPhysicalRegister(RegNo)) {
+ if (MRI)
+ os << "%" << MRI->get(RegNo).Name;
+ else
+ os << "%mreg(" << RegNo << ")";
+ } else
+ os << "%reg" << RegNo;
+}
+
+static void print(const MachineOperand &MO, std::ostream &OS,
+ const TargetMachine *TM) {
+ const MRegisterInfo *MRI = 0;
+
+ if (TM) MRI = TM->getRegisterInfo();
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ OutputReg(OS, MO.getReg(), MRI);
+ break;
+ case MachineOperand::MO_Immediate:
+ OS << MO.getImmedValue();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OS << "mbb<"
+ << ((Value*)MO.getMachineBasicBlock()->getBasicBlock())->getName()
+ << "," << (void*)MO.getMachineBasicBlock() << ">";
+ break;
+ case MachineOperand::MO_FrameIndex:
+ OS << "<fi#" << MO.getFrameIndex() << ">";
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "<cp#" << MO.getConstantPoolIndex() << ">";
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ OS << "<jt#" << MO.getJumpTableIndex() << ">";
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ OS << "<ga:" << ((Value*)MO.getGlobal())->getName();
+ if (MO.getOffset()) OS << "+" << MO.getOffset();
+ OS << ">";
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ OS << "<es:" << MO.getSymbolName();
+ if (MO.getOffset()) OS << "+" << MO.getOffset();
+ OS << ">";
+ break;
+ default:
+ assert(0 && "Unrecognized operand type");
+ }
+}
+
+void MachineInstr::print(std::ostream &OS, const TargetMachine *TM) const {
+ unsigned StartOp = 0;
+
+ // Specialize printing if op#0 is definition
+ if (getNumOperands() && getOperand(0).isReg() && getOperand(0).isDef()) {
+ ::print(getOperand(0), OS, TM);
+ if (getOperand(0).isDead())
+ OS << "<dead>";
+ OS << " = ";
+ ++StartOp; // Don't print this operand again!
+ }
+
+ if (TID)
+ OS << TID->Name;
+
+ for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand& mop = getOperand(i);
+ if (i != StartOp)
+ OS << ",";
+ OS << " ";
+ ::print(mop, OS, TM);
+
+ if (mop.isReg()) {
+ if (mop.isDef() || mop.isKill() || mop.isDead() || mop.isImplicit()) {
+ OS << "<";
+ bool NeedComma = false;
+ if (mop.isImplicit()) {
+ OS << (mop.isDef() ? "imp-def" : "imp-use");
+ NeedComma = true;
+ } else if (mop.isDef()) {
+ OS << "def";
+ NeedComma = true;
+ }
+ if (mop.isKill() || mop.isDead()) {
+ if (NeedComma)
+ OS << ",";
+ if (mop.isKill())
+ OS << "kill";
+ if (mop.isDead())
+ OS << "dead";
+ }
+ OS << ">";
+ }
+ }
+ }
+
+ OS << "\n";
+}
+
+void MachineInstr::print(std::ostream &os) const {
+ // If the instruction is embedded into a basic block, we can find the target
+ // info for the instruction.
+ if (const MachineBasicBlock *MBB = getParent()) {
+ const MachineFunction *MF = MBB->getParent();
+ if (MF)
+ print(os, &MF->getTarget());
+ else
+ print(os, 0);
+ }
+
+ // Otherwise, print it out in the "raw" format without symbolic register names
+ // and such.
+ os << getInstrDescriptor()->Name;
+
+ for (unsigned i = 0, N = getNumOperands(); i < N; i++) {
+ os << "\t" << getOperand(i);
+ if (getOperand(i).isReg() && getOperand(i).isDef())
+ os << "<d>";
+ }
+
+ os << "\n";
+}
+
+void MachineOperand::print(std::ostream &OS) const {
+ switch (getType()) {
+ case MO_Register:
+ OutputReg(OS, getReg());
+ break;
+ case MO_Immediate:
+ OS << (long)getImmedValue();
+ break;
+ case MO_MachineBasicBlock:
+ OS << "<mbb:"
+ << ((Value*)getMachineBasicBlock()->getBasicBlock())->getName()
+ << "@" << (void*)getMachineBasicBlock() << ">";
+ break;
+ case MO_FrameIndex:
+ OS << "<fi#" << getFrameIndex() << ">";
+ break;
+ case MO_ConstantPoolIndex:
+ OS << "<cp#" << getConstantPoolIndex() << ">";
+ break;
+ case MO_JumpTableIndex:
+ OS << "<jt#" << getJumpTableIndex() << ">";
+ break;
+ case MO_GlobalAddress:
+ OS << "<ga:" << ((Value*)getGlobal())->getName() << ">";
+ break;
+ case MO_ExternalSymbol:
+ OS << "<es:" << getSymbolName() << ">";
+ break;
+ default:
+ assert(0 && "Unrecognized operand type");
+ break;
+ }
+}
+
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
new file mode 100644
index 0000000..77fb643
--- /dev/null
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -0,0 +1,1905 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Streams.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+namespace {
+ RegisterPass<MachineModuleInfo> X("machinemoduleinfo", "Module Information");
+}
+char MachineModuleInfo::ID = 0;
+
+//===----------------------------------------------------------------------===//
+
+/// getGlobalVariablesUsing - Return all of the GlobalVariables which have the
+/// specified value in their initializer somewhere.
+static void
+getGlobalVariablesUsing(Value *V, std::vector<GlobalVariable*> &Result) {
+ // Scan though value users.
+ for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*I)) {
+ // If the user is a GlobalVariable then add to result.
+ Result.push_back(GV);
+ } else if (Constant *C = dyn_cast<Constant>(*I)) {
+ // If the user is a constant variable then scan its users
+ getGlobalVariablesUsing(C, Result);
+ }
+ }
+}
+
+/// getGlobalVariablesUsing - Return all of the GlobalVariables that use the
+/// named GlobalVariable.
+static std::vector<GlobalVariable*>
+getGlobalVariablesUsing(Module &M, const std::string &RootName) {
+ std::vector<GlobalVariable*> Result; // GlobalVariables matching criteria.
+
+ std::vector<const Type*> FieldTypes;
+ FieldTypes.push_back(Type::Int32Ty);
+ FieldTypes.push_back(Type::Int32Ty);
+
+ // Get the GlobalVariable root.
+ GlobalVariable *UseRoot = M.getGlobalVariable(RootName,
+ StructType::get(FieldTypes));
+
+ // If present and linkonce then scan for users.
+ if (UseRoot && UseRoot->hasLinkOnceLinkage()) {
+ getGlobalVariablesUsing(UseRoot, Result);
+ }
+
+ return Result;
+}
+
+/// isStringValue - Return true if the given value can be coerced to a string.
+///
+static bool isStringValue(Value *V) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ if (GV->hasInitializer() && isa<ConstantArray>(GV->getInitializer())) {
+ ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+ return Init->isString();
+ }
+ } else if (Constant *C = dyn_cast<Constant>(V)) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return isStringValue(GV);
+ else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ if (CE->getNumOperands() == 3 &&
+ cast<Constant>(CE->getOperand(1))->isNullValue() &&
+ isa<ConstantInt>(CE->getOperand(2))) {
+ return isStringValue(CE->getOperand(0));
+ }
+ }
+ }
+ }
+ return false;
+}
+
+/// getGlobalVariable - Return either a direct or cast Global value.
+///
+static GlobalVariable *getGlobalVariable(Value *V) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ return GV;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() == Instruction::BitCast) {
+ return dyn_cast<GlobalVariable>(CE->getOperand(0));
+ }
+ }
+ return NULL;
+}
+
+/// isGlobalVariable - Return true if the given value can be coerced to a
+/// GlobalVariable.
+static bool isGlobalVariable(Value *V) {
+ if (isa<GlobalVariable>(V) || isa<ConstantPointerNull>(V)) {
+ return true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (CE->getOpcode() == Instruction::BitCast) {
+ return isa<GlobalVariable>(CE->getOperand(0));
+ }
+ }
+ return false;
+}
+
+/// getUIntOperand - Return ith operand if it is an unsigned integer.
+///
+static ConstantInt *getUIntOperand(GlobalVariable *GV, unsigned i) {
+ // Make sure the GlobalVariable has an initializer.
+ if (!GV->hasInitializer()) return NULL;
+
+ // Get the initializer constant.
+ ConstantStruct *CI = dyn_cast<ConstantStruct>(GV->getInitializer());
+ if (!CI) return NULL;
+
+ // Check if there is at least i + 1 operands.
+ unsigned N = CI->getNumOperands();
+ if (i >= N) return NULL;
+
+ // Check constant.
+ return dyn_cast<ConstantInt>(CI->getOperand(i));
+}
+
+//===----------------------------------------------------------------------===//
+
+/// ApplyToFields - Target the visitor to each field of the debug information
+/// descriptor.
+void DIVisitor::ApplyToFields(DebugInfoDesc *DD) {
+ DD->ApplyToFields(this);
+}
+
+//===----------------------------------------------------------------------===//
+/// DICountVisitor - This DIVisitor counts all the fields in the supplied debug
+/// the supplied DebugInfoDesc.
+class DICountVisitor : public DIVisitor {
+private:
+ unsigned Count; // Running count of fields.
+
+public:
+ DICountVisitor() : DIVisitor(), Count(0) {}
+
+ // Accessors.
+ unsigned getCount() const { return Count; }
+
+ /// Apply - Count each of the fields.
+ ///
+ virtual void Apply(int &Field) { ++Count; }
+ virtual void Apply(unsigned &Field) { ++Count; }
+ virtual void Apply(int64_t &Field) { ++Count; }
+ virtual void Apply(uint64_t &Field) { ++Count; }
+ virtual void Apply(bool &Field) { ++Count; }
+ virtual void Apply(std::string &Field) { ++Count; }
+ virtual void Apply(DebugInfoDesc *&Field) { ++Count; }
+ virtual void Apply(GlobalVariable *&Field) { ++Count; }
+ virtual void Apply(std::vector<DebugInfoDesc *> &Field) {
+ ++Count;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// DIDeserializeVisitor - This DIVisitor deserializes all the fields in the
+/// supplied DebugInfoDesc.
+class DIDeserializeVisitor : public DIVisitor {
+private:
+ DIDeserializer &DR; // Active deserializer.
+ unsigned I; // Current operand index.
+ ConstantStruct *CI; // GlobalVariable constant initializer.
+
+public:
+ DIDeserializeVisitor(DIDeserializer &D, GlobalVariable *GV)
+ : DIVisitor()
+ , DR(D)
+ , I(0)
+ , CI(cast<ConstantStruct>(GV->getInitializer()))
+ {}
+
+ /// Apply - Set the value of each of the fields.
+ ///
+ virtual void Apply(int &Field) {
+ Constant *C = CI->getOperand(I++);
+ Field = cast<ConstantInt>(C)->getSExtValue();
+ }
+ virtual void Apply(unsigned &Field) {
+ Constant *C = CI->getOperand(I++);
+ Field = cast<ConstantInt>(C)->getZExtValue();
+ }
+ virtual void Apply(int64_t &Field) {
+ Constant *C = CI->getOperand(I++);
+ Field = cast<ConstantInt>(C)->getSExtValue();
+ }
+ virtual void Apply(uint64_t &Field) {
+ Constant *C = CI->getOperand(I++);
+ Field = cast<ConstantInt>(C)->getZExtValue();
+ }
+ virtual void Apply(bool &Field) {
+ Constant *C = CI->getOperand(I++);
+ Field = cast<ConstantInt>(C)->getZExtValue();
+ }
+ virtual void Apply(std::string &Field) {
+ Constant *C = CI->getOperand(I++);
+ Field = C->getStringValue();
+ }
+ virtual void Apply(DebugInfoDesc *&Field) {
+ Constant *C = CI->getOperand(I++);
+ Field = DR.Deserialize(C);
+ }
+ virtual void Apply(GlobalVariable *&Field) {
+ Constant *C = CI->getOperand(I++);
+ Field = getGlobalVariable(C);
+ }
+ virtual void Apply(std::vector<DebugInfoDesc *> &Field) {
+ Field.resize(0);
+ Constant *C = CI->getOperand(I++);
+ GlobalVariable *GV = getGlobalVariable(C);
+ if (GV->hasInitializer()) {
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer())) {
+ for (unsigned i = 0, N = CA->getNumOperands(); i < N; ++i) {
+ GlobalVariable *GVE = getGlobalVariable(CA->getOperand(i));
+ DebugInfoDesc *DE = DR.Deserialize(GVE);
+ Field.push_back(DE);
+ }
+ } else if (GV->getInitializer()->isNullValue()) {
+ if (const ArrayType *T =
+ dyn_cast<ArrayType>(GV->getType()->getElementType())) {
+ Field.resize(T->getNumElements());
+ }
+ }
+ }
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// DISerializeVisitor - This DIVisitor serializes all the fields in
+/// the supplied DebugInfoDesc.
+class DISerializeVisitor : public DIVisitor {
+private:
+ DISerializer &SR; // Active serializer.
+ std::vector<Constant*> &Elements; // Element accumulator.
+
+public:
+ DISerializeVisitor(DISerializer &S, std::vector<Constant*> &E)
+ : DIVisitor()
+ , SR(S)
+ , Elements(E)
+ {}
+
+ /// Apply - Set the value of each of the fields.
+ ///
+ virtual void Apply(int &Field) {
+ Elements.push_back(ConstantInt::get(Type::Int32Ty, int32_t(Field)));
+ }
+ virtual void Apply(unsigned &Field) {
+ Elements.push_back(ConstantInt::get(Type::Int32Ty, uint32_t(Field)));
+ }
+ virtual void Apply(int64_t &Field) {
+ Elements.push_back(ConstantInt::get(Type::Int64Ty, int64_t(Field)));
+ }
+ virtual void Apply(uint64_t &Field) {
+ Elements.push_back(ConstantInt::get(Type::Int64Ty, uint64_t(Field)));
+ }
+ virtual void Apply(bool &Field) {
+ Elements.push_back(ConstantInt::get(Type::Int1Ty, Field));
+ }
+ virtual void Apply(std::string &Field) {
+ Elements.push_back(SR.getString(Field));
+ }
+ virtual void Apply(DebugInfoDesc *&Field) {
+ GlobalVariable *GV = NULL;
+
+ // If non-NULL then convert to global.
+ if (Field) GV = SR.Serialize(Field);
+
+ // FIXME - At some point should use specific type.
+ const PointerType *EmptyTy = SR.getEmptyStructPtrType();
+
+ if (GV) {
+ // Set to pointer to global.
+ Elements.push_back(ConstantExpr::getBitCast(GV, EmptyTy));
+ } else {
+ // Use NULL.
+ Elements.push_back(ConstantPointerNull::get(EmptyTy));
+ }
+ }
+ virtual void Apply(GlobalVariable *&Field) {
+ const PointerType *EmptyTy = SR.getEmptyStructPtrType();
+ if (Field) {
+ Elements.push_back(ConstantExpr::getBitCast(Field, EmptyTy));
+ } else {
+ Elements.push_back(ConstantPointerNull::get(EmptyTy));
+ }
+ }
+ virtual void Apply(std::vector<DebugInfoDesc *> &Field) {
+ const PointerType *EmptyTy = SR.getEmptyStructPtrType();
+ unsigned N = Field.size();
+ ArrayType *AT = ArrayType::get(EmptyTy, N);
+ std::vector<Constant *> ArrayElements;
+
+ for (unsigned i = 0, N = Field.size(); i < N; ++i) {
+ if (DebugInfoDesc *Element = Field[i]) {
+ GlobalVariable *GVE = SR.Serialize(Element);
+ Constant *CE = ConstantExpr::getBitCast(GVE, EmptyTy);
+ ArrayElements.push_back(cast<Constant>(CE));
+ } else {
+ ArrayElements.push_back(ConstantPointerNull::get(EmptyTy));
+ }
+ }
+
+ Constant *CA = ConstantArray::get(AT, ArrayElements);
+ GlobalVariable *CAGV = new GlobalVariable(AT, true,
+ GlobalValue::InternalLinkage,
+ CA, "llvm.dbg.array",
+ SR.getModule());
+ CAGV->setSection("llvm.metadata");
+ Constant *CAE = ConstantExpr::getBitCast(CAGV, EmptyTy);
+ Elements.push_back(CAE);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// DIGetTypesVisitor - This DIVisitor gathers all the field types in
+/// the supplied DebugInfoDesc.
+class DIGetTypesVisitor : public DIVisitor {
+private:
+ DISerializer &SR; // Active serializer.
+ std::vector<const Type*> &Fields; // Type accumulator.
+
+public:
+ DIGetTypesVisitor(DISerializer &S, std::vector<const Type*> &F)
+ : DIVisitor()
+ , SR(S)
+ , Fields(F)
+ {}
+
+ /// Apply - Set the value of each of the fields.
+ ///
+ virtual void Apply(int &Field) {
+ Fields.push_back(Type::Int32Ty);
+ }
+ virtual void Apply(unsigned &Field) {
+ Fields.push_back(Type::Int32Ty);
+ }
+ virtual void Apply(int64_t &Field) {
+ Fields.push_back(Type::Int64Ty);
+ }
+ virtual void Apply(uint64_t &Field) {
+ Fields.push_back(Type::Int64Ty);
+ }
+ virtual void Apply(bool &Field) {
+ Fields.push_back(Type::Int1Ty);
+ }
+ virtual void Apply(std::string &Field) {
+ Fields.push_back(SR.getStrPtrType());
+ }
+ virtual void Apply(DebugInfoDesc *&Field) {
+ // FIXME - At some point should use specific type.
+ const PointerType *EmptyTy = SR.getEmptyStructPtrType();
+ Fields.push_back(EmptyTy);
+ }
+ virtual void Apply(GlobalVariable *&Field) {
+ const PointerType *EmptyTy = SR.getEmptyStructPtrType();
+ Fields.push_back(EmptyTy);
+ }
+ virtual void Apply(std::vector<DebugInfoDesc *> &Field) {
+ const PointerType *EmptyTy = SR.getEmptyStructPtrType();
+ Fields.push_back(EmptyTy);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+/// DIVerifyVisitor - This DIVisitor verifies all the field types against
+/// a constant initializer.
+class DIVerifyVisitor : public DIVisitor {
+private:
+ DIVerifier &VR; // Active verifier.
+ bool IsValid; // Validity status.
+ unsigned I; // Current operand index.
+ ConstantStruct *CI; // GlobalVariable constant initializer.
+
+public:
+ DIVerifyVisitor(DIVerifier &V, GlobalVariable *GV)
+ : DIVisitor()
+ , VR(V)
+ , IsValid(true)
+ , I(0)
+ , CI(cast<ConstantStruct>(GV->getInitializer()))
+ {
+ }
+
+ // Accessors.
+ bool isValid() const { return IsValid; }
+
+ /// Apply - Set the value of each of the fields.
+ ///
+ virtual void Apply(int &Field) {
+ Constant *C = CI->getOperand(I++);
+ IsValid = IsValid && isa<ConstantInt>(C);
+ }
+ virtual void Apply(unsigned &Field) {
+ Constant *C = CI->getOperand(I++);
+ IsValid = IsValid && isa<ConstantInt>(C);
+ }
+ virtual void Apply(int64_t &Field) {
+ Constant *C = CI->getOperand(I++);
+ IsValid = IsValid && isa<ConstantInt>(C);
+ }
+ virtual void Apply(uint64_t &Field) {
+ Constant *C = CI->getOperand(I++);
+ IsValid = IsValid && isa<ConstantInt>(C);
+ }
+ virtual void Apply(bool &Field) {
+ Constant *C = CI->getOperand(I++);
+ IsValid = IsValid && isa<ConstantInt>(C) && C->getType() == Type::Int1Ty;
+ }
+ virtual void Apply(std::string &Field) {
+ Constant *C = CI->getOperand(I++);
+ IsValid = IsValid &&
+ (!C || isStringValue(C) || C->isNullValue());
+ }
+ virtual void Apply(DebugInfoDesc *&Field) {
+ // FIXME - Prepare the correct descriptor.
+ Constant *C = CI->getOperand(I++);
+ IsValid = IsValid && isGlobalVariable(C);
+ }
+ virtual void Apply(GlobalVariable *&Field) {
+ Constant *C = CI->getOperand(I++);
+ IsValid = IsValid && isGlobalVariable(C);
+ }
+ virtual void Apply(std::vector<DebugInfoDesc *> &Field) {
+ Constant *C = CI->getOperand(I++);
+ IsValid = IsValid && isGlobalVariable(C);
+ if (!IsValid) return;
+
+ GlobalVariable *GV = getGlobalVariable(C);
+ IsValid = IsValid && GV && GV->hasInitializer();
+ if (!IsValid) return;
+
+ ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ IsValid = IsValid && CA;
+ if (!IsValid) return;
+
+ for (unsigned i = 0, N = CA->getNumOperands(); IsValid && i < N; ++i) {
+ IsValid = IsValid && isGlobalVariable(CA->getOperand(i));
+ if (!IsValid) return;
+
+ GlobalVariable *GVE = getGlobalVariable(CA->getOperand(i));
+ VR.Verify(GVE);
+ }
+ }
+};
+
+
+//===----------------------------------------------------------------------===//
+
+/// TagFromGlobal - Returns the tag number from a debug info descriptor
+/// GlobalVariable. Return DIIValid if operand is not an unsigned int.
+unsigned DebugInfoDesc::TagFromGlobal(GlobalVariable *GV) {
+ ConstantInt *C = getUIntOperand(GV, 0);
+ return C ? ((unsigned)C->getZExtValue() & ~LLVMDebugVersionMask) :
+ (unsigned)DW_TAG_invalid;
+}
+
+/// VersionFromGlobal - Returns the version number from a debug info
+/// descriptor GlobalVariable. Return DIIValid if operand is not an unsigned
+/// int.
+unsigned DebugInfoDesc::VersionFromGlobal(GlobalVariable *GV) {
+ ConstantInt *C = getUIntOperand(GV, 0);
+ return C ? ((unsigned)C->getZExtValue() & LLVMDebugVersionMask) :
+ (unsigned)DW_TAG_invalid;
+}
+
+/// DescFactory - Create an instance of debug info descriptor based on Tag.
+/// Return NULL if not a recognized Tag.
+DebugInfoDesc *DebugInfoDesc::DescFactory(unsigned Tag) {
+ switch (Tag) {
+ case DW_TAG_anchor: return new AnchorDesc();
+ case DW_TAG_compile_unit: return new CompileUnitDesc();
+ case DW_TAG_variable: return new GlobalVariableDesc();
+ case DW_TAG_subprogram: return new SubprogramDesc();
+ case DW_TAG_lexical_block: return new BlockDesc();
+ case DW_TAG_base_type: return new BasicTypeDesc();
+ case DW_TAG_typedef:
+ case DW_TAG_pointer_type:
+ case DW_TAG_reference_type:
+ case DW_TAG_const_type:
+ case DW_TAG_volatile_type:
+ case DW_TAG_restrict_type:
+ case DW_TAG_member:
+ case DW_TAG_inheritance: return new DerivedTypeDesc(Tag);
+ case DW_TAG_array_type:
+ case DW_TAG_structure_type:
+ case DW_TAG_union_type:
+ case DW_TAG_enumeration_type:
+ case DW_TAG_vector_type:
+ case DW_TAG_subroutine_type: return new CompositeTypeDesc(Tag);
+ case DW_TAG_subrange_type: return new SubrangeDesc();
+ case DW_TAG_enumerator: return new EnumeratorDesc();
+ case DW_TAG_return_variable:
+ case DW_TAG_arg_variable:
+ case DW_TAG_auto_variable: return new VariableDesc(Tag);
+ default: break;
+ }
+ return NULL;
+}
+
+/// getLinkage - get linkage appropriate for this type of descriptor.
+///
+GlobalValue::LinkageTypes DebugInfoDesc::getLinkage() const {
+ return GlobalValue::InternalLinkage;
+}
+
+/// ApplyToFields - Target the vistor to the fields of the descriptor.
+///
+void DebugInfoDesc::ApplyToFields(DIVisitor *Visitor) {
+ Visitor->Apply(Tag);
+}
+
+//===----------------------------------------------------------------------===//
+
+AnchorDesc::AnchorDesc()
+: DebugInfoDesc(DW_TAG_anchor)
+, AnchorTag(0)
+{}
+AnchorDesc::AnchorDesc(AnchoredDesc *D)
+: DebugInfoDesc(DW_TAG_anchor)
+, AnchorTag(D->getTag())
+{}
+
+// Implement isa/cast/dyncast.
+bool AnchorDesc::classof(const DebugInfoDesc *D) {
+ return D->getTag() == DW_TAG_anchor;
+}
+
+/// getLinkage - get linkage appropriate for this type of descriptor.
+///
+GlobalValue::LinkageTypes AnchorDesc::getLinkage() const {
+ return GlobalValue::LinkOnceLinkage;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the TransUnitDesc.
+///
+void AnchorDesc::ApplyToFields(DIVisitor *Visitor) {
+ DebugInfoDesc::ApplyToFields(Visitor);
+
+ Visitor->Apply(AnchorTag);
+}
+
+/// getDescString - Return a string used to compose global names and labels. A
+/// A global variable name needs to be defined for each debug descriptor that is
+/// anchored. NOTE: that each global variable named here also needs to be added
+/// to the list of names left external in the internalizer.
+/// ExternalNames.insert("llvm.dbg.compile_units");
+/// ExternalNames.insert("llvm.dbg.global_variables");
+/// ExternalNames.insert("llvm.dbg.subprograms");
+const char *AnchorDesc::getDescString() const {
+ switch (AnchorTag) {
+ case DW_TAG_compile_unit: return CompileUnitDesc::AnchorString;
+ case DW_TAG_variable: return GlobalVariableDesc::AnchorString;
+ case DW_TAG_subprogram: return SubprogramDesc::AnchorString;
+ default: break;
+ }
+
+ assert(0 && "Tag does not have a case for anchor string");
+ return "";
+}
+
+/// getTypeString - Return a string used to label this descriptors type.
+///
+const char *AnchorDesc::getTypeString() const {
+ return "llvm.dbg.anchor.type";
+}
+
+#ifndef NDEBUG
+void AnchorDesc::dump() {
+ cerr << getDescString() << " "
+ << "Version(" << getVersion() << "), "
+ << "Tag(" << getTag() << "), "
+ << "AnchorTag(" << AnchorTag << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+AnchoredDesc::AnchoredDesc(unsigned T)
+: DebugInfoDesc(T)
+, Anchor(NULL)
+{}
+
+/// ApplyToFields - Target the visitor to the fields of the AnchoredDesc.
+///
+void AnchoredDesc::ApplyToFields(DIVisitor *Visitor) {
+ DebugInfoDesc::ApplyToFields(Visitor);
+
+ Visitor->Apply(Anchor);
+}
+
+//===----------------------------------------------------------------------===//
+
+CompileUnitDesc::CompileUnitDesc()
+: AnchoredDesc(DW_TAG_compile_unit)
+, Language(0)
+, FileName("")
+, Directory("")
+, Producer("")
+{}
+
+// Implement isa/cast/dyncast.
+bool CompileUnitDesc::classof(const DebugInfoDesc *D) {
+ return D->getTag() == DW_TAG_compile_unit;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the CompileUnitDesc.
+///
+void CompileUnitDesc::ApplyToFields(DIVisitor *Visitor) {
+ AnchoredDesc::ApplyToFields(Visitor);
+
+ // Handle cases out of sync with compiler.
+ if (getVersion() == 0) {
+ unsigned DebugVersion;
+ Visitor->Apply(DebugVersion);
+ }
+
+ Visitor->Apply(Language);
+ Visitor->Apply(FileName);
+ Visitor->Apply(Directory);
+ Visitor->Apply(Producer);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *CompileUnitDesc::getDescString() const {
+ return "llvm.dbg.compile_unit";
+}
+
+/// getTypeString - Return a string used to label this descriptors type.
+///
+const char *CompileUnitDesc::getTypeString() const {
+ return "llvm.dbg.compile_unit.type";
+}
+
+/// getAnchorString - Return a string used to label this descriptor's anchor.
+///
+const char *CompileUnitDesc::AnchorString = "llvm.dbg.compile_units";
+const char *CompileUnitDesc::getAnchorString() const {
+ return AnchorString;
+}
+
+#ifndef NDEBUG
+void CompileUnitDesc::dump() {
+ cerr << getDescString() << " "
+ << "Version(" << getVersion() << "), "
+ << "Tag(" << getTag() << "), "
+ << "Anchor(" << getAnchor() << "), "
+ << "Language(" << Language << "), "
+ << "FileName(\"" << FileName << "\"), "
+ << "Directory(\"" << Directory << "\"), "
+ << "Producer(\"" << Producer << "\")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+TypeDesc::TypeDesc(unsigned T)
+: DebugInfoDesc(T)
+, Context(NULL)
+, Name("")
+, File(NULL)
+, Line(0)
+, Size(0)
+, Align(0)
+, Offset(0)
+, Flags(0)
+{}
+
+/// ApplyToFields - Target the visitor to the fields of the TypeDesc.
+///
+void TypeDesc::ApplyToFields(DIVisitor *Visitor) {
+ DebugInfoDesc::ApplyToFields(Visitor);
+
+ Visitor->Apply(Context);
+ Visitor->Apply(Name);
+ Visitor->Apply(File);
+ Visitor->Apply(Line);
+ Visitor->Apply(Size);
+ Visitor->Apply(Align);
+ Visitor->Apply(Offset);
+ if (getVersion() > LLVMDebugVersion4) Visitor->Apply(Flags);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *TypeDesc::getDescString() const {
+ return "llvm.dbg.type";
+}
+
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *TypeDesc::getTypeString() const {
+ return "llvm.dbg.type.type";
+}
+
+#ifndef NDEBUG
+void TypeDesc::dump() {
+ cerr << getDescString() << " "
+ << "Version(" << getVersion() << "), "
+ << "Tag(" << getTag() << "), "
+ << "Context(" << Context << "), "
+ << "Name(\"" << Name << "\"), "
+ << "File(" << File << "), "
+ << "Line(" << Line << "), "
+ << "Size(" << Size << "), "
+ << "Align(" << Align << "), "
+ << "Offset(" << Offset << "), "
+ << "Flags(" << Flags << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+BasicTypeDesc::BasicTypeDesc()
+: TypeDesc(DW_TAG_base_type)
+, Encoding(0)
+{}
+
+// Implement isa/cast/dyncast.
+bool BasicTypeDesc::classof(const DebugInfoDesc *D) {
+ return D->getTag() == DW_TAG_base_type;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the BasicTypeDesc.
+///
+void BasicTypeDesc::ApplyToFields(DIVisitor *Visitor) {
+ TypeDesc::ApplyToFields(Visitor);
+
+ Visitor->Apply(Encoding);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *BasicTypeDesc::getDescString() const {
+ return "llvm.dbg.basictype";
+}
+
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *BasicTypeDesc::getTypeString() const {
+ return "llvm.dbg.basictype.type";
+}
+
+#ifndef NDEBUG
+void BasicTypeDesc::dump() {
+ cerr << getDescString() << " "
+ << "Version(" << getVersion() << "), "
+ << "Tag(" << getTag() << "), "
+ << "Context(" << getContext() << "), "
+ << "Name(\"" << getName() << "\"), "
+ << "Size(" << getSize() << "), "
+ << "Encoding(" << Encoding << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+DerivedTypeDesc::DerivedTypeDesc(unsigned T)
+: TypeDesc(T)
+, FromType(NULL)
+{}
+
+// Implement isa/cast/dyncast.
+bool DerivedTypeDesc::classof(const DebugInfoDesc *D) {
+ unsigned T = D->getTag();
+ switch (T) {
+ case DW_TAG_typedef:
+ case DW_TAG_pointer_type:
+ case DW_TAG_reference_type:
+ case DW_TAG_const_type:
+ case DW_TAG_volatile_type:
+ case DW_TAG_restrict_type:
+ case DW_TAG_member:
+ case DW_TAG_inheritance:
+ return true;
+ default: break;
+ }
+ return false;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the DerivedTypeDesc.
+///
+void DerivedTypeDesc::ApplyToFields(DIVisitor *Visitor) {
+ TypeDesc::ApplyToFields(Visitor);
+
+ Visitor->Apply(FromType);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *DerivedTypeDesc::getDescString() const {
+ return "llvm.dbg.derivedtype";
+}
+
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *DerivedTypeDesc::getTypeString() const {
+ return "llvm.dbg.derivedtype.type";
+}
+
+#ifndef NDEBUG
+void DerivedTypeDesc::dump() {
+ cerr << getDescString() << " "
+ << "Version(" << getVersion() << "), "
+ << "Tag(" << getTag() << "), "
+ << "Context(" << getContext() << "), "
+ << "Name(\"" << getName() << "\"), "
+ << "Size(" << getSize() << "), "
+ << "File(" << getFile() << "), "
+ << "Line(" << getLine() << "), "
+ << "FromType(" << FromType << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+CompositeTypeDesc::CompositeTypeDesc(unsigned T)
+: DerivedTypeDesc(T)
+, Elements()
+{}
+
+// Implement isa/cast/dyncast.
+bool CompositeTypeDesc::classof(const DebugInfoDesc *D) {
+ unsigned T = D->getTag();
+ switch (T) {
+ case DW_TAG_array_type:
+ case DW_TAG_structure_type:
+ case DW_TAG_union_type:
+ case DW_TAG_enumeration_type:
+ case DW_TAG_vector_type:
+ case DW_TAG_subroutine_type:
+ return true;
+ default: break;
+ }
+ return false;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the CompositeTypeDesc.
+///
+void CompositeTypeDesc::ApplyToFields(DIVisitor *Visitor) {
+ DerivedTypeDesc::ApplyToFields(Visitor);
+
+ Visitor->Apply(Elements);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *CompositeTypeDesc::getDescString() const {
+ return "llvm.dbg.compositetype";
+}
+
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *CompositeTypeDesc::getTypeString() const {
+ return "llvm.dbg.compositetype.type";
+}
+
+#ifndef NDEBUG
+void CompositeTypeDesc::dump() {
+ cerr << getDescString() << " "
+ << "Version(" << getVersion() << "), "
+ << "Tag(" << getTag() << "), "
+ << "Context(" << getContext() << "), "
+ << "Name(\"" << getName() << "\"), "
+ << "Size(" << getSize() << "), "
+ << "File(" << getFile() << "), "
+ << "Line(" << getLine() << "), "
+ << "FromType(" << getFromType() << "), "
+ << "Elements.size(" << Elements.size() << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+SubrangeDesc::SubrangeDesc()
+: DebugInfoDesc(DW_TAG_subrange_type)
+, Lo(0)
+, Hi(0)
+{}
+
+// Implement isa/cast/dyncast.
+bool SubrangeDesc::classof(const DebugInfoDesc *D) {
+ return D->getTag() == DW_TAG_subrange_type;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the SubrangeDesc.
+///
+void SubrangeDesc::ApplyToFields(DIVisitor *Visitor) {
+ DebugInfoDesc::ApplyToFields(Visitor);
+
+ Visitor->Apply(Lo);
+ Visitor->Apply(Hi);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *SubrangeDesc::getDescString() const {
+ return "llvm.dbg.subrange";
+}
+
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *SubrangeDesc::getTypeString() const {
+ return "llvm.dbg.subrange.type";
+}
+
+#ifndef NDEBUG
+void SubrangeDesc::dump() {
+ cerr << getDescString() << " "
+ << "Version(" << getVersion() << "), "
+ << "Tag(" << getTag() << "), "
+ << "Lo(" << Lo << "), "
+ << "Hi(" << Hi << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+EnumeratorDesc::EnumeratorDesc()
+: DebugInfoDesc(DW_TAG_enumerator)
+, Name("")
+, Value(0)
+{}
+
+// Implement isa/cast/dyncast.
+bool EnumeratorDesc::classof(const DebugInfoDesc *D) {
+ return D->getTag() == DW_TAG_enumerator;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the EnumeratorDesc.
+///
+void EnumeratorDesc::ApplyToFields(DIVisitor *Visitor) {
+ DebugInfoDesc::ApplyToFields(Visitor);
+
+ Visitor->Apply(Name);
+ Visitor->Apply(Value);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *EnumeratorDesc::getDescString() const {
+ return "llvm.dbg.enumerator";
+}
+
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *EnumeratorDesc::getTypeString() const {
+ return "llvm.dbg.enumerator.type";
+}
+
+#ifndef NDEBUG
+void EnumeratorDesc::dump() {
+ cerr << getDescString() << " "
+ << "Version(" << getVersion() << "), "
+ << "Tag(" << getTag() << "), "
+ << "Name(" << Name << "), "
+ << "Value(" << Value << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+VariableDesc::VariableDesc(unsigned T)
+: DebugInfoDesc(T)
+, Context(NULL)
+, Name("")
+, File(NULL)
+, Line(0)
+, TyDesc(0)
+{}
+
+// Implement isa/cast/dyncast.
+bool VariableDesc::classof(const DebugInfoDesc *D) {
+ unsigned T = D->getTag();
+ switch (T) {
+ case DW_TAG_auto_variable:
+ case DW_TAG_arg_variable:
+ case DW_TAG_return_variable:
+ return true;
+ default: break;
+ }
+ return false;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the VariableDesc.
+///
+void VariableDesc::ApplyToFields(DIVisitor *Visitor) {
+ DebugInfoDesc::ApplyToFields(Visitor);
+
+ Visitor->Apply(Context);
+ Visitor->Apply(Name);
+ Visitor->Apply(File);
+ Visitor->Apply(Line);
+ Visitor->Apply(TyDesc);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *VariableDesc::getDescString() const {
+ return "llvm.dbg.variable";
+}
+
+/// getTypeString - Return a string used to label this descriptor's type.
+///
+const char *VariableDesc::getTypeString() const {
+ return "llvm.dbg.variable.type";
+}
+
+#ifndef NDEBUG
+void VariableDesc::dump() {
+ cerr << getDescString() << " "
+ << "Version(" << getVersion() << "), "
+ << "Tag(" << getTag() << "), "
+ << "Context(" << Context << "), "
+ << "Name(\"" << Name << "\"), "
+ << "File(" << File << "), "
+ << "Line(" << Line << "), "
+ << "TyDesc(" << TyDesc << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+GlobalDesc::GlobalDesc(unsigned T)
+: AnchoredDesc(T)
+, Context(0)
+, Name("")
+, FullName("")
+, LinkageName("")
+, File(NULL)
+, Line(0)
+, TyDesc(NULL)
+, IsStatic(false)
+, IsDefinition(false)
+{}
+
+/// ApplyToFields - Target the visitor to the fields of the global.
+///
+void GlobalDesc::ApplyToFields(DIVisitor *Visitor) {
+ AnchoredDesc::ApplyToFields(Visitor);
+
+ Visitor->Apply(Context);
+ Visitor->Apply(Name);
+ Visitor->Apply(FullName);
+ Visitor->Apply(LinkageName);
+ Visitor->Apply(File);
+ Visitor->Apply(Line);
+ Visitor->Apply(TyDesc);
+ Visitor->Apply(IsStatic);
+ Visitor->Apply(IsDefinition);
+}
+
+//===----------------------------------------------------------------------===//
+
+GlobalVariableDesc::GlobalVariableDesc()
+: GlobalDesc(DW_TAG_variable)
+, Global(NULL)
+{}
+
+// Implement isa/cast/dyncast.
+bool GlobalVariableDesc::classof(const DebugInfoDesc *D) {
+ return D->getTag() == DW_TAG_variable;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the GlobalVariableDesc.
+///
+void GlobalVariableDesc::ApplyToFields(DIVisitor *Visitor) {
+ GlobalDesc::ApplyToFields(Visitor);
+
+ Visitor->Apply(Global);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *GlobalVariableDesc::getDescString() const {
+ return "llvm.dbg.global_variable";
+}
+
+/// getTypeString - Return a string used to label this descriptors type.
+///
+const char *GlobalVariableDesc::getTypeString() const {
+ return "llvm.dbg.global_variable.type";
+}
+
+/// getAnchorString - Return a string used to label this descriptor's anchor.
+///
+const char *GlobalVariableDesc::AnchorString = "llvm.dbg.global_variables";
+const char *GlobalVariableDesc::getAnchorString() const {
+ return AnchorString;
+}
+
+#ifndef NDEBUG
+void GlobalVariableDesc::dump() {
+ cerr << getDescString() << " "
+ << "Version(" << getVersion() << "), "
+ << "Tag(" << getTag() << "), "
+ << "Anchor(" << getAnchor() << "), "
+ << "Name(\"" << getName() << "\"), "
+ << "FullName(\"" << getFullName() << "\"), "
+ << "LinkageName(\"" << getLinkageName() << "\"), "
+ << "File(" << getFile() << "),"
+ << "Line(" << getLine() << "),"
+ << "Type(" << getType() << "), "
+ << "IsStatic(" << (isStatic() ? "true" : "false") << "), "
+ << "IsDefinition(" << (isDefinition() ? "true" : "false") << "), "
+ << "Global(" << Global << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+SubprogramDesc::SubprogramDesc()
+: GlobalDesc(DW_TAG_subprogram)
+{}
+
+// Implement isa/cast/dyncast.
+bool SubprogramDesc::classof(const DebugInfoDesc *D) {
+ return D->getTag() == DW_TAG_subprogram;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the
+/// SubprogramDesc.
+void SubprogramDesc::ApplyToFields(DIVisitor *Visitor) {
+ GlobalDesc::ApplyToFields(Visitor);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *SubprogramDesc::getDescString() const {
+ return "llvm.dbg.subprogram";
+}
+
+/// getTypeString - Return a string used to label this descriptors type.
+///
+const char *SubprogramDesc::getTypeString() const {
+ return "llvm.dbg.subprogram.type";
+}
+
+/// getAnchorString - Return a string used to label this descriptor's anchor.
+///
+const char *SubprogramDesc::AnchorString = "llvm.dbg.subprograms";
+const char *SubprogramDesc::getAnchorString() const {
+ return AnchorString;
+}
+
+#ifndef NDEBUG
+void SubprogramDesc::dump() {
+ cerr << getDescString() << " "
+ << "Version(" << getVersion() << "), "
+ << "Tag(" << getTag() << "), "
+ << "Anchor(" << getAnchor() << "), "
+ << "Name(\"" << getName() << "\"), "
+ << "FullName(\"" << getFullName() << "\"), "
+ << "LinkageName(\"" << getLinkageName() << "\"), "
+ << "File(" << getFile() << "),"
+ << "Line(" << getLine() << "),"
+ << "Type(" << getType() << "), "
+ << "IsStatic(" << (isStatic() ? "true" : "false") << "), "
+ << "IsDefinition(" << (isDefinition() ? "true" : "false") << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+BlockDesc::BlockDesc()
+: DebugInfoDesc(DW_TAG_lexical_block)
+, Context(NULL)
+{}
+
+// Implement isa/cast/dyncast.
+bool BlockDesc::classof(const DebugInfoDesc *D) {
+ return D->getTag() == DW_TAG_lexical_block;
+}
+
+/// ApplyToFields - Target the visitor to the fields of the BlockDesc.
+///
+void BlockDesc::ApplyToFields(DIVisitor *Visitor) {
+ DebugInfoDesc::ApplyToFields(Visitor);
+
+ Visitor->Apply(Context);
+}
+
+/// getDescString - Return a string used to compose global names and labels.
+///
+const char *BlockDesc::getDescString() const {
+ return "llvm.dbg.block";
+}
+
+/// getTypeString - Return a string used to label this descriptors type.
+///
+const char *BlockDesc::getTypeString() const {
+ return "llvm.dbg.block.type";
+}
+
+#ifndef NDEBUG
+void BlockDesc::dump() {
+ cerr << getDescString() << " "
+ << "Version(" << getVersion() << "), "
+ << "Tag(" << getTag() << "),"
+ << "Context(" << Context << ")\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+DebugInfoDesc *DIDeserializer::Deserialize(Value *V) {
+ return Deserialize(getGlobalVariable(V));
+}
+DebugInfoDesc *DIDeserializer::Deserialize(GlobalVariable *GV) {
+ // Handle NULL.
+ if (!GV) return NULL;
+
+ // Check to see if it has been already deserialized.
+ DebugInfoDesc *&Slot = GlobalDescs[GV];
+ if (Slot) return Slot;
+
+ // Get the Tag from the global.
+ unsigned Tag = DebugInfoDesc::TagFromGlobal(GV);
+
+ // Create an empty instance of the correct sort.
+ Slot = DebugInfoDesc::DescFactory(Tag);
+
+ // If not a user defined descriptor.
+ if (Slot) {
+ // Deserialize the fields.
+ DIDeserializeVisitor DRAM(*this, GV);
+ DRAM.ApplyToFields(Slot);
+ }
+
+ return Slot;
+}
+
+//===----------------------------------------------------------------------===//
+
+/// getStrPtrType - Return a "sbyte *" type.
+///
+const PointerType *DISerializer::getStrPtrType() {
+ // If not already defined.
+ if (!StrPtrTy) {
+ // Construct the pointer to signed bytes.
+ StrPtrTy = PointerType::get(Type::Int8Ty);
+ }
+
+ return StrPtrTy;
+}
+
+/// getEmptyStructPtrType - Return a "{ }*" type.
+///
+const PointerType *DISerializer::getEmptyStructPtrType() {
+ // If not already defined.
+ if (!EmptyStructPtrTy) {
+ // Construct the empty structure type.
+ const StructType *EmptyStructTy =
+ StructType::get(std::vector<const Type*>());
+ // Construct the pointer to empty structure type.
+ EmptyStructPtrTy = PointerType::get(EmptyStructTy);
+ }
+
+ return EmptyStructPtrTy;
+}
+
+/// getTagType - Return the type describing the specified descriptor (via tag.)
+///
+const StructType *DISerializer::getTagType(DebugInfoDesc *DD) {
+ // Attempt to get the previously defined type.
+ StructType *&Ty = TagTypes[DD->getTag()];
+
+ // If not already defined.
+ if (!Ty) {
+ // Set up fields vector.
+ std::vector<const Type*> Fields;
+ // Get types of fields.
+ DIGetTypesVisitor GTAM(*this, Fields);
+ GTAM.ApplyToFields(DD);
+
+ // Construct structured type.
+ Ty = StructType::get(Fields);
+
+ // Register type name with module.
+ M->addTypeName(DD->getTypeString(), Ty);
+ }
+
+ return Ty;
+}
+
+/// getString - Construct the string as constant string global.
+///
+Constant *DISerializer::getString(const std::string &String) {
+ // Check string cache for previous edition.
+ Constant *&Slot = StringCache[String];
+ // Return Constant if previously defined.
+ if (Slot) return Slot;
+ // If empty string then use a sbyte* null instead.
+ if (String.empty()) {
+ Slot = ConstantPointerNull::get(getStrPtrType());
+ } else {
+ // Construct string as an llvm constant.
+ Constant *ConstStr = ConstantArray::get(String);
+ // Otherwise create and return a new string global.
+ GlobalVariable *StrGV = new GlobalVariable(ConstStr->getType(), true,
+ GlobalVariable::InternalLinkage,
+ ConstStr, ".str", M);
+ StrGV->setSection("llvm.metadata");
+ // Convert to generic string pointer.
+ Slot = ConstantExpr::getBitCast(StrGV, getStrPtrType());
+ }
+ return Slot;
+
+}
+
+/// Serialize - Recursively cast the specified descriptor into a GlobalVariable
+/// so that it can be serialized to a .bc or .ll file.
+GlobalVariable *DISerializer::Serialize(DebugInfoDesc *DD) {
+ // Check if the DebugInfoDesc is already in the map.
+ GlobalVariable *&Slot = DescGlobals[DD];
+
+ // See if DebugInfoDesc exists, if so return prior GlobalVariable.
+ if (Slot) return Slot;
+
+ // Get the type associated with the Tag.
+ const StructType *Ty = getTagType(DD);
+
+ // Create the GlobalVariable early to prevent infinite recursion.
+ GlobalVariable *GV = new GlobalVariable(Ty, true, DD->getLinkage(),
+ NULL, DD->getDescString(), M);
+ GV->setSection("llvm.metadata");
+
+ // Insert new GlobalVariable in DescGlobals map.
+ Slot = GV;
+
+ // Set up elements vector
+ std::vector<Constant*> Elements;
+ // Add fields.
+ DISerializeVisitor SRAM(*this, Elements);
+ SRAM.ApplyToFields(DD);
+
+ // Set the globals initializer.
+ GV->setInitializer(ConstantStruct::get(Ty, Elements));
+
+ return GV;
+}
+
+//===----------------------------------------------------------------------===//
+
+/// Verify - Return true if the GlobalVariable appears to be a valid
+/// serialization of a DebugInfoDesc.
+bool DIVerifier::Verify(Value *V) {
+ return !V || Verify(getGlobalVariable(V));
+}
+bool DIVerifier::Verify(GlobalVariable *GV) {
+ // NULLs are valid.
+ if (!GV) return true;
+
+ // Check prior validity.
+ unsigned &ValiditySlot = Validity[GV];
+
+ // If visited before then use old state.
+ if (ValiditySlot) return ValiditySlot == Valid;
+
+ // Assume validity for the time being (recursion.)
+ ValiditySlot = Valid;
+
+ // Make sure the global is internal or link once (anchor.)
+ if (GV->getLinkage() != GlobalValue::InternalLinkage &&
+ GV->getLinkage() != GlobalValue::LinkOnceLinkage) {
+ ValiditySlot = Invalid;
+ return false;
+ }
+
+ // Get the Tag.
+ unsigned Tag = DebugInfoDesc::TagFromGlobal(GV);
+
+ // Check for user defined descriptors.
+ if (Tag == DW_TAG_invalid) {
+ ValiditySlot = Valid;
+ return true;
+ }
+
+ // Get the Version.
+ unsigned Version = DebugInfoDesc::VersionFromGlobal(GV);
+
+ // Check for version mismatch.
+ if (Version != LLVMDebugVersion) {
+ ValiditySlot = Invalid;
+ return false;
+ }
+
+ // Construct an empty DebugInfoDesc.
+ DebugInfoDesc *DD = DebugInfoDesc::DescFactory(Tag);
+
+ // Allow for user defined descriptors.
+ if (!DD) return true;
+
+ // Get the initializer constant.
+ ConstantStruct *CI = cast<ConstantStruct>(GV->getInitializer());
+
+ // Get the operand count.
+ unsigned N = CI->getNumOperands();
+
+ // Get the field count.
+ unsigned &CountSlot = Counts[Tag];
+ if (!CountSlot) {
+ // Check the operand count to the field count
+ DICountVisitor CTAM;
+ CTAM.ApplyToFields(DD);
+ CountSlot = CTAM.getCount();
+ }
+
+ // Field count must be at most equal operand count.
+ if (CountSlot > N) {
+ delete DD;
+ ValiditySlot = Invalid;
+ return false;
+ }
+
+ // Check each field for valid type.
+ DIVerifyVisitor VRAM(*this, GV);
+ VRAM.ApplyToFields(DD);
+
+ // Release empty DebugInfoDesc.
+ delete DD;
+
+ // If fields are not valid.
+ if (!VRAM.isValid()) {
+ ValiditySlot = Invalid;
+ return false;
+ }
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+
+DebugScope::~DebugScope() {
+ for (unsigned i = 0, N = Scopes.size(); i < N; ++i) delete Scopes[i];
+ for (unsigned j = 0, M = Variables.size(); j < M; ++j) delete Variables[j];
+}
+
+//===----------------------------------------------------------------------===//
+
+MachineModuleInfo::MachineModuleInfo()
+: ImmutablePass((intptr_t)&ID)
+, DR()
+, VR()
+, CompileUnits()
+, Directories()
+, SourceFiles()
+, Lines()
+, LabelIDList()
+, ScopeMap()
+, RootScope(NULL)
+, FrameMoves()
+, LandingPads()
+, Personalities()
+, CallsEHReturn(0)
+, CallsUnwindInit(0)
+{
+ // Always emit "no personality" info
+ Personalities.push_back(NULL);
+}
+MachineModuleInfo::~MachineModuleInfo() {
+
+}
+
+/// doInitialization - Initialize the state for a new module.
+///
+bool MachineModuleInfo::doInitialization() {
+ return false;
+}
+
+/// doFinalization - Tear down the state after completion of a module.
+///
+bool MachineModuleInfo::doFinalization() {
+ return false;
+}
+
+/// BeginFunction - Begin gathering function meta information.
+///
+void MachineModuleInfo::BeginFunction(MachineFunction *MF) {
+ // Coming soon.
+}
+
+/// EndFunction - Discard function meta information.
+///
+void MachineModuleInfo::EndFunction() {
+ // Clean up scope information.
+ if (RootScope) {
+ delete RootScope;
+ ScopeMap.clear();
+ RootScope = NULL;
+ }
+
+ // Clean up line info.
+ Lines.clear();
+
+ // Clean up frame info.
+ FrameMoves.clear();
+
+ // Clean up exception info.
+ LandingPads.clear();
+ TypeInfos.clear();
+ FilterIds.clear();
+ FilterEnds.clear();
+ CallsEHReturn = 0;
+ CallsUnwindInit = 0;
+}
+
+/// getDescFor - Convert a Value to a debug information descriptor.
+///
+// FIXME - use new Value type when available.
+DebugInfoDesc *MachineModuleInfo::getDescFor(Value *V) {
+ return DR.Deserialize(V);
+}
+
+/// Verify - Verify that a Value is debug information descriptor.
+///
+bool MachineModuleInfo::Verify(Value *V) {
+ return VR.Verify(V);
+}
+
+/// AnalyzeModule - Scan the module for global debug information.
+///
+void MachineModuleInfo::AnalyzeModule(Module &M) {
+ SetupCompileUnits(M);
+}
+
+/// needsFrameInfo - Returns true if we need to gather callee-saved register
+/// move info for the frame.
+bool MachineModuleInfo::needsFrameInfo() const {
+ return hasDebugInfo() || ExceptionHandling;
+}
+
+/// SetupCompileUnits - Set up the unique vector of compile units.
+///
+void MachineModuleInfo::SetupCompileUnits(Module &M) {
+ std::vector<CompileUnitDesc *>CU = getAnchoredDescriptors<CompileUnitDesc>(M);
+
+ for (unsigned i = 0, N = CU.size(); i < N; i++) {
+ CompileUnits.insert(CU[i]);
+ }
+}
+
+/// getCompileUnits - Return a vector of debug compile units.
+///
+const UniqueVector<CompileUnitDesc *> MachineModuleInfo::getCompileUnits()const{
+ return CompileUnits;
+}
+
+/// getGlobalVariablesUsing - Return all of the GlobalVariables that use the
+/// named GlobalVariable.
+std::vector<GlobalVariable*>
+MachineModuleInfo::getGlobalVariablesUsing(Module &M,
+ const std::string &RootName) {
+ return ::getGlobalVariablesUsing(M, RootName);
+}
+
+/// RecordLabel - Records location information and associates it with a
+/// debug label. Returns a unique label ID used to generate a label and
+/// provide correspondence to the source line list.
+unsigned MachineModuleInfo::RecordLabel(unsigned Line, unsigned Column,
+ unsigned Source) {
+ unsigned ID = NextLabelID();
+ Lines.push_back(SourceLineInfo(Line, Column, Source, ID));
+ return ID;
+}
+
+/// RecordSource - Register a source file with debug info. Returns an source
+/// ID.
+unsigned MachineModuleInfo::RecordSource(const std::string &Directory,
+ const std::string &Source) {
+ unsigned DirectoryID = Directories.insert(Directory);
+ return SourceFiles.insert(SourceFileInfo(DirectoryID, Source));
+}
+unsigned MachineModuleInfo::RecordSource(const CompileUnitDesc *CompileUnit) {
+ return RecordSource(CompileUnit->getDirectory(),
+ CompileUnit->getFileName());
+}
+
+/// RecordRegionStart - Indicate the start of a region.
+///
+unsigned MachineModuleInfo::RecordRegionStart(Value *V) {
+ // FIXME - need to be able to handle split scopes because of bb cloning.
+ DebugInfoDesc *ScopeDesc = DR.Deserialize(V);
+ DebugScope *Scope = getOrCreateScope(ScopeDesc);
+ unsigned ID = NextLabelID();
+ if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID);
+ return ID;
+}
+
+/// RecordRegionEnd - Indicate the end of a region.
+///
+unsigned MachineModuleInfo::RecordRegionEnd(Value *V) {
+ // FIXME - need to be able to handle split scopes because of bb cloning.
+ DebugInfoDesc *ScopeDesc = DR.Deserialize(V);
+ DebugScope *Scope = getOrCreateScope(ScopeDesc);
+ unsigned ID = NextLabelID();
+ Scope->setEndLabelID(ID);
+ return ID;
+}
+
+/// RecordVariable - Indicate the declaration of a local variable.
+///
+void MachineModuleInfo::RecordVariable(Value *V, unsigned FrameIndex) {
+ VariableDesc *VD = cast<VariableDesc>(DR.Deserialize(V));
+ DebugScope *Scope = getOrCreateScope(VD->getContext());
+ DebugVariable *DV = new DebugVariable(VD, FrameIndex);
+ Scope->AddVariable(DV);
+}
+
+/// getOrCreateScope - Returns the scope associated with the given descriptor.
+///
+DebugScope *MachineModuleInfo::getOrCreateScope(DebugInfoDesc *ScopeDesc) {
+ DebugScope *&Slot = ScopeMap[ScopeDesc];
+ if (!Slot) {
+ // FIXME - breaks down when the context is an inlined function.
+ DebugInfoDesc *ParentDesc = NULL;
+ if (BlockDesc *Block = dyn_cast<BlockDesc>(ScopeDesc)) {
+ ParentDesc = Block->getContext();
+ }
+ DebugScope *Parent = ParentDesc ? getOrCreateScope(ParentDesc) : NULL;
+ Slot = new DebugScope(Parent, ScopeDesc);
+ if (Parent) {
+ Parent->AddScope(Slot);
+ } else if (RootScope) {
+ // FIXME - Add inlined function scopes to the root so we can delete
+ // them later. Long term, handle inlined functions properly.
+ RootScope->AddScope(Slot);
+ } else {
+ // First function is top level function.
+ RootScope = Slot;
+ }
+ }
+ return Slot;
+}
+
+//===-EH-------------------------------------------------------------------===//
+
+/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+/// specified MachineBasicBlock.
+LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
+ (MachineBasicBlock *LandingPad) {
+ unsigned N = LandingPads.size();
+ for (unsigned i = 0; i < N; ++i) {
+ LandingPadInfo &LP = LandingPads[i];
+ if (LP.LandingPadBlock == LandingPad)
+ return LP;
+ }
+
+ LandingPads.push_back(LandingPadInfo(LandingPad));
+ return LandingPads[N];
+}
+
+/// addInvoke - Provide the begin and end labels of an invoke style call and
+/// associate it with a try landing pad block.
+void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
+ unsigned BeginLabel, unsigned EndLabel) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.BeginLabels.push_back(BeginLabel);
+ LP.EndLabels.push_back(EndLabel);
+}
+
+/// addLandingPad - Provide the label of a try LandingPad block.
+///
+unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+ unsigned LandingPadLabel = NextLabelID();
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.LandingPadLabel = LandingPadLabel;
+ return LandingPadLabel;
+}
+
+/// addPersonality - Provide the personality function for the exception
+/// information.
+void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
+ Function *Personality) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.Personality = Personality;
+
+ for (unsigned i = 0; i < Personalities.size(); ++i)
+ if (Personalities[i] == Personality)
+ return;
+
+ Personalities.push_back(Personality);
+}
+
+/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+///
+void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
+ std::vector<GlobalVariable *> &TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ for (unsigned N = TyInfo.size(); N; --N)
+ LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+///
+void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad,
+ std::vector<GlobalVariable *> &TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ std::vector<unsigned> IdsInFilter (TyInfo.size());
+ for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+ IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+ LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+/// pads.
+void MachineModuleInfo::TidyLandingPads() {
+ for (unsigned i = 0; i != LandingPads.size(); ) {
+ LandingPadInfo &LandingPad = LandingPads[i];
+ LandingPad.LandingPadLabel = MappedLabel(LandingPad.LandingPadLabel);
+
+ if (!LandingPad.LandingPadBlock)
+ // Must not have cleanups if no landing pad.
+ LandingPad.TypeIds.clear();
+
+ // Special case: we *should* emit LPs with null LP MBB. This indicates
+ // "rethrow" case.
+ if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ for (unsigned j=0; j != LandingPads[i].BeginLabels.size(); ) {
+ unsigned BeginLabel = MappedLabel(LandingPad.BeginLabels[j]);
+ unsigned EndLabel = MappedLabel(LandingPad.EndLabels[j]);
+
+
+ if (!BeginLabel || !EndLabel) {
+ LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+ LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+ continue;
+ }
+
+ LandingPad.BeginLabels[j] = BeginLabel;
+ LandingPad.EndLabels[j] = EndLabel;
+ ++j;
+ }
+
+ ++i;
+ }
+}
+
+/// getTypeIDFor - Return the type id for the specified typeinfo. This is
+/// function wide.
+unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) {
+ for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+ if (TypeInfos[i] == TI) return i + 1;
+
+ TypeInfos.push_back(TI);
+ return TypeInfos.size();
+}
+
+/// getFilterIDFor - Return the filter id for the specified typeinfos. This is
+/// function wide.
+int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
+ // If the new filter coincides with the tail of an existing filter, then
+ // re-use the existing filter. Folding filters more than this requires
+ // re-ordering filters and/or their elements - probably not worth it.
+ for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+ E = FilterEnds.end(); I != E; ++I) {
+ unsigned i = *I, j = TyIds.size();
+
+ while (i && j)
+ if (FilterIds[--i] != TyIds[--j])
+ goto try_next;
+
+ if (!j)
+ // The new filter coincides with range [i, end) of the existing filter.
+ return -(1 + i);
+
+try_next:;
+ }
+
+ // Add the new filter.
+ int FilterID = -(1 + FilterIds.size());
+ FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+ for (unsigned I = 0, N = TyIds.size(); I != N; ++I)
+ FilterIds.push_back(TyIds[I]);
+ FilterEnds.push_back(FilterIds.size());
+ FilterIds.push_back(0); // terminator
+ return FilterID;
+}
+
+/// getPersonality - Return the personality function for the current function.
+Function *MachineModuleInfo::getPersonality() const {
+ // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
+ // function
+ return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
+}
+
+/// getPersonalityIndex - Return unique index for current personality
+/// function. NULL personality function should always get zero index.
+unsigned MachineModuleInfo::getPersonalityIndex() const {
+ const Function* Personality = NULL;
+
+ // Scan landing pads. If there is at least one non-NULL personality - use it.
+ for (unsigned i = 0; i != LandingPads.size(); ++i)
+ if (LandingPads[i].Personality) {
+ Personality = LandingPads[i].Personality;
+ break;
+ }
+
+ for (unsigned i = 0; i < Personalities.size(); ++i) {
+ if (Personalities[i] == Personality)
+ return i;
+ }
+
+ // This should never happen
+ assert(0 && "Personality function should be set!");
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+/// DebugLabelFolding pass - This pass prunes out redundant labels. This allows
+/// a info consumer to determine if the range of two labels is empty, by seeing
+/// if the labels map to the same reduced label.
+
+namespace llvm {
+
+struct DebugLabelFolder : public MachineFunctionPass {
+ static char ID;
+ DebugLabelFolder() : MachineFunctionPass((intptr_t)&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "Label Folder"; }
+};
+
+char DebugLabelFolder::ID = 0;
+
+bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) {
+ // Get machine module info.
+ MachineModuleInfo *MMI = getAnalysisToUpdate<MachineModuleInfo>();
+ if (!MMI) return false;
+ // Get target instruction info.
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ if (!TII) return false;
+
+ // Track if change is made.
+ bool MadeChange = false;
+ // No prior label to begin.
+ unsigned PriorLabel = 0;
+
+ // Iterate through basic blocks.
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+ BB != E; ++BB) {
+ // Iterate through instructions.
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ // Is it a label.
+ if ((unsigned)I->getOpcode() == TargetInstrInfo::LABEL) {
+ // The label ID # is always operand #0, an immediate.
+ unsigned NextLabel = I->getOperand(0).getImm();
+
+ // If there was an immediate prior label.
+ if (PriorLabel) {
+ // Remap the current label to prior label.
+ MMI->RemapLabel(NextLabel, PriorLabel);
+ // Delete the current label.
+ I = BB->erase(I);
+ // Indicate a change has been made.
+ MadeChange = true;
+ continue;
+ } else {
+ // Start a new round.
+ PriorLabel = NextLabel;
+ }
+ } else {
+ // No consecutive labels.
+ PriorLabel = 0;
+ }
+
+ ++I;
+ }
+ }
+
+ return MadeChange;
+}
+
+FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); }
+
+}
+
diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp
new file mode 100644
index 0000000..a7ba5bb
--- /dev/null
+++ b/lib/CodeGen/MachinePassRegistry.cpp
@@ -0,0 +1,41 @@
+//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the machine function pass registry for register allocators
+// and instruction schedulers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+using namespace llvm;
+
+
+/// Add - Adds a function pass to the registration list.
+///
+void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
+ Node->setNext(List);
+ List = Node;
+ if (Listener) Listener->NotifyAdd(Node->getName(),
+ Node->getCtor(),
+ Node->getDescription());
+}
+
+
+/// Remove - Removes a function pass from the registration list.
+///
+void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
+ for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
+ if (*I == Node) {
+ if (Listener) Listener->NotifyRemove(Node->getName());
+ *I = (*I)->getNext();
+ break;
+ }
+ }
+}
diff --git a/lib/CodeGen/Makefile b/lib/CodeGen/Makefile
new file mode 100644
index 0000000..7e5d8ee
--- /dev/null
+++ b/lib/CodeGen/Makefile
@@ -0,0 +1,22 @@
+##===- lib/CodeGen/Makefile --------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMCodeGen
+PARALLEL_DIRS = SelectionDAG
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
+# Xcode prior to 2.4 generates an error in -pedantic mode with use of HUGE_VAL
+# in this directory. Disable -pedantic for this broken compiler.
+ifneq ($(HUGE_VAL_SANITY),yes)
+CompileCommonOpts := $(filter-out -pedantic, $(CompileCommonOpts))
+endif
+
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
new file mode 100644
index 0000000..fec9e2e
--- /dev/null
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -0,0 +1,342 @@
+//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions. This destroys SSA information, but is the desired input for
+// some register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phielim"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include <set>
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumAtomic, "Number of atomic phis lowered");
+//STATISTIC(NumSimple, "Number of simple phis lowered");
+
+namespace {
+ struct VISIBILITY_HIDDEN PNE : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ PNE() : MachineFunctionPass((intptr_t)&ID) {}
+
+ bool runOnMachineFunction(MachineFunction &Fn) {
+ analyzePHINodes(Fn);
+
+ bool Changed = false;
+
+ // Eliminate PHI instructions by inserting copies into predecessor blocks.
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= EliminatePHINodes(Fn, *I);
+
+ VRegPHIUseCount.clear();
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<LiveVariables>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ ///
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+ void LowerAtomicPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ ///
+ void analyzePHINodes(const MachineFunction& Fn);
+
+ typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair;
+ typedef std::map<BBVRegPair, unsigned> VRegPHIUse;
+
+ VRegPHIUse VRegPHIUseCount;
+ };
+
+ char PNE::ID = 0;
+ RegisterPass<PNE> X("phi-node-elimination",
+ "Eliminate PHI nodes for register allocation");
+}
+
+const PassInfo *llvm::PHIEliminationID = X.getPassInfo();
+
+/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
+/// predecessor basic blocks.
+///
+bool PNE::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) {
+ if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI)
+ return false; // Quick exit for basic blocks without PHIs.
+
+ // Get an iterator to the first instruction after the last PHI node (this may
+ // also be the end of the basic block).
+ MachineBasicBlock::iterator AfterPHIsIt = MBB.begin();
+ while (AfterPHIsIt != MBB.end() &&
+ AfterPHIsIt->getOpcode() == TargetInstrInfo::PHI)
+ ++AfterPHIsIt; // Skip over all of the PHI nodes...
+
+ while (MBB.front().getOpcode() == TargetInstrInfo::PHI)
+ LowerAtomicPHINode(MBB, AfterPHIsIt);
+
+ return true;
+}
+
+/// InstructionUsesRegister - Return true if the specified machine instr has a
+/// use of the specified register.
+static bool InstructionUsesRegister(MachineInstr *MI, unsigned SrcReg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).isRegister() &&
+ MI->getOperand(i).getReg() == SrcReg &&
+ MI->getOperand(i).isUse())
+ return true;
+ return false;
+}
+
+/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
+/// under the assuption that it needs to be lowered in a way that supports
+/// atomic execution of PHIs. This lowering method is always correct all of the
+/// time.
+void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt) {
+ // Unlink the PHI node from the basic block, but don't delete the PHI yet.
+ MachineInstr *MPhi = MBB.remove(MBB.begin());
+
+ unsigned DestReg = MPhi->getOperand(0).getReg();
+
+ // Create a new register for the incoming PHI arguments.
+ MachineFunction &MF = *MBB.getParent();
+ const TargetRegisterClass *RC = MF.getSSARegMap()->getRegClass(DestReg);
+ unsigned IncomingReg = MF.getSSARegMap()->createVirtualRegister(RC);
+
+ // Insert a register to register copy in the top of the current block (but
+ // after any remaining phi nodes) which copies the new incoming register
+ // into the phi node destination.
+ //
+ const MRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ RegInfo->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC);
+
+ // Update live variable information if there is any...
+ LiveVariables *LV = getAnalysisToUpdate<LiveVariables>();
+ if (LV) {
+ MachineInstr *PHICopy = prior(AfterPHIsIt);
+
+ // Increment use count of the newly created virtual register.
+ LV->getVarInfo(IncomingReg).NumUses++;
+
+ // Add information to LiveVariables to know that the incoming value is
+ // killed. Note that because the value is defined in several places (once
+ // each for each incoming block), the "def" block and instruction fields
+ // for the VarInfo is not filled in.
+ //
+ LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+
+ // Since we are going to be deleting the PHI node, if it is the last use
+ // of any registers, or if the value itself is dead, we need to move this
+ // information over to the new copy we just inserted.
+ //
+ LV->removeVirtualRegistersKilled(MPhi);
+
+ // If the result is dead, update LV.
+ if (LV->RegisterDefIsDead(MPhi, DestReg)) {
+ LV->addVirtualRegisterDead(DestReg, PHICopy);
+ LV->removeVirtualRegistersDead(MPhi);
+ }
+
+ // Realize that the destination register is defined by the PHI copy now, not
+ // the PHI itself.
+ LV->getVarInfo(DestReg).DefInst = PHICopy;
+ }
+
+ // Adjust the VRegPHIUseCount map to account for the removal of this PHI
+ // node.
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ --VRegPHIUseCount[BBVRegPair(
+ MPhi->getOperand(i + 1).getMachineBasicBlock(),
+ MPhi->getOperand(i).getReg())];
+
+ // Now loop over all of the incoming arguments, changing them to copy into
+ // the IncomingReg register in the corresponding predecessor basic block.
+ //
+ std::set<MachineBasicBlock*> MBBsInsertedInto;
+ for (int i = MPhi->getNumOperands() - 1; i >= 2; i-=2) {
+ unsigned SrcReg = MPhi->getOperand(i-1).getReg();
+ assert(MRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ // Get the MachineBasicBlock equivalent of the BasicBlock that is the
+ // source path the PHI.
+ MachineBasicBlock &opBlock = *MPhi->getOperand(i).getMachineBasicBlock();
+
+ // Check to make sure we haven't already emitted the copy for this block.
+ // This can happen because PHI nodes may have multiple entries for the
+ // same basic block.
+ if (!MBBsInsertedInto.insert(&opBlock).second)
+ continue; // If the copy has already been emitted, we're done.
+
+ // Get an iterator pointing to the first terminator in the block (or end()).
+ // This is the point where we can insert a copy if we'd like to.
+ MachineBasicBlock::iterator I = opBlock.getFirstTerminator();
+
+ // Insert the copy.
+ RegInfo->copyRegToReg(opBlock, I, IncomingReg, SrcReg, RC);
+
+ // Now update live variable information if we have it. Otherwise we're done
+ if (!LV) continue;
+
+ // We want to be able to insert a kill of the register if this PHI
+ // (aka, the copy we just inserted) is the last use of the source
+ // value. Live variable analysis conservatively handles this by
+ // saying that the value is live until the end of the block the PHI
+ // entry lives in. If the value really is dead at the PHI copy, there
+ // will be no successor blocks which have the value live-in.
+ //
+ // Check to see if the copy is the last use, and if so, update the
+ // live variables information so that it knows the copy source
+ // instruction kills the incoming value.
+ //
+ LiveVariables::VarInfo &InRegVI = LV->getVarInfo(SrcReg);
+
+ // Loop over all of the successors of the basic block, checking to see
+ // if the value is either live in the block, or if it is killed in the
+ // block. Also check to see if this register is in use by another PHI
+ // node which has not yet been eliminated. If so, it will be killed
+ // at an appropriate point later.
+ //
+
+ // Is it used by any PHI instructions in this block?
+ bool ValueIsLive = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0;
+
+ std::vector<MachineBasicBlock*> OpSuccBlocks;
+
+ // Otherwise, scan successors, including the BB the PHI node lives in.
+ for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
+ E = opBlock.succ_end(); SI != E && !ValueIsLive; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+
+ // Is it alive in this successor?
+ unsigned SuccIdx = SuccMBB->getNumber();
+ if (SuccIdx < InRegVI.AliveBlocks.size() &&
+ InRegVI.AliveBlocks[SuccIdx]) {
+ ValueIsLive = true;
+ break;
+ }
+
+ OpSuccBlocks.push_back(SuccMBB);
+ }
+
+ // Check to see if this value is live because there is a use in a successor
+ // that kills it.
+ if (!ValueIsLive) {
+ switch (OpSuccBlocks.size()) {
+ case 1: {
+ MachineBasicBlock *MBB = OpSuccBlocks[0];
+ for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
+ if (InRegVI.Kills[i]->getParent() == MBB) {
+ ValueIsLive = true;
+ break;
+ }
+ break;
+ }
+ case 2: {
+ MachineBasicBlock *MBB1 = OpSuccBlocks[0], *MBB2 = OpSuccBlocks[1];
+ for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
+ if (InRegVI.Kills[i]->getParent() == MBB1 ||
+ InRegVI.Kills[i]->getParent() == MBB2) {
+ ValueIsLive = true;
+ break;
+ }
+ break;
+ }
+ default:
+ std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+ for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
+ if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+ InRegVI.Kills[i]->getParent())) {
+ ValueIsLive = true;
+ break;
+ }
+ }
+ }
+
+ // Okay, if we now know that the value is not live out of the block,
+ // we can add a kill marker in this block saying that it kills the incoming
+ // value!
+ if (!ValueIsLive) {
+ // In our final twist, we have to decide which instruction kills the
+ // register. In most cases this is the copy, however, the first
+ // terminator instruction at the end of the block may also use the value.
+ // In this case, we should mark *it* as being the killing block, not the
+ // copy.
+ bool FirstTerminatorUsesValue = false;
+ if (I != opBlock.end()) {
+ FirstTerminatorUsesValue = InstructionUsesRegister(I, SrcReg);
+
+ // Check that no other terminators use values.
+#ifndef NDEBUG
+ for (MachineBasicBlock::iterator TI = next(I); TI != opBlock.end();
+ ++TI) {
+ assert(!InstructionUsesRegister(TI, SrcReg) &&
+ "Terminator instructions cannot use virtual registers unless"
+ "they are the first terminator in a block!");
+ }
+#endif
+ }
+
+ MachineBasicBlock::iterator KillInst;
+ if (!FirstTerminatorUsesValue)
+ KillInst = prior(I);
+ else
+ KillInst = I;
+
+ // Finally, mark it killed.
+ LV->addVirtualRegisterKilled(SrcReg, KillInst);
+
+ // This vreg no longer lives all of the way through opBlock.
+ unsigned opBlockNum = opBlock.getNumber();
+ if (opBlockNum < InRegVI.AliveBlocks.size())
+ InRegVI.AliveBlocks[opBlockNum] = false;
+ }
+ }
+
+ // Really delete the PHI instruction now!
+ delete MPhi;
+ ++NumAtomic;
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the number of uses of a virtual register which is
+/// used in a PHI node. We map that to the BB the vreg is coming from. This is
+/// used later to determine when the vreg is killed in the BB.
+///
+void PNE::analyzePHINodes(const MachineFunction& Fn) {
+ for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ ++VRegPHIUseCount[BBVRegPair(
+ BBI->getOperand(i + 1).getMachineBasicBlock(),
+ BBI->getOperand(i).getReg())];
+}
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
new file mode 100644
index 0000000..87510e4
--- /dev/null
+++ b/lib/CodeGen/Passes.cpp
@@ -0,0 +1,56 @@
+//===-- Passes.cpp - Target independent code generation passes ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterRegAlloc class - Track the registration of register allocators.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterRegAlloc::Registry;
+
+
+//===---------------------------------------------------------------------===//
+///
+/// RegAlloc command line options.
+///
+//===---------------------------------------------------------------------===//
+namespace {
+ cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RegisterRegAlloc> >
+ RegAlloc("regalloc",
+ cl::init(&createLinearScanRegisterAllocator),
+ cl::desc("Register allocator to use: (default = linearscan)"));
+}
+
+
+//===---------------------------------------------------------------------===//
+///
+/// createRegisterAllocator - choose the appropriate register allocator.
+///
+//===---------------------------------------------------------------------===//
+FunctionPass *llvm::createRegisterAllocator() {
+ RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+
+ if (!Ctor) {
+ Ctor = RegAlloc;
+ RegisterRegAlloc::setDefault(RegAlloc);
+ }
+
+ return Ctor();
+}
diff --git a/lib/CodeGen/PhysRegTracker.h b/lib/CodeGen/PhysRegTracker.h
new file mode 100644
index 0000000..f5a2402
--- /dev/null
+++ b/lib/CodeGen/PhysRegTracker.h
@@ -0,0 +1,73 @@
+//===-- llvm/CodeGen/PhysRegTracker.h - Physical Register Tracker -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a physical register tracker. The tracker
+// tracks physical register usage through addRegUse and
+// delRegUse. isRegAvail checks if a physical register is available or
+// not taking into consideration register aliases.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHYSREGTRACKER_H
+#define LLVM_CODEGEN_PHYSREGTRACKER_H
+
+#include "llvm/Target/MRegisterInfo.h"
+
+namespace llvm {
+
+ class PhysRegTracker {
+ const MRegisterInfo* mri_;
+ std::vector<unsigned> regUse_;
+
+ public:
+ PhysRegTracker(const MRegisterInfo& mri)
+ : mri_(&mri),
+ regUse_(mri_->getNumRegs(), 0) {
+ }
+
+ PhysRegTracker(const PhysRegTracker& rhs)
+ : mri_(rhs.mri_),
+ regUse_(rhs.regUse_) {
+ }
+
+ const PhysRegTracker& operator=(const PhysRegTracker& rhs) {
+ mri_ = rhs.mri_;
+ regUse_ = rhs.regUse_;
+ return *this;
+ }
+
+ void addRegUse(unsigned physReg) {
+ assert(MRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ ++regUse_[physReg];
+ for (const unsigned* as = mri_->getAliasSet(physReg); *as; ++as)
+ ++regUse_[*as];
+ }
+
+ void delRegUse(unsigned physReg) {
+ assert(MRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ assert(regUse_[physReg] != 0);
+ --regUse_[physReg];
+ for (const unsigned* as = mri_->getAliasSet(physReg); *as; ++as) {
+ assert(regUse_[*as] != 0);
+ --regUse_[*as];
+ }
+ }
+
+ bool isRegAvail(unsigned physReg) const {
+ assert(MRegisterInfo::isPhysicalRegister(physReg) &&
+ "should be physical register!");
+ return regUse_[physReg] == 0;
+ }
+ };
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
new file mode 100644
index 0000000..06f47c7
--- /dev/null
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -0,0 +1,81 @@
+//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Dale Johannesen and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Debug.h"
+//#include "llvm/ADT/Statistic.h"
+//#include <climits>
+//#include <queue>
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+namespace {
+ bool NoPostRAScheduling;
+
+ // When this works it will be on by default.
+ cl::opt<bool, true>
+ DisablePostRAScheduler("disable-post-RA-scheduler",
+ cl::desc("Disable scheduling after register allocation"),
+ cl::location(NoPostRAScheduling),
+ cl::init(true));
+
+ class VISIBILITY_HIDDEN SchedulePostRATDList : public MachineFunctionPass {
+ public:
+ static char ID;
+ SchedulePostRATDList() : MachineFunctionPass((intptr_t)&ID) {}
+ private:
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ public:
+ const char *getPassName() const {
+ return "Post RA top-down list latency scheduler (STUB)";
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+ };
+ char SchedulePostRATDList::ID = 0;
+}
+
+bool SchedulePostRATDList::runOnMachineFunction(MachineFunction &Fn) {
+ if (NoPostRAScheduling)
+ return true;
+
+ DOUT << "SchedulePostRATDList\n";
+ MF = &Fn;
+ TM = &MF->getTarget();
+
+ // Loop over all of the basic blocks
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB)
+ ;
+
+ return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createPostRAScheduler() {
+ return new SchedulePostRATDList();
+}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
new file mode 100644
index 0000000..882c6a7
--- /dev/null
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -0,0 +1,549 @@
+//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/STLExtras.h"
+#include <climits>
+using namespace llvm;
+
+namespace {
+ struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass {
+ static char ID;
+ PEI() : MachineFunctionPass((intptr_t)&ID) {}
+
+ const char *getPassName() const {
+ return "Prolog/Epilog Insertion & Frame Finalization";
+ }
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn) {
+ const MRegisterInfo *MRI = Fn.getTarget().getRegisterInfo();
+ RS = MRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+
+ // Get MachineModuleInfo so that we can track the construction of the
+ // frame.
+ if (MachineModuleInfo *MMI = getAnalysisToUpdate<MachineModuleInfo>()) {
+ Fn.getFrameInfo()->setMachineModuleInfo(MMI);
+ }
+
+ // Allow the target machine to make some adjustments to the function
+ // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
+ MRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+
+ // Scan the function for modified callee saved registers and insert spill
+ // code for any callee saved registers that are modified. Also calculate
+ // the MaxCallFrameSize and HasCalls variables for the function's frame
+ // information and eliminates call frame pseudo instructions.
+ calculateCalleeSavedRegisters(Fn);
+
+ // Add the code to save and restore the callee saved registers
+ saveCalleeSavedRegisters(Fn);
+
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ Fn.getTarget().getRegisterInfo()->processFunctionBeforeFrameFinalized(Fn);
+
+ // Calculate actual frame offsets for all of the abstract stack objects...
+ calculateFrameObjectOffsets(Fn);
+
+ // Add prolog and epilog code to the function. This function is required
+ // to align the stack frame as necessary for any stack variables or
+ // called functions. Because of this, calculateCalleeSavedRegisters
+ // must be called before this function in order to set the HasCalls
+ // and MaxCallFrameSize variables.
+ insertPrologEpilogCode(Fn);
+
+ // Replace all MO_FrameIndex operands with physical register references
+ // and actual offsets.
+ //
+ replaceFrameIndices(Fn);
+
+ delete RS;
+ return true;
+ }
+
+ private:
+ RegScavenger *RS;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+ void calculateCalleeSavedRegisters(MachineFunction &Fn);
+ void saveCalleeSavedRegisters(MachineFunction &Fn);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ void replaceFrameIndices(MachineFunction &Fn);
+ void insertPrologEpilogCode(MachineFunction &Fn);
+ };
+ char PEI::ID = 0;
+}
+
+
+/// createPrologEpilogCodeInserter - This function returns a pass that inserts
+/// prolog and epilog code, and eliminates abstract frame references.
+///
+FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
+
+
+/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
+/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for
+/// the function's frame information and eliminates call frame pseudo
+/// instructions.
+///
+void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
+ const MRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
+
+ // Get the callee saved register list...
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+
+ // Get the function call frame set-up and tear-down instruction opcode
+ int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
+
+ // These are used to keep track the callee-save area. Initialize them.
+ MinCSFrameIndex = INT_MAX;
+ MaxCSFrameIndex = 0;
+
+ // Early exit for targets which have no callee saved registers and no call
+ // frame setup/destroy pseudo instructions.
+ if ((CSRegs == 0 || CSRegs[0] == 0) &&
+ FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+ return;
+
+ unsigned MaxCallFrameSize = 0;
+ bool HasCalls = false;
+
+ std::vector<MachineBasicBlock::iterator> FrameSDOps;
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+ " instructions should have a single immediate argument!");
+ unsigned Size = I->getOperand(0).getImmedValue();
+ if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+ HasCalls = true;
+ FrameSDOps.push_back(I);
+ }
+
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+ FFI->setHasCalls(HasCalls);
+ FFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+ for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) {
+ MachineBasicBlock::iterator I = FrameSDOps[i];
+ // If call frames are not being included as part of the stack frame,
+ // and there is no dynamic allocation (therefore referencing frame slots
+ // off sp), leave the pseudo ops alone. We'll eliminate them later.
+ if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
+ RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+ }
+
+ // Now figure out which *callee saved* registers are modified by the current
+ // function, thus needing to be saved and restored in the prolog/epilog.
+ //
+ const TargetRegisterClass* const *CSRegClasses =
+ RegInfo->getCalleeSavedRegClasses(&Fn);
+ std::vector<CalleeSavedInfo> CSI;
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (Fn.isPhysRegUsed(Reg)) {
+ // If the reg is modified, save it!
+ CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+ } else {
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
+ *AliasSet; ++AliasSet) { // Check alias registers too.
+ if (Fn.isPhysRegUsed(*AliasSet)) {
+ CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+ break;
+ }
+ }
+ }
+ }
+
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
+
+ unsigned NumFixedSpillSlots;
+ const std::pair<unsigned,int> *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = CSI[i].getRegClass();
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
+ FixedSlot->first != Reg)
+ ++FixedSlot;
+
+ int FrameIdx;
+ if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+ // We may not be able to sastify the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller. Use the min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = FFI->CreateStackObject(RC->getSize(), Align);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second);
+ }
+ CSI[i].setFrameIdx(FrameIdx);
+ }
+
+ FFI->setCalleeSavedInfo(CSI);
+}
+
+/// saveCalleeSavedRegisters - Insert spill code for any callee saved registers
+/// that are modified in the function.
+///
+void PEI::saveCalleeSavedRegisters(MachineFunction &Fn) {
+ // Get callee saved register information.
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty())
+ return;
+
+ const MRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+
+ // Now that we have a stack slot for each register to be saved, insert spill
+ // code into the entry block.
+ MachineBasicBlock *MBB = Fn.begin();
+ MachineBasicBlock::iterator I = MBB->begin();
+ if (!RegInfo->spillCalleeSavedRegisters(*MBB, I, CSI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB->addLiveIn(CSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ RegInfo->storeRegToStackSlot(*MBB, I, CSI[i].getReg(),
+ CSI[i].getFrameIdx(), CSI[i].getRegClass());
+ }
+ }
+
+ // Add code to restore the callee-save registers in each exiting block.
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ for (MachineFunction::iterator FI = Fn.begin(), E = Fn.end(); FI != E; ++FI)
+ // If last instruction is a return instruction, add an epilogue.
+ if (!FI->empty() && TII.isReturn(FI->back().getOpcode())) {
+ MBB = FI;
+ I = MBB->end(); --I;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && TII.isTerminatorInstr((--I2)->getOpcode()))
+ I = I2;
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any terminators
+ // that preceed it.
+ if (!RegInfo->restoreCalleeSavedRegisters(*MBB, I, CSI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ RegInfo->loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
+ CSI[i].getFrameIdx(),
+ CSI[i].getRegClass());
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert multiple
+ // instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+ }
+}
+
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
+
+ unsigned MaxAlign = 0;
+
+ // Start at the beginning of the local area.
+ // The Offset is the distance from the stack top in the direction
+ // of stack growth -- so it's always positive.
+ int64_t Offset = TFI.getOffsetOfLocalArea();
+ if (StackGrowsDown)
+ Offset = -Offset;
+ assert(Offset >= 0
+ && "Local area offset should be in direction of stack growth");
+
+ // If there are fixed sized objects that are preallocated in the local area,
+ // non-fixed objects can't be allocated right at the start of local area.
+ // We currently don't support filling in holes in between fixed sized objects,
+ // so we adjust 'Offset' to point to the end of last fixed sized
+ // preallocated object.
+ for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+ int64_t FixedOff;
+ if (StackGrowsDown) {
+ // The maximum distance from the stack pointer is at lower address of
+ // the object -- which is given by offset. For down growing stack
+ // the offset is negative, so we negate the offset to get the distance.
+ FixedOff = -FFI->getObjectOffset(i);
+ } else {
+ // The maximum distance from the start pointer is at the upper
+ // address of the object.
+ FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i);
+ }
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+
+ // First assign frame offsets to stack objects that are used to spill
+ // callee saved registers.
+ if (StackGrowsDown) {
+ for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+ // If stack grows down, we need to add size of find the lowest
+ // address of the object.
+ Offset += FFI->getObjectSize(i);
+
+ unsigned Align = FFI->getObjectAlignment(i);
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ FFI->setObjectOffset(i, -Offset); // Set the computed offset
+ }
+ } else {
+ for (unsigned i = MaxCSFrameIndex; i >= MinCSFrameIndex; --i) {
+ unsigned Align = FFI->getObjectAlignment(i);
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ FFI->setObjectOffset(i, Offset);
+ Offset += FFI->getObjectSize(i);
+ }
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // frame pointer if a frame pointer is required.
+ const MRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ if (RS && RegInfo->hasFP(Fn)) {
+ int SFI = RS->getScavengingFrameIndex();
+ if (SFI >= 0) {
+ // If stack grows down, we need to add size of the lowest
+ // address of the object.
+ if (StackGrowsDown)
+ Offset += FFI->getObjectSize(SFI);
+
+ unsigned Align = FFI->getObjectAlignment(SFI);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ if (StackGrowsDown) {
+ FFI->setObjectOffset(SFI, -Offset); // Set the computed offset
+ } else {
+ FFI->setObjectOffset(SFI, Offset);
+ Offset += FFI->getObjectSize(SFI);
+ }
+ }
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && (int)i == RS->getScavengingFrameIndex())
+ continue;
+
+ // If stack grows down, we need to add size of find the lowest
+ // address of the object.
+ if (StackGrowsDown)
+ Offset += FFI->getObjectSize(i);
+
+ unsigned Align = FFI->getObjectAlignment(i);
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ if (StackGrowsDown) {
+ FFI->setObjectOffset(i, -Offset); // Set the computed offset
+ } else {
+ FFI->setObjectOffset(i, Offset);
+ Offset += FFI->getObjectSize(i);
+ }
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // stack pointer.
+ if (RS && !RegInfo->hasFP(Fn)) {
+ int SFI = RS->getScavengingFrameIndex();
+ if (SFI >= 0) {
+ // If stack grows down, we need to add size of find the lowest
+ // address of the object.
+ if (StackGrowsDown)
+ Offset += FFI->getObjectSize(SFI);
+
+ unsigned Align = FFI->getObjectAlignment(SFI);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ if (StackGrowsDown) {
+ FFI->setObjectOffset(SFI, -Offset); // Set the computed offset
+ } else {
+ FFI->setObjectOffset(SFI, Offset);
+ Offset += FFI->getObjectSize(SFI);
+ }
+ }
+ }
+
+ // Round up the size to a multiple of the alignment, but only if there are
+ // calls or alloca's in the function. This ensures that any calls to
+ // subroutines have their stack frames suitable aligned.
+ if (!RegInfo->targetHandlesStackFrameRounding() &&
+ (FFI->hasCalls() || FFI->hasVarSizedObjects())) {
+ // If we have reserved argument space for call sites in the function
+ // immediately on entry to the current function, count it as part of the
+ // overall stack size.
+ if (RegInfo->hasReservedCallFrame(Fn))
+ Offset += FFI->getMaxCallFrameSize();
+
+ unsigned AlignMask = TFI.getStackAlignment() - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+ }
+
+ // Update frame info to pretend that this is part of the stack...
+ FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea());
+
+ // Remember the required stack alignment in case targets need it to perform
+ // dynamic stack alignment.
+ assert(FFI->getMaxAlignment() == MaxAlign &&
+ "Stack alignment calculation broken!");
+}
+
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
+ // Add prologue to the function...
+ Fn.getTarget().getRegisterInfo()->emitPrologue(Fn);
+
+ // Add epilogue to restore the callee-save registers in each exiting block
+ const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && TII.isReturn(I->back().getOpcode()))
+ Fn.getTarget().getRegisterInfo()->emitEpilogue(Fn, *I);
+ }
+}
+
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void PEI::replaceFrameIndices(MachineFunction &Fn) {
+ if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
+
+ const TargetMachine &TM = Fn.getTarget();
+ assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+ const MRegisterInfo &MRI = *TM.getRegisterInfo();
+ const TargetFrameInfo *TFI = TM.getFrameInfo();
+ bool StackGrowsDown =
+ TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ int FrameSetupOpcode = MRI.getCallFrameSetupOpcode();
+ int FrameDestroyOpcode = MRI.getCallFrameDestroyOpcode();
+
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ int SPAdj = 0; // SP offset due to call frame setup / destroy.
+ if (RS) RS->enterBasicBlock(BB);
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ MachineInstr *MI = I;
+
+ // Remember how much SP has been adjustment to create the call frame.
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ int Size = I->getOperand(0).getImmedValue();
+ if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
+ (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
+ Size = -Size;
+ SPAdj += Size;
+ MachineBasicBlock::iterator PrevI = prior(I);
+ MRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
+ // Visit the instructions created by eliminateCallFramePseudoInstr().
+ I = next(PrevI);
+ MI = NULL;
+ } else {
+ I++;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).isFrameIndex()) {
+ // If this instruction has a FrameIndex operand, we need to use that
+ // target machine register info object to eliminate it.
+ MRI.eliminateFrameIndex(MI, SPAdj, RS);
+
+ // Revisit the instruction in full. Some instructions (e.g. inline
+ // asm instructions) can have multiple frame indices.
+ --I;
+ MI = 0;
+ break;
+ }
+ }
+ // Update register states.
+ if (RS && MI) RS->forward(MI);
+ }
+ assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
+ }
+}
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
new file mode 100644
index 0000000..4fff15c
--- /dev/null
+++ b/lib/CodeGen/README.txt
@@ -0,0 +1,145 @@
+//===---------------------------------------------------------------------===//
+
+Common register allocation / spilling problem:
+
+ mul lr, r4, lr
+ str lr, [sp, #+52]
+ ldr lr, [r1, #+32]
+ sxth r3, r3
+ ldr r4, [sp, #+52]
+ mla r4, r3, lr, r4
+
+can be:
+
+ mul lr, r4, lr
+ mov r4, lr
+ str lr, [sp, #+52]
+ ldr lr, [r1, #+32]
+ sxth r3, r3
+ mla r4, r3, lr, r4
+
+and then "merge" mul and mov:
+
+ mul r4, r4, lr
+ str lr, [sp, #+52]
+ ldr lr, [r1, #+32]
+ sxth r3, r3
+ mla r4, r3, lr, r4
+
+It also increase the likelyhood the store may become dead.
+
+//===---------------------------------------------------------------------===//
+
+I think we should have a "hasSideEffects" flag (which is automatically set for
+stuff that "isLoad" "isCall" etc), and the remat pass should eventually be able
+to remat any instruction that has no side effects, if it can handle it and if
+profitable.
+
+For now, I'd suggest having the remat stuff work like this:
+
+1. I need to spill/reload this thing.
+2. Check to see if it has side effects.
+3. Check to see if it is simple enough: e.g. it only has one register
+destination and no register input.
+4. If so, clone the instruction, do the xform, etc.
+
+Advantages of this are:
+
+1. the .td file describes the behavior of the instructions, not the way the
+ algorithm should work.
+2. as remat gets smarter in the future, we shouldn't have to be changing the .td
+ files.
+3. it is easier to explain what the flag means in the .td file, because you
+ don't have to pull in the explanation of how the current remat algo works.
+
+Some potential added complexities:
+
+1. Some instructions have to be glued to it's predecessor or successor. All of
+ the PC relative instructions and condition code setting instruction. We could
+ mark them as hasSideEffects, but that's not quite right. PC relative loads
+ from constantpools can be remat'ed, for example. But it requires more than
+ just cloning the instruction. Some instructions can be remat'ed but it
+ expands to more than one instruction. But allocator will have to make a
+ decision.
+
+4. As stated in 3, not as simple as cloning in some cases. The target will have
+ to decide how to remat it. For example, an ARM 2-piece constant generation
+ instruction is remat'ed as a load from constantpool.
+
+//===---------------------------------------------------------------------===//
+
+bb27 ...
+ ...
+ %reg1037 = ADDri %reg1039, 1
+ %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10
+ Successors according to CFG: 0x8b03bf0 (#5)
+
+bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5):
+ Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4)
+ %reg1039 = PHI %reg1070, mbb<bb76.outer,0x8b0c5f0>, %reg1037, mbb<bb27,0x8b0a7c0>
+
+Note ADDri is not a two-address instruction. However, its result %reg1037 is an
+operand of the PHI node in bb76 and its operand %reg1039 is the result of the
+PHI node. We should treat it as a two-address code and make sure the ADDri is
+scheduled after any node that reads %reg1039.
+
+//===---------------------------------------------------------------------===//
+
+Use local info (i.e. register scavenger) to assign it a free register to allow
+reuse:
+ ldr r3, [sp, #+4]
+ add r3, r3, #3
+ ldr r2, [sp, #+8]
+ add r2, r2, #2
+ ldr r1, [sp, #+4] <==
+ add r1, r1, #1
+ ldr r0, [sp, #+4]
+ add r0, r0, #2
+
+//===---------------------------------------------------------------------===//
+
+LLVM aggressively lift CSE out of loop. Sometimes this can be negative side-
+effects:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+load [i + R1]
+...
+load [i + R2]
+...
+load [i + R3]
+
+Suppose there is high register pressure, R1, R2, R3, can be spilled. We need
+to implement proper re-materialization to handle this:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+R1 = X + 4 @ re-materialized
+load [i + R1]
+...
+R2 = X + 7 @ re-materialized
+load [i + R2]
+...
+R3 = X + 15 @ re-materialized
+load [i + R3]
+
+Furthermore, with re-association, we can enable sharing:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+T = i + X
+load [T + 4]
+...
+load [T + 7]
+...
+load [T + 15]
+//===---------------------------------------------------------------------===//
diff --git a/lib/CodeGen/RegAllocBigBlock.cpp b/lib/CodeGen/RegAllocBigBlock.cpp
new file mode 100644
index 0000000..c7f23f5
--- /dev/null
+++ b/lib/CodeGen/RegAllocBigBlock.cpp
@@ -0,0 +1,893 @@
+//===- RegAllocBigBlock.cpp - A register allocator for large basic blocks -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Duraid Madina and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RABigBlock class
+//
+//===----------------------------------------------------------------------===//
+
+// This register allocator is derived from RegAllocLocal.cpp. Like it, this
+// allocator works on one basic block at a time, oblivious to others.
+// However, the algorithm used here is suited for long blocks of
+// instructions - registers are spilled by greedily choosing those holding
+// values that will not be needed for the longest amount of time. This works
+// particularly well for blocks with 10 or more times as many instructions
+// as machine registers, but can be used for general code.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: - automagically invoke linearscan for (groups of) small BBs?
+// - break ties when picking regs? (probably not worth it in a
+// JIT context)
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumFolded, "Number of loads/stores folded into instructions");
+
+namespace {
+ static RegisterRegAlloc
+ bigBlockRegAlloc("bigblock", " Big-block register allocator",
+ createBigBlockRegisterAllocator);
+
+/// VRegKeyInfo - Defines magic values required to use VirtRegs as DenseMap
+/// keys.
+ struct VRegKeyInfo {
+ static inline unsigned getEmptyKey() { return -1U; }
+ static inline unsigned getTombstoneKey() { return -2U; }
+ static unsigned getHashValue(const unsigned &Key) { return Key; }
+ };
+
+
+/// This register allocator is derived from RegAllocLocal.cpp. Like it, this
+/// allocator works on one basic block at a time, oblivious to others.
+/// However, the algorithm used here is suited for long blocks of
+/// instructions - registers are spilled by greedily choosing those holding
+/// values that will not be needed for the longest amount of time. This works
+/// particularly well for blocks with 10 or more times as many instructions
+/// as machine registers, but can be used for general code.
+///
+/// TODO: - automagically invoke linearscan for (groups of) small BBs?
+/// - break ties when picking regs? (probably not worth it in a
+/// JIT context)
+///
+ class VISIBILITY_HIDDEN RABigBlock : public MachineFunctionPass {
+ public:
+ static char ID;
+ RABigBlock() : MachineFunctionPass((intptr_t)&ID) {}
+ private:
+ /// TM - For getting at TargetMachine info
+ ///
+ const TargetMachine *TM;
+
+ /// MF - Our generic MachineFunction pointer
+ ///
+ MachineFunction *MF;
+
+ /// RegInfo - For dealing with machine register info (aliases, folds
+ /// etc)
+ const MRegisterInfo *RegInfo;
+
+ /// LV - Our generic LiveVariables pointer
+ ///
+ LiveVariables *LV;
+
+ typedef SmallVector<unsigned, 2> VRegTimes;
+
+ /// VRegReadTable - maps VRegs in a BB to the set of times they are read
+ ///
+ DenseMap<unsigned, VRegTimes*, VRegKeyInfo> VRegReadTable;
+
+ /// VRegReadIdx - keeps track of the "current time" in terms of
+ /// positions in VRegReadTable
+ DenseMap<unsigned, unsigned , VRegKeyInfo> VRegReadIdx;
+
+ /// StackSlotForVirtReg - Maps virtual regs to the frame index where these
+ /// values are spilled.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+ /// Virt2PhysRegMap - This map contains entries for each virtual register
+ /// that is currently available in a physical register.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+
+ /// PhysRegsUsed - This array is effectively a map, containing entries for
+ /// each physical register that currently has a value (ie, it is in
+ /// Virt2PhysRegMap). The value mapped to is the virtual register
+ /// corresponding to the physical register (the inverse of the
+ /// Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned
+ /// because it is used by a future instruction, and to -2 if it is not
+ /// allocatable. If the entry for a physical register is -1, then the
+ /// physical register is "not in the map".
+ ///
+ std::vector<int> PhysRegsUsed;
+
+ /// VirtRegModified - This bitset contains information about which virtual
+ /// registers need to be spilled back to memory when their registers are
+ /// scavenged. If a virtual register has simply been rematerialized, there
+ /// is no reason to spill it to memory when we need the register back.
+ ///
+ std::vector<int> VirtRegModified;
+
+ /// MBBLastInsnTime - the number of the the last instruction in MBB
+ ///
+ int MBBLastInsnTime;
+
+ /// MBBCurTime - the number of the the instruction being currently processed
+ ///
+ int MBBCurTime;
+
+ unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
+ return Virt2PhysRegMap[VirtReg];
+ }
+
+ unsigned &getVirt2StackSlot(unsigned VirtReg) {
+ return StackSlotForVirtReg[VirtReg];
+ }
+
+ /// markVirtRegModified - Lets us flip bits in the VirtRegModified bitset
+ ///
+ void markVirtRegModified(unsigned Reg, bool Val = true) {
+ assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ Reg -= MRegisterInfo::FirstVirtualRegister;
+ if (VirtRegModified.size() <= Reg)
+ VirtRegModified.resize(Reg+1);
+ VirtRegModified[Reg] = Val;
+ }
+
+ /// isVirtRegModified - Lets us query the VirtRegModified bitset
+ ///
+ bool isVirtRegModified(unsigned Reg) const {
+ assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ assert(Reg - MRegisterInfo::FirstVirtualRegister < VirtRegModified.size()
+ && "Illegal virtual register!");
+ return VirtRegModified[Reg - MRegisterInfo::FirstVirtualRegister];
+ }
+
+ public:
+ /// getPassName - returns the BigBlock allocator's name
+ ///
+ virtual const char *getPassName() const {
+ return "BigBlock Register Allocator";
+ }
+
+ /// getAnalaysisUsage - declares the required analyses
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveVariables>();
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// runOnMachineFunction - Register allocate the whole function
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ /// AllocateBasicBlock - Register allocate the specified basic block.
+ ///
+ void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+ /// FillVRegReadTable - Fill out the table of vreg read times given a BB
+ ///
+ void FillVRegReadTable(MachineBasicBlock &MBB);
+
+ /// areRegsEqual - This method returns true if the specified registers are
+ /// related to each other. To do this, it checks to see if they are equal
+ /// or if the first register is in the alias set of the second register.
+ ///
+ bool areRegsEqual(unsigned R1, unsigned R2) const {
+ if (R1 == R2) return true;
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(R2);
+ *AliasSet; ++AliasSet) {
+ if (*AliasSet == R1) return true;
+ }
+ return false;
+ }
+
+ /// getStackSpaceFor - This returns the frame index of the specified virtual
+ /// register on the stack, allocating space if necessary.
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+ /// removePhysReg - This method marks the specified physical register as no
+ /// longer being in use.
+ ///
+ void removePhysReg(unsigned PhysReg);
+
+ /// spillVirtReg - This method spills the value specified by PhysReg into
+ /// the virtual register slot specified by VirtReg. It then updates the RA
+ /// data structures to indicate the fact that PhysReg is now available.
+ ///
+ void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned VirtReg, unsigned PhysReg);
+
+ /// spillPhysReg - This method spills the specified physical register into
+ /// the virtual register slot associated with it. If OnlyVirtRegs is set to
+ /// true, then the request is ignored if the physical register does not
+ /// contain a virtual register.
+ ///
+ void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs = false);
+
+ /// assignVirtToPhysReg - This method updates local state so that we know
+ /// that PhysReg is the proper container for VirtReg now. The physical
+ /// register must not be used for anything else when this is called.
+ ///
+ void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
+
+ /// isPhysRegAvailable - Return true if the specified physical register is
+ /// free and available for use. This also includes checking to see if
+ /// aliased registers are all free...
+ ///
+ bool isPhysRegAvailable(unsigned PhysReg) const;
+
+ /// getFreeReg - Look to see if there is a free register available in the
+ /// specified register class. If not, return 0.
+ ///
+ unsigned getFreeReg(const TargetRegisterClass *RC);
+
+ /// chooseReg - Pick a physical register to hold the specified
+ /// virtual register by choosing the one which will be read furthest
+ /// in the future.
+ ///
+ unsigned chooseReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned VirtReg);
+
+ /// reloadVirtReg - This method transforms the specified specified virtual
+ /// register use to refer to a physical register. This method may do this
+ /// in one of several ways: if the register is available in a physical
+ /// register already, it uses that physical register. If the value is not
+ /// in a physical register, and if there are physical registers available,
+ /// it loads it into a register. If register pressure is high, and it is
+ /// possible, it tries to fold the load of the virtual register into the
+ /// instruction itself. It avoids doing this if register pressure is low to
+ /// improve the chance that subsequent instructions can use the reloaded
+ /// value. This method returns the modified instruction.
+ ///
+ MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum);
+
+ };
+ char RABigBlock::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RABigBlock::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+ // Find the location Reg would belong...
+ int FrameIdx = getVirt2StackSlot(VirtReg);
+
+ if (FrameIdx)
+ return FrameIdx - 1; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot...
+ getVirt2StackSlot(VirtReg) = FrameIdx + 1;
+ return FrameIdx;
+}
+
+
+/// removePhysReg - This method marks the specified physical register as no
+/// longer being in use.
+///
+void RABigBlock::removePhysReg(unsigned PhysReg) {
+ PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used
+}
+
+
+/// spillVirtReg - This method spills the value specified by PhysReg into the
+/// virtual register slot specified by VirtReg. It then updates the RA data
+/// structures to indicate the fact that PhysReg is now available.
+///
+void RABigBlock::spillVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg) {
+ assert(VirtReg && "Spilling a physical register is illegal!"
+ " Must not have appropriate kill for the register or use exists beyond"
+ " the intended one.");
+ DOUT << " Spilling register " << RegInfo->getName(PhysReg)
+ << " containing %reg" << VirtReg;
+ if (!isVirtRegModified(VirtReg))
+ DOUT << " which has not been modified, so no store necessary!";
+
+ // Otherwise, there is a virtual register corresponding to this physical
+ // register. We only need to spill it into its stack slot if it has been
+ // modified.
+ if (isVirtRegModified(VirtReg)) {
+ const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+ DOUT << " to stack slot #" << FrameIndex;
+ RegInfo->storeRegToStackSlot(MBB, I, PhysReg, FrameIndex, RC);
+ ++NumStores; // Update statistics
+ }
+
+ getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available
+
+ DOUT << "\n";
+ removePhysReg(PhysReg);
+}
+
+
+/// spillPhysReg - This method spills the specified physical register into the
+/// virtual register slot associated with it. If OnlyVirtRegs is set to true,
+/// then the request is ignored if the physical register does not contain a
+/// virtual register.
+///
+void RABigBlock::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs) {
+ if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used!
+ assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
+ if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
+ spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
+ } else {
+ // If the selected register aliases any other registers, we must make
+ // sure that one of the aliases isn't alive.
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register.
+ PhysRegsUsed[*AliasSet] != -2) // If allocatable.
+ if (PhysRegsUsed[*AliasSet])
+ spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+ }
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now. The physical
+/// register must not be used for anything else when this is called.
+///
+void RABigBlock::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+ assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
+ // Update information to note the fact that this register was just used, and
+ // it holds VirtReg.
+ PhysRegsUsed[PhysReg] = VirtReg;
+ getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
+}
+
+
+/// isPhysRegAvailable - Return true if the specified physical register is free
+/// and available for use. This also includes checking to see if aliased
+/// registers are all free...
+///
+bool RABigBlock::isPhysRegAvailable(unsigned PhysReg) const {
+ if (PhysRegsUsed[PhysReg] != -1) return false;
+
+ // If the selected register aliases any other allocated registers, it is
+ // not free!
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] != -1) // Aliased register in use?
+ return false; // Can't use this reg then.
+ return true;
+}
+
+
+/// getFreeReg - Look to see if there is a free register available in the
+/// specified register class. If not, return 0.
+///
+unsigned RABigBlock::getFreeReg(const TargetRegisterClass *RC) {
+ // Get iterators defining the range of registers that are valid to allocate in
+ // this class, which also specifies the preferred allocation order.
+ TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+ TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+ for (; RI != RE; ++RI)
+ if (isPhysRegAvailable(*RI)) { // Is reg unused?
+ assert(*RI != 0 && "Cannot use register!");
+ return *RI; // Found an unused register!
+ }
+ return 0;
+}
+
+
+/// chooseReg - Pick a physical register to hold the specified
+/// virtual register by choosing the one whose value will be read
+/// furthest in the future.
+///
+unsigned RABigBlock::chooseReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned VirtReg) {
+ const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg);
+ // First check to see if we have a free register of the requested type...
+ unsigned PhysReg = getFreeReg(RC);
+
+ // If we didn't find an unused register, find the one which will be
+ // read at the most distant point in time.
+ if (PhysReg == 0) {
+ unsigned delay=0, longest_delay=0;
+ VRegTimes* ReadTimes;
+
+ unsigned curTime = MBBCurTime;
+
+ // for all physical regs in the RC,
+ for(TargetRegisterClass::iterator pReg = RC->begin();
+ pReg != RC->end(); ++pReg) {
+ // how long until they're read?
+ if(PhysRegsUsed[*pReg]>0) { // ignore non-allocatable regs
+ ReadTimes = VRegReadTable[PhysRegsUsed[*pReg]];
+ if(ReadTimes && !ReadTimes->empty()) {
+ unsigned& pt = VRegReadIdx[PhysRegsUsed[*pReg]];
+ while(pt < ReadTimes->size() && (*ReadTimes)[pt] < curTime) {
+ ++pt;
+ }
+
+ if(pt < ReadTimes->size())
+ delay = (*ReadTimes)[pt] - curTime;
+ else
+ delay = MBBLastInsnTime + 1 - curTime;
+ } else {
+ // This register is only defined, but never
+ // read in this MBB. Therefore the next read
+ // happens after the end of this MBB
+ delay = MBBLastInsnTime + 1 - curTime;
+ }
+
+
+ if(delay > longest_delay) {
+ longest_delay = delay;
+ PhysReg = *pReg;
+ }
+ }
+ }
+
+ if(PhysReg == 0) { // ok, now we're desperate. We couldn't choose
+ // a register to spill by looking through the
+ // read timetable, so now we just spill the
+ // first allocatable register we find.
+
+ // for all physical regs in the RC,
+ for(TargetRegisterClass::iterator pReg = RC->begin();
+ pReg != RC->end(); ++pReg) {
+ // if we find a register we can spill
+ if(PhysRegsUsed[*pReg]>=-1)
+ PhysReg = *pReg; // choose it to be spilled
+ }
+ }
+
+ assert(PhysReg && "couldn't choose a register to spill :( ");
+ // TODO: assert that RC->contains(PhysReg) / handle aliased registers?
+
+ // since we needed to look in the table we need to spill this register.
+ spillPhysReg(MBB, I, PhysReg);
+ }
+
+ // assign the vreg to our chosen physical register
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ return PhysReg; // and return it
+}
+
+
+/// reloadVirtReg - This method transforms an instruction with a virtual
+/// register use to one that references a physical register. It does this as
+/// follows:
+///
+/// 1) If the register is already in a physical register, it uses it.
+/// 2) Otherwise, if there is a free physical register, it uses that.
+/// 3) Otherwise, it calls chooseReg() to get the physical register
+/// holding the most distantly needed value, generating a spill in
+/// the process.
+///
+/// This method returns the modified instruction.
+MachineInstr *RABigBlock::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum) {
+ unsigned VirtReg = MI->getOperand(OpNum).getReg();
+
+ // If the virtual register is already available in a physical register,
+ // just update the instruction and return.
+ if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+ MI->getOperand(OpNum).setReg(PR);
+ return MI;
+ }
+
+ // Otherwise, if we have free physical registers available to hold the
+ // value, use them.
+ const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg);
+ unsigned PhysReg = getFreeReg(RC);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+
+ if (PhysReg) { // we have a free register, so use it.
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ } else { // no free registers available.
+ // try to fold the spill into the instruction
+ if(MachineInstr* FMI = RegInfo->foldMemoryOperand(MI, OpNum, FrameIndex)) {
+ ++NumFolded;
+ // Since we changed the address of MI, make sure to update live variables
+ // to know that the new instruction has the properties of the old one.
+ LV->instructionChanged(MI, FMI);
+ return MBB.insert(MBB.erase(MI), FMI);
+ }
+
+ // determine which of the physical registers we'll kill off, since we
+ // couldn't fold.
+ PhysReg = chooseReg(MBB, MI, VirtReg);
+ }
+
+ // this virtual register is now unmodified (since we just reloaded it)
+ markVirtRegModified(VirtReg, false);
+
+ DOUT << " Reloading %reg" << VirtReg << " into "
+ << RegInfo->getName(PhysReg) << "\n";
+
+ // Add move instruction(s)
+ RegInfo->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+ ++NumLoads; // Update statistics
+
+ MF->setPhysRegUsed(PhysReg);
+ MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register
+ return MI;
+}
+
+/// Fill out the vreg read timetable. Since ReadTime increases
+/// monotonically, the individual readtime sets will be sorted
+/// in ascending order.
+void RABigBlock::FillVRegReadTable(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ MachineBasicBlock::iterator MII;
+ unsigned ReadTime;
+
+ for(ReadTime=0, MII = MBB.begin(); MII != MBB.end(); ++ReadTime, ++MII) {
+ MachineInstr *MI = MII;
+
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ // look for vreg reads..
+ if (MO.isRegister() && !MO.isDef() && MO.getReg() &&
+ MRegisterInfo::isVirtualRegister(MO.getReg())) {
+ // ..and add them to the read table.
+ VRegTimes* &Times = VRegReadTable[MO.getReg()];
+ if(!VRegReadTable[MO.getReg()]) {
+ Times = new VRegTimes;
+ VRegReadIdx[MO.getReg()] = 0;
+ }
+ Times->push_back(ReadTime);
+ }
+ }
+
+ }
+
+ MBBLastInsnTime = ReadTime;
+
+ for(DenseMap<unsigned, VRegTimes*, VRegKeyInfo>::iterator Reads = VRegReadTable.begin();
+ Reads != VRegReadTable.end(); ++Reads) {
+ if(Reads->second) {
+ DOUT << "Reads[" << Reads->first << "]=" << Reads->second->size() << "\n";
+ }
+ }
+}
+
+/// isReadModWriteImplicitKill - True if this is an implicit kill for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.getReg() == Reg && MO.isImplicit() &&
+ MO.isDef() && !MO.isDead())
+ return true;
+ }
+ return false;
+}
+
+/// isReadModWriteImplicitDef - True if this is an implicit def for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.getReg() == Reg && MO.isImplicit() &&
+ !MO.isDef() && MO.isKill())
+ return true;
+ }
+ return false;
+}
+
+
+void RABigBlock::AllocateBasicBlock(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ MachineBasicBlock::iterator MII = MBB.begin();
+ const TargetInstrInfo &TII = *TM->getInstrInfo();
+
+ DEBUG(const BasicBlock *LBB = MBB.getBasicBlock();
+ if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName());
+
+ // If this is the first basic block in the machine function, add live-in
+ // registers as active.
+ if (&MBB == &*MF->begin()) {
+ for (MachineFunction::livein_iterator I = MF->livein_begin(),
+ E = MF->livein_end(); I != E; ++I) {
+ unsigned Reg = I->first;
+ MF->setPhysRegUsed(Reg);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now
+ MF->setPhysRegUsed(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Otherwise, sequentially allocate each instruction in the MBB.
+ MBBCurTime = -1;
+ while (MII != MBB.end()) {
+ MachineInstr *MI = MII++;
+ MBBCurTime++;
+ const TargetInstrDescriptor &TID = TII.get(MI->getOpcode());
+ DEBUG(DOUT << "\nTime=" << MBBCurTime << " Starting RegAlloc of: " << *MI;
+ DOUT << " Regs have values: ";
+ for (unsigned i = 0; i != RegInfo->getNumRegs(); ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+ DOUT << "[" << RegInfo->getName(i)
+ << ",%reg" << PhysRegsUsed[i] << "] ";
+ DOUT << "\n");
+
+ SmallVector<unsigned, 8> Kills;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.isKill()) {
+ if (!MO.isImplicit())
+ Kills.push_back(MO.getReg());
+ else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+ // These are extra physical register kills when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ Kills.push_back(MO.getReg());
+ }
+ }
+
+ // Get the used operands into registers. This has the potential to spill
+ // incoming values if we are out of registers. Note that we completely
+ // ignore physical register uses here. We assume that if an explicit
+ // physical register is referenced by the instruction, that it is guaranteed
+ // to be live-in, or the input is badly hosed.
+ //
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ // here we are looking for only used operands (never def&use)
+ if (MO.isRegister() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
+ MRegisterInfo::isVirtualRegister(MO.getReg()))
+ MI = reloadVirtReg(MBB, MI, i);
+ }
+
+ // If this instruction is the last user of this register, kill the
+ // value, freeing the register being used, so it doesn't need to be
+ // spilled to memory.
+ //
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ unsigned VirtReg = Kills[i];
+ unsigned PhysReg = VirtReg;
+ if (MRegisterInfo::isVirtualRegister(VirtReg)) {
+ // If the virtual register was never materialized into a register, it
+ // might not be in the map, but it won't hurt to zero it out anyway.
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ } else {
+ assert(!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1 &&
+ "Silently clearing a virtual register?");
+ }
+
+ if (PhysReg) {
+ DOUT << " Last use of " << RegInfo->getName(PhysReg)
+ << "[%reg" << VirtReg <<"], removing it from live set\n";
+ removePhysReg(PhysReg);
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ DOUT << " Last use of "
+ << RegInfo->getName(*AliasSet)
+ << "[%reg" << VirtReg <<"], removing it from live set\n";
+ removePhysReg(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Loop over all of the operands of the instruction, spilling registers that
+ // are defined, and marking explicit destinations in the PhysRegsUsed map.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.isDef() && !MO.isImplicit() && MO.getReg() &&
+ MRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
+ // These are extra physical register defs when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+ MF->setPhysRegUsed(Reg);
+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now
+ MF->setPhysRegUsed(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Loop over the implicit defs, spilling them as well.
+ if (TID.ImplicitDefs) {
+ for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
+ *ImplicitDefs; ++ImplicitDefs) {
+ unsigned Reg = *ImplicitDefs;
+ if (PhysRegsUsed[Reg] != -2) {
+ spillPhysReg(MBB, MI, Reg, true);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ }
+ MF->setPhysRegUsed(Reg);
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now
+ MF->setPhysRegUsed(*AliasSet);
+ }
+ }
+ }
+ }
+
+ SmallVector<unsigned, 8> DeadDefs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.isDead())
+ DeadDefs.push_back(MO.getReg());
+ }
+
+ // Okay, we have allocated all of the source operands and spilled any values
+ // that would be destroyed by defs of this instruction. Loop over the
+ // explicit defs and assign them to a register, spilling incoming values if
+ // we need to scavenge a register.
+ //
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.isDef() && MO.getReg() &&
+ MRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned DestVirtReg = MO.getReg();
+ unsigned DestPhysReg;
+
+ // If DestVirtReg already has a value, use it.
+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+ DestPhysReg = chooseReg(MBB, MI, DestVirtReg);
+ MF->setPhysRegUsed(DestPhysReg);
+ markVirtRegModified(DestVirtReg);
+ MI->getOperand(i).setReg(DestPhysReg); // Assign the output register
+ }
+ }
+
+ // If this instruction defines any registers that are immediately dead,
+ // kill them now.
+ //
+ for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
+ unsigned VirtReg = DeadDefs[i];
+ unsigned PhysReg = VirtReg;
+ if (MRegisterInfo::isVirtualRegister(VirtReg)) {
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ assert(PhysReg != 0);
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ }
+
+ if (PhysReg) {
+ DOUT << " Register " << RegInfo->getName(PhysReg)
+ << " [%reg" << VirtReg
+ << "] is never used, removing it frame live list\n";
+ removePhysReg(PhysReg);
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ DOUT << " Register " << RegInfo->getName(*AliasSet)
+ << " [%reg" << *AliasSet
+ << "] is never used, removing it frame live list\n";
+ removePhysReg(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Finally, if this is a noop copy instruction, zap it.
+ unsigned SrcReg, DstReg;
+ if (TII.isMoveInstr(*MI, SrcReg, DstReg) && SrcReg == DstReg) {
+ LV->removeVirtualRegistersKilled(MI);
+ LV->removeVirtualRegistersDead(MI);
+ MBB.erase(MI);
+ }
+ }
+
+ MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+
+ // Spill all physical registers holding virtual registers now.
+ for (unsigned i = 0, e = RegInfo->getNumRegs(); i != e; ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+ if (unsigned VirtReg = PhysRegsUsed[i])
+ spillVirtReg(MBB, MI, VirtReg, i);
+ else
+ removePhysReg(i);
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RABigBlock::runOnMachineFunction(MachineFunction &Fn) {
+ DOUT << "Machine Function " << "\n";
+ MF = &Fn;
+ TM = &Fn.getTarget();
+ RegInfo = TM->getRegisterInfo();
+ LV = &getAnalysis<LiveVariables>();
+
+ PhysRegsUsed.assign(RegInfo->getNumRegs(), -1);
+
+ // At various places we want to efficiently check to see whether a register
+ // is allocatable. To handle this, we mark all unallocatable registers as
+ // being pinned down, permanently.
+ {
+ BitVector Allocable = RegInfo->getAllocatableSet(Fn);
+ for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
+ if (!Allocable[i])
+ PhysRegsUsed[i] = -2; // Mark the reg unallocable.
+ }
+
+ // initialize the virtual->physical register map to have a 'null'
+ // mapping for all virtual registers
+ Virt2PhysRegMap.grow(MF->getSSARegMap()->getLastVirtReg());
+ StackSlotForVirtReg.grow(MF->getSSARegMap()->getLastVirtReg());
+ VirtRegModified.resize(MF->getSSARegMap()->getLastVirtReg() - MRegisterInfo::FirstVirtualRegister + 1,0);
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ // fill out the read timetable
+ FillVRegReadTable(*MBB);
+ // use it to allocate the BB
+ AllocateBasicBlock(*MBB);
+ // clear it
+ VRegReadTable.clear();
+ }
+
+ StackSlotForVirtReg.clear();
+ PhysRegsUsed.clear();
+ VirtRegModified.clear();
+ Virt2PhysRegMap.clear();
+ return true;
+}
+
+FunctionPass *llvm::createBigBlockRegisterAllocator() {
+ return new RABigBlock();
+}
+
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
new file mode 100644
index 0000000..01d43fd
--- /dev/null
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -0,0 +1,828 @@
+//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a linear scan register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "PhysRegTracker.h"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+#include <set>
+#include <queue>
+#include <memory>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(NumIters , "Number of iterations performed");
+STATISTIC(NumBacktracks, "Number of times we had to backtrack");
+
+static RegisterRegAlloc
+linearscanRegAlloc("linearscan", " linear scan register allocator",
+ createLinearScanRegisterAllocator);
+
+namespace {
+ static unsigned numIterations = 0;
+ static unsigned numIntervals = 0;
+
+ struct VISIBILITY_HIDDEN RALinScan : public MachineFunctionPass {
+ static char ID;
+ RALinScan() : MachineFunctionPass((intptr_t)&ID) {}
+
+ typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
+ typedef std::vector<IntervalPtr> IntervalPtrs;
+ private:
+ /// RelatedRegClasses - This structure is built the first time a function is
+ /// compiled, and keeps track of which register classes have registers that
+ /// belong to multiple classes or have aliases that are in other classes.
+ EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
+ std::map<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
+
+ MachineFunction* mf_;
+ const TargetMachine* tm_;
+ const MRegisterInfo* mri_;
+ LiveIntervals* li_;
+
+ /// handled_ - Intervals are added to the handled_ set in the order of their
+ /// start value. This is uses for backtracking.
+ std::vector<LiveInterval*> handled_;
+
+ /// fixed_ - Intervals that correspond to machine registers.
+ ///
+ IntervalPtrs fixed_;
+
+ /// active_ - Intervals that are currently being processed, and which have a
+ /// live range active for the current point.
+ IntervalPtrs active_;
+
+ /// inactive_ - Intervals that are currently being processed, but which have
+ /// a hold at the current point.
+ IntervalPtrs inactive_;
+
+ typedef std::priority_queue<LiveInterval*,
+ std::vector<LiveInterval*>,
+ greater_ptr<LiveInterval> > IntervalHeap;
+ IntervalHeap unhandled_;
+ std::auto_ptr<PhysRegTracker> prt_;
+ std::auto_ptr<VirtRegMap> vrm_;
+ std::auto_ptr<Spiller> spiller_;
+
+ public:
+ virtual const char* getPassName() const {
+ return "Linear Scan Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveIntervals>();
+ AU.addRequiredID(SimpleRegisterCoalescingID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - register allocate the whole function
+ bool runOnMachineFunction(MachineFunction&);
+
+ private:
+ /// linearScan - the linear scan algorithm
+ void linearScan();
+
+ /// initIntervalSets - initialize the interval sets.
+ ///
+ void initIntervalSets();
+
+ /// processActiveIntervals - expire old intervals and move non-overlapping
+ /// ones to the inactive list.
+ void processActiveIntervals(unsigned CurPoint);
+
+ /// processInactiveIntervals - expire old intervals and move overlapping
+ /// ones to the active list.
+ void processInactiveIntervals(unsigned CurPoint);
+
+ /// assignRegOrStackSlotAtInterval - assign a register if one
+ /// is available, or spill.
+ void assignRegOrStackSlotAtInterval(LiveInterval* cur);
+
+ ///
+ /// register handling helpers
+ ///
+
+ /// getFreePhysReg - return a free physical register for this virtual
+ /// register interval if we have one, otherwise return 0.
+ unsigned getFreePhysReg(LiveInterval* cur);
+
+ /// assignVirt2StackSlot - assigns this virtual register to a
+ /// stack slot. returns the stack slot
+ int assignVirt2StackSlot(unsigned virtReg);
+
+ void ComputeRelatedRegClasses();
+
+ template <typename ItTy>
+ void printIntervals(const char* const str, ItTy i, ItTy e) const {
+ if (str) DOUT << str << " intervals:\n";
+ for (; i != e; ++i) {
+ DOUT << "\t" << *i->first << " -> ";
+ unsigned reg = i->first->reg;
+ if (MRegisterInfo::isVirtualRegister(reg)) {
+ reg = vrm_->getPhys(reg);
+ }
+ DOUT << mri_->getName(reg) << '\n';
+ }
+ }
+ };
+ char RALinScan::ID = 0;
+}
+
+void RALinScan::ComputeRelatedRegClasses() {
+ const MRegisterInfo &MRI = *mri_;
+
+ // First pass, add all reg classes to the union, and determine at least one
+ // reg class that each register is in.
+ bool HasAliases = false;
+ for (MRegisterInfo::regclass_iterator RCI = MRI.regclass_begin(),
+ E = MRI.regclass_end(); RCI != E; ++RCI) {
+ RelatedRegClasses.insert(*RCI);
+ for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
+ I != E; ++I) {
+ HasAliases = HasAliases || *MRI.getAliasSet(*I) != 0;
+
+ const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
+ if (PRC) {
+ // Already processed this register. Just make sure we know that
+ // multiple register classes share a register.
+ RelatedRegClasses.unionSets(PRC, *RCI);
+ } else {
+ PRC = *RCI;
+ }
+ }
+ }
+
+ // Second pass, now that we know conservatively what register classes each reg
+ // belongs to, add info about aliases. We don't need to do this for targets
+ // without register aliases.
+ if (HasAliases)
+ for (std::map<unsigned, const TargetRegisterClass*>::iterator
+ I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
+ I != E; ++I)
+ for (const unsigned *AS = MRI.getAliasSet(I->first); *AS; ++AS)
+ RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
+}
+
+bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ tm_ = &fn.getTarget();
+ mri_ = tm_->getRegisterInfo();
+ li_ = &getAnalysis<LiveIntervals>();
+
+ // If this is the first function compiled, compute the related reg classes.
+ if (RelatedRegClasses.empty())
+ ComputeRelatedRegClasses();
+
+ if (!prt_.get()) prt_.reset(new PhysRegTracker(*mri_));
+ vrm_.reset(new VirtRegMap(*mf_));
+ if (!spiller_.get()) spiller_.reset(createSpiller());
+
+ initIntervalSets();
+
+ linearScan();
+
+ // Rewrite spill code and update the PhysRegsUsed set.
+ spiller_->runOnMachineFunction(*mf_, *vrm_);
+
+ vrm_.reset(); // Free the VirtRegMap
+
+
+ while (!unhandled_.empty()) unhandled_.pop();
+ fixed_.clear();
+ active_.clear();
+ inactive_.clear();
+ handled_.clear();
+
+ return true;
+}
+
+/// initIntervalSets - initialize the interval sets.
+///
+void RALinScan::initIntervalSets()
+{
+ assert(unhandled_.empty() && fixed_.empty() &&
+ active_.empty() && inactive_.empty() &&
+ "interval sets should be empty on initialization");
+
+ for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+ if (MRegisterInfo::isPhysicalRegister(i->second.reg)) {
+ mf_->setPhysRegUsed(i->second.reg);
+ fixed_.push_back(std::make_pair(&i->second, i->second.begin()));
+ } else
+ unhandled_.push(&i->second);
+ }
+}
+
+void RALinScan::linearScan()
+{
+ // linear scan algorithm
+ DOUT << "********** LINEAR SCAN **********\n";
+ DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n';
+
+ // DEBUG(printIntervals("unhandled", unhandled_.begin(), unhandled_.end()));
+ DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end()));
+ DEBUG(printIntervals("active", active_.begin(), active_.end()));
+ DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end()));
+
+ while (!unhandled_.empty()) {
+ // pick the interval with the earliest start point
+ LiveInterval* cur = unhandled_.top();
+ unhandled_.pop();
+ ++numIterations;
+ DOUT << "\n*** CURRENT ***: " << *cur << '\n';
+
+ processActiveIntervals(cur->beginNumber());
+ processInactiveIntervals(cur->beginNumber());
+
+ assert(MRegisterInfo::isVirtualRegister(cur->reg) &&
+ "Can only allocate virtual registers!");
+
+ // Allocating a virtual register. try to find a free
+ // physical register or spill an interval (possibly this one) in order to
+ // assign it one.
+ assignRegOrStackSlotAtInterval(cur);
+
+ DEBUG(printIntervals("active", active_.begin(), active_.end()));
+ DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end()));
+ }
+ numIntervals += li_->getNumIntervals();
+ NumIters += numIterations;
+
+ // expire any remaining active intervals
+ for (IntervalPtrs::reverse_iterator
+ i = active_.rbegin(); i != active_.rend(); ) {
+ unsigned reg = i->first->reg;
+ DOUT << "\tinterval " << *i->first << " expired\n";
+ assert(MRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ prt_->delRegUse(reg);
+ i = IntervalPtrs::reverse_iterator(active_.erase(i.base()-1));
+ }
+
+ // expire any remaining inactive intervals
+ for (IntervalPtrs::reverse_iterator
+ i = inactive_.rbegin(); i != inactive_.rend(); ) {
+ DOUT << "\tinterval " << *i->first << " expired\n";
+ i = IntervalPtrs::reverse_iterator(inactive_.erase(i.base()-1));
+ }
+
+ // A brute force way of adding live-ins to every BB.
+ MachineFunction::iterator MBB = mf_->begin();
+ ++MBB; // Skip entry MBB.
+ for (MachineFunction::iterator E = mf_->end(); MBB != E; ++MBB) {
+ unsigned StartIdx = li_->getMBBStartIdx(MBB->getNumber());
+ for (IntervalPtrs::iterator i = fixed_.begin(), e = fixed_.end();
+ i != e; ++i)
+ if (i->first->liveAt(StartIdx))
+ MBB->addLiveIn(i->first->reg);
+
+ for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
+ LiveInterval *HI = handled_[i];
+ unsigned Reg = HI->reg;
+ if (!vrm_->hasStackSlot(Reg) && HI->liveAt(StartIdx)) {
+ assert(MRegisterInfo::isVirtualRegister(Reg));
+ Reg = vrm_->getPhys(Reg);
+ MBB->addLiveIn(Reg);
+ }
+ }
+ }
+
+ DOUT << *vrm_;
+}
+
+/// processActiveIntervals - expire old intervals and move non-overlapping ones
+/// to the inactive list.
+void RALinScan::processActiveIntervals(unsigned CurPoint)
+{
+ DOUT << "\tprocessing active intervals:\n";
+
+ for (unsigned i = 0, e = active_.size(); i != e; ++i) {
+ LiveInterval *Interval = active_[i].first;
+ LiveInterval::iterator IntervalPos = active_[i].second;
+ unsigned reg = Interval->reg;
+
+ IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+ if (IntervalPos == Interval->end()) { // Remove expired intervals.
+ DOUT << "\t\tinterval " << *Interval << " expired\n";
+ assert(MRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ prt_->delRegUse(reg);
+
+ // Pop off the end of the list.
+ active_[i] = active_.back();
+ active_.pop_back();
+ --i; --e;
+
+ } else if (IntervalPos->start > CurPoint) {
+ // Move inactive intervals to inactive list.
+ DOUT << "\t\tinterval " << *Interval << " inactive\n";
+ assert(MRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ prt_->delRegUse(reg);
+ // add to inactive.
+ inactive_.push_back(std::make_pair(Interval, IntervalPos));
+
+ // Pop off the end of the list.
+ active_[i] = active_.back();
+ active_.pop_back();
+ --i; --e;
+ } else {
+ // Otherwise, just update the iterator position.
+ active_[i].second = IntervalPos;
+ }
+ }
+}
+
+/// processInactiveIntervals - expire old intervals and move overlapping
+/// ones to the active list.
+void RALinScan::processInactiveIntervals(unsigned CurPoint)
+{
+ DOUT << "\tprocessing inactive intervals:\n";
+
+ for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
+ LiveInterval *Interval = inactive_[i].first;
+ LiveInterval::iterator IntervalPos = inactive_[i].second;
+ unsigned reg = Interval->reg;
+
+ IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+ if (IntervalPos == Interval->end()) { // remove expired intervals.
+ DOUT << "\t\tinterval " << *Interval << " expired\n";
+
+ // Pop off the end of the list.
+ inactive_[i] = inactive_.back();
+ inactive_.pop_back();
+ --i; --e;
+ } else if (IntervalPos->start <= CurPoint) {
+ // move re-activated intervals in active list
+ DOUT << "\t\tinterval " << *Interval << " active\n";
+ assert(MRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ prt_->addRegUse(reg);
+ // add to active
+ active_.push_back(std::make_pair(Interval, IntervalPos));
+
+ // Pop off the end of the list.
+ inactive_[i] = inactive_.back();
+ inactive_.pop_back();
+ --i; --e;
+ } else {
+ // Otherwise, just update the iterator position.
+ inactive_[i].second = IntervalPos;
+ }
+ }
+}
+
+/// updateSpillWeights - updates the spill weights of the specifed physical
+/// register and its weight.
+static void updateSpillWeights(std::vector<float> &Weights,
+ unsigned reg, float weight,
+ const MRegisterInfo *MRI) {
+ Weights[reg] += weight;
+ for (const unsigned* as = MRI->getAliasSet(reg); *as; ++as)
+ Weights[*as] += weight;
+}
+
+static
+RALinScan::IntervalPtrs::iterator
+FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
+ for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end();
+ I != E; ++I)
+ if (I->first == LI) return I;
+ return IP.end();
+}
+
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){
+ for (unsigned i = 0, e = V.size(); i != e; ++i) {
+ RALinScan::IntervalPtr &IP = V[i];
+ LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
+ IP.second, Point);
+ if (I != IP.first->begin()) --I;
+ IP.second = I;
+ }
+}
+
+/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
+/// spill.
+void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
+{
+ DOUT << "\tallocating current interval: ";
+
+ PhysRegTracker backupPrt = *prt_;
+
+ std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
+ unsigned StartPosition = cur->beginNumber();
+ const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(cur->reg);
+ const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+ // for every interval in inactive we overlap with, mark the
+ // register as not free and update spill weights.
+ for (IntervalPtrs::const_iterator i = inactive_.begin(),
+ e = inactive_.end(); i != e; ++i) {
+ unsigned Reg = i->first->reg;
+ assert(MRegisterInfo::isVirtualRegister(Reg) &&
+ "Can only allocate virtual registers!");
+ const TargetRegisterClass *RegRC = mf_->getSSARegMap()->getRegClass(Reg);
+ // If this is not in a related reg class to the register we're allocating,
+ // don't check it.
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ cur->overlapsFrom(*i->first, i->second-1)) {
+ Reg = vrm_->getPhys(Reg);
+ prt_->addRegUse(Reg);
+ SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
+ }
+ }
+
+ // Speculatively check to see if we can get a register right now. If not,
+ // we know we won't be able to by adding more constraints. If so, we can
+ // check to see if it is valid. Doing an exhaustive search of the fixed_ list
+ // is very bad (it contains all callee clobbered registers for any functions
+ // with a call), so we want to avoid doing that if possible.
+ unsigned physReg = getFreePhysReg(cur);
+ if (physReg) {
+ // We got a register. However, if it's in the fixed_ list, we might
+ // conflict with it. Check to see if we conflict with it or any of its
+ // aliases.
+ std::set<unsigned> RegAliases;
+ for (const unsigned *AS = mri_->getAliasSet(physReg); *AS; ++AS)
+ RegAliases.insert(*AS);
+
+ bool ConflictsWithFixed = false;
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) {
+ // Okay, this reg is on the fixed list. Check to see if we actually
+ // conflict.
+ LiveInterval *I = IP.first;
+ if (I->endNumber() > StartPosition) {
+ LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+ IP.second = II;
+ if (II != I->begin() && II->start > StartPosition)
+ --II;
+ if (cur->overlapsFrom(*I, II)) {
+ ConflictsWithFixed = true;
+ break;
+ }
+ }
+ }
+ }
+
+ // Okay, the register picked by our speculative getFreePhysReg call turned
+ // out to be in use. Actually add all of the conflicting fixed registers to
+ // prt so we can do an accurate query.
+ if (ConflictsWithFixed) {
+ // For every interval in fixed we overlap with, mark the register as not
+ // free and update spill weights.
+ for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+ IntervalPtr &IP = fixed_[i];
+ LiveInterval *I = IP.first;
+
+ const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ I->endNumber() > StartPosition) {
+ LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+ IP.second = II;
+ if (II != I->begin() && II->start > StartPosition)
+ --II;
+ if (cur->overlapsFrom(*I, II)) {
+ unsigned reg = I->reg;
+ prt_->addRegUse(reg);
+ SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight));
+ }
+ }
+ }
+
+ // Using the newly updated prt_ object, which includes conflicts in the
+ // future, see if there are any registers available.
+ physReg = getFreePhysReg(cur);
+ }
+ }
+
+ // Restore the physical register tracker, removing information about the
+ // future.
+ *prt_ = backupPrt;
+
+ // if we find a free register, we are done: assign this virtual to
+ // the free physical register and add this interval to the active
+ // list.
+ if (physReg) {
+ DOUT << mri_->getName(physReg) << '\n';
+ vrm_->assignVirt2Phys(cur->reg, physReg);
+ prt_->addRegUse(physReg);
+ active_.push_back(std::make_pair(cur, cur->begin()));
+ handled_.push_back(cur);
+ return;
+ }
+ DOUT << "no free registers\n";
+
+ // Compile the spill weights into an array that is better for scanning.
+ std::vector<float> SpillWeights(mri_->getNumRegs(), 0.0);
+ for (std::vector<std::pair<unsigned, float> >::iterator
+ I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
+ updateSpillWeights(SpillWeights, I->first, I->second, mri_);
+
+ // for each interval in active, update spill weights.
+ for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
+ i != e; ++i) {
+ unsigned reg = i->first->reg;
+ assert(MRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+ reg = vrm_->getPhys(reg);
+ updateSpillWeights(SpillWeights, reg, i->first->weight, mri_);
+ }
+
+ DOUT << "\tassigning stack slot at interval "<< *cur << ":\n";
+
+ // Find a register to spill.
+ float minWeight = HUGE_VALF;
+ unsigned minReg = cur->preference; // Try the preferred register first.
+
+ if (!minReg || SpillWeights[minReg] == HUGE_VALF)
+ for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
+ e = RC->allocation_order_end(*mf_); i != e; ++i) {
+ unsigned reg = *i;
+ if (minWeight > SpillWeights[reg]) {
+ minWeight = SpillWeights[reg];
+ minReg = reg;
+ }
+ }
+
+ // If we didn't find a register that is spillable, try aliases?
+ if (!minReg) {
+ for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
+ e = RC->allocation_order_end(*mf_); i != e; ++i) {
+ unsigned reg = *i;
+ // No need to worry about if the alias register size < regsize of RC.
+ // We are going to spill all registers that alias it anyway.
+ for (const unsigned* as = mri_->getAliasSet(reg); *as; ++as) {
+ if (minWeight > SpillWeights[*as]) {
+ minWeight = SpillWeights[*as];
+ minReg = *as;
+ }
+ }
+ }
+
+ // All registers must have inf weight. Just grab one!
+ if (!minReg)
+ minReg = *RC->allocation_order_begin(*mf_);
+ }
+
+ DOUT << "\t\tregister with min weight: "
+ << mri_->getName(minReg) << " (" << minWeight << ")\n";
+
+ // if the current has the minimum weight, we need to spill it and
+ // add any added intervals back to unhandled, and restart
+ // linearscan.
+ if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
+ DOUT << "\t\t\tspilling(c): " << *cur << '\n';
+ // if the current interval is re-materializable, remember so and don't
+ // assign it a spill slot.
+ if (cur->remat)
+ vrm_->setVirtIsReMaterialized(cur->reg, cur->remat);
+ int slot = cur->remat ? vrm_->assignVirtReMatId(cur->reg)
+ : vrm_->assignVirt2StackSlot(cur->reg);
+ std::vector<LiveInterval*> added =
+ li_->addIntervalsForSpills(*cur, *vrm_, slot);
+ if (added.empty())
+ return; // Early exit if all spills were folded.
+
+ // Merge added with unhandled. Note that we know that
+ // addIntervalsForSpills returns intervals sorted by their starting
+ // point.
+ for (unsigned i = 0, e = added.size(); i != e; ++i)
+ unhandled_.push(added[i]);
+ return;
+ }
+
+ ++NumBacktracks;
+
+ // push the current interval back to unhandled since we are going
+ // to re-run at least this iteration. Since we didn't modify it it
+ // should go back right in the front of the list
+ unhandled_.push(cur);
+
+ // otherwise we spill all intervals aliasing the register with
+ // minimum weight, rollback to the interval with the earliest
+ // start point and let the linear scan algorithm run again
+ std::vector<LiveInterval*> added;
+ assert(MRegisterInfo::isPhysicalRegister(minReg) &&
+ "did not choose a register to spill?");
+ BitVector toSpill(mri_->getNumRegs());
+
+ // We are going to spill minReg and all its aliases.
+ toSpill[minReg] = true;
+ for (const unsigned* as = mri_->getAliasSet(minReg); *as; ++as)
+ toSpill[*as] = true;
+
+ // the earliest start of a spilled interval indicates up to where
+ // in handled we need to roll back
+ unsigned earliestStart = cur->beginNumber();
+
+ // set of spilled vregs (used later to rollback properly)
+ std::set<unsigned> spilled;
+
+ // spill live intervals of virtual regs mapped to the physical register we
+ // want to clear (and its aliases). We only spill those that overlap with the
+ // current interval as the rest do not affect its allocation. we also keep
+ // track of the earliest start of all spilled live intervals since this will
+ // mark our rollback point.
+ for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
+ unsigned reg = i->first->reg;
+ if (//MRegisterInfo::isVirtualRegister(reg) &&
+ toSpill[vrm_->getPhys(reg)] &&
+ cur->overlapsFrom(*i->first, i->second)) {
+ DOUT << "\t\t\tspilling(a): " << *i->first << '\n';
+ earliestStart = std::min(earliestStart, i->first->beginNumber());
+ if (i->first->remat)
+ vrm_->setVirtIsReMaterialized(reg, i->first->remat);
+ int slot = i->first->remat ? vrm_->assignVirtReMatId(reg)
+ : vrm_->assignVirt2StackSlot(reg);
+ std::vector<LiveInterval*> newIs =
+ li_->addIntervalsForSpills(*i->first, *vrm_, slot);
+ std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
+ spilled.insert(reg);
+ }
+ }
+ for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){
+ unsigned reg = i->first->reg;
+ if (//MRegisterInfo::isVirtualRegister(reg) &&
+ toSpill[vrm_->getPhys(reg)] &&
+ cur->overlapsFrom(*i->first, i->second-1)) {
+ DOUT << "\t\t\tspilling(i): " << *i->first << '\n';
+ earliestStart = std::min(earliestStart, i->first->beginNumber());
+ if (i->first->remat)
+ vrm_->setVirtIsReMaterialized(reg, i->first->remat);
+ int slot = i->first->remat ? vrm_->assignVirtReMatId(reg)
+ : vrm_->assignVirt2StackSlot(reg);
+ std::vector<LiveInterval*> newIs =
+ li_->addIntervalsForSpills(*i->first, *vrm_, slot);
+ std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
+ spilled.insert(reg);
+ }
+ }
+
+ DOUT << "\t\trolling back to: " << earliestStart << '\n';
+
+ // Scan handled in reverse order up to the earliest start of a
+ // spilled live interval and undo each one, restoring the state of
+ // unhandled.
+ while (!handled_.empty()) {
+ LiveInterval* i = handled_.back();
+ // If this interval starts before t we are done.
+ if (i->beginNumber() < earliestStart)
+ break;
+ DOUT << "\t\t\tundo changes for: " << *i << '\n';
+ handled_.pop_back();
+
+ // When undoing a live interval allocation we must know if it is active or
+ // inactive to properly update the PhysRegTracker and the VirtRegMap.
+ IntervalPtrs::iterator it;
+ if ((it = FindIntervalInVector(active_, i)) != active_.end()) {
+ active_.erase(it);
+ assert(!MRegisterInfo::isPhysicalRegister(i->reg));
+ if (!spilled.count(i->reg))
+ unhandled_.push(i);
+ prt_->delRegUse(vrm_->getPhys(i->reg));
+ vrm_->clearVirt(i->reg);
+ } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) {
+ inactive_.erase(it);
+ assert(!MRegisterInfo::isPhysicalRegister(i->reg));
+ if (!spilled.count(i->reg))
+ unhandled_.push(i);
+ vrm_->clearVirt(i->reg);
+ } else {
+ assert(MRegisterInfo::isVirtualRegister(i->reg) &&
+ "Can only allocate virtual registers!");
+ vrm_->clearVirt(i->reg);
+ unhandled_.push(i);
+ }
+ }
+
+ // Rewind the iterators in the active, inactive, and fixed lists back to the
+ // point we reverted to.
+ RevertVectorIteratorsTo(active_, earliestStart);
+ RevertVectorIteratorsTo(inactive_, earliestStart);
+ RevertVectorIteratorsTo(fixed_, earliestStart);
+
+ // scan the rest and undo each interval that expired after t and
+ // insert it in active (the next iteration of the algorithm will
+ // put it in inactive if required)
+ for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
+ LiveInterval *HI = handled_[i];
+ if (!HI->expiredAt(earliestStart) &&
+ HI->expiredAt(cur->beginNumber())) {
+ DOUT << "\t\t\tundo changes for: " << *HI << '\n';
+ active_.push_back(std::make_pair(HI, HI->begin()));
+ assert(!MRegisterInfo::isPhysicalRegister(HI->reg));
+ prt_->addRegUse(vrm_->getPhys(HI->reg));
+ }
+ }
+
+ // merge added with unhandled
+ for (unsigned i = 0, e = added.size(); i != e; ++i)
+ unhandled_.push(added[i]);
+}
+
+/// getFreePhysReg - return a free physical register for this virtual register
+/// interval if we have one, otherwise return 0.
+unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
+ std::vector<unsigned> inactiveCounts(mri_->getNumRegs(), 0);
+ unsigned MaxInactiveCount = 0;
+
+ const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(cur->reg);
+ const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+ for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
+ i != e; ++i) {
+ unsigned reg = i->first->reg;
+ assert(MRegisterInfo::isVirtualRegister(reg) &&
+ "Can only allocate virtual registers!");
+
+ // If this is not in a related reg class to the register we're allocating,
+ // don't check it.
+ const TargetRegisterClass *RegRC = mf_->getSSARegMap()->getRegClass(reg);
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
+ reg = vrm_->getPhys(reg);
+ ++inactiveCounts[reg];
+ MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]);
+ }
+ }
+
+ unsigned FreeReg = 0;
+ unsigned FreeRegInactiveCount = 0;
+
+ // If copy coalescer has assigned a "preferred" register, check if it's
+ // available first.
+ if (cur->preference)
+ if (prt_->isRegAvail(cur->preference)) {
+ DOUT << "\t\tassigned the preferred register: "
+ << mri_->getName(cur->preference) << "\n";
+ return cur->preference;
+ } else
+ DOUT << "\t\tunable to assign the preferred register: "
+ << mri_->getName(cur->preference) << "\n";
+
+ // Scan for the first available register.
+ TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_);
+ TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_);
+ for (; I != E; ++I)
+ if (prt_->isRegAvail(*I)) {
+ FreeReg = *I;
+ FreeRegInactiveCount = inactiveCounts[FreeReg];
+ break;
+ }
+
+ // If there are no free regs, or if this reg has the max inactive count,
+ // return this register.
+ if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) return FreeReg;
+
+ // Continue scanning the registers, looking for the one with the highest
+ // inactive count. Alkis found that this reduced register pressure very
+ // slightly on X86 (in rev 1.94 of this file), though this should probably be
+ // reevaluated now.
+ for (; I != E; ++I) {
+ unsigned Reg = *I;
+ if (prt_->isRegAvail(Reg) && FreeRegInactiveCount < inactiveCounts[Reg]) {
+ FreeReg = Reg;
+ FreeRegInactiveCount = inactiveCounts[Reg];
+ if (FreeRegInactiveCount == MaxInactiveCount)
+ break; // We found the one with the max inactive count.
+ }
+ }
+
+ return FreeReg;
+}
+
+FunctionPass* llvm::createLinearScanRegisterAllocator() {
+ return new RALinScan();
+}
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
new file mode 100644
index 0000000..6454900
--- /dev/null
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -0,0 +1,830 @@
+//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumFolded, "Number of loads/stores folded into instructions");
+
+namespace {
+ static RegisterRegAlloc
+ localRegAlloc("local", " local register allocator",
+ createLocalRegisterAllocator);
+
+
+ class VISIBILITY_HIDDEN RALocal : public MachineFunctionPass {
+ public:
+ static char ID;
+ RALocal() : MachineFunctionPass((intptr_t)&ID) {}
+ private:
+ const TargetMachine *TM;
+ MachineFunction *MF;
+ const MRegisterInfo *RegInfo;
+ LiveVariables *LV;
+
+ // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+ // values are spilled.
+ std::map<unsigned, int> StackSlotForVirtReg;
+
+ // Virt2PhysRegMap - This map contains entries for each virtual register
+ // that is currently available in a physical register.
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+
+ unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
+ return Virt2PhysRegMap[VirtReg];
+ }
+
+ // PhysRegsUsed - This array is effectively a map, containing entries for
+ // each physical register that currently has a value (ie, it is in
+ // Virt2PhysRegMap). The value mapped to is the virtual register
+ // corresponding to the physical register (the inverse of the
+ // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned
+ // because it is used by a future instruction, and to -2 if it is not
+ // allocatable. If the entry for a physical register is -1, then the
+ // physical register is "not in the map".
+ //
+ std::vector<int> PhysRegsUsed;
+
+ // PhysRegsUseOrder - This contains a list of the physical registers that
+ // currently have a virtual register value in them. This list provides an
+ // ordering of registers, imposing a reallocation order. This list is only
+ // used if all registers are allocated and we have to spill one, in which
+ // case we spill the least recently used register. Entries at the front of
+ // the list are the least recently used registers, entries at the back are
+ // the most recently used.
+ //
+ std::vector<unsigned> PhysRegsUseOrder;
+
+ // VirtRegModified - This bitset contains information about which virtual
+ // registers need to be spilled back to memory when their registers are
+ // scavenged. If a virtual register has simply been rematerialized, there
+ // is no reason to spill it to memory when we need the register back.
+ //
+ std::vector<bool> VirtRegModified;
+
+ void markVirtRegModified(unsigned Reg, bool Val = true) {
+ assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ Reg -= MRegisterInfo::FirstVirtualRegister;
+ if (VirtRegModified.size() <= Reg) VirtRegModified.resize(Reg+1);
+ VirtRegModified[Reg] = Val;
+ }
+
+ bool isVirtRegModified(unsigned Reg) const {
+ assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
+ assert(Reg - MRegisterInfo::FirstVirtualRegister < VirtRegModified.size()
+ && "Illegal virtual register!");
+ return VirtRegModified[Reg - MRegisterInfo::FirstVirtualRegister];
+ }
+
+ void AddToPhysRegsUseOrder(unsigned Reg) {
+ std::vector<unsigned>::iterator It =
+ std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg);
+ if (It != PhysRegsUseOrder.end())
+ PhysRegsUseOrder.erase(It);
+ PhysRegsUseOrder.push_back(Reg);
+ }
+
+ void MarkPhysRegRecentlyUsed(unsigned Reg) {
+ if (PhysRegsUseOrder.empty() ||
+ PhysRegsUseOrder.back() == Reg) return; // Already most recently used
+
+ for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i)
+ if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) {
+ unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle
+ PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1);
+ // Add it to the end of the list
+ PhysRegsUseOrder.push_back(RegMatch);
+ if (RegMatch == Reg)
+ return; // Found an exact match, exit early
+ }
+ }
+
+ public:
+ virtual const char *getPassName() const {
+ return "Local Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveVariables>();
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// runOnMachineFunction - Register allocate the whole function
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ /// AllocateBasicBlock - Register allocate the specified basic block.
+ void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+
+ /// areRegsEqual - This method returns true if the specified registers are
+ /// related to each other. To do this, it checks to see if they are equal
+ /// or if the first register is in the alias set of the second register.
+ ///
+ bool areRegsEqual(unsigned R1, unsigned R2) const {
+ if (R1 == R2) return true;
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(R2);
+ *AliasSet; ++AliasSet) {
+ if (*AliasSet == R1) return true;
+ }
+ return false;
+ }
+
+ /// getStackSpaceFor - This returns the frame index of the specified virtual
+ /// register on the stack, allocating space if necessary.
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+ /// removePhysReg - This method marks the specified physical register as no
+ /// longer being in use.
+ ///
+ void removePhysReg(unsigned PhysReg);
+
+ /// spillVirtReg - This method spills the value specified by PhysReg into
+ /// the virtual register slot specified by VirtReg. It then updates the RA
+ /// data structures to indicate the fact that PhysReg is now available.
+ ///
+ void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned VirtReg, unsigned PhysReg);
+
+ /// spillPhysReg - This method spills the specified physical register into
+ /// the virtual register slot associated with it. If OnlyVirtRegs is set to
+ /// true, then the request is ignored if the physical register does not
+ /// contain a virtual register.
+ ///
+ void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs = false);
+
+ /// assignVirtToPhysReg - This method updates local state so that we know
+ /// that PhysReg is the proper container for VirtReg now. The physical
+ /// register must not be used for anything else when this is called.
+ ///
+ void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
+
+ /// isPhysRegAvailable - Return true if the specified physical register is
+ /// free and available for use. This also includes checking to see if
+ /// aliased registers are all free...
+ ///
+ bool isPhysRegAvailable(unsigned PhysReg) const;
+
+ /// getFreeReg - Look to see if there is a free register available in the
+ /// specified register class. If not, return 0.
+ ///
+ unsigned getFreeReg(const TargetRegisterClass *RC);
+
+ /// getReg - Find a physical register to hold the specified virtual
+ /// register. If all compatible physical registers are used, this method
+ /// spills the last used virtual register to the stack, and uses that
+ /// register.
+ ///
+ unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned VirtReg);
+
+ /// reloadVirtReg - This method transforms the specified specified virtual
+ /// register use to refer to a physical register. This method may do this
+ /// in one of several ways: if the register is available in a physical
+ /// register already, it uses that physical register. If the value is not
+ /// in a physical register, and if there are physical registers available,
+ /// it loads it into a register. If register pressure is high, and it is
+ /// possible, it tries to fold the load of the virtual register into the
+ /// instruction itself. It avoids doing this if register pressure is low to
+ /// improve the chance that subsequent instructions can use the reloaded
+ /// value. This method returns the modified instruction.
+ ///
+ MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum);
+
+
+ void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I,
+ unsigned PhysReg);
+ };
+ char RALocal::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+ // Find the location Reg would belong...
+ std::map<unsigned, int>::iterator I =StackSlotForVirtReg.lower_bound(VirtReg);
+
+ if (I != StackSlotForVirtReg.end() && I->first == VirtReg)
+ return I->second; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot...
+ StackSlotForVirtReg.insert(I, std::make_pair(VirtReg, FrameIdx));
+ return FrameIdx;
+}
+
+
+/// removePhysReg - This method marks the specified physical register as no
+/// longer being in use.
+///
+void RALocal::removePhysReg(unsigned PhysReg) {
+ PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used
+
+ std::vector<unsigned>::iterator It =
+ std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg);
+ if (It != PhysRegsUseOrder.end())
+ PhysRegsUseOrder.erase(It);
+}
+
+
+/// spillVirtReg - This method spills the value specified by PhysReg into the
+/// virtual register slot specified by VirtReg. It then updates the RA data
+/// structures to indicate the fact that PhysReg is now available.
+///
+void RALocal::spillVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg) {
+ assert(VirtReg && "Spilling a physical register is illegal!"
+ " Must not have appropriate kill for the register or use exists beyond"
+ " the intended one.");
+ DOUT << " Spilling register " << RegInfo->getName(PhysReg)
+ << " containing %reg" << VirtReg;
+ if (!isVirtRegModified(VirtReg))
+ DOUT << " which has not been modified, so no store necessary!";
+
+ // Otherwise, there is a virtual register corresponding to this physical
+ // register. We only need to spill it into its stack slot if it has been
+ // modified.
+ if (isVirtRegModified(VirtReg)) {
+ const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+ DOUT << " to stack slot #" << FrameIndex;
+ RegInfo->storeRegToStackSlot(MBB, I, PhysReg, FrameIndex, RC);
+ ++NumStores; // Update statistics
+ }
+
+ getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available
+
+ DOUT << "\n";
+ removePhysReg(PhysReg);
+}
+
+
+/// spillPhysReg - This method spills the specified physical register into the
+/// virtual register slot associated with it. If OnlyVirtRegs is set to true,
+/// then the request is ignored if the physical register does not contain a
+/// virtual register.
+///
+void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned PhysReg, bool OnlyVirtRegs) {
+ if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used!
+ assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
+ if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
+ spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
+ } else {
+ // If the selected register aliases any other registers, we must make
+ // sure that one of the aliases isn't alive.
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register.
+ PhysRegsUsed[*AliasSet] != -2) // If allocatable.
+ if (PhysRegsUsed[*AliasSet])
+ spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+ }
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now. The physical
+/// register must not be used for anything else when this is called.
+///
+void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+ assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
+ // Update information to note the fact that this register was just used, and
+ // it holds VirtReg.
+ PhysRegsUsed[PhysReg] = VirtReg;
+ getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
+ AddToPhysRegsUseOrder(PhysReg); // New use of PhysReg
+}
+
+
+/// isPhysRegAvailable - Return true if the specified physical register is free
+/// and available for use. This also includes checking to see if aliased
+/// registers are all free...
+///
+bool RALocal::isPhysRegAvailable(unsigned PhysReg) const {
+ if (PhysRegsUsed[PhysReg] != -1) return false;
+
+ // If the selected register aliases any other allocated registers, it is
+ // not free!
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet)
+ if (PhysRegsUsed[*AliasSet] != -1) // Aliased register in use?
+ return false; // Can't use this reg then.
+ return true;
+}
+
+
+/// getFreeReg - Look to see if there is a free register available in the
+/// specified register class. If not, return 0.
+///
+unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) {
+ // Get iterators defining the range of registers that are valid to allocate in
+ // this class, which also specifies the preferred allocation order.
+ TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+ TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+ for (; RI != RE; ++RI)
+ if (isPhysRegAvailable(*RI)) { // Is reg unused?
+ assert(*RI != 0 && "Cannot use register!");
+ return *RI; // Found an unused register!
+ }
+ return 0;
+}
+
+
+/// getReg - Find a physical register to hold the specified virtual
+/// register. If all compatible physical registers are used, this method spills
+/// the last used virtual register to the stack, and uses that register.
+///
+unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I,
+ unsigned VirtReg) {
+ const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg);
+
+ // First check to see if we have a free register of the requested type...
+ unsigned PhysReg = getFreeReg(RC);
+
+ // If we didn't find an unused register, scavenge one now!
+ if (PhysReg == 0) {
+ assert(!PhysRegsUseOrder.empty() && "No allocated registers??");
+
+ // Loop over all of the preallocated registers from the least recently used
+ // to the most recently used. When we find one that is capable of holding
+ // our register, use it.
+ for (unsigned i = 0; PhysReg == 0; ++i) {
+ assert(i != PhysRegsUseOrder.size() &&
+ "Couldn't find a register of the appropriate class!");
+
+ unsigned R = PhysRegsUseOrder[i];
+
+ // We can only use this register if it holds a virtual register (ie, it
+ // can be spilled). Do not use it if it is an explicitly allocated
+ // physical register!
+ assert(PhysRegsUsed[R] != -1 &&
+ "PhysReg in PhysRegsUseOrder, but is not allocated?");
+ if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) {
+ // If the current register is compatible, use it.
+ if (RC->contains(R)) {
+ PhysReg = R;
+ break;
+ } else {
+ // If one of the registers aliased to the current register is
+ // compatible, use it.
+ for (const unsigned *AliasIt = RegInfo->getAliasSet(R);
+ *AliasIt; ++AliasIt) {
+ if (RC->contains(*AliasIt) &&
+ // If this is pinned down for some reason, don't use it. For
+ // example, if CL is pinned, and we run across CH, don't use
+ // CH as justification for using scavenging ECX (which will
+ // fail).
+ PhysRegsUsed[*AliasIt] != 0 &&
+
+ // Make sure the register is allocatable. Don't allocate SIL on
+ // x86-32.
+ PhysRegsUsed[*AliasIt] != -2) {
+ PhysReg = *AliasIt; // Take an aliased register
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ assert(PhysReg && "Physical register not assigned!?!?");
+
+ // At this point PhysRegsUseOrder[i] is the least recently used register of
+ // compatible register class. Spill it to memory and reap its remains.
+ spillPhysReg(MBB, I, PhysReg);
+ }
+
+ // Now that we know which register we need to assign this to, do it now!
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ return PhysReg;
+}
+
+
+/// reloadVirtReg - This method transforms the specified specified virtual
+/// register use to refer to a physical register. This method may do this in
+/// one of several ways: if the register is available in a physical register
+/// already, it uses that physical register. If the value is not in a physical
+/// register, and if there are physical registers available, it loads it into a
+/// register. If register pressure is high, and it is possible, it tries to
+/// fold the load of the virtual register into the instruction itself. It
+/// avoids doing this if register pressure is low to improve the chance that
+/// subsequent instructions can use the reloaded value. This method returns the
+/// modified instruction.
+///
+MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned OpNum) {
+ unsigned VirtReg = MI->getOperand(OpNum).getReg();
+
+ // If the virtual register is already available, just update the instruction
+ // and return.
+ if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+ MarkPhysRegRecentlyUsed(PR); // Already have this value available!
+ MI->getOperand(OpNum).setReg(PR); // Assign the input register
+ return MI;
+ }
+
+ // Otherwise, we need to fold it into the current instruction, or reload it.
+ // If we have registers available to hold the value, use them.
+ const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg);
+ unsigned PhysReg = getFreeReg(RC);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+
+ if (PhysReg) { // Register is available, allocate it!
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ } else { // No registers available.
+ // If we can fold this spill into this instruction, do so now.
+ if (MachineInstr* FMI = RegInfo->foldMemoryOperand(MI, OpNum, FrameIndex)){
+ ++NumFolded;
+ // Since we changed the address of MI, make sure to update live variables
+ // to know that the new instruction has the properties of the old one.
+ LV->instructionChanged(MI, FMI);
+ return MBB.insert(MBB.erase(MI), FMI);
+ }
+
+ // It looks like we can't fold this virtual register load into this
+ // instruction. Force some poor hapless value out of the register file to
+ // make room for the new register, and reload it.
+ PhysReg = getReg(MBB, MI, VirtReg);
+ }
+
+ markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded
+
+ DOUT << " Reloading %reg" << VirtReg << " into "
+ << RegInfo->getName(PhysReg) << "\n";
+
+ // Add move instruction(s)
+ RegInfo->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+ ++NumLoads; // Update statistics
+
+ MF->setPhysRegUsed(PhysReg);
+ MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register
+ return MI;
+}
+
+/// isReadModWriteImplicitKill - True if this is an implicit kill for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.getReg() == Reg && MO.isImplicit() &&
+ MO.isDef() && !MO.isDead())
+ return true;
+ }
+ return false;
+}
+
+/// isReadModWriteImplicitDef - True if this is an implicit def for a
+/// read/mod/write register, i.e. update partial register.
+static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.getReg() == Reg && MO.isImplicit() &&
+ !MO.isDef() && MO.isKill())
+ return true;
+ }
+ return false;
+}
+
+void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ MachineBasicBlock::iterator MII = MBB.begin();
+ const TargetInstrInfo &TII = *TM->getInstrInfo();
+
+ DEBUG(const BasicBlock *LBB = MBB.getBasicBlock();
+ if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName());
+
+ // If this is the first basic block in the machine function, add live-in
+ // registers as active.
+ if (&MBB == &*MF->begin()) {
+ for (MachineFunction::livein_iterator I = MF->livein_begin(),
+ E = MF->livein_end(); I != E; ++I) {
+ unsigned Reg = I->first;
+ MF->setPhysRegUsed(Reg);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ AddToPhysRegsUseOrder(*AliasSet);
+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now
+ MF->setPhysRegUsed(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Otherwise, sequentially allocate each instruction in the MBB.
+ while (MII != MBB.end()) {
+ MachineInstr *MI = MII++;
+ const TargetInstrDescriptor &TID = TII.get(MI->getOpcode());
+ DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI;
+ DOUT << " Regs have values: ";
+ for (unsigned i = 0; i != RegInfo->getNumRegs(); ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+ DOUT << "[" << RegInfo->getName(i)
+ << ",%reg" << PhysRegsUsed[i] << "] ";
+ DOUT << "\n");
+
+ // Loop over the implicit uses, making sure that they are at the head of the
+ // use order list, so they don't get reallocated.
+ if (TID.ImplicitUses) {
+ for (const unsigned *ImplicitUses = TID.ImplicitUses;
+ *ImplicitUses; ++ImplicitUses)
+ MarkPhysRegRecentlyUsed(*ImplicitUses);
+ }
+
+ SmallVector<unsigned, 8> Kills;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.isKill()) {
+ if (!MO.isImplicit())
+ Kills.push_back(MO.getReg());
+ else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+ // These are extra physical register kills when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ Kills.push_back(MO.getReg());
+ }
+ }
+
+ // Get the used operands into registers. This has the potential to spill
+ // incoming values if we are out of registers. Note that we completely
+ // ignore physical register uses here. We assume that if an explicit
+ // physical register is referenced by the instruction, that it is guaranteed
+ // to be live-in, or the input is badly hosed.
+ //
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ // here we are looking for only used operands (never def&use)
+ if (MO.isRegister() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
+ MRegisterInfo::isVirtualRegister(MO.getReg()))
+ MI = reloadVirtReg(MBB, MI, i);
+ }
+
+ // If this instruction is the last user of this register, kill the
+ // value, freeing the register being used, so it doesn't need to be
+ // spilled to memory.
+ //
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ unsigned VirtReg = Kills[i];
+ unsigned PhysReg = VirtReg;
+ if (MRegisterInfo::isVirtualRegister(VirtReg)) {
+ // If the virtual register was never materialized into a register, it
+ // might not be in the map, but it won't hurt to zero it out anyway.
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ } else {
+ assert(!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1 &&
+ "Silently clearing a virtual register?");
+ }
+
+ if (PhysReg) {
+ DOUT << " Last use of " << RegInfo->getName(PhysReg)
+ << "[%reg" << VirtReg <<"], removing it from live set\n";
+ removePhysReg(PhysReg);
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ DOUT << " Last use of "
+ << RegInfo->getName(*AliasSet)
+ << "[%reg" << VirtReg <<"], removing it from live set\n";
+ removePhysReg(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Loop over all of the operands of the instruction, spilling registers that
+ // are defined, and marking explicit destinations in the PhysRegsUsed map.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.isDef() && !MO.isImplicit() && MO.getReg() &&
+ MRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
+ // These are extra physical register defs when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+ MF->setPhysRegUsed(Reg);
+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ MF->setPhysRegUsed(*AliasSet);
+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Loop over the implicit defs, spilling them as well.
+ if (TID.ImplicitDefs) {
+ for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
+ *ImplicitDefs; ++ImplicitDefs) {
+ unsigned Reg = *ImplicitDefs;
+ if (PhysRegsUsed[Reg] != -2) {
+ spillPhysReg(MBB, MI, Reg, true);
+ AddToPhysRegsUseOrder(Reg);
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ }
+ MF->setPhysRegUsed(Reg);
+ for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ AddToPhysRegsUseOrder(*AliasSet);
+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now
+ MF->setPhysRegUsed(*AliasSet);
+ }
+ }
+ }
+ }
+
+ SmallVector<unsigned, 8> DeadDefs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.isDead())
+ DeadDefs.push_back(MO.getReg());
+ }
+
+ // Okay, we have allocated all of the source operands and spilled any values
+ // that would be destroyed by defs of this instruction. Loop over the
+ // explicit defs and assign them to a register, spilling incoming values if
+ // we need to scavenge a register.
+ //
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand& MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.isDef() && MO.getReg() &&
+ MRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned DestVirtReg = MO.getReg();
+ unsigned DestPhysReg;
+
+ // If DestVirtReg already has a value, use it.
+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+ DestPhysReg = getReg(MBB, MI, DestVirtReg);
+ MF->setPhysRegUsed(DestPhysReg);
+ markVirtRegModified(DestVirtReg);
+ MI->getOperand(i).setReg(DestPhysReg); // Assign the output register
+ }
+ }
+
+ // If this instruction defines any registers that are immediately dead,
+ // kill them now.
+ //
+ for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
+ unsigned VirtReg = DeadDefs[i];
+ unsigned PhysReg = VirtReg;
+ if (MRegisterInfo::isVirtualRegister(VirtReg)) {
+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
+ PhysReg = PhysRegSlot;
+ assert(PhysReg != 0);
+ PhysRegSlot = 0;
+ } else if (PhysRegsUsed[PhysReg] == -2) {
+ // Unallocatable register dead, ignore.
+ continue;
+ }
+
+ if (PhysReg) {
+ DOUT << " Register " << RegInfo->getName(PhysReg)
+ << " [%reg" << VirtReg
+ << "] is never used, removing it frame live list\n";
+ removePhysReg(PhysReg);
+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ DOUT << " Register " << RegInfo->getName(*AliasSet)
+ << " [%reg" << *AliasSet
+ << "] is never used, removing it frame live list\n";
+ removePhysReg(*AliasSet);
+ }
+ }
+ }
+ }
+
+ // Finally, if this is a noop copy instruction, zap it.
+ unsigned SrcReg, DstReg;
+ if (TII.isMoveInstr(*MI, SrcReg, DstReg) && SrcReg == DstReg) {
+ LV->removeVirtualRegistersKilled(MI);
+ LV->removeVirtualRegistersDead(MI);
+ MBB.erase(MI);
+ }
+ }
+
+ MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+
+ // Spill all physical registers holding virtual registers now.
+ for (unsigned i = 0, e = RegInfo->getNumRegs(); i != e; ++i)
+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
+ if (unsigned VirtReg = PhysRegsUsed[i])
+ spillVirtReg(MBB, MI, VirtReg, i);
+ else
+ removePhysReg(i);
+
+#if 0
+ // This checking code is very expensive.
+ bool AllOk = true;
+ for (unsigned i = MRegisterInfo::FirstVirtualRegister,
+ e = MF->getSSARegMap()->getLastVirtReg(); i <= e; ++i)
+ if (unsigned PR = Virt2PhysRegMap[i]) {
+ cerr << "Register still mapped: " << i << " -> " << PR << "\n";
+ AllOk = false;
+ }
+ assert(AllOk && "Virtual registers still in phys regs?");
+#endif
+
+ // Clear any physical register which appear live at the end of the basic
+ // block, but which do not hold any virtual registers. e.g., the stack
+ // pointer.
+ PhysRegsUseOrder.clear();
+}
+
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
+ DOUT << "Machine Function " << "\n";
+ MF = &Fn;
+ TM = &Fn.getTarget();
+ RegInfo = TM->getRegisterInfo();
+ LV = &getAnalysis<LiveVariables>();
+
+ PhysRegsUsed.assign(RegInfo->getNumRegs(), -1);
+
+ // At various places we want to efficiently check to see whether a register
+ // is allocatable. To handle this, we mark all unallocatable registers as
+ // being pinned down, permanently.
+ {
+ BitVector Allocable = RegInfo->getAllocatableSet(Fn);
+ for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
+ if (!Allocable[i])
+ PhysRegsUsed[i] = -2; // Mark the reg unallocable.
+ }
+
+ // initialize the virtual->physical register map to have a 'null'
+ // mapping for all virtual registers
+ Virt2PhysRegMap.grow(MF->getSSARegMap()->getLastVirtReg());
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB)
+ AllocateBasicBlock(*MBB);
+
+ StackSlotForVirtReg.clear();
+ PhysRegsUsed.clear();
+ VirtRegModified.clear();
+ Virt2PhysRegMap.clear();
+ return true;
+}
+
+FunctionPass *llvm::createLocalRegisterAllocator() {
+ return new RALocal();
+}
diff --git a/lib/CodeGen/RegAllocSimple.cpp b/lib/CodeGen/RegAllocSimple.cpp
new file mode 100644
index 0000000..f49dd4c
--- /dev/null
+++ b/lib/CodeGen/RegAllocSimple.cpp
@@ -0,0 +1,253 @@
+//===-- RegAllocSimple.cpp - A simple generic register allocator ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register allocator. *Very* simple: It immediate
+// spills every value right after it is computed, and it reloads all used
+// operands from the spill area to temporary registers before each instruction.
+// It does not keep values in registers across instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+
+namespace {
+ static RegisterRegAlloc
+ simpleRegAlloc("simple", " simple register allocator",
+ createSimpleRegisterAllocator);
+
+ class VISIBILITY_HIDDEN RegAllocSimple : public MachineFunctionPass {
+ public:
+ static char ID;
+ RegAllocSimple() : MachineFunctionPass((intptr_t)&ID) {}
+ private:
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const MRegisterInfo *RegInfo;
+
+ // StackSlotForVirtReg - Maps SSA Regs => frame index on the stack where
+ // these values are spilled
+ std::map<unsigned, int> StackSlotForVirtReg;
+
+ // RegsUsed - Keep track of what registers are currently in use. This is a
+ // bitset.
+ std::vector<bool> RegsUsed;
+
+ // RegClassIdx - Maps RegClass => which index we can take a register
+ // from. Since this is a simple register allocator, when we need a register
+ // of a certain class, we just take the next available one.
+ std::map<const TargetRegisterClass*, unsigned> RegClassIdx;
+
+ public:
+ virtual const char *getPassName() const {
+ return "Simple Register Allocator";
+ }
+
+ /// runOnMachineFunction - Register allocate the whole function
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(PHIEliminationID); // Eliminate PHI nodes
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ private:
+ /// AllocateBasicBlock - Register allocate the specified basic block.
+ void AllocateBasicBlock(MachineBasicBlock &MBB);
+
+ /// getStackSpaceFor - This returns the offset of the specified virtual
+ /// register on the stack, allocating space if necessary.
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+
+ /// Given a virtual register, return a compatible physical register that is
+ /// currently unused.
+ ///
+ /// Side effect: marks that register as being used until manually cleared
+ ///
+ unsigned getFreeReg(unsigned virtualReg);
+
+ /// Moves value from memory into that register
+ unsigned reloadVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, unsigned VirtReg);
+
+ /// Saves reg value on the stack (maps virtual register to stack value)
+ void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg);
+ };
+ char RegAllocSimple::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual
+/// register to be held on the stack.
+int RegAllocSimple::getStackSpaceFor(unsigned VirtReg,
+ const TargetRegisterClass *RC) {
+ // Find the location VirtReg would belong...
+ std::map<unsigned, int>::iterator I =
+ StackSlotForVirtReg.lower_bound(VirtReg);
+
+ if (I != StackSlotForVirtReg.end() && I->first == VirtReg)
+ return I->second; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot...
+ StackSlotForVirtReg.insert(I, std::make_pair(VirtReg, FrameIdx));
+
+ return FrameIdx;
+}
+
+unsigned RegAllocSimple::getFreeReg(unsigned virtualReg) {
+ const TargetRegisterClass* RC = MF->getSSARegMap()->getRegClass(virtualReg);
+ TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
+ TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
+
+ while (1) {
+ unsigned regIdx = RegClassIdx[RC]++;
+ assert(RI+regIdx != RE && "Not enough registers!");
+ unsigned PhysReg = *(RI+regIdx);
+
+ if (!RegsUsed[PhysReg]) {
+ MF->setPhysRegUsed(PhysReg);
+ return PhysReg;
+ }
+ }
+}
+
+unsigned RegAllocSimple::reloadVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg) {
+ const TargetRegisterClass* RC = MF->getSSARegMap()->getRegClass(VirtReg);
+ int FrameIdx = getStackSpaceFor(VirtReg, RC);
+ unsigned PhysReg = getFreeReg(VirtReg);
+
+ // Add move instruction(s)
+ ++NumLoads;
+ RegInfo->loadRegFromStackSlot(MBB, I, PhysReg, FrameIdx, RC);
+ return PhysReg;
+}
+
+void RegAllocSimple::spillVirtReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned VirtReg, unsigned PhysReg) {
+ const TargetRegisterClass* RC = MF->getSSARegMap()->getRegClass(VirtReg);
+ int FrameIdx = getStackSpaceFor(VirtReg, RC);
+
+ // Add move instruction(s)
+ ++NumStores;
+ RegInfo->storeRegToStackSlot(MBB, I, PhysReg, FrameIdx, RC);
+}
+
+
+void RegAllocSimple::AllocateBasicBlock(MachineBasicBlock &MBB) {
+ // loop over each instruction
+ for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) {
+ // Made to combat the incorrect allocation of r2 = add r1, r1
+ std::map<unsigned, unsigned> Virt2PhysRegMap;
+
+ RegsUsed.resize(RegInfo->getNumRegs());
+
+ // This is a preliminary pass that will invalidate any registers that are
+ // used by the instruction (including implicit uses).
+ unsigned Opcode = MI->getOpcode();
+ const TargetInstrDescriptor &Desc = TM->getInstrInfo()->get(Opcode);
+ const unsigned *Regs;
+ if (Desc.ImplicitUses) {
+ for (Regs = Desc.ImplicitUses; *Regs; ++Regs)
+ RegsUsed[*Regs] = true;
+ }
+
+ if (Desc.ImplicitDefs) {
+ for (Regs = Desc.ImplicitDefs; *Regs; ++Regs) {
+ RegsUsed[*Regs] = true;
+ MF->setPhysRegUsed(*Regs);
+ }
+ }
+
+ // Loop over uses, move from memory into registers.
+ for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+ MachineOperand &op = MI->getOperand(i);
+
+ if (op.isRegister() && op.getReg() &&
+ MRegisterInfo::isVirtualRegister(op.getReg())) {
+ unsigned virtualReg = (unsigned) op.getReg();
+ DOUT << "op: " << op << "\n";
+ DOUT << "\t inst[" << i << "]: ";
+ DEBUG(MI->print(*cerr.stream(), TM));
+
+ // make sure the same virtual register maps to the same physical
+ // register in any given instruction
+ unsigned physReg = Virt2PhysRegMap[virtualReg];
+ if (physReg == 0) {
+ if (op.isDef()) {
+ int TiedOp = MI->getInstrDescriptor()->findTiedToSrcOperand(i);
+ if (TiedOp == -1) {
+ physReg = getFreeReg(virtualReg);
+ } else {
+ // must be same register number as the source operand that is
+ // tied to. This maps a = b + c into b = b + c, and saves b into
+ // a's spot.
+ assert(MI->getOperand(TiedOp).isRegister() &&
+ MI->getOperand(TiedOp).getReg() &&
+ MI->getOperand(TiedOp).isUse() &&
+ "Two address instruction invalid!");
+
+ physReg = MI->getOperand(TiedOp).getReg();
+ }
+ spillVirtReg(MBB, next(MI), virtualReg, physReg);
+ } else {
+ physReg = reloadVirtReg(MBB, MI, virtualReg);
+ Virt2PhysRegMap[virtualReg] = physReg;
+ }
+ }
+ MI->getOperand(i).setReg(physReg);
+ DOUT << "virt: " << virtualReg << ", phys: " << op.getReg() << "\n";
+ }
+ }
+ RegClassIdx.clear();
+ RegsUsed.clear();
+ }
+}
+
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RegAllocSimple::runOnMachineFunction(MachineFunction &Fn) {
+ DOUT << "Machine Function\n";
+ MF = &Fn;
+ TM = &MF->getTarget();
+ RegInfo = TM->getRegisterInfo();
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB)
+ AllocateBasicBlock(*MBB);
+
+ StackSlotForVirtReg.clear();
+ return true;
+}
+
+FunctionPass *llvm::createSimpleRegisterAllocator() {
+ return new RegAllocSimple();
+}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
new file mode 100644
index 0000000..ae40e58
--- /dev/null
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -0,0 +1,292 @@
+//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine register scavenger. It can provide
+// information such as unused register at any point in a machine basic block.
+// It also provides a mechanism to make registers availbale by evicting them
+// to spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg-scavenging"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
+ const MachineFunction &MF = *mbb->getParent();
+ const TargetMachine &TM = MF.getTarget();
+ TII = TM.getInstrInfo();
+ RegInfo = TM.getRegisterInfo();
+
+ assert((NumPhysRegs == 0 || NumPhysRegs == RegInfo->getNumRegs()) &&
+ "Target changed?");
+
+ if (!MBB) {
+ NumPhysRegs = RegInfo->getNumRegs();
+ RegsAvailable.resize(NumPhysRegs);
+
+ // Create reserved registers bitvector.
+ ReservedRegs = RegInfo->getReservedRegs(MF);
+
+ // Create callee-saved registers bitvector.
+ CalleeSavedRegs.resize(NumPhysRegs);
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ if (CSRegs != NULL)
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ CalleeSavedRegs.set(CSRegs[i]);
+ }
+
+ MBB = mbb;
+ ScavengedReg = 0;
+ ScavengedRC = NULL;
+
+ // All registers started out unused.
+ RegsAvailable.set();
+
+ // Reserved registers are always used.
+ RegsAvailable ^= ReservedRegs;
+
+ // Live-in registers are in use.
+ if (!MBB->livein_empty())
+ for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ setUsed(*I);
+
+ Tracking = false;
+}
+
+void RegScavenger::restoreScavengedReg() {
+ if (!ScavengedReg)
+ return;
+
+ RegInfo->loadRegFromStackSlot(*MBB, MBBI, ScavengedReg,
+ ScavengingFrameIndex, ScavengedRC);
+ MachineBasicBlock::iterator II = prior(MBBI);
+ RegInfo->eliminateFrameIndex(II, 0, this);
+ setUsed(ScavengedReg);
+ ScavengedReg = 0;
+ ScavengedRC = NULL;
+}
+
+void RegScavenger::forward() {
+ // Move ptr forward.
+ if (!Tracking) {
+ MBBI = MBB->begin();
+ Tracking = true;
+ } else {
+ assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+ MBBI = next(MBBI);
+ }
+
+ MachineInstr *MI = MBBI;
+
+ // Reaching a terminator instruction. Restore a scavenged register (which
+ // must be life out.
+ if (TII->isTerminatorInstr(MI->getOpcode()))
+ restoreScavengedReg();
+
+ // Process uses first.
+ BitVector ChangedRegs(NumPhysRegs);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (!isUsed(Reg)) {
+ // Register has been scavenged. Restore it!
+ if (Reg != ScavengedReg)
+ assert(false && "Using an undefined register!");
+ else
+ restoreScavengedReg();
+ }
+ if (MO.isKill() && !isReserved(Reg))
+ ChangedRegs.set(Reg);
+ }
+ // Change states of all registers after all the uses are processed to guard
+ // against multiple uses.
+ setUnused(ChangedRegs);
+
+ // Process defs.
+ const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ // If it's dead upon def, then it is now free.
+ if (MO.isDead()) {
+ setUnused(Reg);
+ continue;
+ }
+ // Skip two-address destination operand.
+ if (TID->findTiedToSrcOperand(i) != -1) {
+ assert(isUsed(Reg) && "Using an undefined register!");
+ continue;
+ }
+ assert((isUnused(Reg) || isReserved(Reg)) &&
+ "Re-defining a live register!");
+ setUsed(Reg);
+ }
+}
+
+void RegScavenger::backward() {
+ assert(Tracking && "Not tracking states!");
+ assert(MBBI != MBB->begin() && "Already at start of basic block!");
+ // Move ptr backward.
+ MBBI = prior(MBBI);
+
+ MachineInstr *MI = MBBI;
+ // Process defs first.
+ const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ // Skip two-address destination operand.
+ if (TID->findTiedToSrcOperand(i) != -1)
+ continue;
+ unsigned Reg = MO.getReg();
+ assert(isUsed(Reg));
+ if (!isReserved(Reg))
+ setUnused(Reg);
+ }
+
+ // Process uses.
+ BitVector ChangedRegs(NumPhysRegs);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ assert(isUnused(Reg) || isReserved(Reg));
+ ChangedRegs.set(Reg);
+ }
+ setUsed(ChangedRegs);
+}
+
+void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+ if (includeReserved)
+ used = ~RegsAvailable;
+ else
+ used = ~RegsAvailable & ~ReservedRegs;
+}
+
+/// CreateRegClassMask - Set the bits that represent the registers in the
+/// TargetRegisterClass.
+static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E;
+ ++I)
+ Mask.set(*I);
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass,
+ const BitVector &Candidates) const {
+ // Mask off the registers which are not in the TargetRegisterClass.
+ BitVector RegsAvailableCopy(NumPhysRegs, false);
+ CreateRegClassMask(RegClass, RegsAvailableCopy);
+ RegsAvailableCopy &= RegsAvailable;
+
+ // Restrict the search to candidates.
+ RegsAvailableCopy &= Candidates;
+
+ // Returns the first unused (bit is set) register, or 0 is none is found.
+ int Reg = RegsAvailableCopy.find_first();
+ return (Reg == -1) ? 0 : Reg;
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass,
+ bool ExCalleeSaved) const {
+ // Mask off the registers which are not in the TargetRegisterClass.
+ BitVector RegsAvailableCopy(NumPhysRegs, false);
+ CreateRegClassMask(RegClass, RegsAvailableCopy);
+ RegsAvailableCopy &= RegsAvailable;
+
+ // If looking for a non-callee-saved register, mask off all the callee-saved
+ // registers.
+ if (ExCalleeSaved)
+ RegsAvailableCopy &= ~CalleeSavedRegs;
+
+ // Returns the first unused (bit is set) register, or 0 is none is found.
+ int Reg = RegsAvailableCopy.find_first();
+ return (Reg == -1) ? 0 : Reg;
+}
+
+/// calcDistanceToUse - Calculate the distance to the first use of the
+/// specified register.
+static unsigned calcDistanceToUse(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I, unsigned Reg) {
+ unsigned Dist = 0;
+ I = next(I);
+ while (I != MBB->end()) {
+ Dist++;
+ if (I->findRegisterUseOperandIdx(Reg) != -1)
+ return Dist;
+ I = next(I);
+ }
+ return Dist + 1;
+}
+
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+ MachineBasicBlock::iterator I,
+ int SPAdj) {
+ assert(ScavengingFrameIndex >= 0 &&
+ "Cannot scavenge a register without an emergency spill slot!");
+
+ // Mask off the registers which are not in the TargetRegisterClass.
+ BitVector Candidates(NumPhysRegs, false);
+ CreateRegClassMask(RC, Candidates);
+ Candidates ^= ReservedRegs; // Do not include reserved registers.
+
+ // Exclude all the registers being used by the instruction.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = I->getOperand(i);
+ if (MO.isReg())
+ Candidates.reset(MO.getReg());
+ }
+
+ // Find the register whose use is furtherest aaway.
+ unsigned SReg = 0;
+ unsigned MaxDist = 0;
+ int Reg = Candidates.find_first();
+ while (Reg != -1) {
+ unsigned Dist = calcDistanceToUse(MBB, I, Reg);
+ if (Dist >= MaxDist) {
+ MaxDist = Dist;
+ SReg = Reg;
+ }
+ Reg = Candidates.find_next(Reg);
+ }
+
+ if (ScavengedReg != 0) {
+ // First restore previously scavenged register.
+ RegInfo->loadRegFromStackSlot(*MBB, I, ScavengedReg,
+ ScavengingFrameIndex, ScavengedRC);
+ MachineBasicBlock::iterator II = prior(I);
+ RegInfo->eliminateFrameIndex(II, SPAdj, this);
+ }
+
+ RegInfo->storeRegToStackSlot(*MBB, I, SReg, ScavengingFrameIndex, RC);
+ MachineBasicBlock::iterator II = prior(I);
+ RegInfo->eliminateFrameIndex(II, SPAdj, this);
+ ScavengedReg = SReg;
+ ScavengedRC = RC;
+
+ return SReg;
+}
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
new file mode 100644
index 0000000..defbe34
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -0,0 +1,102 @@
+//===-- llvm/CallingConvLower.cpp - Calling Conventions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm,
+ SmallVector<CCValAssign, 16> &locs)
+ : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+ MRI(*TM.getRegisterInfo()), Locs(locs) {
+ // No stack is used.
+ StackOffset = 0;
+
+ UsedRegs.resize(MRI.getNumRegs());
+}
+
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+
+ if (const unsigned *RegAliases = MRI.getAliasSet(Reg))
+ for (; (Reg = *RegAliases); ++RegAliases)
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// incorporating info about the formals into this state.
+void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) {
+ unsigned NumArgs = TheArgs->getNumValues()-1;
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT::ValueType ArgVT = TheArgs->getValueType(i);
+ SDOperand FlagOp = TheArgs->getOperand(3+i);
+ unsigned ArgFlags = cast<ConstantSDNode>(FlagOp)->getValue();
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+ cerr << "Formal argument #" << i << " has unhandled type "
+ << MVT::getValueTypeString(ArgVT) << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) {
+ MVT::ValueType VT = TheRet->getOperand(i*2+1).getValueType();
+ if (Fn(i, VT, VT, CCValAssign::Full,
+ cast<ConstantSDNode>(TheRet->getOperand(i*2+2))->getValue(), *this)){
+ cerr << "Return operand #" << i << " has unhandled type "
+ << MVT::getValueTypeString(VT) << "\n";
+ abort();
+ }
+ }
+}
+
+
+/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+/// about the passed values into this state.
+void CCState::AnalyzeCallOperands(SDNode *TheCall, CCAssignFn Fn) {
+ unsigned NumOps = (TheCall->getNumOperands() - 5) / 2;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT::ValueType ArgVT = TheCall->getOperand(5+2*i).getValueType();
+ SDOperand FlagOp = TheCall->getOperand(5+2*i+1);
+ unsigned ArgFlags =cast<ConstantSDNode>(FlagOp)->getValue();
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+ cerr << "Call operand #" << i << " has unhandled type "
+ << MVT::getValueTypeString(ArgVT) << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallResult(SDNode *TheCall, CCAssignFn Fn) {
+ for (unsigned i = 0, e = TheCall->getNumValues() - 1; i != e; ++i) {
+ MVT::ValueType VT = TheCall->getValueType(i);
+ if (Fn(i, VT, VT, CCValAssign::Full, 0, *this)) {
+ cerr << "Call result #" << i << " has unhandled type "
+ << MVT::getValueTypeString(VT) << "\n";
+ abort();
+ }
+ }
+}
+
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 0000000..22c6e6b
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,4749 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Nate Begeman and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
+// both before and after the DAG is legalized.
+//
+// FIXME: Missing folds
+// sdiv, udiv, srem, urem (X, const) where X is an integer can be expanded into
+// a sequence of multiplies, shifts, and adds. This should be controlled by
+// some kind of hint from the target that int div is expensive.
+// various folds of mulh[s,u] by constants such as -1, powers of 2, etc.
+//
+// FIXME: select C, pow2, pow2 -> something smart
+// FIXME: trunc(select X, Y, Z) -> select X, trunc(Y), trunc(Z)
+// FIXME: Dead stores -> nuke
+// FIXME: shr X, (and Y,31) -> shr X, Y (TRICKY!)
+// FIXME: mul (x, const) -> shifts + adds
+// FIXME: undef values
+// FIXME: divide by zero is currently left unfolded. do we want to turn this
+// into an undef?
+// FIXME: select ne (select cc, 1, 0), 0, true, false -> select cc, true, false
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dagcombine"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NodesCombined , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+
+namespace {
+#ifndef NDEBUG
+ static cl::opt<bool>
+ ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the first "
+ "dag combine pass"));
+ static cl::opt<bool>
+ ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the second "
+ "dag combine pass"));
+#else
+ static const bool ViewDAGCombine1 = false;
+ static const bool ViewDAGCombine2 = false;
+#endif
+
+ static cl::opt<bool>
+ CombinerAA("combiner-alias-analysis", cl::Hidden,
+ cl::desc("Turn on alias analysis during testing"));
+
+ static cl::opt<bool>
+ CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+ cl::desc("Include global information in alias analysis"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+ class VISIBILITY_HIDDEN DAGCombiner {
+ SelectionDAG &DAG;
+ TargetLowering &TLI;
+ bool AfterLegalize;
+
+ // Worklist of all of the nodes that need to be simplified.
+ std::vector<SDNode*> WorkList;
+
+ // AA - Used for DAG load/store alias analysis.
+ AliasAnalysis &AA;
+
+ /// AddUsersToWorkList - When an instruction is simplified, add all users of
+ /// the instruction to the work lists because they might get more simplified
+ /// now.
+ ///
+ void AddUsersToWorkList(SDNode *N) {
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ AddToWorkList(*UI);
+ }
+
+ /// removeFromWorkList - remove all instances of N from the worklist.
+ ///
+ void removeFromWorkList(SDNode *N) {
+ WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
+ WorkList.end());
+ }
+
+ public:
+ /// AddToWorkList - Add to the work list making sure it's instance is at the
+ /// the back (next to be processed.)
+ void AddToWorkList(SDNode *N) {
+ removeFromWorkList(N);
+ WorkList.push_back(N);
+ }
+
+ SDOperand CombineTo(SDNode *N, const SDOperand *To, unsigned NumTo,
+ bool AddTo = true) {
+ assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+ ++NodesCombined;
+ DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(To[0].Val->dump(&DAG));
+ DOUT << " and " << NumTo-1 << " other values\n";
+ std::vector<SDNode*> NowDead;
+ DAG.ReplaceAllUsesWith(N, To, &NowDead);
+
+ if (AddTo) {
+ // Push the new nodes and any users onto the worklist
+ for (unsigned i = 0, e = NumTo; i != e; ++i) {
+ AddToWorkList(To[i].Val);
+ AddUsersToWorkList(To[i].Val);
+ }
+ }
+
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+ for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+ removeFromWorkList(NowDead[i]);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ return SDOperand(N, 0);
+ }
+
+ SDOperand CombineTo(SDNode *N, SDOperand Res, bool AddTo = true) {
+ return CombineTo(N, &Res, 1, AddTo);
+ }
+
+ SDOperand CombineTo(SDNode *N, SDOperand Res0, SDOperand Res1,
+ bool AddTo = true) {
+ SDOperand To[] = { Res0, Res1 };
+ return CombineTo(N, To, 2, AddTo);
+ }
+ private:
+
+ /// SimplifyDemandedBits - Check the specified integer node value to see if
+ /// it can be simplified or if things it uses can be simplified by bit
+ /// propagation. If so, return true.
+ bool SimplifyDemandedBits(SDOperand Op) {
+ TargetLowering::TargetLoweringOpt TLO(DAG);
+ uint64_t KnownZero, KnownOne;
+ uint64_t Demanded = MVT::getIntVTBitMask(Op.getValueType());
+ if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+ return false;
+
+ // Revisit the node.
+ AddToWorkList(Op.Val);
+
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.Val->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(TLO.New.Val->dump(&DAG));
+ DOUT << '\n';
+
+ std::vector<SDNode*> NowDead;
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, NowDead);
+
+ // Push the new node and any (possibly new) users onto the worklist.
+ AddToWorkList(TLO.New.Val);
+ AddUsersToWorkList(TLO.New.Val);
+
+ // Nodes can end up on the worklist more than once. Make sure we do
+ // not process a node that has been replaced.
+ for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+ removeFromWorkList(NowDead[i]);
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (TLO.Old.Val->use_empty()) {
+ removeFromWorkList(TLO.Old.Val);
+
+ // If the operands of this node are only used by the node, they will now
+ // be dead. Make sure to visit them first to delete dead nodes early.
+ for (unsigned i = 0, e = TLO.Old.Val->getNumOperands(); i != e; ++i)
+ if (TLO.Old.Val->getOperand(i).Val->hasOneUse())
+ AddToWorkList(TLO.Old.Val->getOperand(i).Val);
+
+ DAG.DeleteNode(TLO.Old.Val);
+ }
+ return true;
+ }
+
+ bool CombineToPreIndexedLoadStore(SDNode *N);
+ bool CombineToPostIndexedLoadStore(SDNode *N);
+
+
+ /// visit - call the node-specific routine that knows how to fold each
+ /// particular type of node.
+ SDOperand visit(SDNode *N);
+
+ // Visitation implementation - Implement dag node combining for different
+ // node types. The semantics are as follows:
+ // Return Value:
+ // SDOperand.Val == 0 - No change was made
+ // SDOperand.Val == N - N was replaced, is dead, and is already handled.
+ // otherwise - N should be replaced by the returned Operand.
+ //
+ SDOperand visitTokenFactor(SDNode *N);
+ SDOperand visitADD(SDNode *N);
+ SDOperand visitSUB(SDNode *N);
+ SDOperand visitADDC(SDNode *N);
+ SDOperand visitADDE(SDNode *N);
+ SDOperand visitMUL(SDNode *N);
+ SDOperand visitSDIV(SDNode *N);
+ SDOperand visitUDIV(SDNode *N);
+ SDOperand visitSREM(SDNode *N);
+ SDOperand visitUREM(SDNode *N);
+ SDOperand visitMULHU(SDNode *N);
+ SDOperand visitMULHS(SDNode *N);
+ SDOperand visitAND(SDNode *N);
+ SDOperand visitOR(SDNode *N);
+ SDOperand visitXOR(SDNode *N);
+ SDOperand SimplifyVBinOp(SDNode *N);
+ SDOperand visitSHL(SDNode *N);
+ SDOperand visitSRA(SDNode *N);
+ SDOperand visitSRL(SDNode *N);
+ SDOperand visitCTLZ(SDNode *N);
+ SDOperand visitCTTZ(SDNode *N);
+ SDOperand visitCTPOP(SDNode *N);
+ SDOperand visitSELECT(SDNode *N);
+ SDOperand visitSELECT_CC(SDNode *N);
+ SDOperand visitSETCC(SDNode *N);
+ SDOperand visitSIGN_EXTEND(SDNode *N);
+ SDOperand visitZERO_EXTEND(SDNode *N);
+ SDOperand visitANY_EXTEND(SDNode *N);
+ SDOperand visitSIGN_EXTEND_INREG(SDNode *N);
+ SDOperand visitTRUNCATE(SDNode *N);
+ SDOperand visitBIT_CONVERT(SDNode *N);
+ SDOperand visitFADD(SDNode *N);
+ SDOperand visitFSUB(SDNode *N);
+ SDOperand visitFMUL(SDNode *N);
+ SDOperand visitFDIV(SDNode *N);
+ SDOperand visitFREM(SDNode *N);
+ SDOperand visitFCOPYSIGN(SDNode *N);
+ SDOperand visitSINT_TO_FP(SDNode *N);
+ SDOperand visitUINT_TO_FP(SDNode *N);
+ SDOperand visitFP_TO_SINT(SDNode *N);
+ SDOperand visitFP_TO_UINT(SDNode *N);
+ SDOperand visitFP_ROUND(SDNode *N);
+ SDOperand visitFP_ROUND_INREG(SDNode *N);
+ SDOperand visitFP_EXTEND(SDNode *N);
+ SDOperand visitFNEG(SDNode *N);
+ SDOperand visitFABS(SDNode *N);
+ SDOperand visitBRCOND(SDNode *N);
+ SDOperand visitBR_CC(SDNode *N);
+ SDOperand visitLOAD(SDNode *N);
+ SDOperand visitSTORE(SDNode *N);
+ SDOperand visitINSERT_VECTOR_ELT(SDNode *N);
+ SDOperand visitBUILD_VECTOR(SDNode *N);
+ SDOperand visitCONCAT_VECTORS(SDNode *N);
+ SDOperand visitVECTOR_SHUFFLE(SDNode *N);
+
+ SDOperand XformToShuffleWithZero(SDNode *N);
+ SDOperand ReassociateOps(unsigned Opc, SDOperand LHS, SDOperand RHS);
+
+ bool SimplifySelectOps(SDNode *SELECT, SDOperand LHS, SDOperand RHS);
+ SDOperand SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+ SDOperand SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2);
+ SDOperand SimplifySelectCC(SDOperand N0, SDOperand N1, SDOperand N2,
+ SDOperand N3, ISD::CondCode CC,
+ bool NotExtCompare = false);
+ SDOperand SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1,
+ ISD::CondCode Cond, bool foldBooleans = true);
+ SDOperand ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT::ValueType);
+ SDOperand BuildSDIV(SDNode *N);
+ SDOperand BuildUDIV(SDNode *N);
+ SDNode *MatchRotate(SDOperand LHS, SDOperand RHS);
+ SDOperand ReduceLoadWidth(SDNode *N);
+
+ /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for aliasing nodes and adding them to the Aliases vector.
+ void GatherAllAliases(SDNode *N, SDOperand OriginalChain,
+ SmallVector<SDOperand, 8> &Aliases);
+
+ /// isAlias - Return true if there is any possibility that the two addresses
+ /// overlap.
+ bool isAlias(SDOperand Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ SDOperand Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2);
+
+ /// FindAliasInfo - Extracts the relevant alias information from the memory
+ /// node. Returns true if the operand was a load.
+ bool FindAliasInfo(SDNode *N,
+ SDOperand &Ptr, int64_t &Size,
+ const Value *&SrcValue, int &SrcValueOffset);
+
+ /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
+ /// looking for a better chain (aliasing node.)
+ SDOperand FindBetterChain(SDNode *N, SDOperand Chain);
+
+public:
+ DAGCombiner(SelectionDAG &D, AliasAnalysis &A)
+ : DAG(D),
+ TLI(D.getTargetLoweringInfo()),
+ AfterLegalize(false),
+ AA(A) {}
+
+ /// Run - runs the dag combiner on all nodes in the work list
+ void Run(bool RunningAfterLegalize);
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->AddToWorkList(N);
+}
+
+SDOperand TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, const std::vector<SDOperand> &To) {
+ return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size());
+}
+
+SDOperand TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDOperand Res) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res);
+}
+
+
+SDOperand TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDOperand Res0, SDOperand Res1) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isNegatibleForFree - Return 1 if we can compute the negated form of the
+/// specified expression for the same cost as the expression itself, or 2 if we
+/// can compute the negated form more cheaply than the expression itself.
+static char isNegatibleForFree(SDOperand Op, unsigned Depth = 0) {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return 2;
+
+ // Don't allow anything with multiple uses.
+ if (!Op.hasOneUse()) return 0;
+
+ // Don't recurse exponentially.
+ if (Depth > 6) return 0;
+
+ switch (Op.getOpcode()) {
+ default: return false;
+ case ISD::ConstantFP:
+ return 1;
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ if (!UnsafeFPMath) return 0;
+
+ // -(A+B) -> -A - B
+ if (char V = isNegatibleForFree(Op.getOperand(0), Depth+1))
+ return V;
+ // -(A+B) -> -B - A
+ return isNegatibleForFree(Op.getOperand(1), Depth+1);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!UnsafeFPMath) return 0;
+
+ // -(A-B) -> B-A
+ return 1;
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ if (HonorSignDependentRoundingFPMath()) return 0;
+
+ // -(X*Y) -> (-X * Y) or (X*-Y)
+ if (char V = isNegatibleForFree(Op.getOperand(0), Depth+1))
+ return V;
+
+ return isNegatibleForFree(Op.getOperand(1), Depth+1);
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return isNegatibleForFree(Op.getOperand(0), Depth+1);
+ }
+}
+
+/// GetNegatedExpression - If isNegatibleForFree returns true, this function
+/// returns the newly negated expression.
+static SDOperand GetNegatedExpression(SDOperand Op, SelectionDAG &DAG,
+ unsigned Depth = 0) {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+
+ // Don't allow anything with multiple uses.
+ assert(Op.hasOneUse() && "Unknown reuse!");
+
+ assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Unknown code");
+ case ISD::ConstantFP:
+ return DAG.getConstantFP(-cast<ConstantFPSDNode>(Op)->getValue(),
+ Op.getValueType());
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ assert(UnsafeFPMath);
+
+ // -(A+B) -> -A - B
+ if (isNegatibleForFree(Op.getOperand(0), Depth+1))
+ return DAG.getNode(ISD::FSUB, Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG, Depth+1),
+ Op.getOperand(1));
+ // -(A+B) -> -B - A
+ return DAG.getNode(ISD::FSUB, Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(1), DAG, Depth+1),
+ Op.getOperand(0));
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ assert(UnsafeFPMath);
+
+ // -(0-B) -> B
+ if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+ if (N0CFP->getValue() == 0.0)
+ return Op.getOperand(1);
+
+ // -(A-B) -> B-A
+ return DAG.getNode(ISD::FSUB, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(0));
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ assert(!HonorSignDependentRoundingFPMath());
+
+ // -(X*Y) -> -X * Y
+ if (isNegatibleForFree(Op.getOperand(0), Depth+1))
+ return DAG.getNode(Op.getOpcode(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG, Depth+1),
+ Op.getOperand(1));
+
+ // -(X*Y) -> X * -Y
+ return DAG.getNode(Op.getOpcode(), Op.getValueType(),
+ Op.getOperand(0),
+ GetNegatedExpression(Op.getOperand(1), DAG, Depth+1));
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return DAG.getNode(Op.getOpcode(), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG, Depth+1));
+ }
+}
+
+
+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// that selects between the values 1 and 0, making it equivalent to a setcc.
+// Also, set the incoming LHS, RHS, and CC references to the appropriate
+// nodes based on the type of node we are checking. This simplifies life a
+// bit for the callers.
+static bool isSetCCEquivalent(SDOperand N, SDOperand &LHS, SDOperand &RHS,
+ SDOperand &CC) {
+ if (N.getOpcode() == ISD::SETCC) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(2);
+ return true;
+ }
+ if (N.getOpcode() == ISD::SELECT_CC &&
+ N.getOperand(2).getOpcode() == ISD::Constant &&
+ N.getOperand(3).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(N.getOperand(2))->getValue() == 1 &&
+ cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(4);
+ return true;
+ }
+ return false;
+}
+
+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
+// one use. If this is true, it allows the users to invert the operation for
+// free when it is profitable to do so.
+static bool isOneUseSetCC(SDOperand N) {
+ SDOperand N0, N1, N2;
+ if (isSetCCEquivalent(N, N0, N1, N2) && N.Val->hasOneUse())
+ return true;
+ return false;
+}
+
+SDOperand DAGCombiner::ReassociateOps(unsigned Opc, SDOperand N0, SDOperand N1){
+ MVT::ValueType VT = N0.getValueType();
+ // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
+ // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+ if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
+ if (isa<ConstantSDNode>(N1)) {
+ SDOperand OpNode = DAG.getNode(Opc, VT, N0.getOperand(1), N1);
+ AddToWorkList(OpNode.Val);
+ return DAG.getNode(Opc, VT, OpNode, N0.getOperand(0));
+ } else if (N0.hasOneUse()) {
+ SDOperand OpNode = DAG.getNode(Opc, VT, N0.getOperand(0), N1);
+ AddToWorkList(OpNode.Val);
+ return DAG.getNode(Opc, VT, OpNode, N0.getOperand(1));
+ }
+ }
+ // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
+ // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+ if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
+ if (isa<ConstantSDNode>(N0)) {
+ SDOperand OpNode = DAG.getNode(Opc, VT, N1.getOperand(1), N0);
+ AddToWorkList(OpNode.Val);
+ return DAG.getNode(Opc, VT, OpNode, N1.getOperand(0));
+ } else if (N1.hasOneUse()) {
+ SDOperand OpNode = DAG.getNode(Opc, VT, N1.getOperand(0), N0);
+ AddToWorkList(OpNode.Val);
+ return DAG.getNode(Opc, VT, OpNode, N1.getOperand(1));
+ }
+ }
+ return SDOperand();
+}
+
+//===----------------------------------------------------------------------===//
+// Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(bool RunningAfterLegalize) {
+ // set the instance variable, so that the various visit routines may use it.
+ AfterLegalize = RunningAfterLegalize;
+
+ // Add all the dag nodes to the worklist.
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I)
+ WorkList.push_back(I);
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+
+ // The root of the dag may dangle to deleted nodes until the dag combiner is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDOperand());
+
+ /// DagCombineInfo - Expose the DAG combiner to the target combiner impls.
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, !RunningAfterLegalize, false, this);
+
+ // while the worklist isn't empty, inspect the node on the end of it and
+ // try and combine it.
+ while (!WorkList.empty()) {
+ SDNode *N = WorkList.back();
+ WorkList.pop_back();
+
+ // If N has no uses, it is dead. Make sure to revisit all N's operands once
+ // N is deleted from the DAG, since they too may now be dead or may have a
+ // reduced number of uses, allowing other xforms.
+ if (N->use_empty() && N != &Dummy) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ AddToWorkList(N->getOperand(i).Val);
+
+ DAG.DeleteNode(N);
+ continue;
+ }
+
+ SDOperand RV = visit(N);
+
+ // If nothing happened, try a target-specific DAG combine.
+ if (RV.Val == 0) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned NULL!");
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+ TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode()))
+ RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+ }
+
+ if (RV.Val) {
+ ++NodesCombined;
+ // If we get back the same node we passed in, rather than a new node or
+ // zero, we know that the node must have defined multiple values and
+ // CombineTo was used. Since CombineTo takes care of the worklist
+ // mechanics for us, we have no work to do in this case.
+ if (RV.Val != N) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ RV.Val->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned new node!");
+
+ DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(RV.Val->dump(&DAG));
+ DOUT << '\n';
+ std::vector<SDNode*> NowDead;
+ if (N->getNumValues() == RV.Val->getNumValues())
+ DAG.ReplaceAllUsesWith(N, RV.Val, &NowDead);
+ else {
+ assert(N->getValueType(0) == RV.getValueType() && "Type mismatch");
+ SDOperand OpV = RV;
+ DAG.ReplaceAllUsesWith(N, &OpV, &NowDead);
+ }
+
+ // Push the new node and any users onto the worklist
+ AddToWorkList(RV.Val);
+ AddUsersToWorkList(RV.Val);
+
+ // Nodes can be reintroduced into the worklist. Make sure we do not
+ // process a node that has been replaced.
+ removeFromWorkList(N);
+ for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+ removeFromWorkList(NowDead[i]);
+
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+ }
+ }
+ }
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ DAG.setRoot(Dummy.getValue());
+}
+
+SDOperand DAGCombiner::visit(SDNode *N) {
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: return visitTokenFactor(N);
+ case ISD::ADD: return visitADD(N);
+ case ISD::SUB: return visitSUB(N);
+ case ISD::ADDC: return visitADDC(N);
+ case ISD::ADDE: return visitADDE(N);
+ case ISD::MUL: return visitMUL(N);
+ case ISD::SDIV: return visitSDIV(N);
+ case ISD::UDIV: return visitUDIV(N);
+ case ISD::SREM: return visitSREM(N);
+ case ISD::UREM: return visitUREM(N);
+ case ISD::MULHU: return visitMULHU(N);
+ case ISD::MULHS: return visitMULHS(N);
+ case ISD::AND: return visitAND(N);
+ case ISD::OR: return visitOR(N);
+ case ISD::XOR: return visitXOR(N);
+ case ISD::SHL: return visitSHL(N);
+ case ISD::SRA: return visitSRA(N);
+ case ISD::SRL: return visitSRL(N);
+ case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTPOP: return visitCTPOP(N);
+ case ISD::SELECT: return visitSELECT(N);
+ case ISD::SELECT_CC: return visitSELECT_CC(N);
+ case ISD::SETCC: return visitSETCC(N);
+ case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
+ case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
+ case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
+ case ISD::TRUNCATE: return visitTRUNCATE(N);
+ case ISD::BIT_CONVERT: return visitBIT_CONVERT(N);
+ case ISD::FADD: return visitFADD(N);
+ case ISD::FSUB: return visitFSUB(N);
+ case ISD::FMUL: return visitFMUL(N);
+ case ISD::FDIV: return visitFDIV(N);
+ case ISD::FREM: return visitFREM(N);
+ case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
+ case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
+ case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
+ case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
+ case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
+ case ISD::FP_ROUND: return visitFP_ROUND(N);
+ case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
+ case ISD::FP_EXTEND: return visitFP_EXTEND(N);
+ case ISD::FNEG: return visitFNEG(N);
+ case ISD::FABS: return visitFABS(N);
+ case ISD::BRCOND: return visitBRCOND(N);
+ case ISD::BR_CC: return visitBR_CC(N);
+ case ISD::LOAD: return visitLOAD(N);
+ case ISD::STORE: return visitSTORE(N);
+ case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
+ case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
+ case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ }
+ return SDOperand();
+}
+
+/// getInputChainForNode - Given a node, return its input chain if it has one,
+/// otherwise return a null sd operand.
+static SDOperand getInputChainForNode(SDNode *N) {
+ if (unsigned NumOps = N->getNumOperands()) {
+ if (N->getOperand(0).getValueType() == MVT::Other)
+ return N->getOperand(0);
+ else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+ return N->getOperand(NumOps-1);
+ for (unsigned i = 1; i < NumOps-1; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ return N->getOperand(i);
+ }
+ return SDOperand(0, 0);
+}
+
+SDOperand DAGCombiner::visitTokenFactor(SDNode *N) {
+ // If N has two operands, where one has an input chain equal to the other,
+ // the 'other' chain is redundant.
+ if (N->getNumOperands() == 2) {
+ if (getInputChainForNode(N->getOperand(0).Val) == N->getOperand(1))
+ return N->getOperand(0);
+ if (getInputChainForNode(N->getOperand(1).Val) == N->getOperand(0))
+ return N->getOperand(1);
+ }
+
+ SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
+ SmallVector<SDOperand, 8> Ops; // Ops for replacing token factor.
+ SmallPtrSet<SDNode*, 16> SeenOps;
+ bool Changed = false; // If we should replace this token factor.
+
+ // Start out with this token factor.
+ TFs.push_back(N);
+
+ // Iterate through token factors. The TFs grows when new token factors are
+ // encountered.
+ for (unsigned i = 0; i < TFs.size(); ++i) {
+ SDNode *TF = TFs[i];
+
+ // Check each of the operands.
+ for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
+ SDOperand Op = TF->getOperand(i);
+
+ switch (Op.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry tokens don't need to be added to the list. They are
+ // rededundant.
+ Changed = true;
+ break;
+
+ case ISD::TokenFactor:
+ if ((CombinerAA || Op.hasOneUse()) &&
+ std::find(TFs.begin(), TFs.end(), Op.Val) == TFs.end()) {
+ // Queue up for processing.
+ TFs.push_back(Op.Val);
+ // Clean up in case the token factor is removed.
+ AddToWorkList(Op.Val);
+ Changed = true;
+ break;
+ }
+ // Fall thru
+
+ default:
+ // Only add if it isn't already in the list.
+ if (SeenOps.insert(Op.Val))
+ Ops.push_back(Op);
+ else
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ SDOperand Result;
+
+ // If we've change things around then replace token factor.
+ if (Changed) {
+ if (Ops.size() == 0) {
+ // The entry token is the only possible outcome.
+ Result = DAG.getEntryNode();
+ } else {
+ // New and improved token factor.
+ Result = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0], Ops.size());
+ }
+
+ // Don't add users to work list.
+ return CombineTo(N, Result, false);
+ }
+
+ return Result;
+}
+
+static
+SDOperand combineShlAddConstant(SDOperand N0, SDOperand N1, SelectionDAG &DAG) {
+ MVT::ValueType VT = N0.getValueType();
+ SDOperand N00 = N0.getOperand(0);
+ SDOperand N01 = N0.getOperand(1);
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
+ if (N01C && N00.getOpcode() == ISD::ADD && N00.Val->hasOneUse() &&
+ isa<ConstantSDNode>(N00.getOperand(1))) {
+ N0 = DAG.getNode(ISD::ADD, VT,
+ DAG.getNode(ISD::SHL, VT, N00.getOperand(0), N01),
+ DAG.getNode(ISD::SHL, VT, N00.getOperand(1), N01));
+ return DAG.getNode(ISD::ADD, VT, N0, N1);
+ }
+ return SDOperand();
+}
+
+static
+SDOperand combineSelectAndUse(SDNode *N, SDOperand Slct, SDOperand OtherOp,
+ SelectionDAG &DAG) {
+ MVT::ValueType VT = N->getValueType(0);
+ unsigned Opc = N->getOpcode();
+ bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+ SDOperand LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+ SDOperand RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+ if (isSlctCC)
+ CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+ else {
+ SDOperand CCOp = Slct.getOperand(0);
+ if (CCOp.getOpcode() == ISD::SETCC)
+ CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+ }
+
+ bool DoXform = false;
+ bool InvCC = false;
+ assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+ "Bad input!");
+ if (LHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(LHS)->isNullValue())
+ DoXform = true;
+ else if (CC != ISD::SETCC_INVALID &&
+ RHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHS)->isNullValue()) {
+ std::swap(LHS, RHS);
+ bool isInt = MVT::isInteger(isSlctCC ? Slct.getOperand(0).getValueType()
+ : Slct.getOperand(0).getOperand(0).getValueType());
+ CC = ISD::getSetCCInverse(CC, isInt);
+ DoXform = true;
+ InvCC = true;
+ }
+
+ if (DoXform) {
+ SDOperand Result = DAG.getNode(Opc, VT, OtherOp, RHS);
+ if (isSlctCC)
+ return DAG.getSelectCC(OtherOp, Result,
+ Slct.getOperand(0), Slct.getOperand(1), CC);
+ SDOperand CCOp = Slct.getOperand(0);
+ if (InvCC)
+ CCOp = DAG.getSetCC(CCOp.getValueType(), CCOp.getOperand(0),
+ CCOp.getOperand(1), CC);
+ return DAG.getNode(ISD::SELECT, VT, CCOp, OtherOp, Result);
+ }
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitADD(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N0.getValueType();
+
+ // fold vector ops
+ if (MVT::isVector(VT)) {
+ SDOperand FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.Val) return FoldedVOp;
+ }
+
+ // fold (add x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (add c1, c2) -> c1+c2
+ if (N0C && N1C)
+ return DAG.getNode(ISD::ADD, VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADD, VT, N1, N0);
+ // fold (add x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (N1C && N0.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+ return DAG.getNode(ISD::SUB, VT,
+ DAG.getConstant(N1C->getValue()+N0C->getValue(), VT),
+ N0.getOperand(1));
+ // reassociate add
+ SDOperand RADD = ReassociateOps(ISD::ADD, N0, N1);
+ if (RADD.Val != 0)
+ return RADD;
+ // fold ((0-A) + B) -> B-A
+ if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
+ cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, VT, N1, N0.getOperand(1));
+ // fold (A + (0-B)) -> A-B
+ if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
+ cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+ return DAG.getNode(ISD::SUB, VT, N0, N1.getOperand(1));
+ // fold (A+(B-A)) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+ return N1.getOperand(0);
+
+ if (!MVT::isVector(VT) && SimplifyDemandedBits(SDOperand(N, 0)))
+ return SDOperand(N, 0);
+
+ // fold (a+b) -> (a|b) iff a and b share no bits.
+ if (MVT::isInteger(VT) && !MVT::isVector(VT)) {
+ uint64_t LHSZero, LHSOne;
+ uint64_t RHSZero, RHSOne;
+ uint64_t Mask = MVT::getIntVTBitMask(VT);
+ DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+ if (LHSZero) {
+ DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+ (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ return DAG.getNode(ISD::OR, VT, N0, N1);
+ }
+ }
+
+ // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+ if (N0.getOpcode() == ISD::SHL && N0.Val->hasOneUse()) {
+ SDOperand Result = combineShlAddConstant(N0, N1, DAG);
+ if (Result.Val) return Result;
+ }
+ if (N1.getOpcode() == ISD::SHL && N1.Val->hasOneUse()) {
+ SDOperand Result = combineShlAddConstant(N1, N0, DAG);
+ if (Result.Val) return Result;
+ }
+
+ // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+ if (N0.getOpcode() == ISD::SELECT && N0.Val->hasOneUse()) {
+ SDOperand Result = combineSelectAndUse(N, N0, N1, DAG);
+ if (Result.Val) return Result;
+ }
+ if (N1.getOpcode() == ISD::SELECT && N1.Val->hasOneUse()) {
+ SDOperand Result = combineSelectAndUse(N, N1, N0, DAG);
+ if (Result.Val) return Result;
+ }
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitADDC(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an ADD.
+ if (N->hasNUsesOfValue(0, 1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, MVT::Flag));
+
+ // canonicalize constant to RHS.
+ if (N0C && !N1C) {
+ SDOperand Ops[] = { N1, N0 };
+ return DAG.getNode(ISD::ADDC, N->getVTList(), Ops, 2);
+ }
+
+ // fold (addc x, 0) -> x + no carry out
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, MVT::Flag));
+
+ // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+ uint64_t LHSZero, LHSOne;
+ uint64_t RHSZero, RHSOne;
+ uint64_t Mask = MVT::getIntVTBitMask(VT);
+ DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+ if (LHSZero) {
+ DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+ (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ return CombineTo(N, DAG.getNode(ISD::OR, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, MVT::Flag));
+ }
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitADDE(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ SDOperand CarryIn = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ //MVT::ValueType VT = N0.getValueType();
+
+ // canonicalize constant to RHS
+ if (N0C && !N1C) {
+ SDOperand Ops[] = { N1, N0, CarryIn };
+ return DAG.getNode(ISD::ADDE, N->getVTList(), Ops, 3);
+ }
+
+ // fold (adde x, y, false) -> (addc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE) {
+ SDOperand Ops[] = { N1, N0 };
+ return DAG.getNode(ISD::ADDC, N->getVTList(), Ops, 2);
+ }
+
+ return SDOperand();
+}
+
+
+
+SDOperand DAGCombiner::visitSUB(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+ MVT::ValueType VT = N0.getValueType();
+
+ // fold vector ops
+ if (MVT::isVector(VT)) {
+ SDOperand FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.Val) return FoldedVOp;
+ }
+
+ // fold (sub x, x) -> 0
+ if (N0 == N1)
+ return DAG.getConstant(0, N->getValueType(0));
+ // fold (sub c1, c2) -> c1-c2
+ if (N0C && N1C)
+ return DAG.getNode(ISD::SUB, VT, N0, N1);
+ // fold (sub x, c) -> (add x, -c)
+ if (N1C)
+ return DAG.getNode(ISD::ADD, VT, N0, DAG.getConstant(-N1C->getValue(), VT));
+ // fold (A+B)-A -> B
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+ return N0.getOperand(1);
+ // fold (A+B)-B -> A
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+ return N0.getOperand(0);
+ // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+ if (N1.getOpcode() == ISD::SELECT && N1.Val->hasOneUse()) {
+ SDOperand Result = combineSelectAndUse(N, N1, N0, DAG);
+ if (Result.Val) return Result;
+ }
+ // If either operand of a sub is undef, the result is undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitMUL(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N0.getValueType();
+
+ // fold vector ops
+ if (MVT::isVector(VT)) {
+ SDOperand FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.Val) return FoldedVOp;
+ }
+
+ // fold (mul x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (mul c1, c2) -> c1*c2
+ if (N0C && N1C)
+ return DAG.getNode(ISD::MUL, VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::MUL, VT, N1, N0);
+ // fold (mul x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mul x, -1) -> 0-x
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
+ // fold (mul x, (1 << c)) -> x << c
+ if (N1C && isPowerOf2_64(N1C->getValue()))
+ return DAG.getNode(ISD::SHL, VT, N0,
+ DAG.getConstant(Log2_64(N1C->getValue()),
+ TLI.getShiftAmountTy()));
+ // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+ if (N1C && isPowerOf2_64(-N1C->getSignExtended())) {
+ // FIXME: If the input is something that is easily negated (e.g. a
+ // single-use add), we should put the negate there.
+ return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT),
+ DAG.getNode(ISD::SHL, VT, N0,
+ DAG.getConstant(Log2_64(-N1C->getSignExtended()),
+ TLI.getShiftAmountTy())));
+ }
+
+ // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ SDOperand C3 = DAG.getNode(ISD::SHL, VT, N1, N0.getOperand(1));
+ AddToWorkList(C3.Val);
+ return DAG.getNode(ISD::MUL, VT, N0.getOperand(0), C3);
+ }
+
+ // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+ // use.
+ {
+ SDOperand Sh(0,0), Y(0,0);
+ // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
+ if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.Val->hasOneUse()) {
+ Sh = N0; Y = N1;
+ } else if (N1.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N1.getOperand(1)) && N1.Val->hasOneUse()) {
+ Sh = N1; Y = N0;
+ }
+ if (Sh.Val) {
+ SDOperand Mul = DAG.getNode(ISD::MUL, VT, Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, VT, Mul, Sh.getOperand(1));
+ }
+ }
+ // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+ if (N1C && N0.getOpcode() == ISD::ADD && N0.Val->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ return DAG.getNode(ISD::ADD, VT,
+ DAG.getNode(ISD::MUL, VT, N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, VT, N0.getOperand(1), N1));
+ }
+
+ // reassociate mul
+ SDOperand RMUL = ReassociateOps(ISD::MUL, N0, N1);
+ if (RMUL.Val != 0)
+ return RMUL;
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSDIV(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold vector ops
+ if (MVT::isVector(VT)) {
+ SDOperand FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.Val) return FoldedVOp;
+ }
+
+ // fold (sdiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.getNode(ISD::SDIV, VT, N0, N1);
+ // fold (sdiv X, 1) -> X
+ if (N1C && N1C->getSignExtended() == 1LL)
+ return N0;
+ // fold (sdiv X, -1) -> 0-X
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
+ uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
+ if (DAG.MaskedValueIsZero(N1, SignBit) &&
+ DAG.MaskedValueIsZero(N0, SignBit))
+ return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1);
+ // fold (sdiv X, pow2) -> simple ops after legalize
+ if (N1C && N1C->getValue() && !TLI.isIntDivCheap() &&
+ (isPowerOf2_64(N1C->getSignExtended()) ||
+ isPowerOf2_64(-N1C->getSignExtended()))) {
+ // If dividing by powers of two is cheap, then don't perform the following
+ // fold.
+ if (TLI.isPow2DivCheap())
+ return SDOperand();
+ int64_t pow2 = N1C->getSignExtended();
+ int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+ unsigned lg2 = Log2_64(abs2);
+ // Splat the sign bit into the register
+ SDOperand SGN = DAG.getNode(ISD::SRA, VT, N0,
+ DAG.getConstant(MVT::getSizeInBits(VT)-1,
+ TLI.getShiftAmountTy()));
+ AddToWorkList(SGN.Val);
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDOperand SRL = DAG.getNode(ISD::SRL, VT, SGN,
+ DAG.getConstant(MVT::getSizeInBits(VT)-lg2,
+ TLI.getShiftAmountTy()));
+ SDOperand ADD = DAG.getNode(ISD::ADD, VT, N0, SRL);
+ AddToWorkList(SRL.Val);
+ AddToWorkList(ADD.Val); // Divide by pow2
+ SDOperand SRA = DAG.getNode(ISD::SRA, VT, ADD,
+ DAG.getConstant(lg2, TLI.getShiftAmountTy()));
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (pow2 > 0)
+ return SRA;
+ AddToWorkList(SRA.Val);
+ return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), SRA);
+ }
+ // if integer divide is expensive and we satisfy the requirements, emit an
+ // alternate sequence.
+ if (N1C && (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) &&
+ !TLI.isIntDivCheap()) {
+ SDOperand Op = BuildSDIV(N);
+ if (Op.Val) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitUDIV(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold vector ops
+ if (MVT::isVector(VT)) {
+ SDOperand FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.Val) return FoldedVOp;
+ }
+
+ // fold (udiv c1, c2) -> c1/c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.getNode(ISD::UDIV, VT, N0, N1);
+ // fold (udiv x, (1 << c)) -> x >>u c
+ if (N1C && isPowerOf2_64(N1C->getValue()))
+ return DAG.getNode(ISD::SRL, VT, N0,
+ DAG.getConstant(Log2_64(N1C->getValue()),
+ TLI.getShiftAmountTy()));
+ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (isPowerOf2_64(SHC->getValue())) {
+ MVT::ValueType ADDVT = N1.getOperand(1).getValueType();
+ SDOperand Add = DAG.getNode(ISD::ADD, ADDVT, N1.getOperand(1),
+ DAG.getConstant(Log2_64(SHC->getValue()),
+ ADDVT));
+ AddToWorkList(Add.Val);
+ return DAG.getNode(ISD::SRL, VT, N0, Add);
+ }
+ }
+ }
+ // fold (udiv x, c) -> alternate
+ if (N1C && N1C->getValue() && !TLI.isIntDivCheap()) {
+ SDOperand Op = BuildUDIV(N);
+ if (Op.Val) return Op;
+ }
+
+ // undef / X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X / undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSREM(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (srem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.getNode(ISD::SREM, VT, N0, N1);
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
+ if (DAG.MaskedValueIsZero(N1, SignBit) &&
+ DAG.MaskedValueIsZero(N0, SignBit))
+ return DAG.getNode(ISD::UREM, VT, N0, N1);
+
+ // Unconditionally lower X%C -> X-X/C*C. This allows the X/C logic to hack on
+ // the remainder operation.
+ if (N1C && !N1C->isNullValue()) {
+ SDOperand Div = DAG.getNode(ISD::SDIV, VT, N0, N1);
+ SDOperand Mul = DAG.getNode(ISD::MUL, VT, Div, N1);
+ SDOperand Sub = DAG.getNode(ISD::SUB, VT, N0, Mul);
+ AddToWorkList(Div.Val);
+ AddToWorkList(Mul.Val);
+ return Sub;
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitUREM(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (urem c1, c2) -> c1%c2
+ if (N0C && N1C && !N1C->isNullValue())
+ return DAG.getNode(ISD::UREM, VT, N0, N1);
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ if (N1C && !N1C->isNullValue() && isPowerOf2_64(N1C->getValue()))
+ return DAG.getNode(ISD::AND, VT, N0, DAG.getConstant(N1C->getValue()-1,VT));
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+ if (isPowerOf2_64(SHC->getValue())) {
+ SDOperand Add = DAG.getNode(ISD::ADD, VT, N1,DAG.getConstant(~0ULL,VT));
+ AddToWorkList(Add.Val);
+ return DAG.getNode(ISD::AND, VT, N0, Add);
+ }
+ }
+ }
+
+ // Unconditionally lower X%C -> X-X/C*C. This allows the X/C logic to hack on
+ // the remainder operation.
+ if (N1C && !N1C->isNullValue()) {
+ SDOperand Div = DAG.getNode(ISD::UDIV, VT, N0, N1);
+ SDOperand Mul = DAG.getNode(ISD::MUL, VT, Div, N1);
+ SDOperand Sub = DAG.getNode(ISD::SUB, VT, N0, Mul);
+ AddToWorkList(Div.Val);
+ AddToWorkList(Mul.Val);
+ return Sub;
+ }
+
+ // undef % X -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // X % undef -> undef
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitMULHS(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (mulhs x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhs x, 1) -> (sra x, size(x)-1)
+ if (N1C && N1C->getValue() == 1)
+ return DAG.getNode(ISD::SRA, N0.getValueType(), N0,
+ DAG.getConstant(MVT::getSizeInBits(N0.getValueType())-1,
+ TLI.getShiftAmountTy()));
+ // fold (mulhs x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitMULHU(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (mulhu x, 0) -> 0
+ if (N1C && N1C->isNullValue())
+ return N1;
+ // fold (mulhu x, 1) -> 0
+ if (N1C && N1C->getValue() == 1)
+ return DAG.getConstant(0, N0.getValueType());
+ // fold (mulhu x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+
+ return SDOperand();
+}
+
+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
+/// two operands of the same opcode, try to simplify it.
+SDOperand DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+ SDOperand N0 = N->getOperand(0), N1 = N->getOperand(1);
+ MVT::ValueType VT = N0.getValueType();
+ assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+
+ // For each of OP in AND/OR/XOR:
+ // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+ // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+ // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
+ N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::TRUNCATE) &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
+ SDOperand ORNode = DAG.getNode(N->getOpcode(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.Val);
+ return DAG.getNode(N0.getOpcode(), VT, ORNode);
+ }
+
+ // For each of OP in SHL/SRL/SRA/AND...
+ // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+ // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
+ // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+ N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+ N0.getOperand(1) == N1.getOperand(1)) {
+ SDOperand ORNode = DAG.getNode(N->getOpcode(),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(ORNode.Val);
+ return DAG.getNode(N0.getOpcode(), VT, ORNode, N0.getOperand(1));
+ }
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitAND(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ SDOperand LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N1.getValueType();
+
+ // fold vector ops
+ if (MVT::isVector(VT)) {
+ SDOperand FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.Val) return FoldedVOp;
+ }
+
+ // fold (and x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (and c1, c2) -> c1&c2
+ if (N0C && N1C)
+ return DAG.getNode(ISD::AND, VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::AND, VT, N1, N0);
+ // fold (and x, -1) -> x
+ if (N1C && N1C->isAllOnesValue())
+ return N0;
+ // if (and x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT)))
+ return DAG.getConstant(0, VT);
+ // reassociate and
+ SDOperand RAND = ReassociateOps(ISD::AND, N0, N1);
+ if (RAND.Val != 0)
+ return RAND;
+ // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+ if (N1C && N0.getOpcode() == ISD::OR)
+ if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if ((ORI->getValue() & N1C->getValue()) == N1C->getValue())
+ return N1;
+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ unsigned InMask = MVT::getIntVTBitMask(N0.getOperand(0).getValueType());
+ if (DAG.MaskedValueIsZero(N0.getOperand(0),
+ ~N1C->getValue() & InMask)) {
+ SDOperand Zext = DAG.getNode(ISD::ZERO_EXTEND, N0.getValueType(),
+ N0.getOperand(0));
+
+ // Replace uses of the AND with uses of the Zero extend node.
+ CombineTo(N, Zext);
+
+ // We actually want to replace all uses of the any_extend with the
+ // zero_extend, to avoid duplicating things. This will later cause this
+ // AND to be folded.
+ CombineTo(N0.Val, Zext);
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ MVT::isInteger(LL.getValueType())) {
+ // fold (X == 0) & (Y == 0) -> (X|Y == 0)
+ if (cast<ConstantSDNode>(LR)->getValue() == 0 && Op1 == ISD::SETEQ) {
+ SDOperand ORNode = DAG.getNode(ISD::OR, LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.Val);
+ return DAG.getSetCC(VT, ORNode, LR, Op1);
+ }
+ // fold (X == -1) & (Y == -1) -> (X&Y == -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+ SDOperand ANDNode = DAG.getNode(ISD::AND, LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.Val);
+ return DAG.getSetCC(VT, ANDNode, LR, Op1);
+ }
+ // fold (X > -1) & (Y > -1) -> (X|Y > -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+ SDOperand ORNode = DAG.getNode(ISD::OR, LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.Val);
+ return DAG.getSetCC(VT, ORNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = MVT::isInteger(LL.getValueType());
+ ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID)
+ return DAG.getSetCC(N0.getValueType(), LL, LR, Result);
+ }
+ }
+
+ // Simplify: and (op x...), (op y...) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDOperand Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.Val) return Tmp;
+ }
+
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (!MVT::isVector(VT) &&
+ SimplifyDemandedBits(SDOperand(N, 0)))
+ return SDOperand(N, 0);
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ if (ISD::isEXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val)) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT::ValueType EVT = LN0->getLoadedVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ if (DAG.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) &&
+ (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) {
+ SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(),
+ LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1));
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isSEXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT::ValueType EVT = LN0->getLoadedVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ if (DAG.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) &&
+ (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) {
+ SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(),
+ LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1));
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (and (load x), 255) -> (zextload x, i8)
+ // fold (and (extload x, i16), 255) -> (zextload x, i8)
+ if (N1C && N0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+ LN0->getAddressingMode() == ISD::UNINDEXED &&
+ N0.hasOneUse()) {
+ MVT::ValueType EVT, LoadedVT;
+ if (N1C->getValue() == 255)
+ EVT = MVT::i8;
+ else if (N1C->getValue() == 65535)
+ EVT = MVT::i16;
+ else if (N1C->getValue() == ~0U)
+ EVT = MVT::i32;
+ else
+ EVT = MVT::Other;
+
+ LoadedVT = LN0->getLoadedVT();
+ if (EVT != MVT::Other && LoadedVT > EVT &&
+ (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) {
+ MVT::ValueType PtrType = N0.getOperand(1).getValueType();
+ // For big endian targets, we need to add an offset to the pointer to
+ // load the correct bytes. For little endian systems, we merely need to
+ // read fewer bytes from the same pointer.
+ unsigned PtrOff =
+ (MVT::getSizeInBits(LoadedVT) - MVT::getSizeInBits(EVT)) / 8;
+ SDOperand NewPtr = LN0->getBasePtr();
+ if (!TLI.isLittleEndian())
+ NewPtr = DAG.getNode(ISD::ADD, PtrType, NewPtr,
+ DAG.getConstant(PtrOff, PtrType));
+ AddToWorkList(NewPtr.Val);
+ SDOperand Load =
+ DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ AddToWorkList(N);
+ CombineTo(N0.Val, Load, Load.getValue(1));
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitOR(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ SDOperand LL, LR, RL, RR, CC0, CC1;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N1.getValueType();
+ unsigned OpSizeInBits = MVT::getSizeInBits(VT);
+
+ // fold vector ops
+ if (MVT::isVector(VT)) {
+ SDOperand FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.Val) return FoldedVOp;
+ }
+
+ // fold (or x, undef) -> -1
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(~0ULL, VT);
+ // fold (or c1, c2) -> c1|c2
+ if (N0C && N1C)
+ return DAG.getNode(ISD::OR, VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::OR, VT, N1, N0);
+ // fold (or x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (or x, -1) -> -1
+ if (N1C && N1C->isAllOnesValue())
+ return N1;
+ // fold (or x, c) -> c iff (x & ~c) == 0
+ if (N1C &&
+ DAG.MaskedValueIsZero(N0,~N1C->getValue() & (~0ULL>>(64-OpSizeInBits))))
+ return N1;
+ // reassociate or
+ SDOperand ROR = ReassociateOps(ISD::OR, N0, N1);
+ if (ROR.Val != 0)
+ return ROR;
+ // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ if (N1C && N0.getOpcode() == ISD::AND && N0.Val->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+ return DAG.getNode(ISD::AND, VT, DAG.getNode(ISD::OR, VT, N0.getOperand(0),
+ N1),
+ DAG.getConstant(N1C->getValue() | C1->getValue(), VT));
+ }
+ // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ MVT::isInteger(LL.getValueType())) {
+ // fold (X != 0) | (Y != 0) -> (X|Y != 0)
+ // fold (X < 0) | (Y < 0) -> (X|Y < 0)
+ if (cast<ConstantSDNode>(LR)->getValue() == 0 &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+ SDOperand ORNode = DAG.getNode(ISD::OR, LR.getValueType(), LL, RL);
+ AddToWorkList(ORNode.Val);
+ return DAG.getSetCC(VT, ORNode, LR, Op1);
+ }
+ // fold (X != -1) | (Y != -1) -> (X&Y != -1)
+ // fold (X > -1) | (Y > -1) -> (X&Y > -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+ SDOperand ANDNode = DAG.getNode(ISD::AND, LR.getValueType(), LL, RL);
+ AddToWorkList(ANDNode.Val);
+ return DAG.getSetCC(VT, ANDNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = MVT::isInteger(LL.getValueType());
+ ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID)
+ return DAG.getSetCC(N0.getValueType(), LL, LR, Result);
+ }
+ }
+
+ // Simplify: or (op x...), (op y...) -> (op (or x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDOperand Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.Val) return Tmp;
+ }
+
+ // (X & C1) | (Y & C2) -> (X|Y) & C3 if possible.
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ N1.getOperand(1).getOpcode() == ISD::Constant &&
+ // Don't increase # computations.
+ (N0.Val->hasOneUse() || N1.Val->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ uint64_t LHSMask = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+ uint64_t RHSMask = cast<ConstantSDNode>(N1.getOperand(1))->getValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDOperand X =DAG.getNode(ISD::OR, VT, N0.getOperand(0), N1.getOperand(0));
+ return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(LHSMask|RHSMask, VT));
+ }
+ }
+
+
+ // See if this is some rotate idiom.
+ if (SDNode *Rot = MatchRotate(N0, N1))
+ return SDOperand(Rot, 0);
+
+ return SDOperand();
+}
+
+
+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool MatchRotateHalf(SDOperand Op, SDOperand &Shift, SDOperand &Mask) {
+ if (Op.getOpcode() == ISD::AND) {
+ if (isa<ConstantSDNode>(Op.getOperand(1))) {
+ Mask = Op.getOperand(1);
+ Op = Op.getOperand(0);
+ } else {
+ return false;
+ }
+ }
+
+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+ Shift = Op;
+ return true;
+ }
+ return false;
+}
+
+
+// MatchRotate - Handle an 'or' of two operands. If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDOperand LHS, SDOperand RHS) {
+ // Must be a legal type. Expanded an promoted things won't work with rotates.
+ MVT::ValueType VT = LHS.getValueType();
+ if (!TLI.isTypeLegal(VT)) return 0;
+
+ // The target must have at least one rotate flavor.
+ bool HasROTL = TLI.isOperationLegal(ISD::ROTL, VT);
+ bool HasROTR = TLI.isOperationLegal(ISD::ROTR, VT);
+ if (!HasROTL && !HasROTR) return 0;
+
+ // Match "(X shl/srl V1) & V2" where V2 may not be present.
+ SDOperand LHSShift; // The shift.
+ SDOperand LHSMask; // AND value if any.
+ if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+ return 0; // Not part of a rotate.
+
+ SDOperand RHSShift; // The shift.
+ SDOperand RHSMask; // AND value if any.
+ if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+ return 0; // Not part of a rotate.
+
+ if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+ return 0; // Not shifting the same value.
+
+ if (LHSShift.getOpcode() == RHSShift.getOpcode())
+ return 0; // Shifts must disagree.
+
+ // Canonicalize shl to left side in a shl/srl pair.
+ if (RHSShift.getOpcode() == ISD::SHL) {
+ std::swap(LHS, RHS);
+ std::swap(LHSShift, RHSShift);
+ std::swap(LHSMask , RHSMask );
+ }
+
+ unsigned OpSizeInBits = MVT::getSizeInBits(VT);
+ SDOperand LHSShiftArg = LHSShift.getOperand(0);
+ SDOperand LHSShiftAmt = LHSShift.getOperand(1);
+ SDOperand RHSShiftAmt = RHSShift.getOperand(1);
+
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ if (LHSShiftAmt.getOpcode() == ISD::Constant &&
+ RHSShiftAmt.getOpcode() == ISD::Constant) {
+ uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getValue();
+ uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getValue();
+ if ((LShVal + RShVal) != OpSizeInBits)
+ return 0;
+
+ SDOperand Rot;
+ if (HasROTL)
+ Rot = DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt);
+ else
+ Rot = DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt);
+
+ // If there is an AND of either shifted operand, apply it to the result.
+ if (LHSMask.Val || RHSMask.Val) {
+ uint64_t Mask = MVT::getIntVTBitMask(VT);
+
+ if (LHSMask.Val) {
+ uint64_t RHSBits = (1ULL << LShVal)-1;
+ Mask &= cast<ConstantSDNode>(LHSMask)->getValue() | RHSBits;
+ }
+ if (RHSMask.Val) {
+ uint64_t LHSBits = ~((1ULL << (OpSizeInBits-RShVal))-1);
+ Mask &= cast<ConstantSDNode>(RHSMask)->getValue() | LHSBits;
+ }
+
+ Rot = DAG.getNode(ISD::AND, VT, Rot, DAG.getConstant(Mask, VT));
+ }
+
+ return Rot.Val;
+ }
+
+ // If there is a mask here, and we have a variable shift, we can't be sure
+ // that we're masking out the right stuff.
+ if (LHSMask.Val || RHSMask.Val)
+ return 0;
+
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
+ // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
+ if (RHSShiftAmt.getOpcode() == ISD::SUB &&
+ LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
+ if (SUBC->getValue() == OpSizeInBits)
+ if (HasROTL)
+ return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val;
+ else
+ return DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt).Val;
+ }
+ }
+
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
+ // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
+ if (LHSShiftAmt.getOpcode() == ISD::SUB &&
+ RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+ if (ConstantSDNode *SUBC =
+ dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
+ if (SUBC->getValue() == OpSizeInBits)
+ if (HasROTL)
+ return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val;
+ else
+ return DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt).Val;
+ }
+ }
+
+ // Look for sign/zext/any-extended cases:
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+ || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+ || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND)) {
+ SDOperand LExtOp0 = LHSShiftAmt.getOperand(0);
+ SDOperand RExtOp0 = RHSShiftAmt.getOperand(0);
+ if (RExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0.getOperand(1) == LExtOp0) {
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotr x, y)
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, (sub 32, y))
+ if (ConstantSDNode *SUBC = cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+ if (SUBC->getValue() == OpSizeInBits) {
+ if (HasROTL)
+ return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val;
+ else
+ return DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt).Val;
+ }
+ }
+ } else if (LExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0 == LExtOp0.getOperand(1)) {
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) ->
+ // (rotl x, y)
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) ->
+ // (rotr x, (sub 32, y))
+ if (ConstantSDNode *SUBC = cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+ if (SUBC->getValue() == OpSizeInBits) {
+ if (HasROTL)
+ return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, RHSShiftAmt).Val;
+ else
+ return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+SDOperand DAGCombiner::visitXOR(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ SDOperand LHS, RHS, CC;
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N0.getValueType();
+
+ // fold vector ops
+ if (MVT::isVector(VT)) {
+ SDOperand FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.Val) return FoldedVOp;
+ }
+
+ // fold (xor x, undef) -> undef
+ if (N0.getOpcode() == ISD::UNDEF)
+ return N0;
+ if (N1.getOpcode() == ISD::UNDEF)
+ return N1;
+ // fold (xor c1, c2) -> c1^c2
+ if (N0C && N1C)
+ return DAG.getNode(ISD::XOR, VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::XOR, VT, N1, N0);
+ // fold (xor x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // reassociate xor
+ SDOperand RXOR = ReassociateOps(ISD::XOR, N0, N1);
+ if (RXOR.Val != 0)
+ return RXOR;
+ // fold !(x cc y) -> (x !cc y)
+ if (N1C && N1C->getValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ bool isInt = MVT::isInteger(LHS.getValueType());
+ ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ isInt);
+ if (N0.getOpcode() == ISD::SETCC)
+ return DAG.getSetCC(VT, LHS, RHS, NotCC);
+ if (N0.getOpcode() == ISD::SELECT_CC)
+ return DAG.getSelectCC(LHS, RHS, N0.getOperand(2),N0.getOperand(3),NotCC);
+ assert(0 && "Unhandled SetCC Equivalent!");
+ abort();
+ }
+ // fold !(x or y) -> (!x and !y) iff x or y are setcc
+ if (N1C && N1C->getValue() == 1 && VT == MVT::i1 &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDOperand LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, VT, LHS, N1); // RHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.Val); AddToWorkList(RHS.Val);
+ return DAG.getNode(NewOpcode, VT, LHS, RHS);
+ }
+ }
+ // fold !(x or y) -> (!x and !y) iff x or y are constants
+ if (N1C && N1C->isAllOnesValue() &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDOperand LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, VT, LHS, N1); // RHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, VT, RHS, N1); // RHS = ~RHS
+ AddToWorkList(LHS.Val); AddToWorkList(RHS.Val);
+ return DAG.getNode(NewOpcode, VT, LHS, RHS);
+ }
+ }
+ // fold (xor (xor x, c1), c2) -> (xor x, c1^c2)
+ if (N1C && N0.getOpcode() == ISD::XOR) {
+ ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N00C)
+ return DAG.getNode(ISD::XOR, VT, N0.getOperand(1),
+ DAG.getConstant(N1C->getValue()^N00C->getValue(), VT));
+ if (N01C)
+ return DAG.getNode(ISD::XOR, VT, N0.getOperand(0),
+ DAG.getConstant(N1C->getValue()^N01C->getValue(), VT));
+ }
+ // fold (xor x, x) -> 0
+ if (N0 == N1) {
+ if (!MVT::isVector(VT)) {
+ return DAG.getConstant(0, VT);
+ } else if (!AfterLegalize || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+ // Produce a vector of zeros.
+ SDOperand El = DAG.getConstant(0, MVT::getVectorElementType(VT));
+ std::vector<SDOperand> Ops(MVT::getVectorNumElements(VT), El);
+ return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+ }
+ }
+
+ // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDOperand Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.Val) return Tmp;
+ }
+
+ // Simplify the expression using non-local knowledge.
+ if (!MVT::isVector(VT) &&
+ SimplifyDemandedBits(SDOperand(N, 0)))
+ return SDOperand(N, 0);
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSHL(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N0.getValueType();
+ unsigned OpSizeInBits = MVT::getSizeInBits(VT);
+
+ // fold (shl c1, c2) -> c1<<c2
+ if (N0C && N1C)
+ return DAG.getNode(ISD::SHL, VT, N0, N1);
+ // fold (shl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (shl x, c >= size(x)) -> undef
+ if (N1C && N1C->getValue() >= OpSizeInBits)
+ return DAG.getNode(ISD::UNDEF, VT);
+ // fold (shl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // if (shl x, c) is known to be zero, return 0
+ if (DAG.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT)))
+ return DAG.getConstant(0, VT);
+ if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
+ return SDOperand(N, 0);
+ // fold (shl (shl x, c1), c2) -> 0 or (shl x, c1+c2)
+ if (N1C && N0.getOpcode() == ISD::SHL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+ uint64_t c2 = N1C->getValue();
+ if (c1 + c2 > OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+ // fold (shl (srl x, c1), c2) -> (shl (and x, -1 << c1), c2-c1) or
+ // (srl (and x, -1 << c1), c1-c2)
+ if (N1C && N0.getOpcode() == ISD::SRL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+ uint64_t c2 = N1C->getValue();
+ SDOperand Mask = DAG.getNode(ISD::AND, VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL << c1, VT));
+ if (c2 > c1)
+ return DAG.getNode(ISD::SHL, VT, Mask,
+ DAG.getConstant(c2-c1, N1.getValueType()));
+ else
+ return DAG.getNode(ISD::SRL, VT, Mask,
+ DAG.getConstant(c1-c2, N1.getValueType()));
+ }
+ // fold (shl (sra x, c1), c1) -> (and x, -1 << c1)
+ if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))
+ return DAG.getNode(ISD::AND, VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL << N1C->getValue(), VT));
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSRA(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N0.getValueType();
+
+ // fold (sra c1, c2) -> c1>>c2
+ if (N0C && N1C)
+ return DAG.getNode(ISD::SRA, VT, N0, N1);
+ // fold (sra 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (sra -1, x) -> -1
+ if (N0C && N0C->isAllOnesValue())
+ return N0;
+ // fold (sra x, c >= size(x)) -> undef
+ if (N1C && N1C->getValue() >= MVT::getSizeInBits(VT))
+ return DAG.getNode(ISD::UNDEF, VT);
+ // fold (sra x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+ // sext_inreg.
+ if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+ unsigned LowBits = MVT::getSizeInBits(VT) - (unsigned)N1C->getValue();
+ MVT::ValueType EVT;
+ switch (LowBits) {
+ default: EVT = MVT::Other; break;
+ case 1: EVT = MVT::i1; break;
+ case 8: EVT = MVT::i8; break;
+ case 16: EVT = MVT::i16; break;
+ case 32: EVT = MVT::i32; break;
+ }
+ if (EVT > MVT::Other && TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0.getOperand(0),
+ DAG.getValueType(EVT));
+ }
+
+ // fold (sra (sra x, c1), c2) -> (sra x, c1+c2)
+ if (N1C && N0.getOpcode() == ISD::SRA) {
+ if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ unsigned Sum = N1C->getValue() + C1->getValue();
+ if (Sum >= MVT::getSizeInBits(VT)) Sum = MVT::getSizeInBits(VT)-1;
+ return DAG.getNode(ISD::SRA, VT, N0.getOperand(0),
+ DAG.getConstant(Sum, N1C->getValueType(0)));
+ }
+ }
+
+ // Simplify, based on bits shifted out of the LHS.
+ if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
+ return SDOperand(N, 0);
+
+
+ // If the sign bit is known to be zero, switch this to a SRL.
+ if (DAG.MaskedValueIsZero(N0, MVT::getIntVTSignBit(VT)))
+ return DAG.getNode(ISD::SRL, VT, N0, N1);
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSRL(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ MVT::ValueType VT = N0.getValueType();
+ unsigned OpSizeInBits = MVT::getSizeInBits(VT);
+
+ // fold (srl c1, c2) -> c1 >>u c2
+ if (N0C && N1C)
+ return DAG.getNode(ISD::SRL, VT, N0, N1);
+ // fold (srl 0, x) -> 0
+ if (N0C && N0C->isNullValue())
+ return N0;
+ // fold (srl x, c >= size(x)) -> undef
+ if (N1C && N1C->getValue() >= OpSizeInBits)
+ return DAG.getNode(ISD::UNDEF, VT);
+ // fold (srl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // if (srl x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDOperand(N, 0), ~0ULL >> (64-OpSizeInBits)))
+ return DAG.getConstant(0, VT);
+
+ // fold (srl (srl x, c1), c2) -> 0 or (srl x, c1+c2)
+ if (N1C && N0.getOpcode() == ISD::SRL &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+ uint64_t c2 = N1C->getValue();
+ if (c1 + c2 > OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SRL, VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+
+ // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ // Shifting in all undef bits?
+ MVT::ValueType SmallVT = N0.getOperand(0).getValueType();
+ if (N1C->getValue() >= MVT::getSizeInBits(SmallVT))
+ return DAG.getNode(ISD::UNDEF, VT);
+
+ SDOperand SmallShift = DAG.getNode(ISD::SRL, SmallVT, N0.getOperand(0), N1);
+ AddToWorkList(SmallShift.Val);
+ return DAG.getNode(ISD::ANY_EXTEND, VT, SmallShift);
+ }
+
+ // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
+ // bit, which is unmodified by sra.
+ if (N1C && N1C->getValue()+1 == MVT::getSizeInBits(VT)) {
+ if (N0.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::SRL, VT, N0.getOperand(0), N1);
+ }
+
+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
+ if (N1C && N0.getOpcode() == ISD::CTLZ &&
+ N1C->getValue() == Log2_32(MVT::getSizeInBits(VT))) {
+ uint64_t KnownZero, KnownOne, Mask = MVT::getIntVTBitMask(VT);
+ DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+
+ // If any of the input bits are KnownOne, then the input couldn't be all
+ // zeros, thus the result of the srl will always be zero.
+ if (KnownOne) return DAG.getConstant(0, VT);
+
+ // If all of the bits input the to ctlz node are known to be zero, then
+ // the result of the ctlz is "32" and the result of the shift is one.
+ uint64_t UnknownBits = ~KnownZero & Mask;
+ if (UnknownBits == 0) return DAG.getConstant(1, VT);
+
+ // Otherwise, check to see if there is exactly one bit input to the ctlz.
+ if ((UnknownBits & (UnknownBits-1)) == 0) {
+ // Okay, we know that only that the single bit specified by UnknownBits
+ // could be set on input to the CTLZ node. If this bit is set, the SRL
+ // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+ // to an SRL,XOR pair, which is likely to simplify more.
+ unsigned ShAmt = CountTrailingZeros_64(UnknownBits);
+ SDOperand Op = N0.getOperand(0);
+ if (ShAmt) {
+ Op = DAG.getNode(ISD::SRL, VT, Op,
+ DAG.getConstant(ShAmt, TLI.getShiftAmountTy()));
+ AddToWorkList(Op.Val);
+ }
+ return DAG.getNode(ISD::XOR, VT, Op, DAG.getConstant(1, VT));
+ }
+ }
+
+ // fold operands of srl based on knowledge that the low bits are not
+ // demanded.
+ if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
+ return SDOperand(N, 0);
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitCTLZ(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (ctlz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ, VT, N0);
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitCTTZ(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (cttz c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ, VT, N0);
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitCTPOP(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (ctpop c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTPOP, VT, N0);
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSELECT(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ SDOperand N2 = N->getOperand(2);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold select C, X, X -> X
+ if (N1 == N2)
+ return N1;
+ // fold select true, X, Y -> X
+ if (N0C && !N0C->isNullValue())
+ return N1;
+ // fold select false, X, Y -> Y
+ if (N0C && N0C->isNullValue())
+ return N2;
+ // fold select C, 1, X -> C | X
+ if (MVT::i1 == VT && N1C && N1C->getValue() == 1)
+ return DAG.getNode(ISD::OR, VT, N0, N2);
+ // fold select C, 0, X -> ~C & X
+ // FIXME: this should check for C type == X type, not i1?
+ if (MVT::i1 == VT && N1C && N1C->isNullValue()) {
+ SDOperand XORNode = DAG.getNode(ISD::XOR, VT, N0, DAG.getConstant(1, VT));
+ AddToWorkList(XORNode.Val);
+ return DAG.getNode(ISD::AND, VT, XORNode, N2);
+ }
+ // fold select C, X, 1 -> ~C | X
+ if (MVT::i1 == VT && N2C && N2C->getValue() == 1) {
+ SDOperand XORNode = DAG.getNode(ISD::XOR, VT, N0, DAG.getConstant(1, VT));
+ AddToWorkList(XORNode.Val);
+ return DAG.getNode(ISD::OR, VT, XORNode, N1);
+ }
+ // fold select C, X, 0 -> C & X
+ // FIXME: this should check for C type == X type, not i1?
+ if (MVT::i1 == VT && N2C && N2C->isNullValue())
+ return DAG.getNode(ISD::AND, VT, N0, N1);
+ // fold X ? X : Y --> X ? 1 : Y --> X | Y
+ if (MVT::i1 == VT && N0 == N1)
+ return DAG.getNode(ISD::OR, VT, N0, N2);
+ // fold X ? Y : X --> X ? Y : 0 --> X & Y
+ if (MVT::i1 == VT && N0 == N2)
+ return DAG.getNode(ISD::AND, VT, N0, N1);
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N1, N2))
+ return SDOperand(N, 0); // Don't revisit N.
+
+ // fold selects based on a setcc into other things, such as min/max/abs
+ if (N0.getOpcode() == ISD::SETCC)
+ // FIXME:
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ if (TLI.isOperationLegal(ISD::SELECT_CC, MVT::Other))
+ return DAG.getNode(ISD::SELECT_CC, VT, N0.getOperand(0), N0.getOperand(1),
+ N1, N2, N0.getOperand(2));
+ else
+ return SimplifySelect(N0, N1, N2);
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSELECT_CC(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ SDOperand N2 = N->getOperand(2);
+ SDOperand N3 = N->getOperand(3);
+ SDOperand N4 = N->getOperand(4);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+ // fold select_cc lhs, rhs, x, x, cc -> x
+ if (N2 == N3)
+ return N2;
+
+ // Determine if the condition we're dealing with is constant
+ SDOperand SCC = SimplifySetCC(TLI.getSetCCResultTy(), N0, N1, CC, false);
+ if (SCC.Val) AddToWorkList(SCC.Val);
+
+ if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.Val)) {
+ if (SCCC->getValue())
+ return N2; // cond always true -> true val
+ else
+ return N3; // cond always false -> false val
+ }
+
+ // Fold to a simpler select_cc
+ if (SCC.Val && SCC.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::SELECT_CC, N2.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1), N2, N3,
+ SCC.getOperand(2));
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N2, N3))
+ return SDOperand(N, 0); // Don't revisit N.
+
+ // fold select_cc into other things, such as min/max/abs
+ return SimplifySelectCC(N0, N1, N2, N3, CC);
+}
+
+SDOperand DAGCombiner::visitSETCC(SDNode *N) {
+ return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+ cast<CondCodeSDNode>(N->getOperand(2))->get());
+}
+
+SDOperand DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (sext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::SIGN_EXTEND, VT, N0);
+
+ // fold (sext (sext x)) -> (sext x)
+ // fold (sext (aext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND, VT, N0.getOperand(0));
+
+ // fold (sext (truncate (load x))) -> (sext (smaller load x))
+ // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDOperand NarrowLoad = ReduceLoadWidth(N0.Val);
+ if (NarrowLoad.Val) {
+ if (NarrowLoad.Val != N0.Val)
+ CombineTo(N0.Val, NarrowLoad);
+ return DAG.getNode(ISD::SIGN_EXTEND, VT, NarrowLoad);
+ }
+ }
+
+ // See if the value being truncated is already sign extended. If so, just
+ // eliminate the trunc/sext pair.
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDOperand Op = N0.getOperand(0);
+ unsigned OpBits = MVT::getSizeInBits(Op.getValueType());
+ unsigned MidBits = MVT::getSizeInBits(N0.getValueType());
+ unsigned DestBits = MVT::getSizeInBits(VT);
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits-MidBits)
+ return Op;
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, VT, Op);
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::TRUNCATE, VT, Op);
+ }
+
+ // fold (sext (truncate x)) -> (sextinreg x).
+ if (!AfterLegalize || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+ N0.getValueType())) {
+ if (Op.getValueType() < VT)
+ Op = DAG.getNode(ISD::ANY_EXTEND, VT, Op);
+ else if (Op.getValueType() > VT)
+ Op = DAG.getNode(ISD::TRUNCATE, VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, Op,
+ DAG.getValueType(N0.getValueType()));
+ }
+ }
+
+ // fold (sext (load x)) -> (sext (truncate (sextload x)))
+ if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+ (!AfterLegalize||TLI.isLoadXLegal(ISD::SEXTLOAD, N0.getValueType()))){
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+ // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+ if ((ISD::isSEXTLoad(N0.Val) || ISD::isEXTLoad(N0.Val)) &&
+ ISD::isUNINDEXEDLoad(N0.Val) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT::ValueType EVT = LN0->getLoadedVT();
+ if (!AfterLegalize || TLI.isLoadXLegal(ISD::SEXTLOAD, EVT)) {
+ SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // sext(setcc x,y,cc) -> select_cc x, y, -1, 0, cc
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDOperand SCC =
+ SimplifySelectCC(N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.Val) return SCC;
+ }
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (zext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, VT, N0);
+ // fold (zext (zext x)) -> (zext x)
+ // fold (zext (aext x)) -> (zext x)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ZERO_EXTEND, VT, N0.getOperand(0));
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDOperand NarrowLoad = ReduceLoadWidth(N0.Val);
+ if (NarrowLoad.Val) {
+ if (NarrowLoad.Val != N0.Val)
+ CombineTo(N0.Val, NarrowLoad);
+ return DAG.getNode(ISD::ZERO_EXTEND, VT, NarrowLoad);
+ }
+ }
+
+ // fold (zext (truncate x)) -> (and x, mask)
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (!AfterLegalize || TLI.isOperationLegal(ISD::AND, VT))) {
+ SDOperand Op = N0.getOperand(0);
+ if (Op.getValueType() < VT) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, VT, Op);
+ } else if (Op.getValueType() > VT) {
+ Op = DAG.getNode(ISD::TRUNCATE, VT, Op);
+ }
+ return DAG.getZeroExtendInReg(Op, N0.getValueType());
+ }
+
+ // fold (zext (and (trunc x), cst)) -> (and x, cst).
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ SDOperand X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType() < VT) {
+ X = DAG.getNode(ISD::ANY_EXTEND, VT, X);
+ } else if (X.getValueType() > VT) {
+ X = DAG.getNode(ISD::TRUNCATE, VT, X);
+ }
+ uint64_t Mask = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+ return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (zext (load x)) -> (zext (truncate (zextload x)))
+ if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+ (!AfterLegalize||TLI.isLoadXLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+ // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+ if ((ISD::isZEXTLoad(N0.Val) || ISD::isEXTLoad(N0.Val)) &&
+ ISD::isUNINDEXEDLoad(N0.Val) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT::ValueType EVT = LN0->getLoadedVT();
+ SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDOperand SCC =
+ SimplifySelectCC(N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.Val) return SCC;
+ }
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitANY_EXTEND(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (aext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::ANY_EXTEND, VT, N0);
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(N0.getOpcode(), VT, N0.getOperand(0));
+
+ // fold (aext (truncate (load x))) -> (aext (smaller load x))
+ // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDOperand NarrowLoad = ReduceLoadWidth(N0.Val);
+ if (NarrowLoad.Val) {
+ if (NarrowLoad.Val != N0.Val)
+ CombineTo(N0.Val, NarrowLoad);
+ return DAG.getNode(ISD::ANY_EXTEND, VT, NarrowLoad);
+ }
+ }
+
+ // fold (aext (truncate x))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDOperand TruncOp = N0.getOperand(0);
+ if (TruncOp.getValueType() == VT)
+ return TruncOp; // x iff x size == zext size.
+ if (TruncOp.getValueType() > VT)
+ return DAG.getNode(ISD::TRUNCATE, VT, TruncOp);
+ return DAG.getNode(ISD::ANY_EXTEND, VT, TruncOp);
+ }
+
+ // fold (aext (and (trunc x), cst)) -> (and x, cst).
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ SDOperand X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType() < VT) {
+ X = DAG.getNode(ISD::ANY_EXTEND, VT, X);
+ } else if (X.getValueType() > VT) {
+ X = DAG.getNode(ISD::TRUNCATE, VT, X);
+ }
+ uint64_t Mask = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+ return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(Mask, VT));
+ }
+
+ // fold (aext (load x)) -> (aext (truncate (extload x)))
+ if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+ (!AfterLegalize||TLI.isLoadXLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDOperand ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+ // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+ // fold (aext ( extload x)) -> (aext (truncate (extload x)))
+ if (N0.getOpcode() == ISD::LOAD &&
+ !ISD::isNON_EXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT::ValueType EVT = LN0->getLoadedVT();
+ SDOperand ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDOperand SCC =
+ SimplifySelectCC(N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+ if (SCC.Val)
+ return SCC;
+ }
+
+ return SDOperand();
+}
+
+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
+/// bits and then truncated to a narrower type and where N is a multiple
+/// of number of bits of the narrower type, transform it to a narrower load
+/// from address + N / num of bits of new type. If the result is to be
+/// extended, also fold the extension to form a extending load.
+SDOperand DAGCombiner::ReduceLoadWidth(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ SDOperand N0 = N->getOperand(0);
+ MVT::ValueType VT = N->getValueType(0);
+ MVT::ValueType EVT = N->getValueType(0);
+
+ // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then
+ // extended to VT.
+ if (Opc == ISD::SIGN_EXTEND_INREG) {
+ ExtType = ISD::SEXTLOAD;
+ EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ if (AfterLegalize && !TLI.isLoadXLegal(ISD::SEXTLOAD, EVT))
+ return SDOperand();
+ }
+
+ unsigned EVTBits = MVT::getSizeInBits(EVT);
+ unsigned ShAmt = 0;
+ bool CombineSRL = false;
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShAmt = N01->getValue();
+ // Is the shift amount a multiple of size of VT?
+ if ((ShAmt & (EVTBits-1)) == 0) {
+ N0 = N0.getOperand(0);
+ if (MVT::getSizeInBits(N0.getValueType()) <= EVTBits)
+ return SDOperand();
+ CombineSRL = true;
+ }
+ }
+ }
+
+ if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+ // Do not allow folding to i1 here. i1 is implicitly stored in memory in
+ // zero extended form: by shrinking the load, we lose track of the fact
+ // that it is already zero extended.
+ // FIXME: This should be reevaluated.
+ VT != MVT::i1) {
+ assert(MVT::getSizeInBits(N0.getValueType()) > EVTBits &&
+ "Cannot truncate to larger type!");
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ MVT::ValueType PtrType = N0.getOperand(1).getValueType();
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (!TLI.isLittleEndian())
+ ShAmt = MVT::getSizeInBits(N0.getValueType()) - ShAmt - EVTBits;
+ uint64_t PtrOff = ShAmt / 8;
+ SDOperand NewPtr = DAG.getNode(ISD::ADD, PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, PtrType));
+ AddToWorkList(NewPtr.Val);
+ SDOperand Load = (ExtType == ISD::NON_EXTLOAD)
+ ? DAG.getLoad(VT, LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->isVolatile(), LN0->getAlignment())
+ : DAG.getExtLoad(ExtType, VT, LN0->getChain(), NewPtr,
+ LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(), LN0->getAlignment());
+ AddToWorkList(N);
+ if (CombineSRL) {
+ std::vector<SDNode*> NowDead;
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), NowDead);
+ CombineTo(N->getOperand(0).Val, Load);
+ } else
+ CombineTo(N0.Val, Load, Load.getValue(1));
+ if (ShAmt) {
+ if (Opc == ISD::SIGN_EXTEND_INREG)
+ return DAG.getNode(Opc, VT, Load, N->getOperand(1));
+ else
+ return DAG.getNode(Opc, VT, Load);
+ }
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ return SDOperand();
+}
+
+
+SDOperand DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ MVT::ValueType VT = N->getValueType(0);
+ MVT::ValueType EVT = cast<VTSDNode>(N1)->getVT();
+ unsigned EVTBits = MVT::getSizeInBits(EVT);
+
+ // fold (sext_in_reg c1) -> c1
+ if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0, N1);
+
+ // If the input is already sign extended, just drop the extension.
+ if (DAG.ComputeNumSignBits(N0) >= MVT::getSizeInBits(VT)-EVTBits+1)
+ return N0;
+
+ // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ EVT < cast<VTSDNode>(N0.getOperand(1))->getVT()) {
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0.getOperand(0), N1);
+ }
+
+ // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+ if (DAG.MaskedValueIsZero(N0, 1ULL << (EVTBits-1)))
+ return DAG.getZeroExtendInReg(N0, EVT);
+
+ // fold operands of sext_in_reg based on knowledge that the top bits are not
+ // demanded.
+ if (SimplifyDemandedBits(SDOperand(N, 0)))
+ return SDOperand(N, 0);
+
+ // fold (sext_in_reg (load x)) -> (smaller sextload x)
+ // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+ SDOperand NarrowLoad = ReduceLoadWidth(N);
+ if (NarrowLoad.Val)
+ return NarrowLoad;
+
+ // fold (sext_in_reg (srl X, 24), i8) -> sra X, 24
+ // fold (sext_in_reg (srl X, 23), i8) -> sra X, 23 iff possible.
+ // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+ if (N0.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getValue()+EVTBits <= MVT::getSizeInBits(VT)) {
+ // We can turn this into an SRA iff the input to the SRL is already sign
+ // extended enough.
+ unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+ if (MVT::getSizeInBits(VT)-(ShAmt->getValue()+EVTBits) < InSignBits)
+ return DAG.getNode(ISD::SRA, VT, N0.getOperand(0), N0.getOperand(1));
+ }
+ }
+
+ // fold (sext_inreg (extload x)) -> (sextload x)
+ if (ISD::isEXTLoad(N0.Val) &&
+ ISD::isUNINDEXEDLoad(N0.Val) &&
+ EVT == cast<LoadSDNode>(N0)->getLoadedVT() &&
+ (!AfterLegalize || TLI.isLoadXLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1));
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+ // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+ if (ISD::isZEXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val) &&
+ N0.hasOneUse() &&
+ EVT == cast<LoadSDNode>(N0)->getLoadedVT() &&
+ (!AfterLegalize || TLI.isLoadXLegal(ISD::SEXTLOAD, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(), EVT,
+ LN0->isVolatile(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1));
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitTRUNCATE(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // noop truncate
+ if (N0.getValueType() == N->getValueType(0))
+ return N0;
+ // fold (truncate c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::TRUNCATE, VT, N0);
+ // fold (truncate (truncate x)) -> (truncate x)
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getNode(ISD::TRUNCATE, VT, N0.getOperand(0));
+ // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
+ N0.getOpcode() == ISD::ANY_EXTEND) {
+ if (N0.getOperand(0).getValueType() < VT)
+ // if the source is smaller than the dest, we still need an extend
+ return DAG.getNode(N0.getOpcode(), VT, N0.getOperand(0));
+ else if (N0.getOperand(0).getValueType() > VT)
+ // if the source is larger than the dest, than we just need the truncate
+ return DAG.getNode(ISD::TRUNCATE, VT, N0.getOperand(0));
+ else
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate
+ return N0.getOperand(0);
+ }
+
+ // fold (truncate (load x)) -> (smaller load x)
+ // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+ return ReduceLoadWidth(N);
+}
+
+SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+ // Only do this before legalize, since afterward the target may be depending
+ // on the bitconvert.
+ // First check to see if this is all constant.
+ if (!AfterLegalize &&
+ N0.getOpcode() == ISD::BUILD_VECTOR && N0.Val->hasOneUse() &&
+ MVT::isVector(VT)) {
+ bool isSimple = true;
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
+ if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
+ N0.getOperand(i).getOpcode() != ISD::Constant &&
+ N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
+ isSimple = false;
+ break;
+ }
+
+ MVT::ValueType DestEltVT = MVT::getVectorElementType(N->getValueType(0));
+ assert(!MVT::isVector(DestEltVT) &&
+ "Element type of vector ValueType must not be vector!");
+ if (isSimple) {
+ return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.Val, DestEltVT);
+ }
+ }
+
+ // If the input is a constant, let getNode() fold it.
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+ SDOperand Res = DAG.getNode(ISD::BIT_CONVERT, VT, N0);
+ if (Res.Val != N) return Res;
+ }
+
+ if (N0.getOpcode() == ISD::BIT_CONVERT) // conv(conv(x,t1),t2) -> conv(x,t2)
+ return DAG.getNode(ISD::BIT_CONVERT, VT, N0.getOperand(0));
+
+ // fold (conv (load x)) -> (load (conv*)x)
+ // If the resultant load doesn't need a higher alignment than the original!
+ if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+ ISD::isUNINDEXEDLoad(N0.Val) &&
+ TLI.isOperationLegal(ISD::LOAD, VT)) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ unsigned Align = TLI.getTargetMachine().getTargetData()->
+ getABITypeAlignment(MVT::getTypeForValueType(VT));
+ unsigned OrigAlign = LN0->getAlignment();
+ if (Align <= OrigAlign) {
+ SDOperand Load = DAG.getLoad(VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->isVolatile(), Align);
+ AddToWorkList(N);
+ CombineTo(N0.Val, DAG.getNode(ISD::BIT_CONVERT, N0.getValueType(), Load),
+ Load.getValue(1));
+ return Load;
+ }
+ }
+
+ return SDOperand();
+}
+
+/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
+/// destination element value type.
+SDOperand DAGCombiner::
+ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT::ValueType DstEltVT) {
+ MVT::ValueType SrcEltVT = BV->getOperand(0).getValueType();
+
+ // If this is already the right type, we're done.
+ if (SrcEltVT == DstEltVT) return SDOperand(BV, 0);
+
+ unsigned SrcBitSize = MVT::getSizeInBits(SrcEltVT);
+ unsigned DstBitSize = MVT::getSizeInBits(DstEltVT);
+
+ // If this is a conversion of N elements of one type to N elements of another
+ // type, convert each element. This handles FP<->INT cases.
+ if (SrcBitSize == DstBitSize) {
+ SmallVector<SDOperand, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, DstEltVT, BV->getOperand(i)));
+ AddToWorkList(Ops.back().Val);
+ }
+ MVT::ValueType VT =
+ MVT::getVectorType(DstEltVT,
+ MVT::getVectorNumElements(BV->getValueType(0)));
+ return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+ }
+
+ // Otherwise, we're growing or shrinking the elements. To avoid having to
+ // handle annoying details of growing/shrinking FP values, we convert them to
+ // int first.
+ if (MVT::isFloatingPoint(SrcEltVT)) {
+ // Convert the input float vector to a int vector where the elements are the
+ // same sizes.
+ assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
+ MVT::ValueType IntVT = SrcEltVT == MVT::f32 ? MVT::i32 : MVT::i64;
+ BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).Val;
+ SrcEltVT = IntVT;
+ }
+
+ // Now we know the input is an integer vector. If the output is a FP type,
+ // convert to integer first, then to FP of the right size.
+ if (MVT::isFloatingPoint(DstEltVT)) {
+ assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
+ MVT::ValueType TmpVT = DstEltVT == MVT::f32 ? MVT::i32 : MVT::i64;
+ SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).Val;
+
+ // Next, convert to FP elements of the same size.
+ return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+ }
+
+ // Okay, we know the src/dst types are both integers of differing types.
+ // Handling growing first.
+ assert(MVT::isInteger(SrcEltVT) && MVT::isInteger(DstEltVT));
+ if (SrcBitSize < DstBitSize) {
+ unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+
+ SmallVector<SDOperand, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+ i += NumInputsPerOutput) {
+ bool isLE = TLI.isLittleEndian();
+ uint64_t NewBits = 0;
+ bool EltIsUndef = true;
+ for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+ // Shift the previously computed bits over.
+ NewBits <<= SrcBitSize;
+ SDOperand Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+ if (Op.getOpcode() == ISD::UNDEF) continue;
+ EltIsUndef = false;
+
+ NewBits |= cast<ConstantSDNode>(Op)->getValue();
+ }
+
+ if (EltIsUndef)
+ Ops.push_back(DAG.getNode(ISD::UNDEF, DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+ }
+
+ MVT::ValueType VT = MVT::getVectorType(DstEltVT,
+ Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+ }
+
+ // Finally, this must be the case where we are shrinking elements: each input
+ // turns into multiple outputs.
+ unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+ SmallVector<SDOperand, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j)
+ Ops.push_back(DAG.getNode(ISD::UNDEF, DstEltVT));
+ continue;
+ }
+ uint64_t OpVal = cast<ConstantSDNode>(BV->getOperand(i))->getValue();
+
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+ unsigned ThisVal = OpVal & ((1ULL << DstBitSize)-1);
+ OpVal >>= DstBitSize;
+ Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+ }
+
+ // For big endian targets, swap the order of the pieces of each element.
+ if (!TLI.isLittleEndian())
+ std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+ }
+ MVT::ValueType VT = MVT::getVectorType(DstEltVT, Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+}
+
+
+
+SDOperand DAGCombiner::visitFADD(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold vector ops
+ if (MVT::isVector(VT)) {
+ SDOperand FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.Val) return FoldedVOp;
+ }
+
+ // fold (fadd c1, c2) -> c1+c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FADD, VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FADD, VT, N1, N0);
+ // fold (A + (-B)) -> A-B
+ if (isNegatibleForFree(N1) == 2)
+ return DAG.getNode(ISD::FSUB, VT, N0, GetNegatedExpression(N1, DAG));
+ // fold ((-A) + B) -> B-A
+ if (isNegatibleForFree(N0) == 2)
+ return DAG.getNode(ISD::FSUB, VT, N1, GetNegatedExpression(N0, DAG));
+
+ // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+ if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
+ N0.Val->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FADD, VT, N0.getOperand(0),
+ DAG.getNode(ISD::FADD, VT, N0.getOperand(1), N1));
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFSUB(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold vector ops
+ if (MVT::isVector(VT)) {
+ SDOperand FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.Val) return FoldedVOp;
+ }
+
+ // fold (fsub c1, c2) -> c1-c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FSUB, VT, N0, N1);
+ // fold (0-B) -> -B
+ if (UnsafeFPMath && N0CFP && N0CFP->getValue() == 0.0) {
+ if (isNegatibleForFree(N1))
+ return GetNegatedExpression(N1, DAG);
+ return DAG.getNode(ISD::FNEG, VT, N1);
+ }
+ // fold (A-(-B)) -> A+B
+ if (isNegatibleForFree(N1))
+ return DAG.getNode(ISD::FADD, VT, N0, GetNegatedExpression(N1, DAG));
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFMUL(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold vector ops
+ if (MVT::isVector(VT)) {
+ SDOperand FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.Val) return FoldedVOp;
+ }
+
+ // fold (fmul c1, c2) -> c1*c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FMUL, VT, N0, N1);
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FMUL, VT, N1, N0);
+ // fold (fmul X, 2.0) -> (fadd X, X)
+ if (N1CFP && N1CFP->isExactlyValue(+2.0))
+ return DAG.getNode(ISD::FADD, VT, N0, N0);
+ // fold (fmul X, -1.0) -> (fneg X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0))
+ return DAG.getNode(ISD::FNEG, VT, N0);
+
+ // -X * -Y -> X*Y
+ if (char LHSNeg = isNegatibleForFree(N0)) {
+ if (char RHSNeg = isNegatibleForFree(N1)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FMUL, VT, GetNegatedExpression(N0, DAG),
+ GetNegatedExpression(N1, DAG));
+ }
+ }
+
+ // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+ if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+ N0.Val->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FMUL, VT, N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, VT, N0.getOperand(1), N1));
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFDIV(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold vector ops
+ if (MVT::isVector(VT)) {
+ SDOperand FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.Val) return FoldedVOp;
+ }
+
+ // fold (fdiv c1, c2) -> c1/c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FDIV, VT, N0, N1);
+
+
+ // -X / -Y -> X*Y
+ if (char LHSNeg = isNegatibleForFree(N0)) {
+ if (char RHSNeg = isNegatibleForFree(N1)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FDIV, VT, GetNegatedExpression(N0, DAG),
+ GetNegatedExpression(N1, DAG));
+ }
+ }
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFREM(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (frem c1, c2) -> fmod(c1,c2)
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FREM, VT, N0, N1);
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ MVT::ValueType VT = N->getValueType(0);
+
+ if (N0CFP && N1CFP) // Constant fold
+ return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1);
+
+ if (N1CFP) {
+ // copysign(x, c1) -> fabs(x) iff ispos(c1)
+ // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+ union {
+ double d;
+ int64_t i;
+ } u;
+ u.d = N1CFP->getValue();
+ if (u.i >= 0)
+ return DAG.getNode(ISD::FABS, VT, N0);
+ else
+ return DAG.getNode(ISD::FNEG, VT, DAG.getNode(ISD::FABS, VT, N0));
+ }
+
+ // copysign(fabs(x), y) -> copysign(x, y)
+ // copysign(fneg(x), y) -> copysign(x, y)
+ // copysign(copysign(x,z), y) -> copysign(x, y)
+ if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+ N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, VT, N0.getOperand(0), N1);
+
+ // copysign(x, abs(y)) -> abs(x)
+ if (N1.getOpcode() == ISD::FABS)
+ return DAG.getNode(ISD::FABS, VT, N0);
+
+ // copysign(x, copysign(y,z)) -> copysign(x, z)
+ if (N1.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1.getOperand(1));
+
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+ return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1.getOperand(0));
+
+ return SDOperand();
+}
+
+
+
+SDOperand DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (sint_to_fp c1) -> c1fp
+ if (N0C)
+ return DAG.getNode(ISD::SINT_TO_FP, VT, N0);
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (uint_to_fp c1) -> c1fp
+ if (N0C)
+ return DAG.getNode(ISD::UINT_TO_FP, VT, N0);
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (fp_to_sint c1fp) -> c1
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_TO_SINT, VT, N0);
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (fp_to_uint c1fp) -> c1
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_TO_UINT, VT, N0);
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_ROUND(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (fp_round c1fp) -> c1fp
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_ROUND, VT, N0);
+
+ // fold (fp_round (fp_extend x)) -> x
+ if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+ return N0.getOperand(0);
+
+ // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.Val->hasOneUse()) {
+ SDOperand Tmp = DAG.getNode(ISD::FP_ROUND, VT, N0.getOperand(0));
+ AddToWorkList(Tmp.Val);
+ return DAG.getNode(ISD::FCOPYSIGN, VT, Tmp, N0.getOperand(1));
+ }
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ MVT::ValueType VT = N->getValueType(0);
+ MVT::ValueType EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+ // fold (fp_round_inreg c1fp) -> c1fp
+ if (N0CFP) {
+ SDOperand Round = DAG.getConstantFP(N0CFP->getValue(), EVT);
+ return DAG.getNode(ISD::FP_EXTEND, VT, Round);
+ }
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFP_EXTEND(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (fp_extend c1fp) -> c1fp
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_EXTEND, VT, N0);
+
+ // fold (fpext (load x)) -> (fpext (fpround (extload x)))
+ if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() &&
+ (!AfterLegalize||TLI.isLoadXLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDOperand ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, LN0->getChain(),
+ LN0->getBasePtr(), LN0->getSrcValue(),
+ LN0->getSrcValueOffset(),
+ N0.getValueType(),
+ LN0->isVolatile(),
+ LN0->getAlignment());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.Val, DAG.getNode(ISD::FP_ROUND, N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDOperand(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFNEG(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+
+ if (isNegatibleForFree(N0))
+ return GetNegatedExpression(N0, DAG);
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitFABS(SDNode *N) {
+ SDOperand N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ MVT::ValueType VT = N->getValueType(0);
+
+ // fold (fabs c1) -> fabs(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FABS, VT, N0);
+ // fold (fabs (fabs x)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FABS)
+ return N->getOperand(0);
+ // fold (fabs (fneg x)) -> (fabs x)
+ // fold (fabs (fcopysign x, y)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FABS, VT, N0.getOperand(0));
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitBRCOND(SDNode *N) {
+ SDOperand Chain = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ SDOperand N2 = N->getOperand(2);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+ // never taken branch, fold to chain
+ if (N1C && N1C->isNullValue())
+ return Chain;
+ // unconditional branch
+ if (N1C && N1C->getValue() == 1)
+ return DAG.getNode(ISD::BR, MVT::Other, Chain, N2);
+ // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+ // on the target.
+ if (N1.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegal(ISD::BR_CC, MVT::Other)) {
+ return DAG.getNode(ISD::BR_CC, MVT::Other, Chain, N1.getOperand(2),
+ N1.getOperand(0), N1.getOperand(1), N2);
+ }
+ return SDOperand();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDOperand DAGCombiner::visitBR_CC(SDNode *N) {
+ CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+ SDOperand CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+ // Use SimplifySetCC to simplify SETCC's.
+ SDOperand Simp = SimplifySetCC(MVT::i1, CondLHS, CondRHS, CC->get(), false);
+ if (Simp.Val) AddToWorkList(Simp.Val);
+
+ ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.Val);
+
+ // fold br_cc true, dest -> br dest (unconditional branch)
+ if (SCCC && SCCC->getValue())
+ return DAG.getNode(ISD::BR, MVT::Other, N->getOperand(0),
+ N->getOperand(4));
+ // fold br_cc false, dest -> unconditional fall through
+ if (SCCC && SCCC->isNullValue())
+ return N->getOperand(0);
+
+ // fold to a simpler setcc
+ if (Simp.Val && Simp.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::BR_CC, MVT::Other, N->getOperand(0),
+ Simp.getOperand(2), Simp.getOperand(0),
+ Simp.getOperand(1), N->getOperand(4));
+ return SDOperand();
+}
+
+
+/// CombineToPreIndexedLoadStore - Try turning a load / store and a
+/// pre-indexed load / store when the base pointer is a add or subtract
+/// and it has other uses besides the load / store. After the
+/// transformation, the new indexed load / store has effectively folded
+/// the add / subtract in and all of its other uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (!AfterLegalize)
+ return false;
+
+ bool isLoad = true;
+ SDOperand Ptr;
+ MVT::ValueType VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->getAddressingMode() != ISD::UNINDEXED)
+ return false;
+ VT = LD->getLoadedVT();
+ if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->getAddressingMode() != ISD::UNINDEXED)
+ return false;
+ VT = ST->getStoredVT();
+ if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else
+ return false;
+
+ // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+ // out. There is no reason to make this a preinc/predec.
+ if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+ Ptr.Val->hasOneUse())
+ return false;
+
+ // Ask the target to do addressing mode selection.
+ SDOperand BasePtr;
+ SDOperand Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+ return false;
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->getValue() == 0)
+ return false;
+
+ // Try turning it into a pre-indexed load / store except when:
+ // 1) The new base ptr is a frame index.
+ // 2) If N is a store and the new base ptr is either the same as or is a
+ // predecessor of the value being stored.
+ // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+ // that would create a cycle.
+ // 4) All uses are load / store ops that use it as old base ptr.
+
+ // Check #1. Preinc'ing a frame index would require copying the stack pointer
+ // (plus the implicit offset) to a register to preinc anyway.
+ if (isa<FrameIndexSDNode>(BasePtr))
+ return false;
+
+ // Check #2.
+ if (!isLoad) {
+ SDOperand Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == BasePtr || BasePtr.Val->isPredecessor(Val.Val))
+ return false;
+ }
+
+ // Now check for #3 and #4.
+ bool RealUse = false;
+ for (SDNode::use_iterator I = Ptr.Val->use_begin(),
+ E = Ptr.Val->use_end(); I != E; ++I) {
+ SDNode *Use = *I;
+ if (Use == N)
+ continue;
+ if (Use->isPredecessor(N))
+ return false;
+
+ if (!((Use->getOpcode() == ISD::LOAD &&
+ cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
+ (Use->getOpcode() == ISD::STORE) &&
+ cast<StoreSDNode>(Use)->getBasePtr() == Ptr))
+ RealUse = true;
+ }
+ if (!RealUse)
+ return false;
+
+ SDOperand Result;
+ if (isLoad)
+ Result = DAG.getIndexedLoad(SDOperand(N,0), BasePtr, Offset, AM);
+ else
+ Result = DAG.getIndexedStore(SDOperand(N,0), BasePtr, Offset, AM);
+ ++PreIndexedNodes;
+ ++NodesCombined;
+ DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(Result.Val->dump(&DAG));
+ DOUT << '\n';
+ std::vector<SDNode*> NowDead;
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(0),
+ NowDead);
+ DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 1), Result.getValue(2),
+ NowDead);
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(1),
+ NowDead);
+ }
+
+ // Nodes can end up on the worklist more than once. Make sure we do
+ // not process a node that has been replaced.
+ for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+ removeFromWorkList(NowDead[i]);
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ // Replace the uses of Ptr with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
+ NowDead);
+ removeFromWorkList(Ptr.Val);
+ for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+ removeFromWorkList(NowDead[i]);
+ DAG.DeleteNode(Ptr.Val);
+
+ return true;
+}
+
+/// CombineToPostIndexedLoadStore - Try combine a load / store with a
+/// add / sub of the base pointer node into a post-indexed load / store.
+/// The transformation folded the add / subtract into the new indexed
+/// load / store effectively and all of its uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+ if (!AfterLegalize)
+ return false;
+
+ bool isLoad = true;
+ SDOperand Ptr;
+ MVT::ValueType VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->getAddressingMode() != ISD::UNINDEXED)
+ return false;
+ VT = LD->getLoadedVT();
+ if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->getAddressingMode() != ISD::UNINDEXED)
+ return false;
+ VT = ST->getStoredVT();
+ if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else
+ return false;
+
+ if (Ptr.Val->hasOneUse())
+ return false;
+
+ for (SDNode::use_iterator I = Ptr.Val->use_begin(),
+ E = Ptr.Val->use_end(); I != E; ++I) {
+ SDNode *Op = *I;
+ if (Op == N ||
+ (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+ continue;
+
+ SDOperand BasePtr;
+ SDOperand Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+ if (Ptr == Offset)
+ std::swap(BasePtr, Offset);
+ if (Ptr != BasePtr)
+ continue;
+ // Don't create a indexed load / store with zero offset.
+ if (isa<ConstantSDNode>(Offset) &&
+ cast<ConstantSDNode>(Offset)->getValue() == 0)
+ continue;
+
+ // Try turning it into a post-indexed load / store except when
+ // 1) All uses are load / store ops that use it as base ptr.
+ // 2) Op must be independent of N, i.e. Op is neither a predecessor
+ // nor a successor of N. Otherwise, if Op is folded that would
+ // create a cycle.
+
+ // Check for #1.
+ bool TryNext = false;
+ for (SDNode::use_iterator II = BasePtr.Val->use_begin(),
+ EE = BasePtr.Val->use_end(); II != EE; ++II) {
+ SDNode *Use = *II;
+ if (Use == Ptr.Val)
+ continue;
+
+ // If all the uses are load / store addresses, then don't do the
+ // transformation.
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+ bool RealUse = false;
+ for (SDNode::use_iterator III = Use->use_begin(),
+ EEE = Use->use_end(); III != EEE; ++III) {
+ SDNode *UseUse = *III;
+ if (!((UseUse->getOpcode() == ISD::LOAD &&
+ cast<LoadSDNode>(UseUse)->getBasePtr().Val == Use) ||
+ (UseUse->getOpcode() == ISD::STORE) &&
+ cast<StoreSDNode>(UseUse)->getBasePtr().Val == Use))
+ RealUse = true;
+ }
+
+ if (!RealUse) {
+ TryNext = true;
+ break;
+ }
+ }
+ }
+ if (TryNext)
+ continue;
+
+ // Check for #2
+ if (!Op->isPredecessor(N) && !N->isPredecessor(Op)) {
+ SDOperand Result = isLoad
+ ? DAG.getIndexedLoad(SDOperand(N,0), BasePtr, Offset, AM)
+ : DAG.getIndexedStore(SDOperand(N,0), BasePtr, Offset, AM);
+ ++PostIndexedNodes;
+ ++NodesCombined;
+ DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG));
+ DOUT << "\nWith: "; DEBUG(Result.Val->dump(&DAG));
+ DOUT << '\n';
+ std::vector<SDNode*> NowDead;
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(0),
+ NowDead);
+ DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 1), Result.getValue(2),
+ NowDead);
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(1),
+ NowDead);
+ }
+
+ // Nodes can end up on the worklist more than once. Make sure we do
+ // not process a node that has been replaced.
+ for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+ removeFromWorkList(NowDead[i]);
+ // Finally, since the node is now dead, remove it from the graph.
+ DAG.DeleteNode(N);
+
+ // Replace the uses of Use with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(SDOperand(Op, 0),
+ Result.getValue(isLoad ? 1 : 0),
+ NowDead);
+ removeFromWorkList(Op);
+ for (unsigned i = 0, e = NowDead.size(); i != e; ++i)
+ removeFromWorkList(NowDead[i]);
+ DAG.DeleteNode(Op);
+
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+SDOperand DAGCombiner::visitLOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDOperand Chain = LD->getChain();
+ SDOperand Ptr = LD->getBasePtr();
+
+ // If load is not volatile and there are no uses of the loaded value (and
+ // the updated indexed value in case of indexed loads), change uses of the
+ // chain value into uses of the chain input (i.e. delete the dead load).
+ if (!LD->isVolatile()) {
+ if (N->getValueType(1) == MVT::Other) {
+ // Unindexed loads.
+ if (N->hasNUsesOfValue(0, 0))
+ return CombineTo(N, DAG.getNode(ISD::UNDEF, N->getValueType(0)), Chain);
+ } else {
+ // Indexed loads.
+ assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+ if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+ SDOperand Undef0 = DAG.getNode(ISD::UNDEF, N->getValueType(0));
+ SDOperand Undef1 = DAG.getNode(ISD::UNDEF, N->getValueType(1));
+ SDOperand To[] = { Undef0, Undef1, Chain };
+ return CombineTo(N, To, 3);
+ }
+ }
+ }
+
+ // If this load is directly stored, replace the load value with the stored
+ // value.
+ // TODO: Handle store large -> read small portion.
+ // TODO: Handle TRUNCSTORE/LOADEXT
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ if (ISD::isNON_TRUNCStore(Chain.Val)) {
+ StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+ if (PrevST->getBasePtr() == Ptr &&
+ PrevST->getValue().getValueType() == N->getValueType(0))
+ return CombineTo(N, Chain.getOperand(1), Chain);
+ }
+ }
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDOperand BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDOperand ReplLoad;
+
+ // Replace the chain to void dependency.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ ReplLoad = DAG.getLoad(N->getValueType(0), BetterChain, Ptr,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+ } else {
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getSrcValue(),
+ LD->getSrcValueOffset(),
+ LD->getLoadedVT(),
+ LD->isVolatile(),
+ LD->getAlignment());
+ }
+
+ // Create token factor to keep old chain connected.
+ SDOperand Token = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ Chain, ReplLoad.getValue(1));
+
+ // Replace uses with load result and token factor. Don't add users
+ // to work list.
+ return CombineTo(N, ReplLoad.getValue(0), Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDOperand(N, 0);
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitSTORE(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDOperand Chain = ST->getChain();
+ SDOperand Value = ST->getValue();
+ SDOperand Ptr = ST->getBasePtr();
+
+ // If this is a store of a bit convert, store the input value if the
+ // resultant store does not need a higher alignment than the original.
+ if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+ ST->getAddressingMode() == ISD::UNINDEXED) {
+ unsigned Align = ST->getAlignment();
+ MVT::ValueType SVT = Value.getOperand(0).getValueType();
+ unsigned OrigAlign = TLI.getTargetMachine().getTargetData()->
+ getABITypeAlignment(MVT::getTypeForValueType(SVT));
+ if (Align <= OrigAlign && TLI.isOperationLegal(ISD::STORE, SVT))
+ return DAG.getStore(Chain, Value.getOperand(0), Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->isVolatile(), Align);
+ }
+
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+ if (Value.getOpcode() != ISD::TargetConstantFP) {
+ SDOperand Tmp;
+ switch (CFP->getValueType(0)) {
+ default: assert(0 && "Unknown FP type");
+ case MVT::f32:
+ if (!AfterLegalize || TLI.isTypeLegal(MVT::i32)) {
+ Tmp = DAG.getConstant(FloatToBits(CFP->getValue()), MVT::i32);
+ return DAG.getStore(Chain, Tmp, Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->isVolatile(),
+ ST->getAlignment());
+ }
+ break;
+ case MVT::f64:
+ if (!AfterLegalize || TLI.isTypeLegal(MVT::i64)) {
+ Tmp = DAG.getConstant(DoubleToBits(CFP->getValue()), MVT::i64);
+ return DAG.getStore(Chain, Tmp, Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->isVolatile(),
+ ST->getAlignment());
+ } else if (TLI.isTypeLegal(MVT::i32)) {
+ // Many FP stores are not make apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = DoubleToBits(CFP->getValue());
+ SDOperand Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
+ SDOperand Hi = DAG.getConstant(Val >> 32, MVT::i32);
+ if (!TLI.isLittleEndian()) std::swap(Lo, Hi);
+
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+
+ SDOperand St0 = DAG.getStore(Chain, Lo, Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(),
+ isVolatile, ST->getAlignment());
+ Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, Ptr.getValueType()));
+ SVOffset += 4;
+ if (Alignment > 4)
+ Alignment = 4;
+ SDOperand St1 = DAG.getStore(Chain, Hi, Ptr, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ return DAG.getNode(ISD::TokenFactor, MVT::Other, St0, St1);
+ }
+ break;
+ }
+ }
+ }
+
+ if (CombinerAA) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDOperand BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ // Replace the chain to avoid dependency.
+ SDOperand ReplStore;
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, Value, Ptr,
+ ST->getSrcValue(), ST->getSrcValueOffset(), ST->getStoredVT(),
+ ST->isVolatile(), ST->getAlignment());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, Value, Ptr,
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->isVolatile(), ST->getAlignment());
+ }
+
+ // Create token to keep both nodes around.
+ SDOperand Token =
+ DAG.getNode(ISD::TokenFactor, MVT::Other, Chain, ReplStore);
+
+ // Don't add users to work list.
+ return CombineTo(N, Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDOperand(N, 0);
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+ SDOperand InVec = N->getOperand(0);
+ SDOperand InVal = N->getOperand(1);
+ SDOperand EltNo = N->getOperand(2);
+
+ // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
+ // vector with the inserted element.
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getValue();
+ SmallVector<SDOperand, 8> Ops(InVec.Val->op_begin(), InVec.Val->op_end());
+ if (Elt < Ops.size())
+ Ops[Elt] = InVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, InVec.getValueType(),
+ &Ops[0], Ops.size());
+ }
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ unsigned NumInScalars = N->getNumOperands();
+ MVT::ValueType VT = N->getValueType(0);
+ unsigned NumElts = MVT::getVectorNumElements(VT);
+ MVT::ValueType EltType = MVT::getVectorElementType(VT);
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+ // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+ // at most two distinct vectors, turn this into a shuffle node.
+ SDOperand VecIn1, VecIn2;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ // Ignore undef inputs.
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ // If this input is something other than a EXTRACT_VECTOR_ELT with a
+ // constant index, bail out.
+ if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+ VecIn1 = VecIn2 = SDOperand(0, 0);
+ break;
+ }
+
+ // If the input vector type disagrees with the result of the build_vector,
+ // we can't make a shuffle.
+ SDOperand ExtractedFromVec = N->getOperand(i).getOperand(0);
+ if (ExtractedFromVec.getValueType() != VT) {
+ VecIn1 = VecIn2 = SDOperand(0, 0);
+ break;
+ }
+
+ // Otherwise, remember this. We allow up to two distinct input vectors.
+ if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+ continue;
+
+ if (VecIn1.Val == 0) {
+ VecIn1 = ExtractedFromVec;
+ } else if (VecIn2.Val == 0) {
+ VecIn2 = ExtractedFromVec;
+ } else {
+ // Too many inputs.
+ VecIn1 = VecIn2 = SDOperand(0, 0);
+ break;
+ }
+ }
+
+ // If everything is good, we can make a shuffle operation.
+ if (VecIn1.Val) {
+ SmallVector<SDOperand, 8> BuildVecIndices;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+ BuildVecIndices.push_back(DAG.getNode(ISD::UNDEF, TLI.getPointerTy()));
+ continue;
+ }
+
+ SDOperand Extract = N->getOperand(i);
+
+ // If extracting from the first vector, just use the index directly.
+ if (Extract.getOperand(0) == VecIn1) {
+ BuildVecIndices.push_back(Extract.getOperand(1));
+ continue;
+ }
+
+ // Otherwise, use InIdx + VecSize
+ unsigned Idx = cast<ConstantSDNode>(Extract.getOperand(1))->getValue();
+ BuildVecIndices.push_back(DAG.getConstant(Idx+NumInScalars,
+ TLI.getPointerTy()));
+ }
+
+ // Add count and size info.
+ MVT::ValueType BuildVecVT =
+ MVT::getVectorType(TLI.getPointerTy(), NumElts);
+
+ // Return the new VECTOR_SHUFFLE node.
+ SDOperand Ops[5];
+ Ops[0] = VecIn1;
+ if (VecIn2.Val) {
+ Ops[1] = VecIn2;
+ } else {
+ // Use an undef build_vector as input for the second operand.
+ std::vector<SDOperand> UnOps(NumInScalars,
+ DAG.getNode(ISD::UNDEF,
+ EltType));
+ Ops[1] = DAG.getNode(ISD::BUILD_VECTOR, VT,
+ &UnOps[0], UnOps.size());
+ AddToWorkList(Ops[1].Val);
+ }
+ Ops[2] = DAG.getNode(ISD::BUILD_VECTOR, BuildVecVT,
+ &BuildVecIndices[0], BuildVecIndices.size());
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Ops, 3);
+ }
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+ // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
+ // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
+ // inputs come from at most two distinct vectors, turn this into a shuffle
+ // node.
+
+ // If we only have one input vector, we don't need to do any concatenation.
+ if (N->getNumOperands() == 1) {
+ return N->getOperand(0);
+ }
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+ SDOperand ShufMask = N->getOperand(2);
+ unsigned NumElts = ShufMask.getNumOperands();
+
+ // If the shuffle mask is an identity operation on the LHS, return the LHS.
+ bool isIdentity = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF &&
+ cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() != i) {
+ isIdentity = false;
+ break;
+ }
+ }
+ if (isIdentity) return N->getOperand(0);
+
+ // If the shuffle mask is an identity operation on the RHS, return the RHS.
+ isIdentity = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF &&
+ cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() != i+NumElts) {
+ isIdentity = false;
+ break;
+ }
+ }
+ if (isIdentity) return N->getOperand(1);
+
+ // Check if the shuffle is a unary shuffle, i.e. one of the vectors is not
+ // needed at all.
+ bool isUnary = true;
+ bool isSplat = true;
+ int VecNum = -1;
+ unsigned BaseIdx = 0;
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF) {
+ unsigned Idx = cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue();
+ int V = (Idx < NumElts) ? 0 : 1;
+ if (VecNum == -1) {
+ VecNum = V;
+ BaseIdx = Idx;
+ } else {
+ if (BaseIdx != Idx)
+ isSplat = false;
+ if (VecNum != V) {
+ isUnary = false;
+ break;
+ }
+ }
+ }
+
+ SDOperand N0 = N->getOperand(0);
+ SDOperand N1 = N->getOperand(1);
+ // Normalize unary shuffle so the RHS is undef.
+ if (isUnary && VecNum == 1)
+ std::swap(N0, N1);
+
+ // If it is a splat, check if the argument vector is a build_vector with
+ // all scalar elements the same.
+ if (isSplat) {
+ SDNode *V = N0.Val;
+
+ // If this is a bit convert that changes the element type of the vector but
+ // not the number of vector elements, look through it. Be careful not to
+ // look though conversions that change things like v4f32 to v2f64.
+ if (V->getOpcode() == ISD::BIT_CONVERT) {
+ SDOperand ConvInput = V->getOperand(0);
+ if (MVT::getVectorNumElements(ConvInput.getValueType()) == NumElts)
+ V = ConvInput.Val;
+ }
+
+ if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElems = V->getNumOperands();
+ if (NumElems > BaseIdx) {
+ SDOperand Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+ Base = V->getOperand(i);
+ break;
+ }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.Val)
+ return N0;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (V->getOperand(i).getOpcode() != ISD::UNDEF &&
+ V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
+ }
+ }
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
+ }
+ }
+ }
+
+ // If it is a unary or the LHS and the RHS are the same node, turn the RHS
+ // into an undef.
+ if (isUnary || N0 == N1) {
+ // Check the SHUFFLE mask, mapping any inputs from the 2nd operand into the
+ // first operand.
+ SmallVector<SDOperand, 8> MappedOps;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF ||
+ cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() < NumElts) {
+ MappedOps.push_back(ShufMask.getOperand(i));
+ } else {
+ unsigned NewIdx =
+ cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() - NumElts;
+ MappedOps.push_back(DAG.getConstant(NewIdx, MVT::i32));
+ }
+ }
+ ShufMask = DAG.getNode(ISD::BUILD_VECTOR, ShufMask.getValueType(),
+ &MappedOps[0], MappedOps.size());
+ AddToWorkList(ShufMask.Val);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getValueType(0),
+ N0,
+ DAG.getNode(ISD::UNDEF, N->getValueType(0)),
+ ShufMask);
+ }
+
+ return SDOperand();
+}
+
+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
+/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+/// vector_shuffle V, Zero, <0, 4, 2, 4>
+SDOperand DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+ SDOperand LHS = N->getOperand(0);
+ SDOperand RHS = N->getOperand(1);
+ if (N->getOpcode() == ISD::AND) {
+ if (RHS.getOpcode() == ISD::BIT_CONVERT)
+ RHS = RHS.getOperand(0);
+ if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ std::vector<SDOperand> IdxOps;
+ unsigned NumOps = RHS.getNumOperands();
+ unsigned NumElts = NumOps;
+ MVT::ValueType EVT = MVT::getVectorElementType(RHS.getValueType());
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDOperand Elt = RHS.getOperand(i);
+ if (!isa<ConstantSDNode>(Elt))
+ return SDOperand();
+ else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+ IdxOps.push_back(DAG.getConstant(i, EVT));
+ else if (cast<ConstantSDNode>(Elt)->isNullValue())
+ IdxOps.push_back(DAG.getConstant(NumElts, EVT));
+ else
+ return SDOperand();
+ }
+
+ // Let's see if the target supports this vector_shuffle.
+ if (!TLI.isVectorClearMaskLegal(IdxOps, EVT, DAG))
+ return SDOperand();
+
+ // Return the new VECTOR_SHUFFLE node.
+ MVT::ValueType VT = MVT::getVectorType(EVT, NumElts);
+ std::vector<SDOperand> Ops;
+ LHS = DAG.getNode(ISD::BIT_CONVERT, VT, LHS);
+ Ops.push_back(LHS);
+ AddToWorkList(LHS.Val);
+ std::vector<SDOperand> ZeroOps(NumElts, DAG.getConstant(0, EVT));
+ Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, VT,
+ &ZeroOps[0], ZeroOps.size()));
+ Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, VT,
+ &IdxOps[0], IdxOps.size()));
+ SDOperand Result = DAG.getNode(ISD::VECTOR_SHUFFLE, VT,
+ &Ops[0], Ops.size());
+ if (VT != LHS.getValueType()) {
+ Result = DAG.getNode(ISD::BIT_CONVERT, LHS.getValueType(), Result);
+ }
+ return Result;
+ }
+ }
+ return SDOperand();
+}
+
+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+SDOperand DAGCombiner::SimplifyVBinOp(SDNode *N) {
+ // After legalize, the target may be depending on adds and other
+ // binary ops to provide legal ways to construct constants or other
+ // things. Simplifying them may result in a loss of legality.
+ if (AfterLegalize) return SDOperand();
+
+ MVT::ValueType VT = N->getValueType(0);
+ assert(MVT::isVector(VT) && "SimplifyVBinOp only works on vectors!");
+
+ MVT::ValueType EltType = MVT::getVectorElementType(VT);
+ SDOperand LHS = N->getOperand(0);
+ SDOperand RHS = N->getOperand(1);
+ SDOperand Shuffle = XformToShuffleWithZero(N);
+ if (Shuffle.Val) return Shuffle;
+
+ // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
+ // this operation.
+ if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
+ RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<SDOperand, 8> Ops;
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ SDOperand LHSOp = LHS.getOperand(i);
+ SDOperand RHSOp = RHS.getOperand(i);
+ // If these two elements can't be folded, bail out.
+ if ((LHSOp.getOpcode() != ISD::UNDEF &&
+ LHSOp.getOpcode() != ISD::Constant &&
+ LHSOp.getOpcode() != ISD::ConstantFP) ||
+ (RHSOp.getOpcode() != ISD::UNDEF &&
+ RHSOp.getOpcode() != ISD::Constant &&
+ RHSOp.getOpcode() != ISD::ConstantFP))
+ break;
+ // Can't fold divide by zero.
+ if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
+ N->getOpcode() == ISD::FDIV) {
+ if ((RHSOp.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHSOp.Val)->isNullValue()) ||
+ (RHSOp.getOpcode() == ISD::ConstantFP &&
+ !cast<ConstantFPSDNode>(RHSOp.Val)->getValue()))
+ break;
+ }
+ Ops.push_back(DAG.getNode(N->getOpcode(), EltType, LHSOp, RHSOp));
+ AddToWorkList(Ops.back().Val);
+ assert((Ops.back().getOpcode() == ISD::UNDEF ||
+ Ops.back().getOpcode() == ISD::Constant ||
+ Ops.back().getOpcode() == ISD::ConstantFP) &&
+ "Scalar binop didn't fold!");
+ }
+
+ if (Ops.size() == LHS.getNumOperands()) {
+ MVT::ValueType VT = LHS.getValueType();
+ return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+ }
+ }
+
+ return SDOperand();
+}
+
+SDOperand DAGCombiner::SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2){
+ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+
+ SDOperand SCC = SimplifySelectCC(N0.getOperand(0), N0.getOperand(1), N1, N2,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ // If we got a simplified select_cc node back from SimplifySelectCC, then
+ // break it down into a new SETCC node, and a new SELECT node, and then return
+ // the SELECT node, since we were called with a SELECT node.
+ if (SCC.Val) {
+ // Check to see if we got a select_cc back (to turn into setcc/select).
+ // Otherwise, just return whatever node we got back, like fabs.
+ if (SCC.getOpcode() == ISD::SELECT_CC) {
+ SDOperand SETCC = DAG.getNode(ISD::SETCC, N0.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1),
+ SCC.getOperand(4));
+ AddToWorkList(SETCC.Val);
+ return DAG.getNode(ISD::SELECT, SCC.getValueType(), SCC.getOperand(2),
+ SCC.getOperand(3), SETCC);
+ }
+ return SCC;
+ }
+ return SDOperand();
+}
+
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select. Callers of this should assume that TheSelect is deleted if this
+/// returns true. As such, they should return the appropriate thing (e.g. the
+/// node) back to the top-level of the DAG combiner loop to avoid it being
+/// looked at.
+///
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDOperand LHS,
+ SDOperand RHS) {
+
+ // If this is a select from two identical things, try to pull the operation
+ // through the select.
+ if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD &&
+ // Token chains must be identical.
+ LHS.getOperand(0) == RHS.getOperand(0)) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // If this is an EXTLOAD, the VT's must match.
+ if (LLD->getLoadedVT() == RLD->getLoadedVT()) {
+ // FIXME: this conflates two src values, discarding one. This is not
+ // the right thing to do, but nothing uses srcvalues now. When they do,
+ // turn SrcValue into a list of locations.
+ SDOperand Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ // Check that the condition doesn't reach either load. If so, folding
+ // this will induce a cycle into the DAG.
+ if (!LLD->isPredecessor(TheSelect->getOperand(0).Val) &&
+ !RLD->isPredecessor(TheSelect->getOperand(0).Val)) {
+ Addr = DAG.getNode(ISD::SELECT, LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ }
+ } else {
+ // Check that the condition doesn't reach either load. If so, folding
+ // this will induce a cycle into the DAG.
+ if (!LLD->isPredecessor(TheSelect->getOperand(0).Val) &&
+ !RLD->isPredecessor(TheSelect->getOperand(0).Val) &&
+ !LLD->isPredecessor(TheSelect->getOperand(1).Val) &&
+ !RLD->isPredecessor(TheSelect->getOperand(1).Val)) {
+ Addr = DAG.getNode(ISD::SELECT_CC, LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+ }
+
+ if (Addr.Val) {
+ SDOperand Load;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD)
+ Load = DAG.getLoad(TheSelect->getValueType(0), LLD->getChain(),
+ Addr,LLD->getSrcValue(),
+ LLD->getSrcValueOffset(),
+ LLD->isVolatile(),
+ LLD->getAlignment());
+ else {
+ Load = DAG.getExtLoad(LLD->getExtensionType(),
+ TheSelect->getValueType(0),
+ LLD->getChain(), Addr, LLD->getSrcValue(),
+ LLD->getSrcValueOffset(),
+ LLD->getLoadedVT(),
+ LLD->isVolatile(),
+ LLD->getAlignment());
+ }
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.Val, Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.Val, Load.getValue(0), Load.getValue(1));
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+SDOperand DAGCombiner::SimplifySelectCC(SDOperand N0, SDOperand N1,
+ SDOperand N2, SDOperand N3,
+ ISD::CondCode CC, bool NotExtCompare) {
+
+ MVT::ValueType VT = N2.getValueType();
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val);
+ ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.Val);
+
+ // Determine if the condition we're dealing with is constant
+ SDOperand SCC = SimplifySetCC(TLI.getSetCCResultTy(), N0, N1, CC, false);
+ if (SCC.Val) AddToWorkList(SCC.Val);
+ ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.Val);
+
+ // fold select_cc true, x, y -> x
+ if (SCCC && SCCC->getValue())
+ return N2;
+ // fold select_cc false, x, y -> y
+ if (SCCC && SCCC->getValue() == 0)
+ return N3;
+
+ // Check to see if we can simplify the select into an fabs node
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+ // Allow either -0.0 or 0.0
+ if (CFP->getValue() == 0.0) {
+ // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+ if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+ N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+ N2 == N3.getOperand(0))
+ return DAG.getNode(ISD::FABS, VT, N0);
+
+ // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+ if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+ N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+ N2.getOperand(0) == N3)
+ return DAG.getNode(ISD::FABS, VT, N3);
+ }
+ }
+
+ // Check to see if we can perform the "gzip trick", transforming
+ // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
+ if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
+ MVT::isInteger(N0.getValueType()) &&
+ MVT::isInteger(N2.getValueType()) &&
+ (N1C->isNullValue() || // (a < 0) ? b : 0
+ (N1C->getValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
+ MVT::ValueType XType = N0.getValueType();
+ MVT::ValueType AType = N2.getValueType();
+ if (XType >= AType) {
+ // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+ // single-bit constant.
+ if (N2C && ((N2C->getValue() & (N2C->getValue()-1)) == 0)) {
+ unsigned ShCtV = Log2_64(N2C->getValue());
+ ShCtV = MVT::getSizeInBits(XType)-ShCtV-1;
+ SDOperand ShCt = DAG.getConstant(ShCtV, TLI.getShiftAmountTy());
+ SDOperand Shift = DAG.getNode(ISD::SRL, XType, N0, ShCt);
+ AddToWorkList(Shift.Val);
+ if (XType > AType) {
+ Shift = DAG.getNode(ISD::TRUNCATE, AType, Shift);
+ AddToWorkList(Shift.Val);
+ }
+ return DAG.getNode(ISD::AND, AType, Shift, N2);
+ }
+ SDOperand Shift = DAG.getNode(ISD::SRA, XType, N0,
+ DAG.getConstant(MVT::getSizeInBits(XType)-1,
+ TLI.getShiftAmountTy()));
+ AddToWorkList(Shift.Val);
+ if (XType > AType) {
+ Shift = DAG.getNode(ISD::TRUNCATE, AType, Shift);
+ AddToWorkList(Shift.Val);
+ }
+ return DAG.getNode(ISD::AND, AType, Shift, N2);
+ }
+ }
+
+ // fold select C, 16, 0 -> shl C, 4
+ if (N2C && N3C && N3C->isNullValue() && isPowerOf2_64(N2C->getValue()) &&
+ TLI.getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) {
+
+ // If the caller doesn't want us to simplify this into a zext of a compare,
+ // don't do it.
+ if (NotExtCompare && N2C->getValue() == 1)
+ return SDOperand();
+
+ // Get a SetCC of the condition
+ // FIXME: Should probably make sure that setcc is legal if we ever have a
+ // target where it isn't.
+ SDOperand Temp, SCC;
+ // cast from setcc result type to select result type
+ if (AfterLegalize) {
+ SCC = DAG.getSetCC(TLI.getSetCCResultTy(), N0, N1, CC);
+ if (N2.getValueType() < SCC.getValueType())
+ Temp = DAG.getZeroExtendInReg(SCC, N2.getValueType());
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getValueType(), SCC);
+ } else {
+ SCC = DAG.getSetCC(MVT::i1, N0, N1, CC);
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getValueType(), SCC);
+ }
+ AddToWorkList(SCC.Val);
+ AddToWorkList(Temp.Val);
+
+ if (N2C->getValue() == 1)
+ return Temp;
+ // shl setcc result by log2 n2c
+ return DAG.getNode(ISD::SHL, N2.getValueType(), Temp,
+ DAG.getConstant(Log2_64(N2C->getValue()),
+ TLI.getShiftAmountTy()));
+ }
+
+ // Check to see if this is the equivalent of setcc
+ // FIXME: Turn all of these into setcc if setcc if setcc is legal
+ // otherwise, go ahead with the folds.
+ if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getValue() == 1ULL)) {
+ MVT::ValueType XType = N0.getValueType();
+ if (TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultTy())) {
+ SDOperand Res = DAG.getSetCC(TLI.getSetCCResultTy(), N0, N1, CC);
+ if (Res.getValueType() != VT)
+ Res = DAG.getNode(ISD::ZERO_EXTEND, VT, Res);
+ return Res;
+ }
+
+ // seteq X, 0 -> srl (ctlz X, log2(size(X)))
+ if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+ TLI.isOperationLegal(ISD::CTLZ, XType)) {
+ SDOperand Ctlz = DAG.getNode(ISD::CTLZ, XType, N0);
+ return DAG.getNode(ISD::SRL, XType, Ctlz,
+ DAG.getConstant(Log2_32(MVT::getSizeInBits(XType)),
+ TLI.getShiftAmountTy()));
+ }
+ // setgt X, 0 -> srl (and (-X, ~X), size(X)-1)
+ if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
+ SDOperand NegN0 = DAG.getNode(ISD::SUB, XType, DAG.getConstant(0, XType),
+ N0);
+ SDOperand NotN0 = DAG.getNode(ISD::XOR, XType, N0,
+ DAG.getConstant(~0ULL, XType));
+ return DAG.getNode(ISD::SRL, XType,
+ DAG.getNode(ISD::AND, XType, NegN0, NotN0),
+ DAG.getConstant(MVT::getSizeInBits(XType)-1,
+ TLI.getShiftAmountTy()));
+ }
+ // setgt X, -1 -> xor (srl (X, size(X)-1), 1)
+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
+ SDOperand Sign = DAG.getNode(ISD::SRL, XType, N0,
+ DAG.getConstant(MVT::getSizeInBits(XType)-1,
+ TLI.getShiftAmountTy()));
+ return DAG.getNode(ISD::XOR, XType, Sign, DAG.getConstant(1, XType));
+ }
+ }
+
+ // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
+ N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
+ N2.getOperand(0) == N1 && MVT::isInteger(N0.getValueType())) {
+ MVT::ValueType XType = N0.getValueType();
+ SDOperand Shift = DAG.getNode(ISD::SRA, XType, N0,
+ DAG.getConstant(MVT::getSizeInBits(XType)-1,
+ TLI.getShiftAmountTy()));
+ SDOperand Add = DAG.getNode(ISD::ADD, XType, N0, Shift);
+ AddToWorkList(Shift.Val);
+ AddToWorkList(Add.Val);
+ return DAG.getNode(ISD::XOR, XType, Add, Shift);
+ }
+ // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
+ N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
+ if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
+ MVT::ValueType XType = N0.getValueType();
+ if (SubC->isNullValue() && MVT::isInteger(XType)) {
+ SDOperand Shift = DAG.getNode(ISD::SRA, XType, N0,
+ DAG.getConstant(MVT::getSizeInBits(XType)-1,
+ TLI.getShiftAmountTy()));
+ SDOperand Add = DAG.getNode(ISD::ADD, XType, N0, Shift);
+ AddToWorkList(Shift.Val);
+ AddToWorkList(Add.Val);
+ return DAG.getNode(ISD::XOR, XType, Add, Shift);
+ }
+ }
+ }
+
+ return SDOperand();
+}
+
+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+SDOperand DAGCombiner::SimplifySetCC(MVT::ValueType VT, SDOperand N0,
+ SDOperand N1, ISD::CondCode Cond,
+ bool foldBooleans) {
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, !AfterLegalize, false, this);
+ return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand DAGCombiner::BuildSDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDOperand S = TLI.BuildSDIV(N, DAG, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand DAGCombiner::BuildUDIV(SDNode *N) {
+ std::vector<SDNode*> Built;
+ SDOperand S = TLI.BuildUDIV(N, DAG, &Built);
+
+ for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+ ii != ee; ++ii)
+ AddToWorkList(*ii);
+ return S;
+}
+
+/// FindBaseOffset - Return true if base is known not to alias with anything
+/// but itself. Provides base object and offset as results.
+static bool FindBaseOffset(SDOperand Ptr, SDOperand &Base, int64_t &Offset) {
+ // Assume it is a primitive operation.
+ Base = Ptr; Offset = 0;
+
+ // If it's an adding a simple constant then integrate the offset.
+ if (Base.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+ Base = Base.getOperand(0);
+ Offset += C->getValue();
+ }
+ }
+
+ // If it's any of the following then it can't alias with anything but itself.
+ return isa<FrameIndexSDNode>(Base) ||
+ isa<ConstantPoolSDNode>(Base) ||
+ isa<GlobalAddressSDNode>(Base);
+}
+
+/// isAlias - Return true if there is any possibility that the two addresses
+/// overlap.
+bool DAGCombiner::isAlias(SDOperand Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ SDOperand Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2)
+{
+ // If they are the same then they must be aliases.
+ if (Ptr1 == Ptr2) return true;
+
+ // Gather base node and offset information.
+ SDOperand Base1, Base2;
+ int64_t Offset1, Offset2;
+ bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1);
+ bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2);
+
+ // If they have a same base address then...
+ if (Base1 == Base2) {
+ // Check to see if the addresses overlap.
+ return!((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+ }
+
+ // If we know both bases then they can't alias.
+ if (KnownBase1 && KnownBase2) return false;
+
+ if (CombinerGlobalAA) {
+ // Use alias analysis information.
+ int Overlap1 = Size1 + SrcValueOffset1 + Offset1;
+ int Overlap2 = Size2 + SrcValueOffset2 + Offset2;
+ AliasAnalysis::AliasResult AAResult =
+ AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+ if (AAResult == AliasAnalysis::NoAlias)
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+/// FindAliasInfo - Extracts the relevant alias information from the memory
+/// node. Returns true if the operand was a load.
+bool DAGCombiner::FindAliasInfo(SDNode *N,
+ SDOperand &Ptr, int64_t &Size,
+ const Value *&SrcValue, int &SrcValueOffset) {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ Size = MVT::getSizeInBits(LD->getLoadedVT()) >> 3;
+ SrcValue = LD->getSrcValue();
+ SrcValueOffset = LD->getSrcValueOffset();
+ return true;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ Size = MVT::getSizeInBits(ST->getStoredVT()) >> 3;
+ SrcValue = ST->getSrcValue();
+ SrcValueOffset = ST->getSrcValueOffset();
+ } else {
+ assert(0 && "FindAliasInfo expected a memory operand");
+ }
+
+ return false;
+}
+
+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDOperand OriginalChain,
+ SmallVector<SDOperand, 8> &Aliases) {
+ SmallVector<SDOperand, 8> Chains; // List of chains to visit.
+ std::set<SDNode *> Visited; // Visited node set.
+
+ // Get alias information for node.
+ SDOperand Ptr;
+ int64_t Size;
+ const Value *SrcValue;
+ int SrcValueOffset;
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset);
+
+ // Starting off.
+ Chains.push_back(OriginalChain);
+
+ // Look at each chain and determine if it is an alias. If so, add it to the
+ // aliases list. If not, then continue up the chain looking for the next
+ // candidate.
+ while (!Chains.empty()) {
+ SDOperand Chain = Chains.back();
+ Chains.pop_back();
+
+ // Don't bother if we've been before.
+ if (Visited.find(Chain.Val) != Visited.end()) continue;
+ Visited.insert(Chain.Val);
+
+ switch (Chain.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry token is ideal chain operand, but handled in FindBetterChain.
+ break;
+
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for Chain.
+ SDOperand OpPtr;
+ int64_t OpSize;
+ const Value *OpSrcValue;
+ int OpSrcValueOffset;
+ bool IsOpLoad = FindAliasInfo(Chain.Val, OpPtr, OpSize,
+ OpSrcValue, OpSrcValueOffset);
+
+ // If chain is alias then stop here.
+ if (!(IsLoad && IsOpLoad) &&
+ isAlias(Ptr, Size, SrcValue, SrcValueOffset,
+ OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) {
+ Aliases.push_back(Chain);
+ } else {
+ // Look further up the chain.
+ Chains.push_back(Chain.getOperand(0));
+ // Clean up old chain.
+ AddToWorkList(Chain.Val);
+ }
+ break;
+ }
+
+ case ISD::TokenFactor:
+ // We have to check each of the operands of the token factor, so we queue
+ // then up. Adding the operands to the queue (stack) in reverse order
+ // maintains the original order and increases the likelihood that getNode
+ // will find a matching token factor (CSE.)
+ for (unsigned n = Chain.getNumOperands(); n;)
+ Chains.push_back(Chain.getOperand(--n));
+ // Eliminate the token factor if we can.
+ AddToWorkList(Chain.Val);
+ break;
+
+ default:
+ // For all other instructions we will just have to take what we can get.
+ Aliases.push_back(Chain);
+ break;
+ }
+ }
+}
+
+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
+/// for a better chain (aliasing node.)
+SDOperand DAGCombiner::FindBetterChain(SDNode *N, SDOperand OldChain) {
+ SmallVector<SDOperand, 8> Aliases; // Ops for replacing token factor.
+
+ // Accumulate all the aliases to this node.
+ GatherAllAliases(N, OldChain, Aliases);
+
+ if (Aliases.size() == 0) {
+ // If no operands then chain to entry token.
+ return DAG.getEntryNode();
+ } else if (Aliases.size() == 1) {
+ // If a single operand then chain to it. We don't need to revisit it.
+ return Aliases[0];
+ }
+
+ // Construct a custom tailored token factor.
+ SDOperand NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &Aliases[0], Aliases.size());
+
+ // Make sure the old chain gets cleaned up.
+ if (NewChain != OldChain) AddToWorkList(OldChain.Val);
+
+ return NewChain;
+}
+
+// SelectionDAG::Combine - This is the entry point for the file.
+//
+void SelectionDAG::Combine(bool RunningAfterLegalize, AliasAnalysis &AA) {
+ if (!RunningAfterLegalize && ViewDAGCombine1)
+ viewGraph();
+ if (RunningAfterLegalize && ViewDAGCombine2)
+ viewGraph();
+ /// run - This is the main entry point to this class.
+ ///
+ DAGCombiner(*this, AA).Run(RunningAfterLegalize);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 0000000..5bcee35
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,5755 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <map>
+using namespace llvm;
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize"));
+#else
+static const bool ViewLegalizeDAGs = 0;
+#endif
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it. This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing. For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+namespace {
+class VISIBILITY_HIDDEN SelectionDAGLegalize {
+ TargetLowering &TLI;
+ SelectionDAG &DAG;
+
+ // Libcall insertion helpers.
+
+ /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+ /// legalized. We use this to ensure that calls are properly serialized
+ /// against each other, including inserted libcalls.
+ SDOperand LastCALLSEQ_END;
+
+ /// IsLegalizingCall - This member is used *only* for purposes of providing
+ /// helpful assertions that a libcall isn't created while another call is
+ /// being legalized (which could lead to non-serialized call sequences).
+ bool IsLegalizingCall;
+
+ enum LegalizeAction {
+ Legal, // The target natively supports this operation.
+ Promote, // This operation should be executed in a larger type.
+ Expand // Try to expand this to other ops, otherwise use a libcall.
+ };
+
+ /// ValueTypeActions - This is a bitvector that contains two bits for each
+ /// value type, where the two bits correspond to the LegalizeAction enum.
+ /// This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// LegalizedNodes - For nodes that are of legal width, and that have more
+ /// than one use, this map indicates what regularized operand to use. This
+ /// allows us to avoid legalizing the same thing more than once.
+ DenseMap<SDOperand, SDOperand> LegalizedNodes;
+
+ /// PromotedNodes - For nodes that are below legal width, and that have more
+ /// than one use, this map indicates what promoted value to use. This allows
+ /// us to avoid promoting the same thing more than once.
+ DenseMap<SDOperand, SDOperand> PromotedNodes;
+
+ /// ExpandedNodes - For nodes that need to be expanded this map indicates
+ /// which which operands are the expanded version of the input. This allows
+ /// us to avoid expanding the same node more than once.
+ DenseMap<SDOperand, std::pair<SDOperand, SDOperand> > ExpandedNodes;
+
+ /// SplitNodes - For vector nodes that need to be split, this map indicates
+ /// which which operands are the split version of the input. This allows us
+ /// to avoid splitting the same node more than once.
+ std::map<SDOperand, std::pair<SDOperand, SDOperand> > SplitNodes;
+
+ /// ScalarizedNodes - For nodes that need to be converted from vector types to
+ /// scalar types, this contains the mapping of ones we have already
+ /// processed to the result.
+ std::map<SDOperand, SDOperand> ScalarizedNodes;
+
+ void AddLegalizedOperand(SDOperand From, SDOperand To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+ void AddPromotedOperand(SDOperand From, SDOperand To) {
+ bool isNew = PromotedNodes.insert(std::make_pair(From, To));
+ assert(isNew && "Got into the map somehow?");
+ // If someone requests legalization of the new node, return itself.
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+public:
+
+ SelectionDAGLegalize(SelectionDAG &DAG);
+
+ /// getTypeAction - Return how we should legalize values of this type, either
+ /// it is already legal or we need to expand it into multiple registers of
+ /// smaller integer type, or we need to promote it to a larger type.
+ LegalizeAction getTypeAction(MVT::ValueType VT) const {
+ return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
+ }
+
+ /// isTypeLegal - Return true if this type is legal on this target.
+ ///
+ bool isTypeLegal(MVT::ValueType VT) const {
+ return getTypeAction(VT) == Legal;
+ }
+
+ void LegalizeDAG();
+
+private:
+ /// HandleOp - Legalize, Promote, or Expand the specified operand as
+ /// appropriate for its type.
+ void HandleOp(SDOperand Op);
+
+ /// LegalizeOp - We know that the specified value has a legal type.
+ /// Recursively ensure that the operands have legal types, then return the
+ /// result.
+ SDOperand LegalizeOp(SDOperand O);
+
+ /// PromoteOp - Given an operation that produces a value in an invalid type,
+ /// promote it to compute the value into a larger type. The produced value
+ /// will have the correct bits for the low portion of the register, but no
+ /// guarantee is made about the top bits: it may be zero, sign-extended, or
+ /// garbage.
+ SDOperand PromoteOp(SDOperand O);
+
+ /// ExpandOp - Expand the specified SDOperand into its two component pieces
+ /// Lo&Hi. Note that the Op MUST be an expanded type. As a result of this,
+ /// the LegalizeNodes map is filled in for any results that are not expanded,
+ /// the ExpandedNodes map is filled in for any results that are expanded, and
+ /// the Lo/Hi values are returned. This applies to integer types and Vector
+ /// types.
+ void ExpandOp(SDOperand O, SDOperand &Lo, SDOperand &Hi);
+
+ /// SplitVectorOp - Given an operand of vector type, break it down into
+ /// two smaller values.
+ void SplitVectorOp(SDOperand O, SDOperand &Lo, SDOperand &Hi);
+
+ /// ScalarizeVectorOp - Given an operand of single-element vector type
+ /// (e.g. v1f32), convert it into the equivalent operation that returns a
+ /// scalar (e.g. f32) value.
+ SDOperand ScalarizeVectorOp(SDOperand O);
+
+ /// isShuffleLegal - Return true if a vector shuffle is legal with the
+ /// specified mask and type. Targets can specify exactly which masks they
+ /// support and the code generator is tasked with not creating illegal masks.
+ ///
+ /// Note that this will also return true for shuffles that are promoted to a
+ /// different type.
+ ///
+ /// If this is a legal shuffle, this method returns the (possibly promoted)
+ /// build_vector Mask. If it's not a legal shuffle, it returns null.
+ SDNode *isShuffleLegal(MVT::ValueType VT, SDOperand Mask) const;
+
+ bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+ SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+
+ void LegalizeSetCCOperands(SDOperand &LHS, SDOperand &RHS, SDOperand &CC);
+
+ SDOperand CreateStackTemporary(MVT::ValueType VT);
+
+ SDOperand ExpandLibCall(const char *Name, SDNode *Node, bool isSigned,
+ SDOperand &Hi);
+ SDOperand ExpandIntToFP(bool isSigned, MVT::ValueType DestTy,
+ SDOperand Source);
+
+ SDOperand ExpandBIT_CONVERT(MVT::ValueType DestVT, SDOperand SrcOp);
+ SDOperand ExpandBUILD_VECTOR(SDNode *Node);
+ SDOperand ExpandSCALAR_TO_VECTOR(SDNode *Node);
+ SDOperand ExpandLegalINT_TO_FP(bool isSigned,
+ SDOperand LegalOp,
+ MVT::ValueType DestVT);
+ SDOperand PromoteLegalINT_TO_FP(SDOperand LegalOp, MVT::ValueType DestVT,
+ bool isSigned);
+ SDOperand PromoteLegalFP_TO_INT(SDOperand LegalOp, MVT::ValueType DestVT,
+ bool isSigned);
+
+ SDOperand ExpandBSWAP(SDOperand Op);
+ SDOperand ExpandBitCount(unsigned Opc, SDOperand Op);
+ bool ExpandShift(unsigned Opc, SDOperand Op, SDOperand Amt,
+ SDOperand &Lo, SDOperand &Hi);
+ void ExpandShiftParts(unsigned NodeOp, SDOperand Op, SDOperand Amt,
+ SDOperand &Lo, SDOperand &Hi);
+
+ SDOperand ExpandEXTRACT_SUBVECTOR(SDOperand Op);
+ SDOperand ExpandEXTRACT_VECTOR_ELT(SDOperand Op);
+
+ SDOperand getIntPtrConstant(uint64_t Val) {
+ return DAG.getConstant(Val, TLI.getPointerTy());
+ }
+};
+}
+
+/// isVectorShuffleLegal - Return true if a vector shuffle is legal with the
+/// specified mask and type. Targets can specify exactly which masks they
+/// support and the code generator is tasked with not creating illegal masks.
+///
+/// Note that this will also return true for shuffles that are promoted to a
+/// different type.
+SDNode *SelectionDAGLegalize::isShuffleLegal(MVT::ValueType VT,
+ SDOperand Mask) const {
+ switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, VT)) {
+ default: return 0;
+ case TargetLowering::Legal:
+ case TargetLowering::Custom:
+ break;
+ case TargetLowering::Promote: {
+ // If this is promoted to a different type, convert the shuffle mask and
+ // ask if it is legal in the promoted type!
+ MVT::ValueType NVT = TLI.getTypeToPromoteTo(ISD::VECTOR_SHUFFLE, VT);
+
+ // If we changed # elements, change the shuffle mask.
+ unsigned NumEltsGrowth =
+ MVT::getVectorNumElements(NVT) / MVT::getVectorNumElements(VT);
+ assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+ if (NumEltsGrowth > 1) {
+ // Renumber the elements.
+ SmallVector<SDOperand, 8> Ops;
+ for (unsigned i = 0, e = Mask.getNumOperands(); i != e; ++i) {
+ SDOperand InOp = Mask.getOperand(i);
+ for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+ if (InOp.getOpcode() == ISD::UNDEF)
+ Ops.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
+ else {
+ unsigned InEltNo = cast<ConstantSDNode>(InOp)->getValue();
+ Ops.push_back(DAG.getConstant(InEltNo*NumEltsGrowth+j, MVT::i32));
+ }
+ }
+ }
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, NVT, &Ops[0], Ops.size());
+ }
+ VT = NVT;
+ break;
+ }
+ }
+ return TLI.isShuffleMaskLegal(Mask, VT) ? Mask.Val : 0;
+}
+
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ assert(MVT::LAST_VALUETYPE <= 32 &&
+ "Too many value types for ValueTypeActions to hold!");
+}
+
+/// ComputeTopDownOrdering - Compute a top-down ordering of the dag, where Order
+/// contains all of a nodes operands before it contains the node.
+static void ComputeTopDownOrdering(SelectionDAG &DAG,
+ SmallVector<SDNode*, 64> &Order) {
+
+ DenseMap<SDNode*, unsigned> Visited;
+ std::vector<SDNode*> Worklist;
+ Worklist.reserve(128);
+
+ // Compute ordering from all of the leaves in the graphs, those (like the
+ // entry node) that have no operands.
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ if (I->getNumOperands() == 0) {
+ Visited[I] = 0 - 1U;
+ Worklist.push_back(I);
+ }
+ }
+
+ while (!Worklist.empty()) {
+ SDNode *N = Worklist.back();
+ Worklist.pop_back();
+
+ if (++Visited[N] != N->getNumOperands())
+ continue; // Haven't visited all operands yet
+
+ Order.push_back(N);
+
+ // Now that we have N in, add anything that uses it if all of their operands
+ // are now done.
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI)
+ Worklist.push_back(*UI);
+ }
+
+ assert(Order.size() == Visited.size() &&
+ Order.size() ==
+ (unsigned)std::distance(DAG.allnodes_begin(), DAG.allnodes_end()) &&
+ "Error: DAG is cyclic!");
+}
+
+
+void SelectionDAGLegalize::LegalizeDAG() {
+ LastCALLSEQ_END = DAG.getEntryNode();
+ IsLegalizingCall = false;
+
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ SmallVector<SDNode*, 64> Order;
+ ComputeTopDownOrdering(DAG, Order);
+
+ for (unsigned i = 0, e = Order.size(); i != e; ++i)
+ HandleOp(SDOperand(Order[i], 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDOperand OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ ExpandedNodes.clear();
+ LegalizedNodes.clear();
+ PromotedNodes.clear();
+ SplitNodes.clear();
+ ScalarizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+}
+
+
+/// FindCallEndFromCallStart - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_END node that terminates the call sequence.
+static SDNode *FindCallEndFromCallStart(SDNode *Node) {
+ if (Node->getOpcode() == ISD::CALLSEQ_END)
+ return Node;
+ if (Node->use_empty())
+ return 0; // No CallSeqEnd
+
+ // The chain is usually at the end.
+ SDOperand TheChain(Node, Node->getNumValues()-1);
+ if (TheChain.getValueType() != MVT::Other) {
+ // Sometimes it's at the beginning.
+ TheChain = SDOperand(Node, 0);
+ if (TheChain.getValueType() != MVT::Other) {
+ // Otherwise, hunt for it.
+ for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
+ if (Node->getValueType(i) == MVT::Other) {
+ TheChain = SDOperand(Node, i);
+ break;
+ }
+
+ // Otherwise, we walked into a node without a chain.
+ if (TheChain.getValueType() != MVT::Other)
+ return 0;
+ }
+ }
+
+ for (SDNode::use_iterator UI = Node->use_begin(),
+ E = Node->use_end(); UI != E; ++UI) {
+
+ // Make sure to only follow users of our token chain.
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+ if (User->getOperand(i) == TheChain)
+ if (SDNode *Result = FindCallEndFromCallStart(User))
+ return Result;
+ }
+ return 0;
+}
+
+/// FindCallStartFromCallEnd - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_START node that initiates the call sequence.
+static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+ assert(Node && "Didn't find callseq_start for a call??");
+ if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
+
+ assert(Node->getOperand(0).getValueType() == MVT::Other &&
+ "Node doesn't have a token chain argument!");
+ return FindCallStartFromCallEnd(Node->getOperand(0).Val);
+}
+
+/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
+/// see if any uses can reach Dest. If no dest operands can get to dest,
+/// legalize them, legalize ourself, and return false, otherwise, return true.
+///
+/// Keep track of the nodes we fine that actually do lead to Dest in
+/// NodesLeadingTo. This avoids retraversing them exponential number of times.
+///
+bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+ SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
+ if (N == Dest) return true; // N certainly leads to Dest :)
+
+ // If we've already processed this node and it does lead to Dest, there is no
+ // need to reprocess it.
+ if (NodesLeadingTo.count(N)) return true;
+
+ // If the first result of this node has been already legalized, then it cannot
+ // reach N.
+ switch (getTypeAction(N->getValueType(0))) {
+ case Legal:
+ if (LegalizedNodes.count(SDOperand(N, 0))) return false;
+ break;
+ case Promote:
+ if (PromotedNodes.count(SDOperand(N, 0))) return false;
+ break;
+ case Expand:
+ if (ExpandedNodes.count(SDOperand(N, 0))) return false;
+ break;
+ }
+
+ // Okay, this node has not already been legalized. Check and legalize all
+ // operands. If none lead to Dest, then we can legalize this node.
+ bool OperandsLeadToDest = false;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ OperandsLeadToDest |= // If an operand leads to Dest, so do we.
+ LegalizeAllNodesNotLeadingTo(N->getOperand(i).Val, Dest, NodesLeadingTo);
+
+ if (OperandsLeadToDest) {
+ NodesLeadingTo.insert(N);
+ return true;
+ }
+
+ // Okay, this node looks safe, legalize it and return false.
+ HandleOp(SDOperand(N, 0));
+ return false;
+}
+
+/// HandleOp - Legalize, Promote, or Expand the specified operand as
+/// appropriate for its type.
+void SelectionDAGLegalize::HandleOp(SDOperand Op) {
+ MVT::ValueType VT = Op.getValueType();
+ switch (getTypeAction(VT)) {
+ default: assert(0 && "Bad type action!");
+ case Legal: (void)LegalizeOp(Op); break;
+ case Promote: (void)PromoteOp(Op); break;
+ case Expand:
+ if (!MVT::isVector(VT)) {
+ // If this is an illegal scalar, expand it into its two component
+ // pieces.
+ SDOperand X, Y;
+ ExpandOp(Op, X, Y);
+ } else if (MVT::getVectorNumElements(VT) == 1) {
+ // If this is an illegal single element vector, convert it to a
+ // scalar operation.
+ (void)ScalarizeVectorOp(Op);
+ } else {
+ // Otherwise, this is an illegal multiple element vector.
+ // Split it in half and legalize both parts.
+ SDOperand X, Y;
+ SplitVectorOp(Op, X, Y);
+ }
+ break;
+ }
+}
+
+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+static SDOperand ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
+ SelectionDAG &DAG, TargetLowering &TLI) {
+ bool Extend = false;
+
+ // If a FP immediate is precise when represented as a float and if the
+ // target can do an extending load from float to double, we put it into
+ // the constant pool as a float, even if it's is statically typed as a
+ // double.
+ MVT::ValueType VT = CFP->getValueType(0);
+ bool isDouble = VT == MVT::f64;
+ ConstantFP *LLVMC = ConstantFP::get(isDouble ? Type::DoubleTy :
+ Type::FloatTy, CFP->getValue());
+ if (!UseCP) {
+ double Val = LLVMC->getValue();
+ return isDouble
+ ? DAG.getConstant(DoubleToBits(Val), MVT::i64)
+ : DAG.getConstant(FloatToBits(Val), MVT::i32);
+ }
+
+ if (isDouble && CFP->isExactlyValue((float)CFP->getValue()) &&
+ // Only do this if the target has a native EXTLOAD instruction from f32.
+ TLI.isLoadXLegal(ISD::EXTLOAD, MVT::f32)) {
+ LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC,Type::FloatTy));
+ VT = MVT::f32;
+ Extend = true;
+ }
+
+ SDOperand CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+ if (Extend) {
+ return DAG.getExtLoad(ISD::EXTLOAD, MVT::f64, DAG.getEntryNode(),
+ CPIdx, NULL, 0, MVT::f32);
+ } else {
+ return DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0);
+ }
+}
+
+
+/// ExpandFCOPYSIGNToBitwiseOps - Expands fcopysign to a series of bitwise
+/// operations.
+static
+SDOperand ExpandFCOPYSIGNToBitwiseOps(SDNode *Node, MVT::ValueType NVT,
+ SelectionDAG &DAG, TargetLowering &TLI) {
+ MVT::ValueType VT = Node->getValueType(0);
+ MVT::ValueType SrcVT = Node->getOperand(1).getValueType();
+ assert((SrcVT == MVT::f32 || SrcVT == MVT::f64) &&
+ "fcopysign expansion only supported for f32 and f64");
+ MVT::ValueType SrcNVT = (SrcVT == MVT::f64) ? MVT::i64 : MVT::i32;
+
+ // First get the sign bit of second operand.
+ SDOperand Mask1 = (SrcVT == MVT::f64)
+ ? DAG.getConstantFP(BitsToDouble(1ULL << 63), SrcVT)
+ : DAG.getConstantFP(BitsToFloat(1U << 31), SrcVT);
+ Mask1 = DAG.getNode(ISD::BIT_CONVERT, SrcNVT, Mask1);
+ SDOperand SignBit= DAG.getNode(ISD::BIT_CONVERT, SrcNVT, Node->getOperand(1));
+ SignBit = DAG.getNode(ISD::AND, SrcNVT, SignBit, Mask1);
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = MVT::getSizeInBits(SrcNVT) - MVT::getSizeInBits(NVT);
+ if (SizeDiff > 0) {
+ SignBit = DAG.getNode(ISD::SRL, SrcNVT, SignBit,
+ DAG.getConstant(SizeDiff, TLI.getShiftAmountTy()));
+ SignBit = DAG.getNode(ISD::TRUNCATE, NVT, SignBit);
+ } else if (SizeDiff < 0)
+ SignBit = DAG.getNode(ISD::SIGN_EXTEND, NVT, SignBit);
+
+ // Clear the sign bit of first operand.
+ SDOperand Mask2 = (VT == MVT::f64)
+ ? DAG.getConstantFP(BitsToDouble(~(1ULL << 63)), VT)
+ : DAG.getConstantFP(BitsToFloat(~(1U << 31)), VT);
+ Mask2 = DAG.getNode(ISD::BIT_CONVERT, NVT, Mask2);
+ SDOperand Result = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0));
+ Result = DAG.getNode(ISD::AND, NVT, Result, Mask2);
+
+ // Or the value with the sign bit.
+ Result = DAG.getNode(ISD::OR, NVT, Result, SignBit);
+ return Result;
+}
+
+
+/// LegalizeOp - We know that the specified value has a legal type, and
+/// that its operands are legal. Now ensure that the operation itself
+/// is legal, recursively ensuring that the operands' operations remain
+/// legal.
+SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
+ assert(isTypeLegal(Op.getValueType()) &&
+ "Caller should expand or promote operands that are not legal!");
+ SDNode *Node = Op.Val;
+
+ // If this operation defines any values that cannot be represented in a
+ // register on this target, make sure to expand or promote them.
+ if (Node->getNumValues() > 1) {
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ if (getTypeAction(Node->getValueType(i)) != Legal) {
+ HandleOp(Op.getValue(i));
+ assert(LegalizedNodes.count(Op) &&
+ "Handling didn't add legal operands!");
+ return LegalizedNodes[Op];
+ }
+ }
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDOperand, SDOperand>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDOperand Tmp1, Tmp2, Tmp3, Tmp4;
+ SDOperand Result = Op;
+ bool isCustom = false;
+
+ switch (Node->getOpcode()) {
+ case ISD::FrameIndex:
+ case ISD::EntryToken:
+ case ISD::Register:
+ case ISD::BasicBlock:
+ case ISD::TargetFrameIndex:
+ case ISD::TargetJumpTable:
+ case ISD::TargetConstant:
+ case ISD::TargetConstantFP:
+ case ISD::TargetConstantPool:
+ case ISD::TargetGlobalAddress:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::TargetExternalSymbol:
+ case ISD::VALUETYPE:
+ case ISD::SRCVALUE:
+ case ISD::STRING:
+ case ISD::CONDCODE:
+ // Primitives must all be legal.
+ assert(TLI.isOperationLegal(Node->getValueType(0), Node->getValueType(0)) &&
+ "This must be legal!");
+ break;
+ default:
+ if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+ // If this is a target node, legalize it by legalizing the operands then
+ // passing it through.
+ SmallVector<SDOperand, 8> Ops;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+
+ Result = DAG.UpdateNodeOperands(Result.getValue(0), &Ops[0], Ops.size());
+
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+ return Result.getValue(Op.ResNo);
+ }
+ // Otherwise this is an unhandled builtin node. splat.
+#ifndef NDEBUG
+ cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to legalize this operator!");
+ abort();
+ case ISD::GLOBAL_OFFSET_TABLE:
+ case ISD::GlobalAddress:
+ case ISD::GlobalTLSAddress:
+ case ISD::ExternalSymbol:
+ case ISD::ConstantPool:
+ case ISD::JumpTable: // Nothing to do.
+ switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Op, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ // FALLTHROUGH if the target doesn't want to lower this op after all.
+ case TargetLowering::Legal:
+ break;
+ }
+ break;
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ // The only option for these nodes is to custom lower them. If the target
+ // does not custom lower them, then return zero.
+ Tmp1 = TLI.LowerOperation(Op, DAG);
+ if (Tmp1.Val)
+ Result = Tmp1;
+ else
+ Result = DAG.getConstant(0, TLI.getPointerTy());
+ break;
+ case ISD::EXCEPTIONADDR: {
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ MVT::ValueType VT = Node->getValueType(0);
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Expand: {
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ Result = DAG.getCopyFromReg(Tmp1, Reg, VT).getValue(Op.ResNo);
+ }
+ break;
+ case TargetLowering::Custom:
+ Result = TLI.LowerOperation(Op, DAG);
+ if (Result.Val) break;
+ // Fall Thru
+ case TargetLowering::Legal: {
+ SDOperand Ops[] = { DAG.getConstant(0, VT), Tmp1 };
+ Result = DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other),
+ Ops, 2).getValue(Op.ResNo);
+ break;
+ }
+ }
+ }
+ break;
+ case ISD::EHSELECTION: {
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ Tmp2 = LegalizeOp(Node->getOperand(1));
+ MVT::ValueType VT = Node->getValueType(0);
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Expand: {
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ Result = DAG.getCopyFromReg(Tmp2, Reg, VT).getValue(Op.ResNo);
+ }
+ break;
+ case TargetLowering::Custom:
+ Result = TLI.LowerOperation(Op, DAG);
+ if (Result.Val) break;
+ // Fall Thru
+ case TargetLowering::Legal: {
+ SDOperand Ops[] = { DAG.getConstant(0, VT), Tmp2 };
+ Result = DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other),
+ Ops, 2).getValue(Op.ResNo);
+ break;
+ }
+ }
+ }
+ break;
+ case ISD::EH_RETURN: {
+ MVT::ValueType VT = Node->getValueType(0);
+ // The only "good" option for this node is to custom lower it.
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: assert(0 && "This action is not supported at all!");
+ case TargetLowering::Custom:
+ Result = TLI.LowerOperation(Op, DAG);
+ if (Result.Val) break;
+ // Fall Thru
+ case TargetLowering::Legal:
+ // Target does not know, how to lower this, lower to noop
+ Result = LegalizeOp(Node->getOperand(0));
+ break;
+ }
+ }
+ break;
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+ break;
+ case ISD::MERGE_VALUES:
+ // Legalize eliminates MERGE_VALUES nodes.
+ Result = Node->getOperand(Op.ResNo);
+ break;
+ case ISD::CopyFromReg:
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ Result = Op.getValue(0);
+ if (Node->getNumValues() == 2) {
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+ } else {
+ assert(Node->getNumValues() == 3 && "Invalid copyfromreg!");
+ if (Node->getNumOperands() == 3) {
+ Tmp2 = LegalizeOp(Node->getOperand(2));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1),Tmp2);
+ } else {
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+ }
+ AddLegalizedOperand(Op.getValue(2), Result.getValue(2));
+ }
+ // Since CopyFromReg produces two values, make sure to remember that we
+ // legalized both of them.
+ AddLegalizedOperand(Op.getValue(0), Result);
+ AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+ return Result.getValue(Op.ResNo);
+ case ISD::UNDEF: {
+ MVT::ValueType VT = Op.getValueType();
+ switch (TLI.getOperationAction(ISD::UNDEF, VT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Expand:
+ if (MVT::isInteger(VT))
+ Result = DAG.getConstant(0, VT);
+ else if (MVT::isFloatingPoint(VT))
+ Result = DAG.getConstantFP(0, VT);
+ else
+ assert(0 && "Unknown value type!");
+ break;
+ case TargetLowering::Legal:
+ break;
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID: {
+ SmallVector<SDOperand, 8> Ops;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+
+ // Allow the target to custom lower its intrinsics if it wants to.
+ if (TLI.getOperationAction(Node->getOpcode(), MVT::Other) ==
+ TargetLowering::Custom) {
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.Val) Result = Tmp3;
+ }
+
+ if (Result.Val->getNumValues() == 1) break;
+
+ // Must have return value and chain result.
+ assert(Result.Val->getNumValues() == 2 &&
+ "Cannot return more than two values!");
+
+ // Since loads produce two values, make sure to remember that we
+ // legalized both of them.
+ AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+ AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+ return Result.getValue(Op.ResNo);
+ }
+
+ case ISD::LOCATION:
+ assert(Node->getNumOperands() == 5 && "Invalid LOCATION node!");
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the input chain.
+
+ switch (TLI.getOperationAction(ISD::LOCATION, MVT::Other)) {
+ case TargetLowering::Promote:
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Expand: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ bool useDEBUG_LOC = TLI.isOperationLegal(ISD::DEBUG_LOC, MVT::Other);
+ bool useLABEL = TLI.isOperationLegal(ISD::LABEL, MVT::Other);
+
+ if (MMI && (useDEBUG_LOC || useLABEL)) {
+ const std::string &FName =
+ cast<StringSDNode>(Node->getOperand(3))->getValue();
+ const std::string &DirName =
+ cast<StringSDNode>(Node->getOperand(4))->getValue();
+ unsigned SrcFile = MMI->RecordSource(DirName, FName);
+
+ SmallVector<SDOperand, 8> Ops;
+ Ops.push_back(Tmp1); // chain
+ SDOperand LineOp = Node->getOperand(1);
+ SDOperand ColOp = Node->getOperand(2);
+
+ if (useDEBUG_LOC) {
+ Ops.push_back(LineOp); // line #
+ Ops.push_back(ColOp); // col #
+ Ops.push_back(DAG.getConstant(SrcFile, MVT::i32)); // source file id
+ Result = DAG.getNode(ISD::DEBUG_LOC, MVT::Other, &Ops[0], Ops.size());
+ } else {
+ unsigned Line = cast<ConstantSDNode>(LineOp)->getValue();
+ unsigned Col = cast<ConstantSDNode>(ColOp)->getValue();
+ unsigned ID = MMI->RecordLabel(Line, Col, SrcFile);
+ Ops.push_back(DAG.getConstant(ID, MVT::i32));
+ Result = DAG.getNode(ISD::LABEL, MVT::Other,&Ops[0],Ops.size());
+ }
+ } else {
+ Result = Tmp1; // chain
+ }
+ break;
+ }
+ case TargetLowering::Legal:
+ if (Tmp1 != Node->getOperand(0) ||
+ getTypeAction(Node->getOperand(1).getValueType()) == Promote) {
+ SmallVector<SDOperand, 8> Ops;
+ Ops.push_back(Tmp1);
+ if (getTypeAction(Node->getOperand(1).getValueType()) == Legal) {
+ Ops.push_back(Node->getOperand(1)); // line # must be legal.
+ Ops.push_back(Node->getOperand(2)); // col # must be legal.
+ } else {
+ // Otherwise promote them.
+ Ops.push_back(PromoteOp(Node->getOperand(1)));
+ Ops.push_back(PromoteOp(Node->getOperand(2)));
+ }
+ Ops.push_back(Node->getOperand(3)); // filename must be legal.
+ Ops.push_back(Node->getOperand(4)); // working dir # must be legal.
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+ break;
+ }
+ break;
+
+ case ISD::DEBUG_LOC:
+ assert(Node->getNumOperands() == 4 && "Invalid DEBUG_LOC node!");
+ switch (TLI.getOperationAction(ISD::DEBUG_LOC, MVT::Other)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the line #.
+ Tmp3 = LegalizeOp(Node->getOperand(2)); // Legalize the col #.
+ Tmp4 = LegalizeOp(Node->getOperand(3)); // Legalize the source file id.
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4);
+ break;
+ }
+ break;
+
+ case ISD::LABEL:
+ assert(Node->getNumOperands() == 2 && "Invalid LABEL node!");
+ switch (TLI.getOperationAction(ISD::LABEL, MVT::Other)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the label id.
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+ break;
+ case TargetLowering::Expand:
+ Result = LegalizeOp(Node->getOperand(0));
+ break;
+ }
+ break;
+
+ case ISD::Constant:
+ // We know we don't need to expand constants here, constants only have one
+ // value and we check that it is fine above.
+
+ // FIXME: Maybe we should handle things like targets that don't support full
+ // 32-bit immediates?
+ break;
+ case ISD::ConstantFP: {
+ // Spill FP immediates to the constant pool if the target cannot directly
+ // codegen them. Targets often have some immediate values that can be
+ // efficiently generated into an FP register without a load. We explicitly
+ // leave these constants as ConstantFP nodes for the target to deal with.
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+
+ // Check to see if this FP immediate is already legal.
+ bool isLegal = false;
+ for (TargetLowering::legal_fpimm_iterator I = TLI.legal_fpimm_begin(),
+ E = TLI.legal_fpimm_end(); I != E; ++I)
+ if (CFP->isExactlyValue(*I)) {
+ isLegal = true;
+ break;
+ }
+
+ // If this is a legal constant, turn it into a TargetConstantFP node.
+ if (isLegal) {
+ Result = DAG.getTargetConstantFP(CFP->getValue(), CFP->getValueType(0));
+ break;
+ }
+
+ switch (TLI.getOperationAction(ISD::ConstantFP, CFP->getValueType(0))) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.Val) {
+ Result = Tmp3;
+ break;
+ }
+ // FALLTHROUGH
+ case TargetLowering::Expand:
+ Result = ExpandConstantFP(CFP, true, DAG, TLI);
+ }
+ break;
+ }
+ case ISD::TokenFactor:
+ if (Node->getNumOperands() == 2) {
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ Tmp2 = LegalizeOp(Node->getOperand(1));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+ } else if (Node->getNumOperands() == 3) {
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ Tmp2 = LegalizeOp(Node->getOperand(1));
+ Tmp3 = LegalizeOp(Node->getOperand(2));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+ } else {
+ SmallVector<SDOperand, 8> Ops;
+ // Legalize the operands.
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+ break;
+
+ case ISD::FORMAL_ARGUMENTS:
+ case ISD::CALL:
+ // The only option for this is to custom lower it.
+ Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG);
+ assert(Tmp3.Val && "Target didn't custom lower this node!");
+ assert(Tmp3.Val->getNumValues() == Result.Val->getNumValues() &&
+ "Lowering call/formal_arguments produced unexpected # results!");
+
+ // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to
+ // remember that we legalized all of them, so it doesn't get relegalized.
+ for (unsigned i = 0, e = Tmp3.Val->getNumValues(); i != e; ++i) {
+ Tmp1 = LegalizeOp(Tmp3.getValue(i));
+ if (Op.ResNo == i)
+ Tmp2 = Tmp1;
+ AddLegalizedOperand(SDOperand(Node, i), Tmp1);
+ }
+ return Tmp2;
+
+ case ISD::BUILD_VECTOR:
+ switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.Val) {
+ Result = Tmp3;
+ break;
+ }
+ // FALLTHROUGH
+ case TargetLowering::Expand:
+ Result = ExpandBUILD_VECTOR(Result.Val);
+ break;
+ }
+ break;
+ case ISD::INSERT_VECTOR_ELT:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // InVec
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // InVal
+ Tmp3 = LegalizeOp(Node->getOperand(2)); // InEltNo
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+
+ switch (TLI.getOperationAction(ISD::INSERT_VECTOR_ELT,
+ Node->getValueType(0))) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ break;
+ case TargetLowering::Custom:
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.Val) {
+ Result = Tmp3;
+ break;
+ }
+ // FALLTHROUGH
+ case TargetLowering::Expand: {
+ // If the insert index is a constant, codegen this as a scalar_to_vector,
+ // then a shuffle that inserts it into the right position in the vector.
+ if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Tmp3)) {
+ SDOperand ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR,
+ Tmp1.getValueType(), Tmp2);
+
+ unsigned NumElts = MVT::getVectorNumElements(Tmp1.getValueType());
+ MVT::ValueType ShufMaskVT = MVT::getIntVectorWithNumElements(NumElts);
+ MVT::ValueType ShufMaskEltVT = MVT::getVectorElementType(ShufMaskVT);
+
+ // We generate a shuffle of InVec and ScVec, so the shuffle mask should
+ // be 0,1,2,3,4,5... with the appropriate element replaced with elt 0 of
+ // the RHS.
+ SmallVector<SDOperand, 8> ShufOps;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (i != InsertPos->getValue())
+ ShufOps.push_back(DAG.getConstant(i, ShufMaskEltVT));
+ else
+ ShufOps.push_back(DAG.getConstant(NumElts, ShufMaskEltVT));
+ }
+ SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, ShufMaskVT,
+ &ShufOps[0], ShufOps.size());
+
+ Result = DAG.getNode(ISD::VECTOR_SHUFFLE, Tmp1.getValueType(),
+ Tmp1, ScVec, ShufMask);
+ Result = LegalizeOp(Result);
+ break;
+ }
+
+ // If the target doesn't support this, we have to spill the input vector
+ // to a temporary stack slot, update the element, then reload it. This is
+ // badness. We could also load the value into a vector register (either
+ // with a "move to register" or "extload into register" instruction, then
+ // permute it into place, if the idx is a constant and if the idx is
+ // supported by the target.
+ MVT::ValueType VT = Tmp1.getValueType();
+ MVT::ValueType EltVT = Tmp2.getValueType();
+ MVT::ValueType IdxVT = Tmp3.getValueType();
+ MVT::ValueType PtrVT = TLI.getPointerTy();
+ SDOperand StackPtr = CreateStackTemporary(VT);
+ // Store the vector.
+ SDOperand Ch = DAG.getStore(DAG.getEntryNode(), Tmp1, StackPtr, NULL, 0);
+
+ // Truncate or zero extend offset to target pointer type.
+ unsigned CastOpc = (IdxVT > PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+ Tmp3 = DAG.getNode(CastOpc, PtrVT, Tmp3);
+ // Add the offset to the index.
+ unsigned EltSize = MVT::getSizeInBits(EltVT)/8;
+ Tmp3 = DAG.getNode(ISD::MUL, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+ SDOperand StackPtr2 = DAG.getNode(ISD::ADD, IdxVT, Tmp3, StackPtr);
+ // Store the scalar value.
+ Ch = DAG.getStore(Ch, Tmp2, StackPtr2, NULL, 0);
+ // Load the updated vector.
+ Result = DAG.getLoad(VT, Ch, StackPtr, NULL, 0);
+ break;
+ }
+ }
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ if (!TLI.isTypeLegal(Node->getOperand(0).getValueType())) {
+ Result = LegalizeOp(ExpandSCALAR_TO_VECTOR(Node));
+ break;
+ }
+
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // InVal
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ switch (TLI.getOperationAction(ISD::SCALAR_TO_VECTOR,
+ Node->getValueType(0))) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ break;
+ case TargetLowering::Custom:
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.Val) {
+ Result = Tmp3;
+ break;
+ }
+ // FALLTHROUGH
+ case TargetLowering::Expand:
+ Result = LegalizeOp(ExpandSCALAR_TO_VECTOR(Node));
+ break;
+ }
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the input vectors,
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // but not the shuffle mask.
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+
+ // Allow targets to custom lower the SHUFFLEs they support.
+ switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE,Result.getValueType())) {
+ default: assert(0 && "Unknown operation action!");
+ case TargetLowering::Legal:
+ assert(isShuffleLegal(Result.getValueType(), Node->getOperand(2)) &&
+ "vector shuffle should not be created if not legal!");
+ break;
+ case TargetLowering::Custom:
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.Val) {
+ Result = Tmp3;
+ break;
+ }
+ // FALLTHROUGH
+ case TargetLowering::Expand: {
+ MVT::ValueType VT = Node->getValueType(0);
+ MVT::ValueType EltVT = MVT::getVectorElementType(VT);
+ MVT::ValueType PtrVT = TLI.getPointerTy();
+ SDOperand Mask = Node->getOperand(2);
+ unsigned NumElems = Mask.getNumOperands();
+ SmallVector<SDOperand,8> Ops;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDOperand Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) {
+ Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+ } else {
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
+ if (Idx < NumElems)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp1,
+ DAG.getConstant(Idx, PtrVT)));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp2,
+ DAG.getConstant(Idx - NumElems, PtrVT)));
+ }
+ }
+ Result = DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+ break;
+ }
+ case TargetLowering::Promote: {
+ // Change base type to a different vector type.
+ MVT::ValueType OVT = Node->getValueType(0);
+ MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+
+ // Cast the two input vectors.
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp1);
+ Tmp2 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp2);
+
+ // Convert the shuffle mask to the right # elements.
+ Tmp3 = SDOperand(isShuffleLegal(OVT, Node->getOperand(2)), 0);
+ assert(Tmp3.Val && "Shuffle not legal?");
+ Result = DAG.getNode(ISD::VECTOR_SHUFFLE, NVT, Tmp1, Tmp2, Tmp3);
+ Result = DAG.getNode(ISD::BIT_CONVERT, OVT, Result);
+ break;
+ }
+ }
+ break;
+
+ case ISD::EXTRACT_VECTOR_ELT:
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = LegalizeOp(Node->getOperand(1));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+ Result = ExpandEXTRACT_VECTOR_ELT(Result);
+ break;
+
+ case ISD::EXTRACT_SUBVECTOR:
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = LegalizeOp(Node->getOperand(1));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+ Result = ExpandEXTRACT_SUBVECTOR(Result);
+ break;
+
+ case ISD::CALLSEQ_START: {
+ SDNode *CallEnd = FindCallEndFromCallStart(Node);
+
+ // Recursively Legalize all of the inputs of the call end that do not lead
+ // to this call start. This ensures that any libcalls that need be inserted
+ // are inserted *before* the CALLSEQ_START.
+ {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
+ for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
+ LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).Val, Node,
+ NodesLeadingTo);
+ }
+
+ // Now that we legalized all of the inputs (which may have inserted
+ // libcalls) create the new CALLSEQ_START node.
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+
+ // Merge in the last call, to ensure that this call start after the last
+ // call ended.
+ if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+ Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+ Tmp1 = LegalizeOp(Tmp1);
+ }
+
+ // Do not try to legalize the target-specific arguments (#1+).
+ if (Tmp1 != Node->getOperand(0)) {
+ SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+
+ // Remember that the CALLSEQ_START is legalized.
+ AddLegalizedOperand(Op.getValue(0), Result);
+ if (Node->getNumValues() == 2) // If this has a flag result, remember it.
+ AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+
+ // Now that the callseq_start and all of the non-call nodes above this call
+ // sequence have been legalized, legalize the call itself. During this
+ // process, no libcalls can/will be inserted, guaranteeing that no calls
+ // can overlap.
+ assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
+ SDOperand InCallSEQ = LastCALLSEQ_END;
+ // Note that we are selecting this call!
+ LastCALLSEQ_END = SDOperand(CallEnd, 0);
+ IsLegalizingCall = true;
+
+ // Legalize the call, starting from the CALLSEQ_END.
+ LegalizeOp(LastCALLSEQ_END);
+ assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+ return Result;
+ }
+ case ISD::CALLSEQ_END:
+ // If the CALLSEQ_START node hasn't been legalized first, legalize it. This
+ // will cause this node to be legalized as well as handling libcalls right.
+ if (LastCALLSEQ_END.Val != Node) {
+ LegalizeOp(SDOperand(FindCallStartFromCallEnd(Node), 0));
+ DenseMap<SDOperand, SDOperand>::iterator I = LegalizedNodes.find(Op);
+ assert(I != LegalizedNodes.end() &&
+ "Legalizing the call start should have legalized this node!");
+ return I->second;
+ }
+
+ // Otherwise, the call start has been legalized and everything is going
+ // according to plan. Just legalize ourselves normally here.
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ // Do not try to legalize the target-specific arguments (#1+), except for
+ // an optional flag input.
+ if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+ if (Tmp1 != Node->getOperand(0)) {
+ SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+ } else {
+ Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
+ if (Tmp1 != Node->getOperand(0) ||
+ Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
+ SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end());
+ Ops[0] = Tmp1;
+ Ops.back() = Tmp2;
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ }
+ }
+ assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
+ // This finishes up call legalization.
+ IsLegalizingCall = false;
+
+ // If the CALLSEQ_END node has a flag, remember that we legalized it.
+ AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+ if (Node->getNumValues() == 2)
+ AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+ return Result.getValue(Op.ResNo);
+ case ISD::DYNAMIC_STACKALLOC: {
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the size.
+ Tmp3 = LegalizeOp(Node->getOperand(2)); // Legalize the alignment.
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+ switch (TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0))) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Expand: {
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+ " not tell us which reg is the stack pointer!");
+ SDOperand Chain = Tmp1.getOperand(0);
+ SDOperand Size = Tmp2.getOperand(1);
+ SDOperand SP = DAG.getCopyFromReg(Chain, SPReg, Node->getValueType(0));
+ Tmp1 = DAG.getNode(ISD::SUB, Node->getValueType(0), SP, Size); // Value
+ Tmp2 = DAG.getCopyToReg(SP.getValue(1), SPReg, Tmp1); // Output chain
+ Tmp1 = LegalizeOp(Tmp1);
+ Tmp2 = LegalizeOp(Tmp2);
+ break;
+ }
+ case TargetLowering::Custom:
+ Tmp3 = TLI.LowerOperation(Tmp1, DAG);
+ if (Tmp3.Val) {
+ Tmp1 = LegalizeOp(Tmp3);
+ Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ }
+ break;
+ case TargetLowering::Legal:
+ break;
+ }
+ // Since this op produce two values, make sure to remember that we
+ // legalized both of them.
+ AddLegalizedOperand(SDOperand(Node, 0), Tmp1);
+ AddLegalizedOperand(SDOperand(Node, 1), Tmp2);
+ return Op.ResNo ? Tmp2 : Tmp1;
+ }
+ case ISD::INLINEASM: {
+ SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end());
+ bool Changed = false;
+ // Legalize all of the operands of the inline asm, in case they are nodes
+ // that need to be expanded or something. Note we skip the asm string and
+ // all of the TargetConstant flags.
+ SDOperand Op = LegalizeOp(Ops[0]);
+ Changed = Op != Ops[0];
+ Ops[0] = Op;
+
+ bool HasInFlag = Ops.back().getValueType() == MVT::Flag;
+ for (unsigned i = 2, e = Ops.size()-HasInFlag; i < e; ) {
+ unsigned NumVals = cast<ConstantSDNode>(Ops[i])->getValue() >> 3;
+ for (++i; NumVals; ++i, --NumVals) {
+ SDOperand Op = LegalizeOp(Ops[i]);
+ if (Op != Ops[i]) {
+ Changed = true;
+ Ops[i] = Op;
+ }
+ }
+ }
+
+ if (HasInFlag) {
+ Op = LegalizeOp(Ops.back());
+ Changed |= Op != Ops.back();
+ Ops.back() = Op;
+ }
+
+ if (Changed)
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+
+ // INLINE asm returns a chain and flag, make sure to add both to the map.
+ AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+ AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+ return Result.getValue(Op.ResNo);
+ }
+ case ISD::BR:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ // Ensure that libcalls are emitted before a branch.
+ Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+ Tmp1 = LegalizeOp(Tmp1);
+ LastCALLSEQ_END = DAG.getEntryNode();
+
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+ break;
+ case ISD::BRIND:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ // Ensure that libcalls are emitted before a branch.
+ Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+ Tmp1 = LegalizeOp(Tmp1);
+ LastCALLSEQ_END = DAG.getEntryNode();
+
+ switch (getTypeAction(Node->getOperand(1).getValueType())) {
+ default: assert(0 && "Indirect target must be legal type (pointer)!");
+ case Legal:
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the condition.
+ break;
+ }
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+ break;
+ case ISD::BR_JT:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ // Ensure that libcalls are emitted before a branch.
+ Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+ Tmp1 = LegalizeOp(Tmp1);
+ LastCALLSEQ_END = DAG.getEntryNode();
+
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the jumptable node.
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+
+ switch (TLI.getOperationAction(ISD::BR_JT, MVT::Other)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ break;
+ case TargetLowering::Expand: {
+ SDOperand Chain = Result.getOperand(0);
+ SDOperand Table = Result.getOperand(1);
+ SDOperand Index = Result.getOperand(2);
+
+ MVT::ValueType PTy = TLI.getPointerTy();
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize();
+ Index= DAG.getNode(ISD::MUL, PTy, Index, DAG.getConstant(EntrySize, PTy));
+ SDOperand Addr = DAG.getNode(ISD::ADD, PTy, Index, Table);
+
+ SDOperand LD;
+ switch (EntrySize) {
+ default: assert(0 && "Size of jump table not supported yet."); break;
+ case 4: LD = DAG.getLoad(MVT::i32, Chain, Addr, NULL, 0); break;
+ case 8: LD = DAG.getLoad(MVT::i64, Chain, Addr, NULL, 0); break;
+ }
+
+ if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase is the JumpTable on PPC and X86, GOT on Alpha
+ SDOperand Reloc;
+ if (TLI.usesGlobalOffsetTable())
+ Reloc = DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, PTy);
+ else
+ Reloc = Table;
+ Addr = (PTy != MVT::i32) ? DAG.getNode(ISD::SIGN_EXTEND, PTy, LD) : LD;
+ Addr = DAG.getNode(ISD::ADD, PTy, Addr, Reloc);
+ Result = DAG.getNode(ISD::BRIND, MVT::Other, LD.getValue(1), Addr);
+ } else {
+ Result = DAG.getNode(ISD::BRIND, MVT::Other, LD.getValue(1), LD);
+ }
+ }
+ }
+ break;
+ case ISD::BRCOND:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ // Ensure that libcalls are emitted before a return.
+ Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+ Tmp1 = LegalizeOp(Tmp1);
+ LastCALLSEQ_END = DAG.getEntryNode();
+
+ switch (getTypeAction(Node->getOperand(1).getValueType())) {
+ case Expand: assert(0 && "It's impossible to expand bools");
+ case Legal:
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the condition.
+ break;
+ case Promote:
+ Tmp2 = PromoteOp(Node->getOperand(1)); // Promote the condition.
+
+ // The top bits of the promoted condition are not necessarily zero, ensure
+ // that the value is properly zero extended.
+ if (!DAG.MaskedValueIsZero(Tmp2,
+ MVT::getIntVTBitMask(Tmp2.getValueType())^1))
+ Tmp2 = DAG.getZeroExtendInReg(Tmp2, MVT::i1);
+ break;
+ }
+
+ // Basic block destination (Op#2) is always legal.
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+
+ switch (TLI.getOperationAction(ISD::BRCOND, MVT::Other)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ break;
+ case TargetLowering::Expand:
+ // Expand brcond's setcc into its constituent parts and create a BR_CC
+ // Node.
+ if (Tmp2.getOpcode() == ISD::SETCC) {
+ Result = DAG.getNode(ISD::BR_CC, MVT::Other, Tmp1, Tmp2.getOperand(2),
+ Tmp2.getOperand(0), Tmp2.getOperand(1),
+ Node->getOperand(2));
+ } else {
+ Result = DAG.getNode(ISD::BR_CC, MVT::Other, Tmp1,
+ DAG.getCondCode(ISD::SETNE), Tmp2,
+ DAG.getConstant(0, Tmp2.getValueType()),
+ Node->getOperand(2));
+ }
+ break;
+ }
+ break;
+ case ISD::BR_CC:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ // Ensure that libcalls are emitted before a branch.
+ Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+ Tmp1 = LegalizeOp(Tmp1);
+ Tmp2 = Node->getOperand(2); // LHS
+ Tmp3 = Node->getOperand(3); // RHS
+ Tmp4 = Node->getOperand(1); // CC
+
+ LegalizeSetCCOperands(Tmp2, Tmp3, Tmp4);
+ LastCALLSEQ_END = DAG.getEntryNode();
+
+ // If we didn't get both a LHS and RHS back from LegalizeSetCCOperands,
+ // the LHS is a legal SETCC itself. In this case, we need to compare
+ // the result against zero to select between true and false values.
+ if (Tmp3.Val == 0) {
+ Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+ Tmp4 = DAG.getCondCode(ISD::SETNE);
+ }
+
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp4, Tmp2, Tmp3,
+ Node->getOperand(4));
+
+ switch (TLI.getOperationAction(ISD::BR_CC, Tmp3.getValueType())) {
+ default: assert(0 && "Unexpected action for BR_CC!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp4 = TLI.LowerOperation(Result, DAG);
+ if (Tmp4.Val) Result = Tmp4;
+ break;
+ }
+ break;
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain.
+ Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ MVT::ValueType VT = Node->getValueType(0);
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+ Tmp3 = Result.getValue(0);
+ Tmp4 = Result.getValue(1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Tmp3, DAG);
+ if (Tmp1.Val) {
+ Tmp3 = LegalizeOp(Tmp1);
+ Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ }
+ break;
+ case TargetLowering::Promote: {
+ // Only promote a load of vector type to another.
+ assert(MVT::isVector(VT) && "Cannot promote this load!");
+ // Change base type to a different vector type.
+ MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+ Tmp1 = DAG.getLoad(NVT, Tmp1, Tmp2, LD->getSrcValue(),
+ LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+ Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, VT, Tmp1));
+ Tmp4 = LegalizeOp(Tmp1.getValue(1));
+ break;
+ }
+ }
+ // Since loads produce two values, make sure to remember that we
+ // legalized both of them.
+ AddLegalizedOperand(SDOperand(Node, 0), Tmp3);
+ AddLegalizedOperand(SDOperand(Node, 1), Tmp4);
+ return Op.ResNo ? Tmp4 : Tmp3;
+ } else {
+ MVT::ValueType SrcVT = LD->getLoadedVT();
+ switch (TLI.getLoadXAction(ExtType, SrcVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Promote:
+ assert(SrcVT == MVT::i1 &&
+ "Can only promote extending LOAD from i1 -> i8!");
+ Result = DAG.getExtLoad(ExtType, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ MVT::i8, LD->isVolatile(), LD->getAlignment());
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+ break;
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+
+ if (isCustom) {
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.Val) {
+ Tmp1 = LegalizeOp(Tmp3);
+ Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ }
+ }
+ break;
+ case TargetLowering::Expand:
+ // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
+ if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) {
+ SDOperand Load = DAG.getLoad(SrcVT, Tmp1, Tmp2, LD->getSrcValue(),
+ LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+ Result = DAG.getNode(ISD::FP_EXTEND, Node->getValueType(0), Load);
+ Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Load.getValue(1));
+ break;
+ }
+ assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getSrcValue(),
+ LD->getSrcValueOffset(), SrcVT,
+ LD->isVolatile(), LD->getAlignment());
+ SDOperand ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, SrcVT);
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ break;
+ }
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDOperand(Node, 0), Tmp1);
+ AddLegalizedOperand(SDOperand(Node, 1), Tmp2);
+ return Op.ResNo ? Tmp2 : Tmp1;
+ }
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ MVT::ValueType OpTy = Node->getOperand(0).getValueType();
+ switch (getTypeAction(OpTy)) {
+ default: assert(0 && "EXTRACT_ELEMENT action for type unimplemented!");
+ case Legal:
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getValue()) {
+ // 1 -> Hi
+ Result = DAG.getNode(ISD::SRL, OpTy, Node->getOperand(0),
+ DAG.getConstant(MVT::getSizeInBits(OpTy)/2,
+ TLI.getShiftAmountTy()));
+ Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), Result);
+ } else {
+ // 0 -> Lo
+ Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0),
+ Node->getOperand(0));
+ }
+ break;
+ case Expand:
+ // Get both the low and high parts.
+ ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getValue())
+ Result = Tmp2; // 1 -> Hi
+ else
+ Result = Tmp1; // 0 -> Lo
+ break;
+ }
+ break;
+ }
+
+ case ISD::CopyToReg:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+
+ assert(isTypeLegal(Node->getOperand(2).getValueType()) &&
+ "Register type must be legal!");
+ // Legalize the incoming value (must be a legal type).
+ Tmp2 = LegalizeOp(Node->getOperand(2));
+ if (Node->getNumValues() == 1) {
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1), Tmp2);
+ } else {
+ assert(Node->getNumValues() == 2 && "Unknown CopyToReg");
+ if (Node->getNumOperands() == 4) {
+ Tmp3 = LegalizeOp(Node->getOperand(3));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1), Tmp2,
+ Tmp3);
+ } else {
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1),Tmp2);
+ }
+
+ // Since this produces two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+ AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+ return Result;
+ }
+ break;
+
+ case ISD::RET:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+
+ // Ensure that libcalls are emitted before a return.
+ Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END);
+ Tmp1 = LegalizeOp(Tmp1);
+ LastCALLSEQ_END = DAG.getEntryNode();
+
+ switch (Node->getNumOperands()) {
+ case 3: // ret val
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2); // Signness
+ switch (getTypeAction(Tmp2.getValueType())) {
+ case Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, LegalizeOp(Tmp2), Tmp3);
+ break;
+ case Expand:
+ if (!MVT::isVector(Tmp2.getValueType())) {
+ SDOperand Lo, Hi;
+ ExpandOp(Tmp2, Lo, Hi);
+
+ // Big endian systems want the hi reg first.
+ if (!TLI.isLittleEndian())
+ std::swap(Lo, Hi);
+
+ if (Hi.Val)
+ Result = DAG.getNode(ISD::RET, MVT::Other, Tmp1, Lo, Tmp3, Hi,Tmp3);
+ else
+ Result = DAG.getNode(ISD::RET, MVT::Other, Tmp1, Lo, Tmp3);
+ Result = LegalizeOp(Result);
+ } else {
+ SDNode *InVal = Tmp2.Val;
+ unsigned NumElems = MVT::getVectorNumElements(InVal->getValueType(0));
+ MVT::ValueType EVT = MVT::getVectorElementType(InVal->getValueType(0));
+
+ // Figure out if there is a simple type corresponding to this Vector
+ // type. If so, convert to the vector type.
+ MVT::ValueType TVT = MVT::getVectorType(EVT, NumElems);
+ if (TLI.isTypeLegal(TVT)) {
+ // Turn this into a return of the vector type.
+ Tmp2 = LegalizeOp(Tmp2);
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+ } else if (NumElems == 1) {
+ // Turn this into a return of the scalar type.
+ Tmp2 = ScalarizeVectorOp(Tmp2);
+ Tmp2 = LegalizeOp(Tmp2);
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+
+ // FIXME: Returns of gcc generic vectors smaller than a legal type
+ // should be returned in integer registers!
+
+ // The scalarized value type may not be legal, e.g. it might require
+ // promotion or expansion. Relegalize the return.
+ Result = LegalizeOp(Result);
+ } else {
+ // FIXME: Returns of gcc generic vectors larger than a legal vector
+ // type should be returned by reference!
+ SDOperand Lo, Hi;
+ SplitVectorOp(Tmp2, Lo, Hi);
+ Result = DAG.getNode(ISD::RET, MVT::Other, Tmp1, Lo, Tmp3, Hi,Tmp3);
+ Result = LegalizeOp(Result);
+ }
+ }
+ break;
+ case Promote:
+ Tmp2 = PromoteOp(Node->getOperand(1));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+ Result = LegalizeOp(Result);
+ break;
+ }
+ break;
+ case 1: // ret void
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ break;
+ default: { // ret <values>
+ SmallVector<SDOperand, 8> NewValues;
+ NewValues.push_back(Tmp1);
+ for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2)
+ switch (getTypeAction(Node->getOperand(i).getValueType())) {
+ case Legal:
+ NewValues.push_back(LegalizeOp(Node->getOperand(i)));
+ NewValues.push_back(Node->getOperand(i+1));
+ break;
+ case Expand: {
+ SDOperand Lo, Hi;
+ assert(!MVT::isExtendedVT(Node->getOperand(i).getValueType()) &&
+ "FIXME: TODO: implement returning non-legal vector types!");
+ ExpandOp(Node->getOperand(i), Lo, Hi);
+ NewValues.push_back(Lo);
+ NewValues.push_back(Node->getOperand(i+1));
+ if (Hi.Val) {
+ NewValues.push_back(Hi);
+ NewValues.push_back(Node->getOperand(i+1));
+ }
+ break;
+ }
+ case Promote:
+ assert(0 && "Can't promote multiple return value yet!");
+ }
+
+ if (NewValues.size() == Node->getNumOperands())
+ Result = DAG.UpdateNodeOperands(Result, &NewValues[0],NewValues.size());
+ else
+ Result = DAG.getNode(ISD::RET, MVT::Other,
+ &NewValues[0], NewValues.size());
+ break;
+ }
+ }
+
+ if (Result.getOpcode() == ISD::RET) {
+ switch (TLI.getOperationAction(Result.getOpcode(), MVT::Other)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ break;
+ }
+ }
+ break;
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.
+ Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.
+ int SVOffset = ST->getSrcValueOffset();
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+
+ if (!ST->isTruncatingStore()) {
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ // FIXME: We shouldn't do this for TargetConstantFP's.
+ // FIXME: move this to the DAG Combiner! Note that we can't regress due
+ // to phase ordering between legalized code and the dag combiner. This
+ // probably means that we need to integrate dag combiner and legalizer
+ // together.
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+ if (CFP->getValueType(0) == MVT::f32) {
+ Tmp3 = DAG.getConstant(FloatToBits(CFP->getValue()), MVT::i32);
+ } else {
+ assert(CFP->getValueType(0) == MVT::f64 && "Unknown FP type!");
+ Tmp3 = DAG.getConstant(DoubleToBits(CFP->getValue()), MVT::i64);
+ }
+ Result = DAG.getStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ break;
+ }
+
+ switch (getTypeAction(ST->getStoredVT())) {
+ case Legal: {
+ Tmp3 = LegalizeOp(ST->getValue());
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
+ ST->getOffset());
+
+ MVT::ValueType VT = Tmp3.getValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ break;
+ case TargetLowering::Promote:
+ assert(MVT::isVector(VT) && "Unknown legal promote case!");
+ Tmp3 = DAG.getNode(ISD::BIT_CONVERT,
+ TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
+ Result = DAG.getStore(Tmp1, Tmp3, Tmp2,
+ ST->getSrcValue(), SVOffset, isVolatile,
+ Alignment);
+ break;
+ }
+ break;
+ }
+ case Promote:
+ // Truncate the value and store the result.
+ Tmp3 = PromoteOp(ST->getValue());
+ Result = DAG.getTruncStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, ST->getStoredVT(),
+ isVolatile, Alignment);
+ break;
+
+ case Expand:
+ unsigned IncrementSize = 0;
+ SDOperand Lo, Hi;
+
+ // If this is a vector type, then we have to calculate the increment as
+ // the product of the element size in bytes, and the number of elements
+ // in the high half of the vector.
+ if (MVT::isVector(ST->getValue().getValueType())) {
+ SDNode *InVal = ST->getValue().Val;
+ unsigned NumElems = MVT::getVectorNumElements(InVal->getValueType(0));
+ MVT::ValueType EVT = MVT::getVectorElementType(InVal->getValueType(0));
+
+ // Figure out if there is a simple type corresponding to this Vector
+ // type. If so, convert to the vector type.
+ MVT::ValueType TVT = MVT::getVectorType(EVT, NumElems);
+ if (TLI.isTypeLegal(TVT)) {
+ // Turn this into a normal store of the vector type.
+ Tmp3 = LegalizeOp(Node->getOperand(1));
+ Result = DAG.getStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ Result = LegalizeOp(Result);
+ break;
+ } else if (NumElems == 1) {
+ // Turn this into a normal store of the scalar type.
+ Tmp3 = ScalarizeVectorOp(Node->getOperand(1));
+ Result = DAG.getStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ // The scalarized value type may not be legal, e.g. it might require
+ // promotion or expansion. Relegalize the scalar store.
+ Result = LegalizeOp(Result);
+ break;
+ } else {
+ SplitVectorOp(Node->getOperand(1), Lo, Hi);
+ IncrementSize = NumElems/2 * MVT::getSizeInBits(EVT)/8;
+ }
+ } else {
+ ExpandOp(Node->getOperand(1), Lo, Hi);
+ IncrementSize = Hi.Val ? MVT::getSizeInBits(Hi.getValueType())/8 : 0;
+
+ if (!TLI.isLittleEndian())
+ std::swap(Lo, Hi);
+ }
+
+ Lo = DAG.getStore(Tmp1, Lo, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+
+ if (Hi.Val == NULL) {
+ // Must be int <-> float one-to-one expansion.
+ Result = Lo;
+ break;
+ }
+
+ Tmp2 = DAG.getNode(ISD::ADD, Tmp2.getValueType(), Tmp2,
+ getIntPtrConstant(IncrementSize));
+ assert(isTypeLegal(Tmp2.getValueType()) &&
+ "Pointers must be legal!");
+ SVOffset += IncrementSize;
+ if (Alignment > IncrementSize)
+ Alignment = IncrementSize;
+ Hi = DAG.getStore(Tmp1, Hi, Tmp2, ST->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ Result = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo, Hi);
+ break;
+ }
+ } else {
+ // Truncating store
+ assert(isTypeLegal(ST->getValue().getValueType()) &&
+ "Cannot handle illegal TRUNCSTORE yet!");
+ Tmp3 = LegalizeOp(ST->getValue());
+
+ // The only promote case we handle is TRUNCSTORE:i1 X into
+ // -> TRUNCSTORE:i8 (and X, 1)
+ if (ST->getStoredVT() == MVT::i1 &&
+ TLI.getStoreXAction(MVT::i1) == TargetLowering::Promote) {
+ // Promote the bool to a mask then store.
+ Tmp3 = DAG.getNode(ISD::AND, Tmp3.getValueType(), Tmp3,
+ DAG.getConstant(1, Tmp3.getValueType()));
+ Result = DAG.getTruncStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+ SVOffset, MVT::i8,
+ isVolatile, Alignment);
+ } else if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
+ Tmp2 != ST->getBasePtr()) {
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
+ ST->getOffset());
+ }
+
+ MVT::ValueType StVT = cast<StoreSDNode>(Result.Val)->getStoredVT();
+ switch (TLI.getStoreXAction(StVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ break;
+ }
+ }
+ break;
+ }
+ case ISD::PCMARKER:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+ break;
+ case ISD::STACKSAVE:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+
+ switch (TLI.getOperationAction(ISD::STACKSAVE, MVT::Other)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.Val) {
+ Tmp1 = LegalizeOp(Tmp3);
+ Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ }
+ break;
+ case TargetLowering::Expand:
+ // Expand to CopyFromReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Tmp1 = DAG.getCopyFromReg(Result.getOperand(0), SP,
+ Node->getValueType(0));
+ Tmp2 = Tmp1.getValue(1);
+ } else {
+ Tmp1 = DAG.getNode(ISD::UNDEF, Node->getValueType(0));
+ Tmp2 = Node->getOperand(0);
+ }
+ break;
+ }
+
+ // Since stacksave produce two values, make sure to remember that we
+ // legalized both of them.
+ AddLegalizedOperand(SDOperand(Node, 0), Tmp1);
+ AddLegalizedOperand(SDOperand(Node, 1), Tmp2);
+ return Op.ResNo ? Tmp2 : Tmp1;
+
+ case ISD::STACKRESTORE:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the pointer.
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+
+ switch (TLI.getOperationAction(ISD::STACKRESTORE, MVT::Other)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ break;
+ case TargetLowering::Expand:
+ // Expand to CopyToReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Result = DAG.getCopyToReg(Tmp1, SP, Tmp2);
+ } else {
+ Result = Tmp1;
+ }
+ break;
+ }
+ break;
+
+ case ISD::READCYCLECOUNTER:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ switch (TLI.getOperationAction(ISD::READCYCLECOUNTER,
+ Node->getValueType(0))) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal:
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+ break;
+ case TargetLowering::Custom:
+ Result = TLI.LowerOperation(Result, DAG);
+ Tmp1 = LegalizeOp(Result.getValue(0));
+ Tmp2 = LegalizeOp(Result.getValue(1));
+ break;
+ }
+
+ // Since rdcc produce two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDOperand(Node, 0), Tmp1);
+ AddLegalizedOperand(SDOperand(Node, 1), Tmp2);
+ return Result;
+
+ case ISD::SELECT:
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Expand: assert(0 && "It's impossible to expand bools");
+ case Legal:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the condition.
+ break;
+ case Promote:
+ Tmp1 = PromoteOp(Node->getOperand(0)); // Promote the condition.
+ // Make sure the condition is either zero or one.
+ if (!DAG.MaskedValueIsZero(Tmp1,
+ MVT::getIntVTBitMask(Tmp1.getValueType())^1))
+ Tmp1 = DAG.getZeroExtendInReg(Tmp1, MVT::i1);
+ break;
+ }
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // TrueVal
+ Tmp3 = LegalizeOp(Node->getOperand(2)); // FalseVal
+
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+
+ switch (TLI.getOperationAction(ISD::SELECT, Tmp2.getValueType())) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom: {
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ break;
+ }
+ case TargetLowering::Expand:
+ if (Tmp1.getOpcode() == ISD::SETCC) {
+ Result = DAG.getSelectCC(Tmp1.getOperand(0), Tmp1.getOperand(1),
+ Tmp2, Tmp3,
+ cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+ } else {
+ Result = DAG.getSelectCC(Tmp1,
+ DAG.getConstant(0, Tmp1.getValueType()),
+ Tmp2, Tmp3, ISD::SETNE);
+ }
+ break;
+ case TargetLowering::Promote: {
+ MVT::ValueType NVT =
+ TLI.getTypeToPromoteTo(ISD::SELECT, Tmp2.getValueType());
+ unsigned ExtOp, TruncOp;
+ if (MVT::isVector(Tmp2.getValueType())) {
+ ExtOp = ISD::BIT_CONVERT;
+ TruncOp = ISD::BIT_CONVERT;
+ } else if (MVT::isInteger(Tmp2.getValueType())) {
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ } else {
+ ExtOp = ISD::FP_EXTEND;
+ TruncOp = ISD::FP_ROUND;
+ }
+ // Promote each of the values to the new type.
+ Tmp2 = DAG.getNode(ExtOp, NVT, Tmp2);
+ Tmp3 = DAG.getNode(ExtOp, NVT, Tmp3);
+ // Perform the larger operation, then round down.
+ Result = DAG.getNode(ISD::SELECT, NVT, Tmp1, Tmp2,Tmp3);
+ Result = DAG.getNode(TruncOp, Node->getValueType(0), Result);
+ break;
+ }
+ }
+ break;
+ case ISD::SELECT_CC: {
+ Tmp1 = Node->getOperand(0); // LHS
+ Tmp2 = Node->getOperand(1); // RHS
+ Tmp3 = LegalizeOp(Node->getOperand(2)); // True
+ Tmp4 = LegalizeOp(Node->getOperand(3)); // False
+ SDOperand CC = Node->getOperand(4);
+
+ LegalizeSetCCOperands(Tmp1, Tmp2, CC);
+
+ // If we didn't get both a LHS and RHS back from LegalizeSetCCOperands,
+ // the LHS is a legal SETCC itself. In this case, we need to compare
+ // the result against zero to select between true and false values.
+ if (Tmp2.Val == 0) {
+ Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+ CC = DAG.getCondCode(ISD::SETNE);
+ }
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4, CC);
+
+ // Everything is legal, see if we should expand this op or something.
+ switch (TLI.getOperationAction(ISD::SELECT_CC, Tmp3.getValueType())) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ break;
+ }
+ break;
+ }
+ case ISD::SETCC:
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ LegalizeSetCCOperands(Tmp1, Tmp2, Tmp3);
+
+ // If we had to Expand the SetCC operands into a SELECT node, then it may
+ // not always be possible to return a true LHS & RHS. In this case, just
+ // return the value we legalized, returned in the LHS
+ if (Tmp2.Val == 0) {
+ Result = Tmp1;
+ break;
+ }
+
+ switch (TLI.getOperationAction(ISD::SETCC, Tmp1.getValueType())) {
+ default: assert(0 && "Cannot handle this action for SETCC yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH.
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+ if (isCustom) {
+ Tmp4 = TLI.LowerOperation(Result, DAG);
+ if (Tmp4.Val) Result = Tmp4;
+ }
+ break;
+ case TargetLowering::Promote: {
+ // First step, figure out the appropriate operation to use.
+ // Allow SETCC to not be supported for all legal data types
+ // Mostly this targets FP
+ MVT::ValueType NewInTy = Node->getOperand(0).getValueType();
+ MVT::ValueType OldVT = NewInTy; OldVT = OldVT;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewInTy = (MVT::ValueType)(NewInTy+1);
+
+ assert(MVT::isInteger(NewInTy) == MVT::isInteger(OldVT) &&
+ "Fell off of the edge of the integer world");
+ assert(MVT::isFloatingPoint(NewInTy) == MVT::isFloatingPoint(OldVT) &&
+ "Fell off of the edge of the floating point world");
+
+ // If the target supports SETCC of this type, use it.
+ if (TLI.isOperationLegal(ISD::SETCC, NewInTy))
+ break;
+ }
+ if (MVT::isInteger(NewInTy))
+ assert(0 && "Cannot promote Legal Integer SETCC yet");
+ else {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, NewInTy, Tmp1);
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, NewInTy, Tmp2);
+ }
+ Tmp1 = LegalizeOp(Tmp1);
+ Tmp2 = LegalizeOp(Tmp2);
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+ Result = LegalizeOp(Result);
+ break;
+ }
+ case TargetLowering::Expand:
+ // Expand a setcc node into a select_cc of the same condition, lhs, and
+ // rhs that selects between const 1 (true) and const 0 (false).
+ MVT::ValueType VT = Node->getValueType(0);
+ Result = DAG.getNode(ISD::SELECT_CC, VT, Tmp1, Tmp2,
+ DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ Tmp3);
+ break;
+ }
+ break;
+ case ISD::MEMSET:
+ case ISD::MEMCPY:
+ case ISD::MEMMOVE: {
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Chain
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Pointer
+
+ if (Node->getOpcode() == ISD::MEMSET) { // memset = ubyte
+ switch (getTypeAction(Node->getOperand(2).getValueType())) {
+ case Expand: assert(0 && "Cannot expand a byte!");
+ case Legal:
+ Tmp3 = LegalizeOp(Node->getOperand(2));
+ break;
+ case Promote:
+ Tmp3 = PromoteOp(Node->getOperand(2));
+ break;
+ }
+ } else {
+ Tmp3 = LegalizeOp(Node->getOperand(2)); // memcpy/move = pointer,
+ }
+
+ SDOperand Tmp4;
+ switch (getTypeAction(Node->getOperand(3).getValueType())) {
+ case Expand: {
+ // Length is too big, just take the lo-part of the length.
+ SDOperand HiPart;
+ ExpandOp(Node->getOperand(3), Tmp4, HiPart);
+ break;
+ }
+ case Legal:
+ Tmp4 = LegalizeOp(Node->getOperand(3));
+ break;
+ case Promote:
+ Tmp4 = PromoteOp(Node->getOperand(3));
+ break;
+ }
+
+ SDOperand Tmp5;
+ switch (getTypeAction(Node->getOperand(4).getValueType())) { // uint
+ case Expand: assert(0 && "Cannot expand this yet!");
+ case Legal:
+ Tmp5 = LegalizeOp(Node->getOperand(4));
+ break;
+ case Promote:
+ Tmp5 = PromoteOp(Node->getOperand(4));
+ break;
+ }
+
+ switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) {
+ default: assert(0 && "This action not implemented for this operation!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4, Tmp5);
+ if (isCustom) {
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ }
+ break;
+ case TargetLowering::Expand: {
+ // Otherwise, the target does not support this operation. Lower the
+ // operation to an explicit libcall as appropriate.
+ MVT::ValueType IntPtr = TLI.getPointerTy();
+ const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ const char *FnName = 0;
+ if (Node->getOpcode() == ISD::MEMSET) {
+ Entry.Node = Tmp2; Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ // Extend the (previously legalized) ubyte argument to be an int value
+ // for the call.
+ if (Tmp3.getValueType() > MVT::i32)
+ Tmp3 = DAG.getNode(ISD::TRUNCATE, MVT::i32, Tmp3);
+ else
+ Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Tmp3);
+ Entry.Node = Tmp3; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
+ Args.push_back(Entry);
+ Entry.Node = Tmp4; Entry.Ty = IntPtrTy; Entry.isSExt = false;
+ Args.push_back(Entry);
+
+ FnName = "memset";
+ } else if (Node->getOpcode() == ISD::MEMCPY ||
+ Node->getOpcode() == ISD::MEMMOVE) {
+ Entry.Ty = IntPtrTy;
+ Entry.Node = Tmp2; Args.push_back(Entry);
+ Entry.Node = Tmp3; Args.push_back(Entry);
+ Entry.Node = Tmp4; Args.push_back(Entry);
+ FnName = Node->getOpcode() == ISD::MEMMOVE ? "memmove" : "memcpy";
+ } else {
+ assert(0 && "Unknown op!");
+ }
+
+ std::pair<SDOperand,SDOperand> CallResult =
+ TLI.LowerCallTo(Tmp1, Type::VoidTy, false, false, CallingConv::C, false,
+ DAG.getExternalSymbol(FnName, IntPtr), Args, DAG);
+ Result = CallResult.second;
+ break;
+ }
+ }
+ break;
+ }
+
+ case ISD::SHL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS: {
+ SmallVector<SDOperand, 8> Ops;
+ bool Changed = false;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ Ops.push_back(LegalizeOp(Node->getOperand(i)));
+ Changed |= Ops.back() != Node->getOperand(i);
+ }
+ if (Changed)
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+
+ switch (TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0))) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) {
+ SDOperand Tmp2, RetVal(0, 0);
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+ Tmp2 = LegalizeOp(Tmp1.getValue(i));
+ AddLegalizedOperand(SDOperand(Node, i), Tmp2);
+ if (i == Op.ResNo)
+ RetVal = Tmp2;
+ }
+ assert(RetVal.Val && "Illegal result number");
+ return RetVal;
+ }
+ break;
+ }
+
+ // Since these produce multiple values, make sure to remember that we
+ // legalized all of them.
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(SDOperand(Node, i), Result.getValue(i));
+ return Result.getValue(Op.ResNo);
+ }
+
+ // Binary operators
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // LHS
+ switch (getTypeAction(Node->getOperand(1).getValueType())) {
+ case Expand: assert(0 && "Not possible");
+ case Legal:
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the RHS.
+ break;
+ case Promote:
+ Tmp2 = PromoteOp(Node->getOperand(1)); // Promote the RHS.
+ break;
+ }
+
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+ default: assert(0 && "BinOp legalize operation not supported");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ break;
+ case TargetLowering::Expand: {
+ if (Node->getValueType(0) == MVT::i32) {
+ switch (Node->getOpcode()) {
+ default: assert(0 && "Do not know how to expand this integer BinOp!");
+ case ISD::UDIV:
+ case ISD::SDIV:
+ RTLIB::Libcall LC = Node->getOpcode() == ISD::UDIV
+ ? RTLIB::UDIV_I32 : RTLIB::SDIV_I32;
+ SDOperand Dummy;
+ bool isSigned = Node->getOpcode() == ISD::SDIV;
+ Result = ExpandLibCall(TLI.getLibcallName(LC), Node, isSigned, Dummy);
+ };
+ break;
+ }
+
+ assert(MVT::isVector(Node->getValueType(0)) &&
+ "Cannot expand this binary operator!");
+ // Expand the operation into a bunch of nasty scalar code.
+ SmallVector<SDOperand, 8> Ops;
+ MVT::ValueType EltVT = MVT::getVectorElementType(Node->getValueType(0));
+ MVT::ValueType PtrVT = TLI.getPointerTy();
+ for (unsigned i = 0, e = MVT::getVectorNumElements(Node->getValueType(0));
+ i != e; ++i) {
+ SDOperand Idx = DAG.getConstant(i, PtrVT);
+ SDOperand LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp1, Idx);
+ SDOperand RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp2, Idx);
+ Ops.push_back(DAG.getNode(Node->getOpcode(), EltVT, LHS, RHS));
+ }
+ Result = DAG.getNode(ISD::BUILD_VECTOR, Node->getValueType(0),
+ &Ops[0], Ops.size());
+ break;
+ }
+ case TargetLowering::Promote: {
+ switch (Node->getOpcode()) {
+ default: assert(0 && "Do not know how to promote this BinOp!");
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ MVT::ValueType OVT = Node->getValueType(0);
+ MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+ assert(MVT::isVector(OVT) && "Cannot promote this BinOp!");
+ // Bit convert each of the values to the new type.
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp1);
+ Tmp2 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp2);
+ Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+ // Bit convert the result back the original type.
+ Result = DAG.getNode(ISD::BIT_CONVERT, OVT, Result);
+ break;
+ }
+ }
+ }
+ }
+ break;
+
+ case ISD::FCOPYSIGN: // FCOPYSIGN does not require LHS/RHS to match type!
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // LHS
+ switch (getTypeAction(Node->getOperand(1).getValueType())) {
+ case Expand: assert(0 && "Not possible");
+ case Legal:
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the RHS.
+ break;
+ case Promote:
+ Tmp2 = PromoteOp(Node->getOperand(1)); // Promote the RHS.
+ break;
+ }
+
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+ default: assert(0 && "Operation not supported");
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ break;
+ case TargetLowering::Legal: break;
+ case TargetLowering::Expand: {
+ // If this target supports fabs/fneg natively and select is cheap,
+ // do this efficiently.
+ if (!TLI.isSelectExpensive() &&
+ TLI.getOperationAction(ISD::FABS, Tmp1.getValueType()) ==
+ TargetLowering::Legal &&
+ TLI.getOperationAction(ISD::FNEG, Tmp1.getValueType()) ==
+ TargetLowering::Legal) {
+ // Get the sign bit of the RHS.
+ MVT::ValueType IVT =
+ Tmp2.getValueType() == MVT::f32 ? MVT::i32 : MVT::i64;
+ SDOperand SignBit = DAG.getNode(ISD::BIT_CONVERT, IVT, Tmp2);
+ SignBit = DAG.getSetCC(TLI.getSetCCResultTy(),
+ SignBit, DAG.getConstant(0, IVT), ISD::SETLT);
+ // Get the absolute value of the result.
+ SDOperand AbsVal = DAG.getNode(ISD::FABS, Tmp1.getValueType(), Tmp1);
+ // Select between the nabs and abs value based on the sign bit of
+ // the input.
+ Result = DAG.getNode(ISD::SELECT, AbsVal.getValueType(), SignBit,
+ DAG.getNode(ISD::FNEG, AbsVal.getValueType(),
+ AbsVal),
+ AbsVal);
+ Result = LegalizeOp(Result);
+ break;
+ }
+
+ // Otherwise, do bitwise ops!
+ MVT::ValueType NVT =
+ Node->getValueType(0) == MVT::f32 ? MVT::i32 : MVT::i64;
+ Result = ExpandFCOPYSIGNToBitwiseOps(Node, NVT, DAG, TLI);
+ Result = DAG.getNode(ISD::BIT_CONVERT, Node->getValueType(0), Result);
+ Result = LegalizeOp(Result);
+ break;
+ }
+ }
+ break;
+
+ case ISD::ADDC:
+ case ISD::SUBC:
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ Tmp2 = LegalizeOp(Node->getOperand(1));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+ // Since this produces two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+ AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+ return Result;
+
+ case ISD::ADDE:
+ case ISD::SUBE:
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ Tmp2 = LegalizeOp(Node->getOperand(1));
+ Tmp3 = LegalizeOp(Node->getOperand(2));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
+ // Since this produces two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0));
+ AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1));
+ return Result;
+
+ case ISD::BUILD_PAIR: {
+ MVT::ValueType PairTy = Node->getValueType(0);
+ // TODO: handle the case where the Lo and Hi operands are not of legal type
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Lo
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Hi
+ switch (TLI.getOperationAction(ISD::BUILD_PAIR, PairTy)) {
+ case TargetLowering::Promote:
+ case TargetLowering::Custom:
+ assert(0 && "Cannot promote/custom this yet!");
+ case TargetLowering::Legal:
+ if (Tmp1 != Node->getOperand(0) || Tmp2 != Node->getOperand(1))
+ Result = DAG.getNode(ISD::BUILD_PAIR, PairTy, Tmp1, Tmp2);
+ break;
+ case TargetLowering::Expand:
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, PairTy, Tmp1);
+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, PairTy, Tmp2);
+ Tmp2 = DAG.getNode(ISD::SHL, PairTy, Tmp2,
+ DAG.getConstant(MVT::getSizeInBits(PairTy)/2,
+ TLI.getShiftAmountTy()));
+ Result = DAG.getNode(ISD::OR, PairTy, Tmp1, Tmp2);
+ break;
+ }
+ break;
+ }
+
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::FREM:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // LHS
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // RHS
+
+ switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+ case TargetLowering::Promote: assert(0 && "Cannot promote this yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+ if (isCustom) {
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ }
+ break;
+ case TargetLowering::Expand:
+ unsigned DivOpc= (Node->getOpcode() == ISD::UREM) ? ISD::UDIV : ISD::SDIV;
+ bool isSigned = DivOpc == ISD::SDIV;
+ if (MVT::isInteger(Node->getValueType(0))) {
+ if (TLI.getOperationAction(DivOpc, Node->getValueType(0)) ==
+ TargetLowering::Legal) {
+ // X % Y -> X-X/Y*Y
+ MVT::ValueType VT = Node->getValueType(0);
+ Result = DAG.getNode(DivOpc, VT, Tmp1, Tmp2);
+ Result = DAG.getNode(ISD::MUL, VT, Result, Tmp2);
+ Result = DAG.getNode(ISD::SUB, VT, Tmp1, Result);
+ } else {
+ assert(Node->getValueType(0) == MVT::i32 &&
+ "Cannot expand this binary operator!");
+ RTLIB::Libcall LC = Node->getOpcode() == ISD::UREM
+ ? RTLIB::UREM_I32 : RTLIB::SREM_I32;
+ SDOperand Dummy;
+ Result = ExpandLibCall(TLI.getLibcallName(LC), Node, isSigned, Dummy);
+ }
+ } else {
+ // Floating point mod -> fmod libcall.
+ RTLIB::Libcall LC = Node->getValueType(0) == MVT::f32
+ ? RTLIB::REM_F32 : RTLIB::REM_F64;
+ SDOperand Dummy;
+ Result = ExpandLibCall(TLI.getLibcallName(LC), Node,
+ false/*sign irrelevant*/, Dummy);
+ }
+ break;
+ }
+ break;
+ case ISD::VAARG: {
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the pointer.
+
+ MVT::ValueType VT = Node->getValueType(0);
+ switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+ Result = Result.getValue(0);
+ Tmp1 = Result.getValue(1);
+
+ if (isCustom) {
+ Tmp2 = TLI.LowerOperation(Result, DAG);
+ if (Tmp2.Val) {
+ Result = LegalizeOp(Tmp2);
+ Tmp1 = LegalizeOp(Tmp2.getValue(1));
+ }
+ }
+ break;
+ case TargetLowering::Expand: {
+ SrcValueSDNode *SV = cast<SrcValueSDNode>(Node->getOperand(2));
+ SDOperand VAList = DAG.getLoad(TLI.getPointerTy(), Tmp1, Tmp2,
+ SV->getValue(), SV->getOffset());
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp3 = DAG.getNode(ISD::ADD, TLI.getPointerTy(), VAList,
+ DAG.getConstant(MVT::getSizeInBits(VT)/8,
+ TLI.getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAList.getValue(1), Tmp3, Tmp2, SV->getValue(),
+ SV->getOffset());
+ // Load the actual argument out of the pointer VAList
+ Result = DAG.getLoad(VT, Tmp3, VAList, NULL, 0);
+ Tmp1 = LegalizeOp(Result.getValue(1));
+ Result = LegalizeOp(Result);
+ break;
+ }
+ }
+ // Since VAARG produces two values, make sure to remember that we
+ // legalized both of them.
+ AddLegalizedOperand(SDOperand(Node, 0), Result);
+ AddLegalizedOperand(SDOperand(Node, 1), Tmp1);
+ return Op.ResNo ? Tmp1 : Result;
+ }
+
+ case ISD::VACOPY:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the dest pointer.
+ Tmp3 = LegalizeOp(Node->getOperand(2)); // Legalize the source pointer.
+
+ switch (TLI.getOperationAction(ISD::VACOPY, MVT::Other)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3,
+ Node->getOperand(3), Node->getOperand(4));
+ if (isCustom) {
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ }
+ break;
+ case TargetLowering::Expand:
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ SrcValueSDNode *SVD = cast<SrcValueSDNode>(Node->getOperand(3));
+ SrcValueSDNode *SVS = cast<SrcValueSDNode>(Node->getOperand(4));
+ Tmp4 = DAG.getLoad(TLI.getPointerTy(), Tmp1, Tmp3, SVD->getValue(),
+ SVD->getOffset());
+ Result = DAG.getStore(Tmp4.getValue(1), Tmp4, Tmp2, SVS->getValue(),
+ SVS->getOffset());
+ break;
+ }
+ break;
+
+ case ISD::VAEND:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the pointer.
+
+ switch (TLI.getOperationAction(ISD::VAEND, MVT::Other)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+ if (isCustom) {
+ Tmp1 = TLI.LowerOperation(Tmp1, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ }
+ break;
+ case TargetLowering::Expand:
+ Result = Tmp1; // Default to a no-op, return the chain
+ break;
+ }
+ break;
+
+ case ISD::VASTART:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the pointer.
+
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
+
+ switch (TLI.getOperationAction(ISD::VASTART, MVT::Other)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Legal: break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ break;
+ }
+ break;
+
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // LHS
+ Tmp2 = LegalizeOp(Node->getOperand(1)); // RHS
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
+ switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+ default:
+ assert(0 && "ROTL/ROTR legalize operation not supported");
+ break;
+ case TargetLowering::Legal:
+ break;
+ case TargetLowering::Custom:
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ break;
+ case TargetLowering::Promote:
+ assert(0 && "Do not know how to promote ROTL/ROTR");
+ break;
+ case TargetLowering::Expand:
+ assert(0 && "Do not know how to expand ROTL/ROTR");
+ break;
+ }
+ break;
+
+ case ISD::BSWAP:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Op
+ switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+ case TargetLowering::Custom:
+ assert(0 && "Cannot custom legalize this yet!");
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ break;
+ case TargetLowering::Promote: {
+ MVT::ValueType OVT = Tmp1.getValueType();
+ MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+ unsigned DiffBits = MVT::getSizeInBits(NVT) - MVT::getSizeInBits(OVT);
+
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::BSWAP, NVT, Tmp1);
+ Result = DAG.getNode(ISD::SRL, NVT, Tmp1,
+ DAG.getConstant(DiffBits, TLI.getShiftAmountTy()));
+ break;
+ }
+ case TargetLowering::Expand:
+ Result = ExpandBSWAP(Tmp1);
+ break;
+ }
+ break;
+
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ Tmp1 = LegalizeOp(Node->getOperand(0)); // Op
+ switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+ case TargetLowering::Custom: assert(0 && "Cannot custom handle this yet!");
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ break;
+ case TargetLowering::Promote: {
+ MVT::ValueType OVT = Tmp1.getValueType();
+ MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+
+ // Zero extend the argument.
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1);
+ // Perform the larger operation, then subtract if needed.
+ Tmp1 = DAG.getNode(Node->getOpcode(), Node->getValueType(0), Tmp1);
+ switch (Node->getOpcode()) {
+ case ISD::CTPOP:
+ Result = Tmp1;
+ break;
+ case ISD::CTTZ:
+ //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+ Tmp2 = DAG.getSetCC(TLI.getSetCCResultTy(), Tmp1,
+ DAG.getConstant(MVT::getSizeInBits(NVT), NVT),
+ ISD::SETEQ);
+ Result = DAG.getNode(ISD::SELECT, NVT, Tmp2,
+ DAG.getConstant(MVT::getSizeInBits(OVT),NVT), Tmp1);
+ break;
+ case ISD::CTLZ:
+ // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+ Result = DAG.getNode(ISD::SUB, NVT, Tmp1,
+ DAG.getConstant(MVT::getSizeInBits(NVT) -
+ MVT::getSizeInBits(OVT), NVT));
+ break;
+ }
+ break;
+ }
+ case TargetLowering::Expand:
+ Result = ExpandBitCount(Node->getOpcode(), Tmp1);
+ break;
+ }
+ break;
+
+ // Unary operators
+ case ISD::FABS:
+ case ISD::FNEG:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
+ case TargetLowering::Promote:
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ if (isCustom) {
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ }
+ break;
+ case TargetLowering::Expand:
+ switch (Node->getOpcode()) {
+ default: assert(0 && "Unreachable!");
+ case ISD::FNEG:
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ Tmp2 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+ Result = DAG.getNode(ISD::FSUB, Node->getValueType(0), Tmp2, Tmp1);
+ break;
+ case ISD::FABS: {
+ // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
+ MVT::ValueType VT = Node->getValueType(0);
+ Tmp2 = DAG.getConstantFP(0.0, VT);
+ Tmp2 = DAG.getSetCC(TLI.getSetCCResultTy(), Tmp1, Tmp2, ISD::SETUGT);
+ Tmp3 = DAG.getNode(ISD::FNEG, VT, Tmp1);
+ Result = DAG.getNode(ISD::SELECT, VT, Tmp2, Tmp1, Tmp3);
+ break;
+ }
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS: {
+ MVT::ValueType VT = Node->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ switch(Node->getOpcode()) {
+ case ISD::FSQRT:
+ LC = VT == MVT::f32 ? RTLIB::SQRT_F32 : RTLIB::SQRT_F64;
+ break;
+ case ISD::FSIN:
+ LC = VT == MVT::f32 ? RTLIB::SIN_F32 : RTLIB::SIN_F64;
+ break;
+ case ISD::FCOS:
+ LC = VT == MVT::f32 ? RTLIB::COS_F32 : RTLIB::COS_F64;
+ break;
+ default: assert(0 && "Unreachable!");
+ }
+ SDOperand Dummy;
+ Result = ExpandLibCall(TLI.getLibcallName(LC), Node,
+ false/*sign irrelevant*/, Dummy);
+ break;
+ }
+ }
+ break;
+ }
+ break;
+ case ISD::FPOWI: {
+ // We always lower FPOWI into a libcall. No target support it yet.
+ RTLIB::Libcall LC = Node->getValueType(0) == MVT::f32
+ ? RTLIB::POWI_F32 : RTLIB::POWI_F64;
+ SDOperand Dummy;
+ Result = ExpandLibCall(TLI.getLibcallName(LC), Node,
+ false/*sign irrelevant*/, Dummy);
+ break;
+ }
+ case ISD::BIT_CONVERT:
+ if (!isTypeLegal(Node->getOperand(0).getValueType())) {
+ Result = ExpandBIT_CONVERT(Node->getValueType(0), Node->getOperand(0));
+ } else if (MVT::isVector(Op.getOperand(0).getValueType())) {
+ // The input has to be a vector type, we have to either scalarize it, pack
+ // it, or convert it based on whether the input vector type is legal.
+ SDNode *InVal = Node->getOperand(0).Val;
+ unsigned NumElems = MVT::getVectorNumElements(InVal->getValueType(0));
+ MVT::ValueType EVT = MVT::getVectorElementType(InVal->getValueType(0));
+
+ // Figure out if there is a simple type corresponding to this Vector
+ // type. If so, convert to the vector type.
+ MVT::ValueType TVT = MVT::getVectorType(EVT, NumElems);
+ if (TLI.isTypeLegal(TVT)) {
+ // Turn this into a bit convert of the vector input.
+ Result = DAG.getNode(ISD::BIT_CONVERT, Node->getValueType(0),
+ LegalizeOp(Node->getOperand(0)));
+ break;
+ } else if (NumElems == 1) {
+ // Turn this into a bit convert of the scalar input.
+ Result = DAG.getNode(ISD::BIT_CONVERT, Node->getValueType(0),
+ ScalarizeVectorOp(Node->getOperand(0)));
+ break;
+ } else {
+ // FIXME: UNIMP! Store then reload
+ assert(0 && "Cast from unsupported vector type not implemented yet!");
+ }
+ } else {
+ switch (TLI.getOperationAction(ISD::BIT_CONVERT,
+ Node->getOperand(0).getValueType())) {
+ default: assert(0 && "Unknown operation action!");
+ case TargetLowering::Expand:
+ Result = ExpandBIT_CONVERT(Node->getValueType(0), Node->getOperand(0));
+ break;
+ case TargetLowering::Legal:
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ break;
+ }
+ }
+ break;
+
+ // Conversion operators. The source and destination have different types.
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: {
+ bool isSigned = Node->getOpcode() == ISD::SINT_TO_FP;
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Legal:
+ switch (TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType())) {
+ default: assert(0 && "Unknown operation action!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ if (isCustom) {
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ }
+ break;
+ case TargetLowering::Expand:
+ Result = ExpandLegalINT_TO_FP(isSigned,
+ LegalizeOp(Node->getOperand(0)),
+ Node->getValueType(0));
+ break;
+ case TargetLowering::Promote:
+ Result = PromoteLegalINT_TO_FP(LegalizeOp(Node->getOperand(0)),
+ Node->getValueType(0),
+ isSigned);
+ break;
+ }
+ break;
+ case Expand:
+ Result = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP,
+ Node->getValueType(0), Node->getOperand(0));
+ break;
+ case Promote:
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ if (isSigned) {
+ Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, Tmp1.getValueType(),
+ Tmp1, DAG.getValueType(Node->getOperand(0).getValueType()));
+ } else {
+ Tmp1 = DAG.getZeroExtendInReg(Tmp1,
+ Node->getOperand(0).getValueType());
+ }
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ Result = LegalizeOp(Result); // The 'op' is not necessarily legal!
+ break;
+ }
+ break;
+ }
+ case ISD::TRUNCATE:
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Legal:
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ break;
+ case Expand:
+ ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
+
+ // Since the result is legal, we should just be able to truncate the low
+ // part of the source.
+ Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), Tmp1);
+ break;
+ case Promote:
+ Result = PromoteOp(Node->getOperand(0));
+ Result = DAG.getNode(ISD::TRUNCATE, Op.getValueType(), Result);
+ break;
+ }
+ break;
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Legal:
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+
+ switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))){
+ default: assert(0 && "Unknown operation action!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ if (isCustom) {
+ Tmp1 = TLI.LowerOperation(Result, DAG);
+ if (Tmp1.Val) Result = Tmp1;
+ }
+ break;
+ case TargetLowering::Promote:
+ Result = PromoteLegalFP_TO_INT(Tmp1, Node->getValueType(0),
+ Node->getOpcode() == ISD::FP_TO_SINT);
+ break;
+ case TargetLowering::Expand:
+ if (Node->getOpcode() == ISD::FP_TO_UINT) {
+ SDOperand True, False;
+ MVT::ValueType VT = Node->getOperand(0).getValueType();
+ MVT::ValueType NVT = Node->getValueType(0);
+ unsigned ShiftAmt = MVT::getSizeInBits(Node->getValueType(0))-1;
+ Tmp2 = DAG.getConstantFP((double)(1ULL << ShiftAmt), VT);
+ Tmp3 = DAG.getSetCC(TLI.getSetCCResultTy(),
+ Node->getOperand(0), Tmp2, ISD::SETLT);
+ True = DAG.getNode(ISD::FP_TO_SINT, NVT, Node->getOperand(0));
+ False = DAG.getNode(ISD::FP_TO_SINT, NVT,
+ DAG.getNode(ISD::FSUB, VT, Node->getOperand(0),
+ Tmp2));
+ False = DAG.getNode(ISD::XOR, NVT, False,
+ DAG.getConstant(1ULL << ShiftAmt, NVT));
+ Result = DAG.getNode(ISD::SELECT, NVT, Tmp3, True, False);
+ break;
+ } else {
+ assert(0 && "Do not know how to expand FP_TO_SINT yet!");
+ }
+ break;
+ }
+ break;
+ case Expand: {
+ // Convert f32 / f64 to i32 / i64.
+ MVT::ValueType VT = Op.getValueType();
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ switch (Node->getOpcode()) {
+ case ISD::FP_TO_SINT:
+ if (Node->getOperand(0).getValueType() == MVT::f32)
+ LC = (VT == MVT::i32)
+ ? RTLIB::FPTOSINT_F32_I32 : RTLIB::FPTOSINT_F32_I64;
+ else
+ LC = (VT == MVT::i32)
+ ? RTLIB::FPTOSINT_F64_I32 : RTLIB::FPTOSINT_F64_I64;
+ break;
+ case ISD::FP_TO_UINT:
+ if (Node->getOperand(0).getValueType() == MVT::f32)
+ LC = (VT == MVT::i32)
+ ? RTLIB::FPTOUINT_F32_I32 : RTLIB::FPTOSINT_F32_I64;
+ else
+ LC = (VT == MVT::i32)
+ ? RTLIB::FPTOUINT_F64_I32 : RTLIB::FPTOSINT_F64_I64;
+ break;
+ default: assert(0 && "Unreachable!");
+ }
+ SDOperand Dummy;
+ Result = ExpandLibCall(TLI.getLibcallName(LC), Node,
+ false/*sign irrelevant*/, Dummy);
+ break;
+ }
+ case Promote:
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ Result = DAG.UpdateNodeOperands(Result, LegalizeOp(Tmp1));
+ Result = LegalizeOp(Result);
+ break;
+ }
+ break;
+
+ case ISD::FP_ROUND:
+ if (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)) ==
+ TargetLowering::Expand) {
+ // The only way we can lower this is to turn it into a TRUNCSTORE,
+ // EXTLOAD pair, targetting a temporary location (a stack slot).
+
+ // NOTE: there is a choice here between constantly creating new stack
+ // slots and always reusing the same one. We currently always create
+ // new ones, as reuse may inhibit scheduling.
+ MVT::ValueType VT = Op.getValueType(); // 32
+ const Type *Ty = MVT::getTypeForValueType(VT);
+ uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI =
+ MF.getFrameInfo()->CreateStackObject(TySize, Align);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Result = DAG.getTruncStore(DAG.getEntryNode(), Node->getOperand(0),
+ StackSlot, NULL, 0, VT);
+ Result = DAG.getLoad(VT, Result, StackSlot, NULL, 0, VT);
+ break;
+ }
+ // FALL THROUGH
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::FP_EXTEND:
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Expand: assert(0 && "Shouldn't need to expand other operators here!");
+ case Legal:
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ Result = DAG.UpdateNodeOperands(Result, Tmp1);
+ break;
+ case Promote:
+ switch (Node->getOpcode()) {
+ case ISD::ANY_EXTEND:
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ Result = DAG.getNode(ISD::ANY_EXTEND, Op.getValueType(), Tmp1);
+ break;
+ case ISD::ZERO_EXTEND:
+ Result = PromoteOp(Node->getOperand(0));
+ Result = DAG.getNode(ISD::ANY_EXTEND, Op.getValueType(), Result);
+ Result = DAG.getZeroExtendInReg(Result,
+ Node->getOperand(0).getValueType());
+ break;
+ case ISD::SIGN_EXTEND:
+ Result = PromoteOp(Node->getOperand(0));
+ Result = DAG.getNode(ISD::ANY_EXTEND, Op.getValueType(), Result);
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, Result.getValueType(),
+ Result,
+ DAG.getValueType(Node->getOperand(0).getValueType()));
+ break;
+ case ISD::FP_EXTEND:
+ Result = PromoteOp(Node->getOperand(0));
+ if (Result.getValueType() != Op.getValueType())
+ // Dynamically dead while we have only 2 FP types.
+ Result = DAG.getNode(ISD::FP_EXTEND, Op.getValueType(), Result);
+ break;
+ case ISD::FP_ROUND:
+ Result = PromoteOp(Node->getOperand(0));
+ Result = DAG.getNode(Node->getOpcode(), Op.getValueType(), Result);
+ break;
+ }
+ }
+ break;
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG: {
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ MVT::ValueType ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+
+ // If this operation is not supported, convert it to a shl/shr or load/store
+ // pair.
+ switch (TLI.getOperationAction(Node->getOpcode(), ExtraVT)) {
+ default: assert(0 && "This action not supported for this op yet!");
+ case TargetLowering::Legal:
+ Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1));
+ break;
+ case TargetLowering::Expand:
+ // If this is an integer extend and shifts are supported, do that.
+ if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ // NOTE: we could fall back on load/store here too for targets without
+ // SAR. However, it is doubtful that any exist.
+ unsigned BitsDiff = MVT::getSizeInBits(Node->getValueType(0)) -
+ MVT::getSizeInBits(ExtraVT);
+ SDOperand ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy());
+ Result = DAG.getNode(ISD::SHL, Node->getValueType(0),
+ Node->getOperand(0), ShiftCst);
+ Result = DAG.getNode(ISD::SRA, Node->getValueType(0),
+ Result, ShiftCst);
+ } else if (Node->getOpcode() == ISD::FP_ROUND_INREG) {
+ // The only way we can lower this is to turn it into a TRUNCSTORE,
+ // EXTLOAD pair, targetting a temporary location (a stack slot).
+
+ // NOTE: there is a choice here between constantly creating new stack
+ // slots and always reusing the same one. We currently always create
+ // new ones, as reuse may inhibit scheduling.
+ const Type *Ty = MVT::getTypeForValueType(ExtraVT);
+ uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI =
+ MF.getFrameInfo()->CreateStackObject(TySize, Align);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Result = DAG.getTruncStore(DAG.getEntryNode(), Node->getOperand(0),
+ StackSlot, NULL, 0, ExtraVT);
+ Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0),
+ Result, StackSlot, NULL, 0, ExtraVT);
+ } else {
+ assert(0 && "Unknown op");
+ }
+ break;
+ }
+ break;
+ }
+ }
+
+ assert(Result.getValueType() == Op.getValueType() &&
+ "Bad legalization!");
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op)
+ Result = LegalizeOp(Result);
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+/// PromoteOp - Given an operation that produces a value in an invalid type,
+/// promote it to compute the value into a larger type. The produced value will
+/// have the correct bits for the low portion of the register, but no guarantee
+/// is made about the top bits: it may be zero, sign-extended, or garbage.
+SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) {
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType NVT = TLI.getTypeToTransformTo(VT);
+ assert(getTypeAction(VT) == Promote &&
+ "Caller should expand or legalize operands that are not promotable!");
+ assert(NVT > VT && MVT::isInteger(NVT) == MVT::isInteger(VT) &&
+ "Cannot promote to smaller type!");
+
+ SDOperand Tmp1, Tmp2, Tmp3;
+ SDOperand Result;
+ SDNode *Node = Op.Val;
+
+ DenseMap<SDOperand, SDOperand>::iterator I = PromotedNodes.find(Op);
+ if (I != PromotedNodes.end()) return I->second;
+
+ switch (Node->getOpcode()) {
+ case ISD::CopyFromReg:
+ assert(0 && "CopyFromReg must be legal!");
+ default:
+#ifndef NDEBUG
+ cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to promote this operator!");
+ abort();
+ case ISD::UNDEF:
+ Result = DAG.getNode(ISD::UNDEF, NVT);
+ break;
+ case ISD::Constant:
+ if (VT != MVT::i1)
+ Result = DAG.getNode(ISD::SIGN_EXTEND, NVT, Op);
+ else
+ Result = DAG.getNode(ISD::ZERO_EXTEND, NVT, Op);
+ assert(isa<ConstantSDNode>(Result) && "Didn't constant fold zext?");
+ break;
+ case ISD::ConstantFP:
+ Result = DAG.getNode(ISD::FP_EXTEND, NVT, Op);
+ assert(isa<ConstantFPSDNode>(Result) && "Didn't constant fold fp_extend?");
+ break;
+
+ case ISD::SETCC:
+ assert(isTypeLegal(TLI.getSetCCResultTy()) && "SetCC type is not legal??");
+ Result = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(),Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2));
+ break;
+
+ case ISD::TRUNCATE:
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Legal:
+ Result = LegalizeOp(Node->getOperand(0));
+ assert(Result.getValueType() >= NVT &&
+ "This truncation doesn't make sense!");
+ if (Result.getValueType() > NVT) // Truncate to NVT instead of VT
+ Result = DAG.getNode(ISD::TRUNCATE, NVT, Result);
+ break;
+ case Promote:
+ // The truncation is not required, because we don't guarantee anything
+ // about high bits anyway.
+ Result = PromoteOp(Node->getOperand(0));
+ break;
+ case Expand:
+ ExpandOp(Node->getOperand(0), Tmp1, Tmp2);
+ // Truncate the low part of the expanded value to the result type
+ Result = DAG.getNode(ISD::TRUNCATE, NVT, Tmp1);
+ }
+ break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Expand: assert(0 && "BUG: Smaller reg should have been promoted!");
+ case Legal:
+ // Input is legal? Just do extend all the way to the larger type.
+ Result = DAG.getNode(Node->getOpcode(), NVT, Node->getOperand(0));
+ break;
+ case Promote:
+ // Promote the reg if it's smaller.
+ Result = PromoteOp(Node->getOperand(0));
+ // The high bits are not guaranteed to be anything. Insert an extend.
+ if (Node->getOpcode() == ISD::SIGN_EXTEND)
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Result,
+ DAG.getValueType(Node->getOperand(0).getValueType()));
+ else if (Node->getOpcode() == ISD::ZERO_EXTEND)
+ Result = DAG.getZeroExtendInReg(Result,
+ Node->getOperand(0).getValueType());
+ break;
+ }
+ break;
+ case ISD::BIT_CONVERT:
+ Result = ExpandBIT_CONVERT(Node->getValueType(0), Node->getOperand(0));
+ Result = PromoteOp(Result);
+ break;
+
+ case ISD::FP_EXTEND:
+ assert(0 && "Case not implemented. Dynamically dead with 2 FP types!");
+ case ISD::FP_ROUND:
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Expand: assert(0 && "BUG: Cannot expand FP regs!");
+ case Promote: assert(0 && "Unreachable with 2 FP types!");
+ case Legal:
+ // Input is legal? Do an FP_ROUND_INREG.
+ Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Node->getOperand(0),
+ DAG.getValueType(VT));
+ break;
+ }
+ break;
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Legal:
+ // No extra round required here.
+ Result = DAG.getNode(Node->getOpcode(), NVT, Node->getOperand(0));
+ break;
+
+ case Promote:
+ Result = PromoteOp(Node->getOperand(0));
+ if (Node->getOpcode() == ISD::SINT_TO_FP)
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, Result.getValueType(),
+ Result,
+ DAG.getValueType(Node->getOperand(0).getValueType()));
+ else
+ Result = DAG.getZeroExtendInReg(Result,
+ Node->getOperand(0).getValueType());
+ // No extra round required here.
+ Result = DAG.getNode(Node->getOpcode(), NVT, Result);
+ break;
+ case Expand:
+ Result = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP, NVT,
+ Node->getOperand(0));
+ // Round if we cannot tolerate excess precision.
+ if (NoExcessFPPrecision)
+ Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+ DAG.getValueType(VT));
+ break;
+ }
+ break;
+
+ case ISD::SIGN_EXTEND_INREG:
+ Result = PromoteOp(Node->getOperand(0));
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Result,
+ Node->getOperand(1));
+ break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Legal:
+ case Expand:
+ Tmp1 = Node->getOperand(0);
+ break;
+ case Promote:
+ // The input result is prerounded, so we don't have to do anything
+ // special.
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ break;
+ }
+ // If we're promoting a UINT to a larger size, check to see if the new node
+ // will be legal. If it isn't, check to see if FP_TO_SINT is legal, since
+ // we can use that instead. This allows us to generate better code for
+ // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not
+ // legal, such as PowerPC.
+ if (Node->getOpcode() == ISD::FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+ (TLI.isOperationLegal(ISD::FP_TO_SINT, NVT) ||
+ TLI.getOperationAction(ISD::FP_TO_SINT, NVT)==TargetLowering::Custom)){
+ Result = DAG.getNode(ISD::FP_TO_SINT, NVT, Tmp1);
+ } else {
+ Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
+ }
+ break;
+
+ case ISD::FABS:
+ case ISD::FNEG:
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ assert(Tmp1.getValueType() == NVT);
+ Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
+ // NOTE: we do not have to do any extra rounding here for
+ // NoExcessFPPrecision, because we know the input will have the appropriate
+ // precision, and these operations don't modify precision at all.
+ break;
+
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ assert(Tmp1.getValueType() == NVT);
+ Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
+ if (NoExcessFPPrecision)
+ Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+ DAG.getValueType(VT));
+ break;
+
+ case ISD::FPOWI: {
+ // Promote f32 powi to f64 powi. Note that this could insert a libcall
+ // directly as well, which may be better.
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ assert(Tmp1.getValueType() == NVT);
+ Result = DAG.getNode(ISD::FPOWI, NVT, Tmp1, Node->getOperand(1));
+ if (NoExcessFPPrecision)
+ Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+ DAG.getValueType(VT));
+ break;
+ }
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ // The input may have strange things in the top bits of the registers, but
+ // these operations don't care. They may have weird bits going out, but
+ // that too is okay if they are integer operations.
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ Tmp2 = PromoteOp(Node->getOperand(1));
+ assert(Tmp1.getValueType() == NVT && Tmp2.getValueType() == NVT);
+ Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+ break;
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ Tmp2 = PromoteOp(Node->getOperand(1));
+ assert(Tmp1.getValueType() == NVT && Tmp2.getValueType() == NVT);
+ Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+
+ // Floating point operations will give excess precision that we may not be
+ // able to tolerate. If we DO allow excess precision, just leave it,
+ // otherwise excise it.
+ // FIXME: Why would we need to round FP ops more than integer ones?
+ // Is Round(Add(Add(A,B),C)) != Round(Add(Round(Add(A,B)), C))
+ if (NoExcessFPPrecision)
+ Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+ DAG.getValueType(VT));
+ break;
+
+ case ISD::SDIV:
+ case ISD::SREM:
+ // These operators require that their input be sign extended.
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ Tmp2 = PromoteOp(Node->getOperand(1));
+ if (MVT::isInteger(NVT)) {
+ Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp1,
+ DAG.getValueType(VT));
+ Tmp2 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp2,
+ DAG.getValueType(VT));
+ }
+ Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+
+ // Perform FP_ROUND: this is probably overly pessimistic.
+ if (MVT::isFloatingPoint(NVT) && NoExcessFPPrecision)
+ Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+ DAG.getValueType(VT));
+ break;
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FCOPYSIGN:
+ // These operators require that their input be fp extended.
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Legal:
+ Tmp1 = LegalizeOp(Node->getOperand(0));
+ break;
+ case Promote:
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ break;
+ case Expand:
+ assert(0 && "not implemented");
+ }
+ switch (getTypeAction(Node->getOperand(1).getValueType())) {
+ case Legal:
+ Tmp2 = LegalizeOp(Node->getOperand(1));
+ break;
+ case Promote:
+ Tmp2 = PromoteOp(Node->getOperand(1));
+ break;
+ case Expand:
+ assert(0 && "not implemented");
+ }
+ Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+
+ // Perform FP_ROUND: this is probably overly pessimistic.
+ if (NoExcessFPPrecision && Node->getOpcode() != ISD::FCOPYSIGN)
+ Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result,
+ DAG.getValueType(VT));
+ break;
+
+ case ISD::UDIV:
+ case ISD::UREM:
+ // These operators require that their input be zero extended.
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ Tmp2 = PromoteOp(Node->getOperand(1));
+ assert(MVT::isInteger(NVT) && "Operators don't apply to FP!");
+ Tmp1 = DAG.getZeroExtendInReg(Tmp1, VT);
+ Tmp2 = DAG.getZeroExtendInReg(Tmp2, VT);
+ Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2);
+ break;
+
+ case ISD::SHL:
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ Result = DAG.getNode(ISD::SHL, NVT, Tmp1, Node->getOperand(1));
+ break;
+ case ISD::SRA:
+ // The input value must be properly sign extended.
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp1,
+ DAG.getValueType(VT));
+ Result = DAG.getNode(ISD::SRA, NVT, Tmp1, Node->getOperand(1));
+ break;
+ case ISD::SRL:
+ // The input value must be properly zero extended.
+ Tmp1 = PromoteOp(Node->getOperand(0));
+ Tmp1 = DAG.getZeroExtendInReg(Tmp1, VT);
+ Result = DAG.getNode(ISD::SRL, NVT, Tmp1, Node->getOperand(1));
+ break;
+
+ case ISD::VAARG:
+ Tmp1 = Node->getOperand(0); // Get the chain.
+ Tmp2 = Node->getOperand(1); // Get the pointer.
+ if (TLI.getOperationAction(ISD::VAARG, VT) == TargetLowering::Custom) {
+ Tmp3 = DAG.getVAArg(VT, Tmp1, Tmp2, Node->getOperand(2));
+ Result = TLI.CustomPromoteOperation(Tmp3, DAG);
+ } else {
+ SrcValueSDNode *SV = cast<SrcValueSDNode>(Node->getOperand(2));
+ SDOperand VAList = DAG.getLoad(TLI.getPointerTy(), Tmp1, Tmp2,
+ SV->getValue(), SV->getOffset());
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp3 = DAG.getNode(ISD::ADD, TLI.getPointerTy(), VAList,
+ DAG.getConstant(MVT::getSizeInBits(VT)/8,
+ TLI.getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAList.getValue(1), Tmp3, Tmp2, SV->getValue(),
+ SV->getOffset());
+ // Load the actual argument out of the pointer VAList
+ Result = DAG.getExtLoad(ISD::EXTLOAD, NVT, Tmp3, VAList, NULL, 0, VT);
+ }
+ // Remember that we legalized the chain.
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
+ break;
+
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(Node)
+ ? ISD::EXTLOAD : LD->getExtensionType();
+ Result = DAG.getExtLoad(ExtType, NVT,
+ LD->getChain(), LD->getBasePtr(),
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getLoadedVT(),
+ LD->isVolatile(),
+ LD->getAlignment());
+ // Remember that we legalized the chain.
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
+ break;
+ }
+ case ISD::SELECT:
+ Tmp2 = PromoteOp(Node->getOperand(1)); // Legalize the op0
+ Tmp3 = PromoteOp(Node->getOperand(2)); // Legalize the op1
+ Result = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), Tmp2, Tmp3);
+ break;
+ case ISD::SELECT_CC:
+ Tmp2 = PromoteOp(Node->getOperand(2)); // True
+ Tmp3 = PromoteOp(Node->getOperand(3)); // False
+ Result = DAG.getNode(ISD::SELECT_CC, NVT, Node->getOperand(0),
+ Node->getOperand(1), Tmp2, Tmp3, Node->getOperand(4));
+ break;
+ case ISD::BSWAP:
+ Tmp1 = Node->getOperand(0);
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::BSWAP, NVT, Tmp1);
+ Result = DAG.getNode(ISD::SRL, NVT, Tmp1,
+ DAG.getConstant(MVT::getSizeInBits(NVT) -
+ MVT::getSizeInBits(VT),
+ TLI.getShiftAmountTy()));
+ break;
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ // Zero extend the argument
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Node->getOperand(0));
+ // Perform the larger operation, then subtract if needed.
+ Tmp1 = DAG.getNode(Node->getOpcode(), NVT, Tmp1);
+ switch(Node->getOpcode()) {
+ case ISD::CTPOP:
+ Result = Tmp1;
+ break;
+ case ISD::CTTZ:
+ // if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+ Tmp2 = DAG.getSetCC(TLI.getSetCCResultTy(), Tmp1,
+ DAG.getConstant(MVT::getSizeInBits(NVT), NVT),
+ ISD::SETEQ);
+ Result = DAG.getNode(ISD::SELECT, NVT, Tmp2,
+ DAG.getConstant(MVT::getSizeInBits(VT), NVT), Tmp1);
+ break;
+ case ISD::CTLZ:
+ //Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+ Result = DAG.getNode(ISD::SUB, NVT, Tmp1,
+ DAG.getConstant(MVT::getSizeInBits(NVT) -
+ MVT::getSizeInBits(VT), NVT));
+ break;
+ }
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ Result = PromoteOp(ExpandEXTRACT_SUBVECTOR(Op));
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Result = PromoteOp(ExpandEXTRACT_VECTOR_ELT(Op));
+ break;
+ }
+
+ assert(Result.Val && "Didn't set a result!");
+
+ // Make sure the result is itself legal.
+ Result = LegalizeOp(Result);
+
+ // Remember that we promoted this!
+ AddPromotedOperand(Op, Result);
+ return Result;
+}
+
+/// ExpandEXTRACT_VECTOR_ELT - Expand an EXTRACT_VECTOR_ELT operation into
+/// a legal EXTRACT_VECTOR_ELT operation, scalar code, or memory traffic,
+/// based on the vector type. The return type of this matches the element type
+/// of the vector, which may not be legal for the target.
+SDOperand SelectionDAGLegalize::ExpandEXTRACT_VECTOR_ELT(SDOperand Op) {
+ // We know that operand #0 is the Vec vector. If the index is a constant
+ // or if the invec is a supported hardware type, we can use it. Otherwise,
+ // lower to a store then an indexed load.
+ SDOperand Vec = Op.getOperand(0);
+ SDOperand Idx = Op.getOperand(1);
+
+ SDNode *InVal = Vec.Val;
+ MVT::ValueType TVT = InVal->getValueType(0);
+ unsigned NumElems = MVT::getVectorNumElements(TVT);
+
+ switch (TLI.getOperationAction(ISD::EXTRACT_VECTOR_ELT, TVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom: {
+ Vec = LegalizeOp(Vec);
+ Op = DAG.UpdateNodeOperands(Op, Vec, Idx);
+ SDOperand Tmp3 = TLI.LowerOperation(Op, DAG);
+ if (Tmp3.Val)
+ return Tmp3;
+ break;
+ }
+ case TargetLowering::Legal:
+ if (isTypeLegal(TVT)) {
+ Vec = LegalizeOp(Vec);
+ Op = DAG.UpdateNodeOperands(Op, Vec, Idx);
+ Op = LegalizeOp(Op);
+ }
+ break;
+ case TargetLowering::Expand:
+ break;
+ }
+
+ if (NumElems == 1) {
+ // This must be an access of the only element. Return it.
+ Op = ScalarizeVectorOp(Vec);
+ } else if (!TLI.isTypeLegal(TVT) && isa<ConstantSDNode>(Idx)) {
+ ConstantSDNode *CIdx = cast<ConstantSDNode>(Idx);
+ SDOperand Lo, Hi;
+ SplitVectorOp(Vec, Lo, Hi);
+ if (CIdx->getValue() < NumElems/2) {
+ Vec = Lo;
+ } else {
+ Vec = Hi;
+ Idx = DAG.getConstant(CIdx->getValue() - NumElems/2,
+ Idx.getValueType());
+ }
+
+ // It's now an extract from the appropriate high or low part. Recurse.
+ Op = DAG.UpdateNodeOperands(Op, Vec, Idx);
+ Op = ExpandEXTRACT_VECTOR_ELT(Op);
+ } else {
+ // Store the value to a temporary stack slot, then LOAD the scalar
+ // element back out.
+ SDOperand StackPtr = CreateStackTemporary(Vec.getValueType());
+ SDOperand Ch = DAG.getStore(DAG.getEntryNode(), Vec, StackPtr, NULL, 0);
+
+ // Add the offset to the index.
+ unsigned EltSize = MVT::getSizeInBits(Op.getValueType())/8;
+ Idx = DAG.getNode(ISD::MUL, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+ StackPtr = DAG.getNode(ISD::ADD, Idx.getValueType(), Idx, StackPtr);
+
+ Op = DAG.getLoad(Op.getValueType(), Ch, StackPtr, NULL, 0);
+ }
+ return Op;
+}
+
+/// ExpandEXTRACT_SUBVECTOR - Expand a EXTRACT_SUBVECTOR operation. For now
+/// we assume the operation can be split if it is not already legal.
+SDOperand SelectionDAGLegalize::ExpandEXTRACT_SUBVECTOR(SDOperand Op) {
+ // We know that operand #0 is the Vec vector. For now we assume the index
+ // is a constant and that the extracted result is a supported hardware type.
+ SDOperand Vec = Op.getOperand(0);
+ SDOperand Idx = LegalizeOp(Op.getOperand(1));
+
+ unsigned NumElems = MVT::getVectorNumElements(Vec.getValueType());
+
+ if (NumElems == MVT::getVectorNumElements(Op.getValueType())) {
+ // This must be an access of the desired vector length. Return it.
+ return Vec;
+ }
+
+ ConstantSDNode *CIdx = cast<ConstantSDNode>(Idx);
+ SDOperand Lo, Hi;
+ SplitVectorOp(Vec, Lo, Hi);
+ if (CIdx->getValue() < NumElems/2) {
+ Vec = Lo;
+ } else {
+ Vec = Hi;
+ Idx = DAG.getConstant(CIdx->getValue() - NumElems/2, Idx.getValueType());
+ }
+
+ // It's now an extract from the appropriate high or low part. Recurse.
+ Op = DAG.UpdateNodeOperands(Op, Vec, Idx);
+ return ExpandEXTRACT_SUBVECTOR(Op);
+}
+
+/// LegalizeSetCCOperands - Attempts to create a legal LHS and RHS for a SETCC
+/// with condition CC on the current target. This usually involves legalizing
+/// or promoting the arguments. In the case where LHS and RHS must be expanded,
+/// there may be no choice but to create a new SetCC node to represent the
+/// legalized value of setcc lhs, rhs. In this case, the value is returned in
+/// LHS, and the SDOperand returned in RHS has a nil SDNode value.
+void SelectionDAGLegalize::LegalizeSetCCOperands(SDOperand &LHS,
+ SDOperand &RHS,
+ SDOperand &CC) {
+ SDOperand Tmp1, Tmp2, Result;
+
+ switch (getTypeAction(LHS.getValueType())) {
+ case Legal:
+ Tmp1 = LegalizeOp(LHS); // LHS
+ Tmp2 = LegalizeOp(RHS); // RHS
+ break;
+ case Promote:
+ Tmp1 = PromoteOp(LHS); // LHS
+ Tmp2 = PromoteOp(RHS); // RHS
+
+ // If this is an FP compare, the operands have already been extended.
+ if (MVT::isInteger(LHS.getValueType())) {
+ MVT::ValueType VT = LHS.getValueType();
+ MVT::ValueType NVT = TLI.getTypeToTransformTo(VT);
+
+ // Otherwise, we have to insert explicit sign or zero extends. Note
+ // that we could insert sign extends for ALL conditions, but zero extend
+ // is cheaper on many machines (an AND instead of two shifts), so prefer
+ // it.
+ switch (cast<CondCodeSDNode>(CC)->get()) {
+ default: assert(0 && "Unknown integer comparison!");
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGE:
+ case ISD::SETUGT:
+ case ISD::SETULE:
+ case ISD::SETULT:
+ // ALL of these operations will work if we either sign or zero extend
+ // the operands (including the unsigned comparisons!). Zero extend is
+ // usually a simpler/cheaper operation, so prefer it.
+ Tmp1 = DAG.getZeroExtendInReg(Tmp1, VT);
+ Tmp2 = DAG.getZeroExtendInReg(Tmp2, VT);
+ break;
+ case ISD::SETGE:
+ case ISD::SETGT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp1,
+ DAG.getValueType(VT));
+ Tmp2 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp2,
+ DAG.getValueType(VT));
+ break;
+ }
+ }
+ break;
+ case Expand: {
+ MVT::ValueType VT = LHS.getValueType();
+ if (VT == MVT::f32 || VT == MVT::f64) {
+ // Expand into one or more soft-fp libcall(s).
+ RTLIB::Libcall LC1, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ switch (cast<CondCodeSDNode>(CC)->get()) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+ break;
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+ break;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ break;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+ break;
+ case ISD::SETUO:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+ break;
+ case ISD::SETO:
+ LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;
+ break;
+ default:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+ switch (cast<CondCodeSDNode>(CC)->get()) {
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ // Fallthrough
+ case ISD::SETUGT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+ break;
+ case ISD::SETUGE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+ break;
+ case ISD::SETULT:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+ break;
+ case ISD::SETULE:
+ LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+ break;
+ case ISD::SETUEQ:
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+ break;
+ default: assert(0 && "Unsupported FP setcc!");
+ }
+ }
+
+ SDOperand Dummy;
+ Tmp1 = ExpandLibCall(TLI.getLibcallName(LC1),
+ DAG.getNode(ISD::MERGE_VALUES, VT, LHS, RHS).Val,
+ false /*sign irrelevant*/, Dummy);
+ Tmp2 = DAG.getConstant(0, MVT::i32);
+ CC = DAG.getCondCode(TLI.getCmpLibcallCC(LC1));
+ if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+ Tmp1 = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(), Tmp1, Tmp2, CC);
+ LHS = ExpandLibCall(TLI.getLibcallName(LC2),
+ DAG.getNode(ISD::MERGE_VALUES, VT, LHS, RHS).Val,
+ false /*sign irrelevant*/, Dummy);
+ Tmp2 = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(), LHS, Tmp2,
+ DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));
+ Tmp1 = DAG.getNode(ISD::OR, Tmp1.getValueType(), Tmp1, Tmp2);
+ Tmp2 = SDOperand();
+ }
+ LHS = Tmp1;
+ RHS = Tmp2;
+ return;
+ }
+
+ SDOperand LHSLo, LHSHi, RHSLo, RHSHi;
+ ExpandOp(LHS, LHSLo, LHSHi);
+ ExpandOp(RHS, RHSLo, RHSHi);
+ switch (cast<CondCodeSDNode>(CC)->get()) {
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ if (RHSLo == RHSHi)
+ if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo))
+ if (RHSCST->isAllOnesValue()) {
+ // Comparison to -1.
+ Tmp1 = DAG.getNode(ISD::AND, LHSLo.getValueType(), LHSLo, LHSHi);
+ Tmp2 = RHSLo;
+ break;
+ }
+
+ Tmp1 = DAG.getNode(ISD::XOR, LHSLo.getValueType(), LHSLo, RHSLo);
+ Tmp2 = DAG.getNode(ISD::XOR, LHSLo.getValueType(), LHSHi, RHSHi);
+ Tmp1 = DAG.getNode(ISD::OR, Tmp1.getValueType(), Tmp1, Tmp2);
+ Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+ break;
+ default:
+ // If this is a comparison of the sign bit, just look at the top part.
+ // X > -1, x < 0
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(RHS))
+ if ((cast<CondCodeSDNode>(CC)->get() == ISD::SETLT &&
+ CST->getValue() == 0) || // X < 0
+ (cast<CondCodeSDNode>(CC)->get() == ISD::SETGT &&
+ CST->isAllOnesValue())) { // X > -1
+ Tmp1 = LHSHi;
+ Tmp2 = RHSHi;
+ break;
+ }
+
+ // FIXME: This generated code sucks.
+ ISD::CondCode LowCC;
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ switch (CCCode) {
+ default: assert(0 && "Unknown integer setcc!");
+ case ISD::SETLT:
+ case ISD::SETULT: LowCC = ISD::SETULT; break;
+ case ISD::SETGT:
+ case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: LowCC = ISD::SETULE; break;
+ case ISD::SETGE:
+ case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+ }
+
+ // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison
+ // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands
+ // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+ // NOTE: on targets without efficient SELECT of bools, we can always use
+ // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+ TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL);
+ Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultTy(), LHSLo, RHSLo, LowCC,
+ false, DagCombineInfo);
+ if (!Tmp1.Val)
+ Tmp1 = DAG.getSetCC(TLI.getSetCCResultTy(), LHSLo, RHSLo, LowCC);
+ Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultTy(), LHSHi, RHSHi,
+ CCCode, false, DagCombineInfo);
+ if (!Tmp2.Val)
+ Tmp2 = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(), LHSHi, RHSHi, CC);
+
+ ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.Val);
+ ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.Val);
+ if ((Tmp1C && Tmp1C->getValue() == 0) ||
+ (Tmp2C && Tmp2C->getValue() == 0 &&
+ (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+ CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+ (Tmp2C && Tmp2C->getValue() == 1 &&
+ (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+ CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+ // low part is known false, returns high part.
+ // For LE / GE, if high part is known false, ignore the low part.
+ // For LT / GT, if high part is known true, ignore the low part.
+ Tmp1 = Tmp2;
+ Tmp2 = SDOperand();
+ } else {
+ Result = TLI.SimplifySetCC(TLI.getSetCCResultTy(), LHSHi, RHSHi,
+ ISD::SETEQ, false, DagCombineInfo);
+ if (!Result.Val)
+ Result=DAG.getSetCC(TLI.getSetCCResultTy(), LHSHi, RHSHi, ISD::SETEQ);
+ Result = LegalizeOp(DAG.getNode(ISD::SELECT, Tmp1.getValueType(),
+ Result, Tmp1, Tmp2));
+ Tmp1 = Result;
+ Tmp2 = SDOperand();
+ }
+ }
+ }
+ }
+ LHS = Tmp1;
+ RHS = Tmp2;
+}
+
+/// ExpandBIT_CONVERT - Expand a BIT_CONVERT node into a store/load combination.
+/// The resultant code need not be legal. Note that SrcOp is the input operand
+/// to the BIT_CONVERT, not the BIT_CONVERT node itself.
+SDOperand SelectionDAGLegalize::ExpandBIT_CONVERT(MVT::ValueType DestVT,
+ SDOperand SrcOp) {
+ // Create the stack frame object.
+ SDOperand FIPtr = CreateStackTemporary(DestVT);
+
+ // Emit a store to the stack slot.
+ SDOperand Store = DAG.getStore(DAG.getEntryNode(), SrcOp, FIPtr, NULL, 0);
+ // Result is a load from the stack slot.
+ return DAG.getLoad(DestVT, Store, FIPtr, NULL, 0);
+}
+
+SDOperand SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+ // Create a vector sized/aligned stack slot, store the value to element #0,
+ // then load the whole vector back out.
+ SDOperand StackPtr = CreateStackTemporary(Node->getValueType(0));
+ SDOperand Ch = DAG.getStore(DAG.getEntryNode(), Node->getOperand(0), StackPtr,
+ NULL, 0);
+ return DAG.getLoad(Node->getValueType(0), Ch, StackPtr, NULL, 0);
+}
+
+
+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDOperand SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+
+ // If the only non-undef value is the low element, turn this into a
+ // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.
+ unsigned NumElems = Node->getNumOperands();
+ bool isOnlyLowElement = true;
+ SDOperand SplatValue = Node->getOperand(0);
+ std::map<SDOperand, std::vector<unsigned> > Values;
+ Values[SplatValue].push_back(0);
+ bool isConstant = true;
+ if (!isa<ConstantFPSDNode>(SplatValue) && !isa<ConstantSDNode>(SplatValue) &&
+ SplatValue.getOpcode() != ISD::UNDEF)
+ isConstant = false;
+
+ for (unsigned i = 1; i < NumElems; ++i) {
+ SDOperand V = Node->getOperand(i);
+ Values[V].push_back(i);
+ if (V.getOpcode() != ISD::UNDEF)
+ isOnlyLowElement = false;
+ if (SplatValue != V)
+ SplatValue = SDOperand(0,0);
+
+ // If this isn't a constant element or an undef, we can't use a constant
+ // pool load.
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V) &&
+ V.getOpcode() != ISD::UNDEF)
+ isConstant = false;
+ }
+
+ if (isOnlyLowElement) {
+ // If the low element is an undef too, then this whole things is an undef.
+ if (Node->getOperand(0).getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::UNDEF, Node->getValueType(0));
+ // Otherwise, turn this into a scalar_to_vector node.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, Node->getValueType(0),
+ Node->getOperand(0));
+ }
+
+ // If all elements are constants, create a load from the constant pool.
+ if (isConstant) {
+ MVT::ValueType VT = Node->getValueType(0);
+ const Type *OpNTy =
+ MVT::getTypeForValueType(Node->getOperand(0).getValueType());
+ std::vector<Constant*> CV;
+ for (unsigned i = 0, e = NumElems; i != e; ++i) {
+ if (ConstantFPSDNode *V =
+ dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+ CV.push_back(ConstantFP::get(OpNTy, V->getValue()));
+ } else if (ConstantSDNode *V =
+ dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+ CV.push_back(ConstantInt::get(OpNTy, V->getValue()));
+ } else {
+ assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+ CV.push_back(UndefValue::get(OpNTy));
+ }
+ }
+ Constant *CP = ConstantVector::get(CV);
+ SDOperand CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+ return DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0);
+ }
+
+ if (SplatValue.Val) { // Splat of one value?
+ // Build the shuffle constant vector: <0, 0, 0, 0>
+ MVT::ValueType MaskVT =
+ MVT::getIntVectorWithNumElements(NumElems);
+ SDOperand Zero = DAG.getConstant(0, MVT::getVectorElementType(MaskVT));
+ std::vector<SDOperand> ZeroVec(NumElems, Zero);
+ SDOperand SplatMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &ZeroVec[0], ZeroVec.size());
+
+ // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
+ if (isShuffleLegal(Node->getValueType(0), SplatMask)) {
+ // Get the splatted value into the low element of a vector register.
+ SDOperand LowValVec =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, Node->getValueType(0), SplatValue);
+
+ // Return shuffle(LowValVec, undef, <0,0,0,0>)
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, Node->getValueType(0), LowValVec,
+ DAG.getNode(ISD::UNDEF, Node->getValueType(0)),
+ SplatMask);
+ }
+ }
+
+ // If there are only two unique elements, we may be able to turn this into a
+ // vector shuffle.
+ if (Values.size() == 2) {
+ // Build the shuffle constant vector: e.g. <0, 4, 0, 4>
+ MVT::ValueType MaskVT =
+ MVT::getIntVectorWithNumElements(NumElems);
+ std::vector<SDOperand> MaskVec(NumElems);
+ unsigned i = 0;
+ for (std::map<SDOperand,std::vector<unsigned> >::iterator I=Values.begin(),
+ E = Values.end(); I != E; ++I) {
+ for (std::vector<unsigned>::iterator II = I->second.begin(),
+ EE = I->second.end(); II != EE; ++II)
+ MaskVec[*II] = DAG.getConstant(i, MVT::getVectorElementType(MaskVT));
+ i += NumElems;
+ }
+ SDOperand ShuffleMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &MaskVec[0], MaskVec.size());
+
+ // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
+ if (TLI.isOperationLegal(ISD::SCALAR_TO_VECTOR, Node->getValueType(0)) &&
+ isShuffleLegal(Node->getValueType(0), ShuffleMask)) {
+ SmallVector<SDOperand, 8> Ops;
+ for(std::map<SDOperand,std::vector<unsigned> >::iterator I=Values.begin(),
+ E = Values.end(); I != E; ++I) {
+ SDOperand Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, Node->getValueType(0),
+ I->first);
+ Ops.push_back(Op);
+ }
+ Ops.push_back(ShuffleMask);
+
+ // Return shuffle(LoValVec, HiValVec, <0,1,0,1>)
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, Node->getValueType(0),
+ &Ops[0], Ops.size());
+ }
+ }
+
+ // Otherwise, we can't handle this case efficiently. Allocate a sufficiently
+ // aligned object on the stack, store each element into it, then load
+ // the result as a vector.
+ MVT::ValueType VT = Node->getValueType(0);
+ // Create the stack frame object.
+ SDOperand FIPtr = CreateStackTemporary(VT);
+
+ // Emit a store of each element to the stack slot.
+ SmallVector<SDOperand, 8> Stores;
+ unsigned TypeByteSize =
+ MVT::getSizeInBits(Node->getOperand(0).getValueType())/8;
+ // Store (in the right endianness) the elements to memory.
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ unsigned Offset = TypeByteSize*i;
+
+ SDOperand Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr, Idx);
+
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), Node->getOperand(i), Idx,
+ NULL, 0));
+ }
+
+ SDOperand StoreChain;
+ if (!Stores.empty()) // Not all undef elements?
+ StoreChain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &Stores[0], Stores.size());
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(VT, StoreChain, FIPtr, NULL, 0);
+}
+
+/// CreateStackTemporary - Create a stack temporary, suitable for holding the
+/// specified value type.
+SDOperand SelectionDAGLegalize::CreateStackTemporary(MVT::ValueType VT) {
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ unsigned ByteSize = MVT::getSizeInBits(VT)/8;
+ const Type *Ty = MVT::getTypeForValueType(VT);
+ unsigned StackAlign = (unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign);
+ return DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+void SelectionDAGLegalize::ExpandShiftParts(unsigned NodeOp,
+ SDOperand Op, SDOperand Amt,
+ SDOperand &Lo, SDOperand &Hi) {
+ // Expand the subcomponents.
+ SDOperand LHSL, LHSH;
+ ExpandOp(Op, LHSL, LHSH);
+
+ SDOperand Ops[] = { LHSL, LHSH, Amt };
+ MVT::ValueType VT = LHSL.getValueType();
+ Lo = DAG.getNode(NodeOp, DAG.getNodeValueTypes(VT, VT), 2, Ops, 3);
+ Hi = Lo.getValue(1);
+}
+
+
+/// ExpandShift - Try to find a clever way to expand this shift operation out to
+/// smaller elements. If we can't find a way that is more efficient than a
+/// libcall on this target, return false. Otherwise, return true with the
+/// low-parts expanded into Lo and Hi.
+bool SelectionDAGLegalize::ExpandShift(unsigned Opc, SDOperand Op,SDOperand Amt,
+ SDOperand &Lo, SDOperand &Hi) {
+ assert((Opc == ISD::SHL || Opc == ISD::SRA || Opc == ISD::SRL) &&
+ "This is not a shift!");
+
+ MVT::ValueType NVT = TLI.getTypeToTransformTo(Op.getValueType());
+ SDOperand ShAmt = LegalizeOp(Amt);
+ MVT::ValueType ShTy = ShAmt.getValueType();
+ unsigned VTBits = MVT::getSizeInBits(Op.getValueType());
+ unsigned NVTBits = MVT::getSizeInBits(NVT);
+
+ // Handle the case when Amt is an immediate. Other cases are currently broken
+ // and are disabled.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Amt.Val)) {
+ unsigned Cst = CN->getValue();
+ // Expand the incoming operand to be shifted, so that we have its parts
+ SDOperand InL, InH;
+ ExpandOp(Op, InL, InH);
+ switch(Opc) {
+ case ISD::SHL:
+ if (Cst > VTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Cst > NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getNode(ISD::SHL, NVT, InL, DAG.getConstant(Cst-NVTBits,ShTy));
+ } else if (Cst == NVTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = InL;
+ } else {
+ Lo = DAG.getNode(ISD::SHL, NVT, InL, DAG.getConstant(Cst, ShTy));
+ Hi = DAG.getNode(ISD::OR, NVT,
+ DAG.getNode(ISD::SHL, NVT, InH, DAG.getConstant(Cst, ShTy)),
+ DAG.getNode(ISD::SRL, NVT, InL, DAG.getConstant(NVTBits-Cst, ShTy)));
+ }
+ return true;
+ case ISD::SRL:
+ if (Cst > VTBits) {
+ Lo = DAG.getConstant(0, NVT);
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Cst > NVTBits) {
+ Lo = DAG.getNode(ISD::SRL, NVT, InH, DAG.getConstant(Cst-NVTBits,ShTy));
+ Hi = DAG.getConstant(0, NVT);
+ } else if (Cst == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getConstant(0, NVT);
+ } else {
+ Lo = DAG.getNode(ISD::OR, NVT,
+ DAG.getNode(ISD::SRL, NVT, InL, DAG.getConstant(Cst, ShTy)),
+ DAG.getNode(ISD::SHL, NVT, InH, DAG.getConstant(NVTBits-Cst, ShTy)));
+ Hi = DAG.getNode(ISD::SRL, NVT, InH, DAG.getConstant(Cst, ShTy));
+ }
+ return true;
+ case ISD::SRA:
+ if (Cst > VTBits) {
+ Hi = Lo = DAG.getNode(ISD::SRA, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Cst > NVTBits) {
+ Lo = DAG.getNode(ISD::SRA, NVT, InH,
+ DAG.getConstant(Cst-NVTBits, ShTy));
+ Hi = DAG.getNode(ISD::SRA, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else if (Cst == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getNode(ISD::SRA, NVT, InH,
+ DAG.getConstant(NVTBits-1, ShTy));
+ } else {
+ Lo = DAG.getNode(ISD::OR, NVT,
+ DAG.getNode(ISD::SRL, NVT, InL, DAG.getConstant(Cst, ShTy)),
+ DAG.getNode(ISD::SHL, NVT, InH, DAG.getConstant(NVTBits-Cst, ShTy)));
+ Hi = DAG.getNode(ISD::SRA, NVT, InH, DAG.getConstant(Cst, ShTy));
+ }
+ return true;
+ }
+ }
+
+ // Okay, the shift amount isn't constant. However, if we can tell that it is
+ // >= 32 or < 32, we can still simplify it, without knowing the actual value.
+ uint64_t Mask = NVTBits, KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(Amt, Mask, KnownZero, KnownOne);
+
+ // If we know that the high bit of the shift amount is one, then we can do
+ // this as a couple of simple shifts.
+ if (KnownOne & Mask) {
+ // Mask out the high bit, which we know is set.
+ Amt = DAG.getNode(ISD::AND, Amt.getValueType(), Amt,
+ DAG.getConstant(NVTBits-1, Amt.getValueType()));
+
+ // Expand the incoming operand to be shifted, so that we have its parts
+ SDOperand InL, InH;
+ ExpandOp(Op, InL, InH);
+ switch(Opc) {
+ case ISD::SHL:
+ Lo = DAG.getConstant(0, NVT); // Low part is zero.
+ Hi = DAG.getNode(ISD::SHL, NVT, InL, Amt); // High part from Lo part.
+ return true;
+ case ISD::SRL:
+ Hi = DAG.getConstant(0, NVT); // Hi part is zero.
+ Lo = DAG.getNode(ISD::SRL, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ case ISD::SRA:
+ Hi = DAG.getNode(ISD::SRA, NVT, InH, // Sign extend high part.
+ DAG.getConstant(NVTBits-1, Amt.getValueType()));
+ Lo = DAG.getNode(ISD::SRA, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ }
+ }
+
+ // If we know that the high bit of the shift amount is zero, then we can do
+ // this as a couple of simple shifts.
+ if (KnownZero & Mask) {
+ // Compute 32-amt.
+ SDOperand Amt2 = DAG.getNode(ISD::SUB, Amt.getValueType(),
+ DAG.getConstant(NVTBits, Amt.getValueType()),
+ Amt);
+
+ // Expand the incoming operand to be shifted, so that we have its parts
+ SDOperand InL, InH;
+ ExpandOp(Op, InL, InH);
+ switch(Opc) {
+ case ISD::SHL:
+ Lo = DAG.getNode(ISD::SHL, NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, NVT,
+ DAG.getNode(ISD::SHL, NVT, InH, Amt),
+ DAG.getNode(ISD::SRL, NVT, InL, Amt2));
+ return true;
+ case ISD::SRL:
+ Hi = DAG.getNode(ISD::SRL, NVT, InH, Amt);
+ Lo = DAG.getNode(ISD::OR, NVT,
+ DAG.getNode(ISD::SRL, NVT, InL, Amt),
+ DAG.getNode(ISD::SHL, NVT, InH, Amt2));
+ return true;
+ case ISD::SRA:
+ Hi = DAG.getNode(ISD::SRA, NVT, InH, Amt);
+ Lo = DAG.getNode(ISD::OR, NVT,
+ DAG.getNode(ISD::SRL, NVT, InL, Amt),
+ DAG.getNode(ISD::SHL, NVT, InH, Amt2));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+// ExpandLibCall - Expand a node into a call to a libcall. If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument. If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDOperand SelectionDAGLegalize::ExpandLibCall(const char *Name, SDNode *Node,
+ bool isSigned, SDOperand &Hi) {
+ assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDOperand InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ MVT::ValueType ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = MVT::getTypeForValueType(ArgVT);
+ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Args.push_back(Entry);
+ }
+ SDOperand Callee = DAG.getExternalSymbol(Name, TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = MVT::getTypeForValueType(Node->getValueType(0));
+ std::pair<SDOperand,SDOperand> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, false, CallingConv::C, false,
+ Callee, Args, DAG);
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+ SDOperand Result;
+ switch (getTypeAction(CallInfo.first.getValueType())) {
+ default: assert(0 && "Unknown thing");
+ case Legal:
+ Result = CallInfo.first;
+ break;
+ case Expand:
+ ExpandOp(CallInfo.first, Result, Hi);
+ break;
+ }
+ return Result;
+}
+
+
+/// ExpandIntToFP - Expand a [US]INT_TO_FP operation.
+///
+SDOperand SelectionDAGLegalize::
+ExpandIntToFP(bool isSigned, MVT::ValueType DestTy, SDOperand Source) {
+ assert(getTypeAction(Source.getValueType()) == Expand &&
+ "This is not an expansion!");
+ assert(Source.getValueType() == MVT::i64 && "Only handle expand from i64!");
+
+ if (!isSigned) {
+ assert(Source.getValueType() == MVT::i64 &&
+ "This only works for 64-bit -> FP");
+ // The 64-bit value loaded will be incorrectly if the 'sign bit' of the
+ // incoming integer is set. To handle this, we dynamically test to see if
+ // it is set, and, if so, add a fudge factor.
+ SDOperand Lo, Hi;
+ ExpandOp(Source, Lo, Hi);
+
+ // If this is unsigned, and not supported, first perform the conversion to
+ // signed, then adjust the result if the sign bit is set.
+ SDOperand SignedConv = ExpandIntToFP(true, DestTy,
+ DAG.getNode(ISD::BUILD_PAIR, Source.getValueType(), Lo, Hi));
+
+ SDOperand SignSet = DAG.getSetCC(TLI.getSetCCResultTy(), Hi,
+ DAG.getConstant(0, Hi.getValueType()),
+ ISD::SETLT);
+ SDOperand Zero = getIntPtrConstant(0), Four = getIntPtrConstant(4);
+ SDOperand CstOffset = DAG.getNode(ISD::SELECT, Zero.getValueType(),
+ SignSet, Four, Zero);
+ uint64_t FF = 0x5f800000ULL;
+ if (TLI.isLittleEndian()) FF <<= 32;
+ static Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);
+
+ SDOperand CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+ CPIdx = DAG.getNode(ISD::ADD, TLI.getPointerTy(), CPIdx, CstOffset);
+ SDOperand FudgeInReg;
+ if (DestTy == MVT::f32)
+ FudgeInReg = DAG.getLoad(MVT::f32, DAG.getEntryNode(), CPIdx, NULL, 0);
+ else {
+ assert(DestTy == MVT::f64 && "Unexpected conversion");
+ // FIXME: Avoid the extend by construction the right constantpool?
+ FudgeInReg = DAG.getExtLoad(ISD::EXTLOAD, MVT::f64, DAG.getEntryNode(),
+ CPIdx, NULL, 0, MVT::f32);
+ }
+ MVT::ValueType SCVT = SignedConv.getValueType();
+ if (SCVT != DestTy) {
+ // Destination type needs to be expanded as well. The FADD now we are
+ // constructing will be expanded into a libcall.
+ if (MVT::getSizeInBits(SCVT) != MVT::getSizeInBits(DestTy)) {
+ assert(SCVT == MVT::i32 && DestTy == MVT::f64);
+ SignedConv = DAG.getNode(ISD::BUILD_PAIR, MVT::i64,
+ SignedConv, SignedConv.getValue(1));
+ }
+ SignedConv = DAG.getNode(ISD::BIT_CONVERT, DestTy, SignedConv);
+ }
+ return DAG.getNode(ISD::FADD, DestTy, SignedConv, FudgeInReg);
+ }
+
+ // Check to see if the target has a custom way to lower this. If so, use it.
+ switch (TLI.getOperationAction(ISD::SINT_TO_FP, Source.getValueType())) {
+ default: assert(0 && "This action not implemented for this operation!");
+ case TargetLowering::Legal:
+ case TargetLowering::Expand:
+ break; // This case is handled below.
+ case TargetLowering::Custom: {
+ SDOperand NV = TLI.LowerOperation(DAG.getNode(ISD::SINT_TO_FP, DestTy,
+ Source), DAG);
+ if (NV.Val)
+ return LegalizeOp(NV);
+ break; // The target decided this was legal after all
+ }
+ }
+
+ // Expand the source, then glue it back together for the call. We must expand
+ // the source in case it is shared (this pass of legalize must traverse it).
+ SDOperand SrcLo, SrcHi;
+ ExpandOp(Source, SrcLo, SrcHi);
+ Source = DAG.getNode(ISD::BUILD_PAIR, Source.getValueType(), SrcLo, SrcHi);
+
+ RTLIB::Libcall LC;
+ if (DestTy == MVT::f32)
+ LC = RTLIB::SINTTOFP_I64_F32;
+ else {
+ assert(DestTy == MVT::f64 && "Unknown fp value type!");
+ LC = RTLIB::SINTTOFP_I64_F64;
+ }
+
+ assert(TLI.getLibcallName(LC) && "Don't know how to expand this SINT_TO_FP!");
+ Source = DAG.getNode(ISD::SINT_TO_FP, DestTy, Source);
+ SDOperand UnusedHiPart;
+ return ExpandLibCall(TLI.getLibcallName(LC), Source.Val, isSigned,
+ UnusedHiPart);
+}
+
+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it. At this point, we know that the result and operand types are
+/// legal for the target.
+SDOperand SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
+ SDOperand Op0,
+ MVT::ValueType DestVT) {
+ if (Op0.getValueType() == MVT::i32) {
+ // simple 32-bit [signed|unsigned] integer to float/double expansion
+
+ // get the stack frame index of a 8 byte buffer, pessimistically aligned
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Type *F64Type = MVT::getTypeForValueType(MVT::f64);
+ unsigned StackAlign =
+ (unsigned)TLI.getTargetData()->getPrefTypeAlignment(F64Type);
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, StackAlign);
+ // get address of 8 byte buffer
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ // word offset constant for Hi/Lo address computation
+ SDOperand WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+ // set up Hi and Lo (into buffer) address based on endian
+ SDOperand Hi = StackSlot;
+ SDOperand Lo = DAG.getNode(ISD::ADD, TLI.getPointerTy(), StackSlot,WordOff);
+ if (TLI.isLittleEndian())
+ std::swap(Hi, Lo);
+
+ // if signed map to unsigned space
+ SDOperand Op0Mapped;
+ if (isSigned) {
+ // constant used to invert sign bit (signed to unsigned mapping)
+ SDOperand SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+ Op0Mapped = DAG.getNode(ISD::XOR, MVT::i32, Op0, SignBit);
+ } else {
+ Op0Mapped = Op0;
+ }
+ // store the lo of the constructed double - based on integer input
+ SDOperand Store1 = DAG.getStore(DAG.getEntryNode(),
+ Op0Mapped, Lo, NULL, 0);
+ // initial hi portion of constructed double
+ SDOperand InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+ // store the hi of the constructed double - biased exponent
+ SDOperand Store2=DAG.getStore(Store1, InitialHi, Hi, NULL, 0);
+ // load the constructed double
+ SDOperand Load = DAG.getLoad(MVT::f64, Store2, StackSlot, NULL, 0);
+ // FP constant to bias correct the final result
+ SDOperand Bias = DAG.getConstantFP(isSigned ?
+ BitsToDouble(0x4330000080000000ULL)
+ : BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+ // subtract the bias
+ SDOperand Sub = DAG.getNode(ISD::FSUB, MVT::f64, Load, Bias);
+ // final result
+ SDOperand Result;
+ // handle final rounding
+ if (DestVT == MVT::f64) {
+ // do nothing
+ Result = Sub;
+ } else {
+ // if f32 then cast to f32
+ Result = DAG.getNode(ISD::FP_ROUND, MVT::f32, Sub);
+ }
+ return Result;
+ }
+ assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+ SDOperand Tmp1 = DAG.getNode(ISD::SINT_TO_FP, DestVT, Op0);
+
+ SDOperand SignSet = DAG.getSetCC(TLI.getSetCCResultTy(), Op0,
+ DAG.getConstant(0, Op0.getValueType()),
+ ISD::SETLT);
+ SDOperand Zero = getIntPtrConstant(0), Four = getIntPtrConstant(4);
+ SDOperand CstOffset = DAG.getNode(ISD::SELECT, Zero.getValueType(),
+ SignSet, Four, Zero);
+
+ // If the sign bit of the integer is set, the large number will be treated
+ // as a negative number. To counteract this, the dynamic code adds an
+ // offset depending on the data type.
+ uint64_t FF;
+ switch (Op0.getValueType()) {
+ default: assert(0 && "Unsupported integer type!");
+ case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
+ case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
+ case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
+ case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float)
+ }
+ if (TLI.isLittleEndian()) FF <<= 32;
+ static Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);
+
+ SDOperand CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+ CPIdx = DAG.getNode(ISD::ADD, TLI.getPointerTy(), CPIdx, CstOffset);
+ SDOperand FudgeInReg;
+ if (DestVT == MVT::f32)
+ FudgeInReg = DAG.getLoad(MVT::f32, DAG.getEntryNode(), CPIdx, NULL, 0);
+ else {
+ assert(DestVT == MVT::f64 && "Unexpected conversion");
+ FudgeInReg = LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, MVT::f64,
+ DAG.getEntryNode(), CPIdx,
+ NULL, 0, MVT::f32));
+ }
+
+ return DAG.getNode(ISD::FADD, DestVT, Tmp1, FudgeInReg);
+}
+
+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDOperand SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDOperand LegalOp,
+ MVT::ValueType DestVT,
+ bool isSigned) {
+ // First step, figure out the appropriate *INT_TO_FP operation to use.
+ MVT::ValueType NewInTy = LegalOp.getValueType();
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewInTy = (MVT::ValueType)(NewInTy+1);
+ assert(MVT::isInteger(NewInTy) && "Ran out of possibilities!");
+
+ // If the target supports SINT_TO_FP of this type, use it.
+ switch (TLI.getOperationAction(ISD::SINT_TO_FP, NewInTy)) {
+ default: break;
+ case TargetLowering::Legal:
+ if (!TLI.isTypeLegal(NewInTy))
+ break; // Can't use this datatype.
+ // FALL THROUGH.
+ case TargetLowering::Custom:
+ OpToUse = ISD::SINT_TO_FP;
+ break;
+ }
+ if (OpToUse) break;
+ if (isSigned) continue;
+
+ // If the target supports UINT_TO_FP of this type, use it.
+ switch (TLI.getOperationAction(ISD::UINT_TO_FP, NewInTy)) {
+ default: break;
+ case TargetLowering::Legal:
+ if (!TLI.isTypeLegal(NewInTy))
+ break; // Can't use this datatype.
+ // FALL THROUGH.
+ case TargetLowering::Custom:
+ OpToUse = ISD::UINT_TO_FP;
+ break;
+ }
+ if (OpToUse) break;
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use. Zero extend our input to the
+ // desired type then run the operation on it.
+ return DAG.getNode(OpToUse, DestVT,
+ DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ NewInTy, LegalOp));
+}
+
+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDOperand SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDOperand LegalOp,
+ MVT::ValueType DestVT,
+ bool isSigned) {
+ // First step, figure out the appropriate FP_TO*INT operation to use.
+ MVT::ValueType NewOutTy = DestVT;
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewOutTy = (MVT::ValueType)(NewOutTy+1);
+ assert(MVT::isInteger(NewOutTy) && "Ran out of possibilities!");
+
+ // If the target supports FP_TO_SINT returning this type, use it.
+ switch (TLI.getOperationAction(ISD::FP_TO_SINT, NewOutTy)) {
+ default: break;
+ case TargetLowering::Legal:
+ if (!TLI.isTypeLegal(NewOutTy))
+ break; // Can't use this datatype.
+ // FALL THROUGH.
+ case TargetLowering::Custom:
+ OpToUse = ISD::FP_TO_SINT;
+ break;
+ }
+ if (OpToUse) break;
+
+ // If the target supports FP_TO_UINT of this type, use it.
+ switch (TLI.getOperationAction(ISD::FP_TO_UINT, NewOutTy)) {
+ default: break;
+ case TargetLowering::Legal:
+ if (!TLI.isTypeLegal(NewOutTy))
+ break; // Can't use this datatype.
+ // FALL THROUGH.
+ case TargetLowering::Custom:
+ OpToUse = ISD::FP_TO_UINT;
+ break;
+ }
+ if (OpToUse) break;
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use. Truncate the result of the
+ // extended FP_TO_*INT operation to the desired size.
+ return DAG.getNode(ISD::TRUNCATE, DestVT,
+ DAG.getNode(OpToUse, NewOutTy, LegalOp));
+}
+
+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
+///
+SDOperand SelectionDAGLegalize::ExpandBSWAP(SDOperand Op) {
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType SHVT = TLI.getShiftAmountTy();
+ SDOperand Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT) {
+ default: assert(0 && "Unhandled Expand type in BSWAP!"); abort();
+ case MVT::i16:
+ Tmp2 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(8, SHVT));
+ return DAG.getNode(ISD::OR, VT, Tmp1, Tmp2);
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
+ Tmp2 = DAG.getNode(ISD::AND, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+ Tmp4 = DAG.getNode(ISD::OR, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, VT, Tmp2, Tmp1);
+ return DAG.getNode(ISD::OR, VT, Tmp4, Tmp2);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp6 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp5 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp4 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(8, SHVT));
+ Tmp3 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(24, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(40, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(56, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
+ Tmp6 = DAG.getNode(ISD::AND, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
+ Tmp5 = DAG.getNode(ISD::AND, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
+ Tmp4 = DAG.getNode(ISD::AND, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
+ Tmp3 = DAG.getNode(ISD::AND, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
+ Tmp2 = DAG.getNode(ISD::AND, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+ Tmp8 = DAG.getNode(ISD::OR, VT, Tmp8, Tmp7);
+ Tmp6 = DAG.getNode(ISD::OR, VT, Tmp6, Tmp5);
+ Tmp4 = DAG.getNode(ISD::OR, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, VT, Tmp2, Tmp1);
+ Tmp8 = DAG.getNode(ISD::OR, VT, Tmp8, Tmp6);
+ Tmp4 = DAG.getNode(ISD::OR, VT, Tmp4, Tmp2);
+ return DAG.getNode(ISD::OR, VT, Tmp8, Tmp4);
+ }
+}
+
+/// ExpandBitCount - Expand the specified bitcount instruction into operations.
+///
+SDOperand SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDOperand Op) {
+ switch (Opc) {
+ default: assert(0 && "Cannot expand this yet!");
+ case ISD::CTPOP: {
+ static const uint64_t mask[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType ShVT = TLI.getShiftAmountTy();
+ unsigned len = MVT::getSizeInBits(VT);
+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
+ SDOperand Tmp2 = DAG.getConstant(mask[i], VT);
+ SDOperand Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+ Op = DAG.getNode(ISD::ADD, VT, DAG.getNode(ISD::AND, VT, Op, Tmp2),
+ DAG.getNode(ISD::AND, VT,
+ DAG.getNode(ISD::SRL, VT, Op, Tmp3),Tmp2));
+ }
+ return Op;
+ }
+ case ISD::CTLZ: {
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType ShVT = TLI.getShiftAmountTy();
+ unsigned len = MVT::getSizeInBits(VT);
+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ SDOperand Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+ Op = DAG.getNode(ISD::OR, VT, Op, DAG.getNode(ISD::SRL, VT, Op, Tmp3));
+ }
+ Op = DAG.getNode(ISD::XOR, VT, Op, DAG.getConstant(~0ULL, VT));
+ return DAG.getNode(ISD::CTPOP, VT, Op);
+ }
+ case ISD::CTTZ: {
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // see also http://www.hackersdelight.org/HDcode/ntz.cc
+ MVT::ValueType VT = Op.getValueType();
+ SDOperand Tmp2 = DAG.getConstant(~0ULL, VT);
+ SDOperand Tmp3 = DAG.getNode(ISD::AND, VT,
+ DAG.getNode(ISD::XOR, VT, Op, Tmp2),
+ DAG.getNode(ISD::SUB, VT, Op, DAG.getConstant(1, VT)));
+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+ if (!TLI.isOperationLegal(ISD::CTPOP, VT) &&
+ TLI.isOperationLegal(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::SUB, VT,
+ DAG.getConstant(MVT::getSizeInBits(VT), VT),
+ DAG.getNode(ISD::CTLZ, VT, Tmp3));
+ return DAG.getNode(ISD::CTPOP, VT, Tmp3);
+ }
+ }
+}
+
+/// ExpandOp - Expand the specified SDOperand into its two component pieces
+/// Lo&Hi. Note that the Op MUST be an expanded type. As a result of this, the
+/// LegalizeNodes map is filled in for any results that are not expanded, the
+/// ExpandedNodes map is filled in for any results that are expanded, and the
+/// Lo/Hi values are returned.
+void SelectionDAGLegalize::ExpandOp(SDOperand Op, SDOperand &Lo, SDOperand &Hi){
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType NVT = TLI.getTypeToTransformTo(VT);
+ SDNode *Node = Op.Val;
+ assert(getTypeAction(VT) == Expand && "Not an expanded type!");
+ assert(((MVT::isInteger(NVT) && NVT < VT) || MVT::isFloatingPoint(VT) ||
+ MVT::isVector(VT)) &&
+ "Cannot expand to FP value or to larger int value!");
+
+ // See if we already expanded it.
+ DenseMap<SDOperand, std::pair<SDOperand, SDOperand> >::iterator I
+ = ExpandedNodes.find(Op);
+ if (I != ExpandedNodes.end()) {
+ Lo = I->second.first;
+ Hi = I->second.second;
+ return;
+ }
+
+ switch (Node->getOpcode()) {
+ case ISD::CopyFromReg:
+ assert(0 && "CopyFromReg must be legal!");
+ default:
+#ifndef NDEBUG
+ cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Do not know how to expand this operator!");
+ abort();
+ case ISD::UNDEF:
+ NVT = TLI.getTypeToExpandTo(VT);
+ Lo = DAG.getNode(ISD::UNDEF, NVT);
+ Hi = DAG.getNode(ISD::UNDEF, NVT);
+ break;
+ case ISD::Constant: {
+ uint64_t Cst = cast<ConstantSDNode>(Node)->getValue();
+ Lo = DAG.getConstant(Cst, NVT);
+ Hi = DAG.getConstant(Cst >> MVT::getSizeInBits(NVT), NVT);
+ break;
+ }
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+ Lo = ExpandConstantFP(CFP, false, DAG, TLI);
+ if (getTypeAction(Lo.getValueType()) == Expand)
+ ExpandOp(Lo, Lo, Hi);
+ break;
+ }
+ case ISD::BUILD_PAIR:
+ // Return the operands.
+ Lo = Node->getOperand(0);
+ Hi = Node->getOperand(1);
+ break;
+
+ case ISD::SIGN_EXTEND_INREG:
+ ExpandOp(Node->getOperand(0), Lo, Hi);
+ // sext_inreg the low part if needed.
+ Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Lo, Node->getOperand(1));
+
+ // The high part gets the sign extension from the lo-part. This handles
+ // things like sextinreg V:i64 from i8.
+ Hi = DAG.getNode(ISD::SRA, NVT, Lo,
+ DAG.getConstant(MVT::getSizeInBits(NVT)-1,
+ TLI.getShiftAmountTy()));
+ break;
+
+ case ISD::BSWAP: {
+ ExpandOp(Node->getOperand(0), Lo, Hi);
+ SDOperand TempLo = DAG.getNode(ISD::BSWAP, NVT, Hi);
+ Hi = DAG.getNode(ISD::BSWAP, NVT, Lo);
+ Lo = TempLo;
+ break;
+ }
+
+ case ISD::CTPOP:
+ ExpandOp(Node->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::ADD, NVT, // ctpop(HL) -> ctpop(H)+ctpop(L)
+ DAG.getNode(ISD::CTPOP, NVT, Lo),
+ DAG.getNode(ISD::CTPOP, NVT, Hi));
+ Hi = DAG.getConstant(0, NVT);
+ break;
+
+ case ISD::CTLZ: {
+ // ctlz (HL) -> ctlz(H) != 32 ? ctlz(H) : (ctlz(L)+32)
+ ExpandOp(Node->getOperand(0), Lo, Hi);
+ SDOperand BitsC = DAG.getConstant(MVT::getSizeInBits(NVT), NVT);
+ SDOperand HLZ = DAG.getNode(ISD::CTLZ, NVT, Hi);
+ SDOperand TopNotZero = DAG.getSetCC(TLI.getSetCCResultTy(), HLZ, BitsC,
+ ISD::SETNE);
+ SDOperand LowPart = DAG.getNode(ISD::CTLZ, NVT, Lo);
+ LowPart = DAG.getNode(ISD::ADD, NVT, LowPart, BitsC);
+
+ Lo = DAG.getNode(ISD::SELECT, NVT, TopNotZero, HLZ, LowPart);
+ Hi = DAG.getConstant(0, NVT);
+ break;
+ }
+
+ case ISD::CTTZ: {
+ // cttz (HL) -> cttz(L) != 32 ? cttz(L) : (cttz(H)+32)
+ ExpandOp(Node->getOperand(0), Lo, Hi);
+ SDOperand BitsC = DAG.getConstant(MVT::getSizeInBits(NVT), NVT);
+ SDOperand LTZ = DAG.getNode(ISD::CTTZ, NVT, Lo);
+ SDOperand BotNotZero = DAG.getSetCC(TLI.getSetCCResultTy(), LTZ, BitsC,
+ ISD::SETNE);
+ SDOperand HiPart = DAG.getNode(ISD::CTTZ, NVT, Hi);
+ HiPart = DAG.getNode(ISD::ADD, NVT, HiPart, BitsC);
+
+ Lo = DAG.getNode(ISD::SELECT, NVT, BotNotZero, LTZ, HiPart);
+ Hi = DAG.getConstant(0, NVT);
+ break;
+ }
+
+ case ISD::VAARG: {
+ SDOperand Ch = Node->getOperand(0); // Legalize the chain.
+ SDOperand Ptr = Node->getOperand(1); // Legalize the pointer.
+ Lo = DAG.getVAArg(NVT, Ch, Ptr, Node->getOperand(2));
+ Hi = DAG.getVAArg(NVT, Lo.getValue(1), Ptr, Node->getOperand(2));
+
+ // Remember that we legalized the chain.
+ Hi = LegalizeOp(Hi);
+ AddLegalizedOperand(Op.getValue(1), Hi.getValue(1));
+ if (!TLI.isLittleEndian())
+ std::swap(Lo, Hi);
+ break;
+ }
+
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ SDOperand Ch = LD->getChain(); // Legalize the chain.
+ SDOperand Ptr = LD->getBasePtr(); // Legalize the pointer.
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+
+ if (ExtType == ISD::NON_EXTLOAD) {
+ Lo = DAG.getLoad(NVT, Ch, Ptr, LD->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+ if (VT == MVT::f32 || VT == MVT::f64) {
+ // f32->i32 or f64->i64 one to one expansion.
+ // Remember that we legalized the chain.
+ AddLegalizedOperand(SDOperand(Node, 1), LegalizeOp(Lo.getValue(1)));
+ // Recursively expand the new load.
+ if (getTypeAction(NVT) == Expand)
+ ExpandOp(Lo, Lo, Hi);
+ break;
+ }
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = MVT::getSizeInBits(Lo.getValueType())/8;
+ Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr,
+ getIntPtrConstant(IncrementSize));
+ SVOffset += IncrementSize;
+ if (Alignment > IncrementSize)
+ Alignment = IncrementSize;
+ Hi = DAG.getLoad(NVT, Ch, Ptr, LD->getSrcValue(), SVOffset,
+ isVolatile, Alignment);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ SDOperand TF = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Remember that we legalized the chain.
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(TF));
+ if (!TLI.isLittleEndian())
+ std::swap(Lo, Hi);
+ } else {
+ MVT::ValueType EVT = LD->getLoadedVT();
+
+ if (VT == MVT::f64 && EVT == MVT::f32) {
+ // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
+ SDOperand Load = DAG.getLoad(EVT, Ch, Ptr, LD->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ // Remember that we legalized the chain.
+ AddLegalizedOperand(SDOperand(Node, 1), LegalizeOp(Load.getValue(1)));
+ ExpandOp(DAG.getNode(ISD::FP_EXTEND, VT, Load), Lo, Hi);
+ break;
+ }
+
+ if (EVT == NVT)
+ Lo = DAG.getLoad(NVT, Ch, Ptr, LD->getSrcValue(),
+ SVOffset, isVolatile, Alignment);
+ else
+ Lo = DAG.getExtLoad(ExtType, NVT, Ch, Ptr, LD->getSrcValue(),
+ SVOffset, EVT, isVolatile,
+ Alignment);
+
+ // Remember that we legalized the chain.
+ AddLegalizedOperand(SDOperand(Node, 1), LegalizeOp(Lo.getValue(1)));
+
+ if (ExtType == ISD::SEXTLOAD) {
+ // The high part is obtained by SRA'ing all but one of the bits of the
+ // lo part.
+ unsigned LoSize = MVT::getSizeInBits(Lo.getValueType());
+ Hi = DAG.getNode(ISD::SRA, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getShiftAmountTy()));
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ // The high part is just a zero.
+ Hi = DAG.getConstant(0, NVT);
+ } else /* if (ExtType == ISD::EXTLOAD) */ {
+ // The high part is undefined.
+ Hi = DAG.getNode(ISD::UNDEF, NVT);
+ }
+ }
+ break;
+ }
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: { // Simple logical operators -> two trivial pieces.
+ SDOperand LL, LH, RL, RH;
+ ExpandOp(Node->getOperand(0), LL, LH);
+ ExpandOp(Node->getOperand(1), RL, RH);
+ Lo = DAG.getNode(Node->getOpcode(), NVT, LL, RL);
+ Hi = DAG.getNode(Node->getOpcode(), NVT, LH, RH);
+ break;
+ }
+ case ISD::SELECT: {
+ SDOperand LL, LH, RL, RH;
+ ExpandOp(Node->getOperand(1), LL, LH);
+ ExpandOp(Node->getOperand(2), RL, RH);
+ if (getTypeAction(NVT) == Expand)
+ NVT = TLI.getTypeToExpandTo(NVT);
+ Lo = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), LL, RL);
+ if (VT != MVT::f32)
+ Hi = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), LH, RH);
+ break;
+ }
+ case ISD::SELECT_CC: {
+ SDOperand TL, TH, FL, FH;
+ ExpandOp(Node->getOperand(2), TL, TH);
+ ExpandOp(Node->getOperand(3), FL, FH);
+ if (getTypeAction(NVT) == Expand)
+ NVT = TLI.getTypeToExpandTo(NVT);
+ Lo = DAG.getNode(ISD::SELECT_CC, NVT, Node->getOperand(0),
+ Node->getOperand(1), TL, FL, Node->getOperand(4));
+ if (VT != MVT::f32)
+ Hi = DAG.getNode(ISD::SELECT_CC, NVT, Node->getOperand(0),
+ Node->getOperand(1), TH, FH, Node->getOperand(4));
+ break;
+ }
+ case ISD::ANY_EXTEND:
+ // The low part is any extension of the input (which degenerates to a copy).
+ Lo = DAG.getNode(ISD::ANY_EXTEND, NVT, Node->getOperand(0));
+ // The high part is undefined.
+ Hi = DAG.getNode(ISD::UNDEF, NVT);
+ break;
+ case ISD::SIGN_EXTEND: {
+ // The low part is just a sign extension of the input (which degenerates to
+ // a copy).
+ Lo = DAG.getNode(ISD::SIGN_EXTEND, NVT, Node->getOperand(0));
+
+ // The high part is obtained by SRA'ing all but one of the bits of the lo
+ // part.
+ unsigned LoSize = MVT::getSizeInBits(Lo.getValueType());
+ Hi = DAG.getNode(ISD::SRA, NVT, Lo,
+ DAG.getConstant(LoSize-1, TLI.getShiftAmountTy()));
+ break;
+ }
+ case ISD::ZERO_EXTEND:
+ // The low part is just a zero extension of the input (which degenerates to
+ // a copy).
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, NVT, Node->getOperand(0));
+
+ // The high part is just a zero.
+ Hi = DAG.getConstant(0, NVT);
+ break;
+
+ case ISD::TRUNCATE: {
+ // The input value must be larger than this value. Expand *it*.
+ SDOperand NewLo;
+ ExpandOp(Node->getOperand(0), NewLo, Hi);
+
+ // The low part is now either the right size, or it is closer. If not the
+ // right size, make an illegal truncate so we recursively expand it.
+ if (NewLo.getValueType() != Node->getValueType(0))
+ NewLo = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), NewLo);
+ ExpandOp(NewLo, Lo, Hi);
+ break;
+ }
+
+ case ISD::BIT_CONVERT: {
+ SDOperand Tmp;
+ if (TLI.getOperationAction(ISD::BIT_CONVERT, VT) == TargetLowering::Custom){
+ // If the target wants to, allow it to lower this itself.
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Expand: assert(0 && "cannot expand FP!");
+ case Legal: Tmp = LegalizeOp(Node->getOperand(0)); break;
+ case Promote: Tmp = PromoteOp (Node->getOperand(0)); break;
+ }
+ Tmp = TLI.LowerOperation(DAG.getNode(ISD::BIT_CONVERT, VT, Tmp), DAG);
+ }
+
+ // f32 / f64 must be expanded to i32 / i64.
+ if (VT == MVT::f32 || VT == MVT::f64) {
+ Lo = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0));
+ if (getTypeAction(NVT) == Expand)
+ ExpandOp(Lo, Lo, Hi);
+ break;
+ }
+
+ // If source operand will be expanded to the same type as VT, i.e.
+ // i64 <- f64, i32 <- f32, expand the source operand instead.
+ MVT::ValueType VT0 = Node->getOperand(0).getValueType();
+ if (getTypeAction(VT0) == Expand && TLI.getTypeToTransformTo(VT0) == VT) {
+ ExpandOp(Node->getOperand(0), Lo, Hi);
+ break;
+ }
+
+ // Turn this into a load/store pair by default.
+ if (Tmp.Val == 0)
+ Tmp = ExpandBIT_CONVERT(VT, Node->getOperand(0));
+
+ ExpandOp(Tmp, Lo, Hi);
+ break;
+ }
+
+ case ISD::READCYCLECOUNTER:
+ assert(TLI.getOperationAction(ISD::READCYCLECOUNTER, VT) ==
+ TargetLowering::Custom &&
+ "Must custom expand ReadCycleCounter");
+ Lo = TLI.LowerOperation(Op, DAG);
+ assert(Lo.Val && "Node must be custom expanded!");
+ Hi = Lo.getValue(1);
+ AddLegalizedOperand(SDOperand(Node, 1), // Remember we legalized the chain.
+ LegalizeOp(Lo.getValue(2)));
+ break;
+
+ // These operators cannot be expanded directly, emit them as calls to
+ // library functions.
+ case ISD::FP_TO_SINT: {
+ if (TLI.getOperationAction(ISD::FP_TO_SINT, VT) == TargetLowering::Custom) {
+ SDOperand Op;
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Expand: assert(0 && "cannot expand FP!");
+ case Legal: Op = LegalizeOp(Node->getOperand(0)); break;
+ case Promote: Op = PromoteOp (Node->getOperand(0)); break;
+ }
+
+ Op = TLI.LowerOperation(DAG.getNode(ISD::FP_TO_SINT, VT, Op), DAG);
+
+ // Now that the custom expander is done, expand the result, which is still
+ // VT.
+ if (Op.Val) {
+ ExpandOp(Op, Lo, Hi);
+ break;
+ }
+ }
+
+ RTLIB::Libcall LC;
+ if (Node->getOperand(0).getValueType() == MVT::f32)
+ LC = RTLIB::FPTOSINT_F32_I64;
+ else
+ LC = RTLIB::FPTOSINT_F64_I64;
+ Lo = ExpandLibCall(TLI.getLibcallName(LC), Node,
+ false/*sign irrelevant*/, Hi);
+ break;
+ }
+
+ case ISD::FP_TO_UINT: {
+ if (TLI.getOperationAction(ISD::FP_TO_UINT, VT) == TargetLowering::Custom) {
+ SDOperand Op;
+ switch (getTypeAction(Node->getOperand(0).getValueType())) {
+ case Expand: assert(0 && "cannot expand FP!");
+ case Legal: Op = LegalizeOp(Node->getOperand(0)); break;
+ case Promote: Op = PromoteOp (Node->getOperand(0)); break;
+ }
+
+ Op = TLI.LowerOperation(DAG.getNode(ISD::FP_TO_UINT, VT, Op), DAG);
+
+ // Now that the custom expander is done, expand the result.
+ if (Op.Val) {
+ ExpandOp(Op, Lo, Hi);
+ break;
+ }
+ }
+
+ RTLIB::Libcall LC;
+ if (Node->getOperand(0).getValueType() == MVT::f32)
+ LC = RTLIB::FPTOUINT_F32_I64;
+ else
+ LC = RTLIB::FPTOUINT_F64_I64;
+ Lo = ExpandLibCall(TLI.getLibcallName(LC), Node,
+ false/*sign irrelevant*/, Hi);
+ break;
+ }
+
+ case ISD::SHL: {
+ // If the target wants custom lowering, do so.
+ SDOperand ShiftAmt = LegalizeOp(Node->getOperand(1));
+ if (TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Custom) {
+ SDOperand Op = DAG.getNode(ISD::SHL, VT, Node->getOperand(0), ShiftAmt);
+ Op = TLI.LowerOperation(Op, DAG);
+ if (Op.Val) {
+ // Now that the custom expander is done, expand the result, which is
+ // still VT.
+ ExpandOp(Op, Lo, Hi);
+ break;
+ }
+ }
+
+ // If ADDC/ADDE are supported and if the shift amount is a constant 1, emit
+ // this X << 1 as X+X.
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ if (ShAmt->getValue() == 1 && TLI.isOperationLegal(ISD::ADDC, NVT) &&
+ TLI.isOperationLegal(ISD::ADDE, NVT)) {
+ SDOperand LoOps[2], HiOps[3];
+ ExpandOp(Node->getOperand(0), LoOps[0], HiOps[0]);
+ SDVTList VTList = DAG.getVTList(LoOps[0].getValueType(), MVT::Flag);
+ LoOps[1] = LoOps[0];
+ Lo = DAG.getNode(ISD::ADDC, VTList, LoOps, 2);
+
+ HiOps[1] = HiOps[0];
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, VTList, HiOps, 3);
+ break;
+ }
+ }
+
+ // If we can emit an efficient shift operation, do so now.
+ if (ExpandShift(ISD::SHL, Node->getOperand(0), ShiftAmt, Lo, Hi))
+ break;
+
+ // If this target supports SHL_PARTS, use it.
+ TargetLowering::LegalizeAction Action =
+ TLI.getOperationAction(ISD::SHL_PARTS, NVT);
+ if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom) {
+ ExpandShiftParts(ISD::SHL_PARTS, Node->getOperand(0), ShiftAmt, Lo, Hi);
+ break;
+ }
+
+ // Otherwise, emit a libcall.
+ Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SHL_I64), Node,
+ false/*left shift=unsigned*/, Hi);
+ break;
+ }
+
+ case ISD::SRA: {
+ // If the target wants custom lowering, do so.
+ SDOperand ShiftAmt = LegalizeOp(Node->getOperand(1));
+ if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Custom) {
+ SDOperand Op = DAG.getNode(ISD::SRA, VT, Node->getOperand(0), ShiftAmt);
+ Op = TLI.LowerOperation(Op, DAG);
+ if (Op.Val) {
+ // Now that the custom expander is done, expand the result, which is
+ // still VT.
+ ExpandOp(Op, Lo, Hi);
+ break;
+ }
+ }
+
+ // If we can emit an efficient shift operation, do so now.
+ if (ExpandShift(ISD::SRA, Node->getOperand(0), ShiftAmt, Lo, Hi))
+ break;
+
+ // If this target supports SRA_PARTS, use it.
+ TargetLowering::LegalizeAction Action =
+ TLI.getOperationAction(ISD::SRA_PARTS, NVT);
+ if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom) {
+ ExpandShiftParts(ISD::SRA_PARTS, Node->getOperand(0), ShiftAmt, Lo, Hi);
+ break;
+ }
+
+ // Otherwise, emit a libcall.
+ Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SRA_I64), Node,
+ true/*ashr is signed*/, Hi);
+ break;
+ }
+
+ case ISD::SRL: {
+ // If the target wants custom lowering, do so.
+ SDOperand ShiftAmt = LegalizeOp(Node->getOperand(1));
+ if (TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Custom) {
+ SDOperand Op = DAG.getNode(ISD::SRL, VT, Node->getOperand(0), ShiftAmt);
+ Op = TLI.LowerOperation(Op, DAG);
+ if (Op.Val) {
+ // Now that the custom expander is done, expand the result, which is
+ // still VT.
+ ExpandOp(Op, Lo, Hi);
+ break;
+ }
+ }
+
+ // If we can emit an efficient shift operation, do so now.
+ if (ExpandShift(ISD::SRL, Node->getOperand(0), ShiftAmt, Lo, Hi))
+ break;
+
+ // If this target supports SRL_PARTS, use it.
+ TargetLowering::LegalizeAction Action =
+ TLI.getOperationAction(ISD::SRL_PARTS, NVT);
+ if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom) {
+ ExpandShiftParts(ISD::SRL_PARTS, Node->getOperand(0), ShiftAmt, Lo, Hi);
+ break;
+ }
+
+ // Otherwise, emit a libcall.
+ Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SRL_I64), Node,
+ false/*lshr is unsigned*/, Hi);
+ break;
+ }
+
+ case ISD::ADD:
+ case ISD::SUB: {
+ // If the target wants to custom expand this, let them.
+ if (TLI.getOperationAction(Node->getOpcode(), VT) ==
+ TargetLowering::Custom) {
+ Op = TLI.LowerOperation(Op, DAG);
+ if (Op.Val) {
+ ExpandOp(Op, Lo, Hi);
+ break;
+ }
+ }
+
+ // Expand the subcomponents.
+ SDOperand LHSL, LHSH, RHSL, RHSH;
+ ExpandOp(Node->getOperand(0), LHSL, LHSH);
+ ExpandOp(Node->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDOperand LoOps[2], HiOps[3];
+ LoOps[0] = LHSL;
+ LoOps[1] = RHSL;
+ HiOps[0] = LHSH;
+ HiOps[1] = RHSH;
+ if (Node->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADDC, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, VTList, HiOps, 3);
+ }
+ break;
+ }
+
+ case ISD::ADDC:
+ case ISD::SUBC: {
+ // Expand the subcomponents.
+ SDOperand LHSL, LHSH, RHSL, RHSH;
+ ExpandOp(Node->getOperand(0), LHSL, LHSH);
+ ExpandOp(Node->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDOperand LoOps[2] = { LHSL, RHSL };
+ SDOperand HiOps[3] = { LHSH, RHSH };
+
+ if (Node->getOpcode() == ISD::ADDC) {
+ Lo = DAG.getNode(ISD::ADDC, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, VTList, HiOps, 3);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, VTList, LoOps, 2);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, VTList, HiOps, 3);
+ }
+ // Remember that we legalized the flag.
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Hi.getValue(1)));
+ break;
+ }
+ case ISD::ADDE:
+ case ISD::SUBE: {
+ // Expand the subcomponents.
+ SDOperand LHSL, LHSH, RHSL, RHSH;
+ ExpandOp(Node->getOperand(0), LHSL, LHSH);
+ ExpandOp(Node->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDOperand LoOps[3] = { LHSL, RHSL, Node->getOperand(2) };
+ SDOperand HiOps[3] = { LHSH, RHSH };
+
+ Lo = DAG.getNode(Node->getOpcode(), VTList, LoOps, 3);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(Node->getOpcode(), VTList, HiOps, 3);
+
+ // Remember that we legalized the flag.
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Hi.getValue(1)));
+ break;
+ }
+ case ISD::MUL: {
+ // If the target wants to custom expand this, let them.
+ if (TLI.getOperationAction(ISD::MUL, VT) == TargetLowering::Custom) {
+ SDOperand New = TLI.LowerOperation(Op, DAG);
+ if (New.Val) {
+ ExpandOp(New, Lo, Hi);
+ break;
+ }
+ }
+
+ bool HasMULHS = TLI.isOperationLegal(ISD::MULHS, NVT);
+ bool HasMULHU = TLI.isOperationLegal(ISD::MULHU, NVT);
+ if (HasMULHS || HasMULHU) {
+ SDOperand LL, LH, RL, RH;
+ ExpandOp(Node->getOperand(0), LL, LH);
+ ExpandOp(Node->getOperand(1), RL, RH);
+ unsigned SH = MVT::getSizeInBits(RH.getValueType())-1;
+ // FIXME: Move this to the dag combiner.
+ // MULHS implicitly sign extends its inputs. Check to see if ExpandOp
+ // extended the sign bit of the low half through the upper half, and if so
+ // emit a MULHS instead of the alternate sequence that is valid for any
+ // i64 x i64 multiply.
+ if (HasMULHS &&
+ // is RH an extension of the sign bit of RL?
+ RH.getOpcode() == ISD::SRA && RH.getOperand(0) == RL &&
+ RH.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RH.getOperand(1))->getValue() == SH &&
+ // is LH an extension of the sign bit of LL?
+ LH.getOpcode() == ISD::SRA && LH.getOperand(0) == LL &&
+ LH.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(LH.getOperand(1))->getValue() == SH) {
+ // Low part:
+ Lo = DAG.getNode(ISD::MUL, NVT, LL, RL);
+ // High part:
+ Hi = DAG.getNode(ISD::MULHS, NVT, LL, RL);
+ break;
+ } else if (HasMULHU) {
+ // Low part:
+ Lo = DAG.getNode(ISD::MUL, NVT, LL, RL);
+
+ // High part:
+ Hi = DAG.getNode(ISD::MULHU, NVT, LL, RL);
+ RH = DAG.getNode(ISD::MUL, NVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, NVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, NVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, NVT, Hi, LH);
+ break;
+ }
+ }
+
+ Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::MUL_I64), Node,
+ false/*sign irrelevant*/, Hi);
+ break;
+ }
+ case ISD::SDIV:
+ Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SDIV_I64), Node, true, Hi);
+ break;
+ case ISD::UDIV:
+ Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::UDIV_I64), Node, true, Hi);
+ break;
+ case ISD::SREM:
+ Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SREM_I64), Node, true, Hi);
+ break;
+ case ISD::UREM:
+ Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::UREM_I64), Node, true, Hi);
+ break;
+
+ case ISD::FADD:
+ Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32)
+ ? RTLIB::ADD_F32 : RTLIB::ADD_F64),
+ Node, false, Hi);
+ break;
+ case ISD::FSUB:
+ Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32)
+ ? RTLIB::SUB_F32 : RTLIB::SUB_F64),
+ Node, false, Hi);
+ break;
+ case ISD::FMUL:
+ Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32)
+ ? RTLIB::MUL_F32 : RTLIB::MUL_F64),
+ Node, false, Hi);
+ break;
+ case ISD::FDIV:
+ Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32)
+ ? RTLIB::DIV_F32 : RTLIB::DIV_F64),
+ Node, false, Hi);
+ break;
+ case ISD::FP_EXTEND:
+ Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::FPEXT_F32_F64), Node, true,Hi);
+ break;
+ case ISD::FP_ROUND:
+ Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::FPROUND_F64_F32),Node,true,Hi);
+ break;
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS: {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ switch(Node->getOpcode()) {
+ case ISD::FSQRT:
+ LC = (VT == MVT::f32) ? RTLIB::SQRT_F32 : RTLIB::SQRT_F64;
+ break;
+ case ISD::FSIN:
+ LC = (VT == MVT::f32) ? RTLIB::SIN_F32 : RTLIB::SIN_F64;
+ break;
+ case ISD::FCOS:
+ LC = (VT == MVT::f32) ? RTLIB::COS_F32 : RTLIB::COS_F64;
+ break;
+ default: assert(0 && "Unreachable!");
+ }
+ Lo = ExpandLibCall(TLI.getLibcallName(LC), Node, false, Hi);
+ break;
+ }
+ case ISD::FABS: {
+ SDOperand Mask = (VT == MVT::f64)
+ ? DAG.getConstantFP(BitsToDouble(~(1ULL << 63)), VT)
+ : DAG.getConstantFP(BitsToFloat(~(1U << 31)), VT);
+ Mask = DAG.getNode(ISD::BIT_CONVERT, NVT, Mask);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0));
+ Lo = DAG.getNode(ISD::AND, NVT, Lo, Mask);
+ if (getTypeAction(NVT) == Expand)
+ ExpandOp(Lo, Lo, Hi);
+ break;
+ }
+ case ISD::FNEG: {
+ SDOperand Mask = (VT == MVT::f64)
+ ? DAG.getConstantFP(BitsToDouble(1ULL << 63), VT)
+ : DAG.getConstantFP(BitsToFloat(1U << 31), VT);
+ Mask = DAG.getNode(ISD::BIT_CONVERT, NVT, Mask);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0));
+ Lo = DAG.getNode(ISD::XOR, NVT, Lo, Mask);
+ if (getTypeAction(NVT) == Expand)
+ ExpandOp(Lo, Lo, Hi);
+ break;
+ }
+ case ISD::FCOPYSIGN: {
+ Lo = ExpandFCOPYSIGNToBitwiseOps(Node, NVT, DAG, TLI);
+ if (getTypeAction(NVT) == Expand)
+ ExpandOp(Lo, Lo, Hi);
+ break;
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: {
+ bool isSigned = Node->getOpcode() == ISD::SINT_TO_FP;
+ MVT::ValueType SrcVT = Node->getOperand(0).getValueType();
+ RTLIB::Libcall LC;
+ if (Node->getOperand(0).getValueType() == MVT::i64) {
+ if (VT == MVT::f32)
+ LC = isSigned ? RTLIB::SINTTOFP_I64_F32 : RTLIB::UINTTOFP_I64_F32;
+ else
+ LC = isSigned ? RTLIB::SINTTOFP_I64_F64 : RTLIB::UINTTOFP_I64_F64;
+ } else {
+ if (VT == MVT::f32)
+ LC = isSigned ? RTLIB::SINTTOFP_I32_F32 : RTLIB::UINTTOFP_I32_F32;
+ else
+ LC = isSigned ? RTLIB::SINTTOFP_I32_F64 : RTLIB::UINTTOFP_I32_F64;
+ }
+
+ // Promote the operand if needed.
+ if (getTypeAction(SrcVT) == Promote) {
+ SDOperand Tmp = PromoteOp(Node->getOperand(0));
+ Tmp = isSigned
+ ? DAG.getNode(ISD::SIGN_EXTEND_INREG, Tmp.getValueType(), Tmp,
+ DAG.getValueType(SrcVT))
+ : DAG.getZeroExtendInReg(Tmp, SrcVT);
+ Node = DAG.UpdateNodeOperands(Op, Tmp).Val;
+ }
+
+ const char *LibCall = TLI.getLibcallName(LC);
+ if (LibCall)
+ Lo = ExpandLibCall(TLI.getLibcallName(LC), Node, isSigned, Hi);
+ else {
+ Lo = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP, VT,
+ Node->getOperand(0));
+ if (getTypeAction(Lo.getValueType()) == Expand)
+ ExpandOp(Lo, Lo, Hi);
+ }
+ break;
+ }
+ }
+
+ // Make sure the resultant values have been legalized themselves, unless this
+ // is a type that requires multi-step expansion.
+ if (getTypeAction(NVT) != Expand && NVT != MVT::isVoid) {
+ Lo = LegalizeOp(Lo);
+ if (Hi.Val)
+ // Don't legalize the high part if it is expanded to a single node.
+ Hi = LegalizeOp(Hi);
+ }
+
+ // Remember in a map if the values will be reused later.
+ bool isNew = ExpandedNodes.insert(std::make_pair(Op, std::make_pair(Lo, Hi)));
+ assert(isNew && "Value already expanded?!?");
+}
+
+/// SplitVectorOp - Given an operand of vector type, break it down into
+/// two smaller values, still of vector type.
+void SelectionDAGLegalize::SplitVectorOp(SDOperand Op, SDOperand &Lo,
+ SDOperand &Hi) {
+ assert(MVT::isVector(Op.getValueType()) && "Cannot split non-vector type!");
+ SDNode *Node = Op.Val;
+ unsigned NumElements = MVT::getVectorNumElements(Node->getValueType(0));
+ assert(NumElements > 1 && "Cannot split a single element vector!");
+ unsigned NewNumElts = NumElements/2;
+ MVT::ValueType NewEltVT = MVT::getVectorElementType(Node->getValueType(0));
+ MVT::ValueType NewVT = MVT::getVectorType(NewEltVT, NewNumElts);
+
+ // See if we already split it.
+ std::map<SDOperand, std::pair<SDOperand, SDOperand> >::iterator I
+ = SplitNodes.find(Op);
+ if (I != SplitNodes.end()) {
+ Lo = I->second.first;
+ Hi = I->second.second;
+ return;
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ Node->dump(&DAG);
+#endif
+ assert(0 && "Unhandled operation in SplitVectorOp!");
+ case ISD::BUILD_PAIR:
+ Lo = Node->getOperand(0);
+ Hi = Node->getOperand(1);
+ break;
+ case ISD::BUILD_VECTOR: {
+ SmallVector<SDOperand, 8> LoOps(Node->op_begin(),
+ Node->op_begin()+NewNumElts);
+ Lo = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDOperand, 8> HiOps(Node->op_begin()+NewNumElts,
+ Node->op_end());
+ Hi = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &HiOps[0], HiOps.size());
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ unsigned NewNumSubvectors = Node->getNumOperands() / 2;
+ if (NewNumSubvectors == 1) {
+ Lo = Node->getOperand(0);
+ Hi = Node->getOperand(1);
+ } else {
+ SmallVector<SDOperand, 8> LoOps(Node->op_begin(),
+ Node->op_begin()+NewNumSubvectors);
+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, NewVT, &LoOps[0], LoOps.size());
+
+ SmallVector<SDOperand, 8> HiOps(Node->op_begin()+NewNumSubvectors,
+ Node->op_end());
+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, NewVT, &HiOps[0], HiOps.size());
+ }
+ break;
+ }
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::FDIV:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ SDOperand LL, LH, RL, RH;
+ SplitVectorOp(Node->getOperand(0), LL, LH);
+ SplitVectorOp(Node->getOperand(1), RL, RH);
+
+ Lo = DAG.getNode(Node->getOpcode(), NewVT, LL, RL);
+ Hi = DAG.getNode(Node->getOpcode(), NewVT, LH, RH);
+ break;
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ SDOperand Ch = LD->getChain();
+ SDOperand Ptr = LD->getBasePtr();
+ const Value *SV = LD->getSrcValue();
+ int SVOffset = LD->getSrcValueOffset();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+
+ Lo = DAG.getLoad(NewVT, Ch, Ptr, SV, SVOffset, isVolatile, Alignment);
+ unsigned IncrementSize = NewNumElts * MVT::getSizeInBits(NewEltVT)/8;
+ Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr,
+ getIntPtrConstant(IncrementSize));
+ SVOffset += IncrementSize;
+ if (Alignment > IncrementSize)
+ Alignment = IncrementSize;
+ Hi = DAG.getLoad(NewVT, Ch, Ptr, SV, SVOffset, isVolatile, Alignment);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ SDOperand TF = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Remember that we legalized the chain.
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(TF));
+ break;
+ }
+ case ISD::BIT_CONVERT: {
+ // We know the result is a vector. The input may be either a vector or a
+ // scalar value.
+ SDOperand InOp = Node->getOperand(0);
+ if (!MVT::isVector(InOp.getValueType()) ||
+ MVT::getVectorNumElements(InOp.getValueType()) == 1) {
+ // The input is a scalar or single-element vector.
+ // Lower to a store/load so that it can be split.
+ // FIXME: this could be improved probably.
+ SDOperand Ptr = CreateStackTemporary(InOp.getValueType());
+
+ SDOperand St = DAG.getStore(DAG.getEntryNode(),
+ InOp, Ptr, NULL, 0);
+ InOp = DAG.getLoad(Op.getValueType(), St, Ptr, NULL, 0);
+ }
+ // Split the vector and convert each of the pieces now.
+ SplitVectorOp(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, NewVT, Lo);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, NewVT, Hi);
+ break;
+ }
+ }
+
+ // Remember in a map if the values will be reused later.
+ bool isNew =
+ SplitNodes.insert(std::make_pair(Op, std::make_pair(Lo, Hi))).second;
+ assert(isNew && "Value already split?!?");
+}
+
+
+/// ScalarizeVectorOp - Given an operand of single-element vector type
+/// (e.g. v1f32), convert it into the equivalent operation that returns a
+/// scalar (e.g. f32) value.
+SDOperand SelectionDAGLegalize::ScalarizeVectorOp(SDOperand Op) {
+ assert(MVT::isVector(Op.getValueType()) &&
+ "Bad ScalarizeVectorOp invocation!");
+ SDNode *Node = Op.Val;
+ MVT::ValueType NewVT = MVT::getVectorElementType(Op.getValueType());
+ assert(MVT::getVectorNumElements(Op.getValueType()) == 1);
+
+ // See if we already scalarized it.
+ std::map<SDOperand, SDOperand>::iterator I = ScalarizedNodes.find(Op);
+ if (I != ScalarizedNodes.end()) return I->second;
+
+ SDOperand Result;
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ Node->dump(&DAG); cerr << "\n";
+#endif
+ assert(0 && "Unknown vector operation in ScalarizeVectorOp!");
+ case ISD::ADD:
+ case ISD::FADD:
+ case ISD::SUB:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::FMUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::FDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::FREM:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ Result = DAG.getNode(Node->getOpcode(),
+ NewVT,
+ ScalarizeVectorOp(Node->getOperand(0)),
+ ScalarizeVectorOp(Node->getOperand(1)));
+ break;
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ Result = DAG.getNode(Node->getOpcode(),
+ NewVT,
+ ScalarizeVectorOp(Node->getOperand(0)));
+ break;
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ SDOperand Ch = LegalizeOp(LD->getChain()); // Legalize the chain.
+ SDOperand Ptr = LegalizeOp(LD->getBasePtr()); // Legalize the pointer.
+
+ const Value *SV = LD->getSrcValue();
+ int SVOffset = LD->getSrcValueOffset();
+ Result = DAG.getLoad(NewVT, Ch, Ptr, SV, SVOffset,
+ LD->isVolatile(), LD->getAlignment());
+
+ // Remember that we legalized the chain.
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
+ break;
+ }
+ case ISD::BUILD_VECTOR:
+ Result = Node->getOperand(0);
+ break;
+ case ISD::INSERT_VECTOR_ELT:
+ // Returning the inserted scalar element.
+ Result = Node->getOperand(1);
+ break;
+ case ISD::CONCAT_VECTORS:
+ assert(Node->getOperand(0).getValueType() == NewVT &&
+ "Concat of non-legal vectors not yet supported!");
+ Result = Node->getOperand(0);
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ // Figure out if the scalar is the LHS or RHS and return it.
+ SDOperand EltNum = Node->getOperand(2).getOperand(0);
+ if (cast<ConstantSDNode>(EltNum)->getValue())
+ Result = ScalarizeVectorOp(Node->getOperand(1));
+ else
+ Result = ScalarizeVectorOp(Node->getOperand(0));
+ break;
+ }
+ case ISD::EXTRACT_SUBVECTOR:
+ Result = Node->getOperand(0);
+ assert(Result.getValueType() == NewVT);
+ break;
+ case ISD::BIT_CONVERT:
+ Result = DAG.getNode(ISD::BIT_CONVERT, NewVT, Op.getOperand(0));
+ break;
+ case ISD::SELECT:
+ Result = DAG.getNode(ISD::SELECT, NewVT, Op.getOperand(0),
+ ScalarizeVectorOp(Op.getOperand(1)),
+ ScalarizeVectorOp(Op.getOperand(2)));
+ break;
+ }
+
+ if (TLI.isTypeLegal(NewVT))
+ Result = LegalizeOp(Result);
+ bool isNew = ScalarizedNodes.insert(std::make_pair(Op, Result)).second;
+ assert(isNew && "Value already scalarized?");
+ return Result;
+}
+
+
+// SelectionDAG::Legalize - This is the entry point for the file.
+//
+void SelectionDAG::Legalize() {
+ if (ViewLegalizeDAGs) viewGraph();
+
+ /// run - This is the main entry point to this class.
+ ///
+ SelectionDAGLegalize(*this).LegalizeDAG();
+}
+
diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile
new file mode 100644
index 0000000..6c50288
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/Makefile
@@ -0,0 +1,15 @@
+##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMSelectionDAG
+PARALLEL_DIRS =
+BUILD_ARCHIVE = 1
+DONT_BUILD_RELINKED = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp
new file mode 100644
index 0000000..06b2329
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp
@@ -0,0 +1,725 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a simple two pass scheduler. The first pass attempts to push
+// backward any lengthy instructions and critical paths. The second pass packs
+// instructions into semi-optimal time slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// BuildSchedUnits - Build SUnits from the selection dag that we are input.
+/// This SUnit graph is similar to the SelectionDAG, but represents flagged
+/// together nodes with a single SUnit.
+void ScheduleDAG::BuildSchedUnits() {
+ // Reserve entries in the vector for each of the SUnits we are creating. This
+ // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+ // invalidated.
+ SUnits.reserve(std::distance(DAG.allnodes_begin(), DAG.allnodes_end()));
+
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+
+ for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); NI != E; ++NI) {
+ if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
+ continue;
+
+ // If this node has already been processed, stop now.
+ if (SUnitMap[NI]) continue;
+
+ SUnit *NodeSUnit = NewSUnit(NI);
+
+ // See if anything is flagged to this node, if so, add them to flagged
+ // nodes. Nodes can have at most one flag input and one flag output. Flags
+ // are required the be the last operand and result of a node.
+
+ // Scan up, adding flagged preds to FlaggedNodes.
+ SDNode *N = NI;
+ if (N->getNumOperands() &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+ do {
+ N = N->getOperand(N->getNumOperands()-1).Val;
+ NodeSUnit->FlaggedNodes.push_back(N);
+ SUnitMap[N] = NodeSUnit;
+ } while (N->getNumOperands() &&
+ N->getOperand(N->getNumOperands()-1).getValueType()== MVT::Flag);
+ std::reverse(NodeSUnit->FlaggedNodes.begin(),
+ NodeSUnit->FlaggedNodes.end());
+ }
+
+ // Scan down, adding this node and any flagged succs to FlaggedNodes if they
+ // have a user of the flag operand.
+ N = NI;
+ while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
+ SDOperand FlagVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Flag result.
+ bool HasFlagUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI)
+ if (FlagVal.isOperand(*UI)) {
+ HasFlagUse = true;
+ NodeSUnit->FlaggedNodes.push_back(N);
+ SUnitMap[N] = NodeSUnit;
+ N = *UI;
+ break;
+ }
+ if (!HasFlagUse) break;
+ }
+
+ // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node.
+ // Update the SUnit
+ NodeSUnit->Node = N;
+ SUnitMap[N] = NodeSUnit;
+
+ // Compute the latency for the node. We use the sum of the latencies for
+ // all nodes flagged together into this SUnit.
+ if (InstrItins.isEmpty()) {
+ // No latency information.
+ NodeSUnit->Latency = 1;
+ } else {
+ NodeSUnit->Latency = 0;
+ if (N->isTargetOpcode()) {
+ unsigned SchedClass = TII->getSchedClass(N->getTargetOpcode());
+ InstrStage *S = InstrItins.begin(SchedClass);
+ InstrStage *E = InstrItins.end(SchedClass);
+ for (; S != E; ++S)
+ NodeSUnit->Latency += S->Cycles;
+ }
+ for (unsigned i = 0, e = NodeSUnit->FlaggedNodes.size(); i != e; ++i) {
+ SDNode *FNode = NodeSUnit->FlaggedNodes[i];
+ if (FNode->isTargetOpcode()) {
+ unsigned SchedClass = TII->getSchedClass(FNode->getTargetOpcode());
+ InstrStage *S = InstrItins.begin(SchedClass);
+ InstrStage *E = InstrItins.end(SchedClass);
+ for (; S != E; ++S)
+ NodeSUnit->Latency += S->Cycles;
+ }
+ }
+ }
+ }
+
+ // Pass 2: add the preds, succs, etc.
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+ SUnit *SU = &SUnits[su];
+ SDNode *MainNode = SU->Node;
+
+ if (MainNode->isTargetOpcode()) {
+ unsigned Opc = MainNode->getTargetOpcode();
+ for (unsigned i = 0, ee = TII->getNumOperands(Opc); i != ee; ++i) {
+ if (TII->getOperandConstraint(Opc, i, TOI::TIED_TO) != -1) {
+ SU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (TII->isCommutableInstr(Opc))
+ SU->isCommutable = true;
+ }
+
+ // Find all predecessors and successors of the group.
+ // Temporarily add N to make code simpler.
+ SU->FlaggedNodes.push_back(MainNode);
+
+ for (unsigned n = 0, e = SU->FlaggedNodes.size(); n != e; ++n) {
+ SDNode *N = SU->FlaggedNodes[n];
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *OpN = N->getOperand(i).Val;
+ if (isPassiveNode(OpN)) continue; // Not scheduled.
+ SUnit *OpSU = SUnitMap[OpN];
+ assert(OpSU && "Node has no SUnit!");
+ if (OpSU == SU) continue; // In the same group.
+
+ MVT::ValueType OpVT = N->getOperand(i).getValueType();
+ assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+ bool isChain = OpVT == MVT::Other;
+
+ if (SU->addPred(OpSU, isChain)) {
+ if (!isChain) {
+ SU->NumPreds++;
+ SU->NumPredsLeft++;
+ } else {
+ SU->NumChainPredsLeft++;
+ }
+ }
+ if (OpSU->addSucc(SU, isChain)) {
+ if (!isChain) {
+ OpSU->NumSuccs++;
+ OpSU->NumSuccsLeft++;
+ } else {
+ OpSU->NumChainSuccsLeft++;
+ }
+ }
+ }
+ }
+
+ // Remove MainNode from FlaggedNodes again.
+ SU->FlaggedNodes.pop_back();
+ }
+
+ return;
+}
+
+void ScheduleDAG::CalculateDepths() {
+ std::vector<std::pair<SUnit*, unsigned> > WorkList;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i)
+ if (SUnits[i].Preds.size() == 0/* && &SUnits[i] != Entry*/)
+ WorkList.push_back(std::make_pair(&SUnits[i], 0U));
+
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back().first;
+ unsigned Depth = WorkList.back().second;
+ WorkList.pop_back();
+ if (SU->Depth == 0 || Depth > SU->Depth) {
+ SU->Depth = Depth;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ WorkList.push_back(std::make_pair(I->first, Depth+1));
+ }
+ }
+}
+
+void ScheduleDAG::CalculateHeights() {
+ std::vector<std::pair<SUnit*, unsigned> > WorkList;
+ SUnit *Root = SUnitMap[DAG.getRoot().Val];
+ WorkList.push_back(std::make_pair(Root, 0U));
+
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back().first;
+ unsigned Height = WorkList.back().second;
+ WorkList.pop_back();
+ if (SU->Height == 0 || Height > SU->Height) {
+ SU->Height = Height;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ WorkList.push_back(std::make_pair(I->first, Height+1));
+ }
+ }
+}
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional flag operands (which do
+/// not go into the machine instrs.)
+unsigned ScheduleDAG::CountResults(SDNode *Node) {
+ unsigned N = Node->getNumValues();
+ while (N && Node->getValueType(N - 1) == MVT::Flag)
+ --N;
+ if (N && Node->getValueType(N - 1) == MVT::Other)
+ --N; // Skip over chain result.
+ return N;
+}
+
+/// CountOperands The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then flag operands. Compute the
+/// number of actual operands that will go into the machine instr.
+unsigned ScheduleDAG::CountOperands(SDNode *Node) {
+ unsigned N = Node->getNumOperands();
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+ --N;
+ if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+ --N; // Ignore chain if it exists.
+ return N;
+}
+
+static const TargetRegisterClass *getInstrOperandRegClass(
+ const MRegisterInfo *MRI,
+ const TargetInstrInfo *TII,
+ const TargetInstrDescriptor *II,
+ unsigned Op) {
+ if (Op >= II->numOperands) {
+ assert((II->Flags & M_VARIABLE_OPS)&& "Invalid operand # of instruction");
+ return NULL;
+ }
+ const TargetOperandInfo &toi = II->OpInfo[Op];
+ return (toi.Flags & M_LOOK_UP_PTR_REG_CLASS)
+ ? TII->getPointerRegClass() : MRI->getRegClass(toi.RegClass);
+}
+
+static void CreateVirtualRegisters(SDNode *Node,
+ unsigned NumResults,
+ const MRegisterInfo *MRI,
+ MachineInstr *MI,
+ SSARegMap *RegMap,
+ const TargetInstrInfo *TII,
+ const TargetInstrDescriptor &II,
+ DenseMap<SDOperand, unsigned> &VRBaseMap) {
+ for (unsigned i = 0; i < NumResults; ++i) {
+ // If the specific node value is only used by a CopyToReg and the dest reg
+ // is a vreg, use the CopyToReg'd destination register instead of creating
+ // a new vreg.
+ unsigned VRBase = 0;
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *Use = *UI;
+ if (Use->getOpcode() == ISD::CopyToReg &&
+ Use->getOperand(2).Val == Node &&
+ Use->getOperand(2).ResNo == i) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (MRegisterInfo::isVirtualRegister(Reg)) {
+ VRBase = Reg;
+ MI->addRegOperand(Reg, true);
+ break;
+ }
+ }
+ }
+
+ if (VRBase == 0) {
+ // Create the result registers for this node and add the result regs to
+ // the machine instruction.
+ const TargetRegisterClass *RC = getInstrOperandRegClass(MRI, TII, &II, i);
+ assert(RC && "Isn't a register operand!");
+ VRBase = RegMap->createVirtualRegister(RC);
+ MI->addRegOperand(VRBase, true);
+ }
+
+ bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,i), VRBase));
+ assert(isNew && "Node emitted out of order - early");
+ }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+static unsigned getVR(SDOperand Op, DenseMap<SDOperand, unsigned> &VRBaseMap) {
+ DenseMap<SDOperand, unsigned>::iterator I = VRBaseMap.find(Op);
+ assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+ return I->second;
+}
+
+
+/// AddOperand - Add the specified operand to the specified machine instr. II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding. IIOpNum and II are used for
+/// assertions only.
+void ScheduleDAG::AddOperand(MachineInstr *MI, SDOperand Op,
+ unsigned IIOpNum,
+ const TargetInstrDescriptor *II,
+ DenseMap<SDOperand, unsigned> &VRBaseMap) {
+ if (Op.isTargetOpcode()) {
+ // Note that this case is redundant with the final else block, but we
+ // include it because it is the most common and it makes the logic
+ // simpler here.
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Flag &&
+ "Chain and flag operands should occur at end of operand list!");
+
+ // Get/emit the operand.
+ unsigned VReg = getVR(Op, VRBaseMap);
+ const TargetInstrDescriptor *TID = MI->getInstrDescriptor();
+ bool isOptDef = (IIOpNum < TID->numOperands)
+ ? (TID->OpInfo[IIOpNum].Flags & M_OPTIONAL_DEF_OPERAND) : false;
+ MI->addRegOperand(VReg, isOptDef);
+
+ // Verify that it is right.
+ assert(MRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+ if (II) {
+ const TargetRegisterClass *RC =
+ getInstrOperandRegClass(MRI, TII, II, IIOpNum);
+ assert(RC && "Don't have operand info for this instruction!");
+ const TargetRegisterClass *VRC = RegMap->getRegClass(VReg);
+ if (VRC != RC) {
+ cerr << "Register class of operand and regclass of use don't agree!\n";
+#ifndef NDEBUG
+ cerr << "Operand = " << IIOpNum << "\n";
+ cerr << "Op->Val = "; Op.Val->dump(&DAG); cerr << "\n";
+ cerr << "MI = "; MI->print(cerr);
+ cerr << "VReg = " << VReg << "\n";
+ cerr << "VReg RegClass size = " << VRC->getSize()
+ << ", align = " << VRC->getAlignment() << "\n";
+ cerr << "Expected RegClass size = " << RC->getSize()
+ << ", align = " << RC->getAlignment() << "\n";
+#endif
+ cerr << "Fatal error, aborting.\n";
+ abort();
+ }
+ }
+ } else if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Op)) {
+ MI->addImmOperand(C->getValue());
+ } else if (RegisterSDNode *R =
+ dyn_cast<RegisterSDNode>(Op)) {
+ MI->addRegOperand(R->getReg(), false);
+ } else if (GlobalAddressSDNode *TGA =
+ dyn_cast<GlobalAddressSDNode>(Op)) {
+ MI->addGlobalAddressOperand(TGA->getGlobal(), TGA->getOffset());
+ } else if (BasicBlockSDNode *BB =
+ dyn_cast<BasicBlockSDNode>(Op)) {
+ MI->addMachineBasicBlockOperand(BB->getBasicBlock());
+ } else if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(Op)) {
+ MI->addFrameIndexOperand(FI->getIndex());
+ } else if (JumpTableSDNode *JT =
+ dyn_cast<JumpTableSDNode>(Op)) {
+ MI->addJumpTableIndexOperand(JT->getIndex());
+ } else if (ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(Op)) {
+ int Offset = CP->getOffset();
+ unsigned Align = CP->getAlignment();
+ const Type *Type = CP->getType();
+ // MachineConstantPool wants an explicit alignment.
+ if (Align == 0) {
+ Align = TM.getTargetData()->getPreferredTypeAlignmentShift(Type);
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = TM.getTargetData()->getTypeSize(Type);
+ Align = Log2_64(Align);
+ }
+ }
+
+ unsigned Idx;
+ if (CP->isMachineConstantPoolEntry())
+ Idx = ConstPool->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+ else
+ Idx = ConstPool->getConstantPoolIndex(CP->getConstVal(), Align);
+ MI->addConstantPoolIndexOperand(Idx, Offset);
+ } else if (ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(Op)) {
+ MI->addExternalSymbolOperand(ES->getSymbol());
+ } else {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Flag &&
+ "Chain and flag operands should occur at end of operand list!");
+ unsigned VReg = getVR(Op, VRBaseMap);
+ MI->addRegOperand(VReg, false);
+
+ // Verify that it is right.
+ assert(MRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+ if (II) {
+ const TargetRegisterClass *RC =
+ getInstrOperandRegClass(MRI, TII, II, IIOpNum);
+ assert(RC && "Don't have operand info for this instruction!");
+ assert(RegMap->getRegClass(VReg) == RC &&
+ "Register class of operand and regclass of use don't agree!");
+ }
+ }
+
+}
+
+// Returns the Register Class of a physical register
+static const TargetRegisterClass *getPhysicalRegisterRegClass(
+ const MRegisterInfo *MRI,
+ MVT::ValueType VT,
+ unsigned reg) {
+ assert(MRegisterInfo::isPhysicalRegister(reg) &&
+ "reg must be a physical register");
+ // Pick the register class of the right type that contains this physreg.
+ for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(),
+ E = MRI->regclass_end(); I != E; ++I)
+ if ((*I)->hasType(VT) && (*I)->contains(reg))
+ return *I;
+ assert(false && "Couldn't find the register class");
+ return 0;
+}
+
+/// EmitNode - Generate machine code for an node and needed dependencies.
+///
+void ScheduleDAG::EmitNode(SDNode *Node,
+ DenseMap<SDOperand, unsigned> &VRBaseMap) {
+ // If machine instruction
+ if (Node->isTargetOpcode()) {
+ unsigned Opc = Node->getTargetOpcode();
+ const TargetInstrDescriptor &II = TII->get(Opc);
+
+ unsigned NumResults = CountResults(Node);
+ unsigned NodeOperands = CountOperands(Node);
+ unsigned NumMIOperands = NodeOperands + NumResults;
+#ifndef NDEBUG
+ assert((unsigned(II.numOperands) == NumMIOperands ||
+ (II.Flags & M_VARIABLE_OPS)) &&
+ "#operands for dag node doesn't match .td file!");
+#endif
+
+ // Create the new machine instruction.
+ MachineInstr *MI = new MachineInstr(II);
+
+ // Add result register values for things that are defined by this
+ // instruction.
+ if (NumResults)
+ CreateVirtualRegisters(Node, NumResults, MRI, MI, RegMap,
+ TII, II, VRBaseMap);
+
+ // Emit all of the actual operands of this instruction, adding them to the
+ // instruction as appropriate.
+ for (unsigned i = 0; i != NodeOperands; ++i)
+ AddOperand(MI, Node->getOperand(i), i+NumResults, &II, VRBaseMap);
+
+ // Commute node if it has been determined to be profitable.
+ if (CommuteSet.count(Node)) {
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+ if (NewMI == 0)
+ DOUT << "Sched: COMMUTING FAILED!\n";
+ else {
+ DOUT << "Sched: COMMUTED TO: " << *NewMI;
+ if (MI != NewMI) {
+ delete MI;
+ MI = NewMI;
+ }
+ }
+ }
+
+ // Now that we have emitted all operands, emit this instruction itself.
+ if ((II.Flags & M_USES_CUSTOM_DAG_SCHED_INSERTION) == 0) {
+ BB->insert(BB->end(), MI);
+ } else {
+ // Insert this instruction into the end of the basic block, potentially
+ // taking some custom action.
+ BB = DAG.getTargetLoweringInfo().InsertAtEndOfBasicBlock(MI, BB);
+ }
+ } else {
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ Node->dump(&DAG);
+#endif
+ assert(0 && "This target-independent node should have been selected!");
+ case ISD::EntryToken: // fall thru
+ case ISD::TokenFactor:
+ case ISD::LABEL:
+ break;
+ case ISD::CopyToReg: {
+ unsigned InReg;
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(2)))
+ InReg = R->getReg();
+ else
+ InReg = getVR(Node->getOperand(2), VRBaseMap);
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (InReg != DestReg) {// Coalesced away the copy?
+ const TargetRegisterClass *TRC = 0;
+ // Get the target register class
+ if (MRegisterInfo::isVirtualRegister(InReg))
+ TRC = RegMap->getRegClass(InReg);
+ else
+ TRC = getPhysicalRegisterRegClass(MRI,
+ Node->getOperand(2).getValueType(),
+ InReg);
+ MRI->copyRegToReg(*BB, BB->end(), DestReg, InReg, TRC);
+ }
+ break;
+ }
+ case ISD::CopyFromReg: {
+ unsigned VRBase = 0;
+ unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (MRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Just use the input register directly!
+ bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,0),SrcReg));
+ assert(isNew && "Node emitted out of order - early");
+ break;
+ }
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+ UI != E; ++UI) {
+ SDNode *Use = *UI;
+ if (Use->getOpcode() == ISD::CopyToReg &&
+ Use->getOperand(2).Val == Node) {
+ unsigned DestReg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (MRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ break;
+ }
+ }
+ }
+
+ // Figure out the register class to create for the destreg.
+ const TargetRegisterClass *TRC = 0;
+ if (VRBase) {
+ TRC = RegMap->getRegClass(VRBase);
+ } else {
+ TRC = getPhysicalRegisterRegClass(MRI, Node->getValueType(0), SrcReg);
+
+ // Create the reg, emit the copy.
+ VRBase = RegMap->createVirtualRegister(TRC);
+ }
+ MRI->copyRegToReg(*BB, BB->end(), VRBase, SrcReg, TRC);
+
+ bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,0), VRBase));
+ assert(isNew && "Node emitted out of order - early");
+ break;
+ }
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+ --NumOps; // Ignore the flag operand.
+
+ // Create the inline asm machine instruction.
+ MachineInstr *MI =
+ new MachineInstr(BB, TII->get(TargetInstrInfo::INLINEASM));
+
+ // Add the asm string as an external symbol operand.
+ const char *AsmStr =
+ cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol();
+ MI->addExternalSymbolOperand(AsmStr);
+
+ // Add all of the operand registers to the instruction.
+ for (unsigned i = 2; i != NumOps;) {
+ unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getValue();
+ unsigned NumVals = Flags >> 3;
+
+ MI->addImmOperand(Flags);
+ ++i; // Skip the ID value.
+
+ switch (Flags & 7) {
+ default: assert(0 && "Bad flags!");
+ case 1: // Use of register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MI->addRegOperand(Reg, false);
+ }
+ break;
+ case 2: // Def of register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MI->addRegOperand(Reg, true);
+ }
+ break;
+ case 3: { // Immediate.
+ assert(NumVals == 1 && "Unknown immediate value!");
+ if (ConstantSDNode *CS=dyn_cast<ConstantSDNode>(Node->getOperand(i))){
+ MI->addImmOperand(CS->getValue());
+ } else {
+ GlobalAddressSDNode *GA =
+ cast<GlobalAddressSDNode>(Node->getOperand(i));
+ MI->addGlobalAddressOperand(GA->getGlobal(), GA->getOffset());
+ }
+ ++i;
+ break;
+ }
+ case 4: // Addressing mode.
+ // The addressing mode has been selected, just add all of the
+ // operands to the machine instruction.
+ for (; NumVals; --NumVals, ++i)
+ AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
+ break;
+ }
+ }
+ break;
+ }
+ }
+ }
+}
+
+void ScheduleDAG::EmitNoop() {
+ TII->insertNoop(*BB, BB->end());
+}
+
+/// EmitSchedule - Emit the machine code in scheduled order.
+void ScheduleDAG::EmitSchedule() {
+ // If this is the first basic block in the function, and if it has live ins
+ // that need to be copied into vregs, emit the copies into the top of the
+ // block before emitting the code for the block.
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (&MF.front() == BB && MF.livein_begin() != MF.livein_end()) {
+ for (MachineFunction::livein_iterator LI = MF.livein_begin(),
+ E = MF.livein_end(); LI != E; ++LI)
+ if (LI->second)
+ MRI->copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second,
+ LI->first, RegMap->getRegClass(LI->second));
+ }
+
+
+ // Finally, emit the code for all of the scheduled instructions.
+ DenseMap<SDOperand, unsigned> VRBaseMap;
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i]) {
+ for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; j++)
+ EmitNode(SU->FlaggedNodes[j], VRBaseMap);
+ EmitNode(SU->Node, VRBaseMap);
+ } else {
+ // Null SUnit* is a noop.
+ EmitNoop();
+ }
+ }
+}
+
+/// dump - dump the schedule.
+void ScheduleDAG::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(&DAG);
+ else
+ cerr << "**** NOOP ****\n";
+ }
+}
+
+
+/// Run - perform scheduling.
+///
+MachineBasicBlock *ScheduleDAG::Run() {
+ TII = TM.getInstrInfo();
+ MRI = TM.getRegisterInfo();
+ RegMap = BB->getParent()->getSSARegMap();
+ ConstPool = BB->getParent()->getConstantPool();
+
+ Schedule();
+ return BB;
+}
+
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const SelectionDAG *G) const {
+ cerr << "SU(" << NodeNum << "): ";
+ Node->dump(G);
+ cerr << "\n";
+ if (FlaggedNodes.size() != 0) {
+ for (unsigned i = 0, e = FlaggedNodes.size(); i != e; i++) {
+ cerr << " ";
+ FlaggedNodes[i]->dump(G);
+ cerr << "\n";
+ }
+ }
+}
+
+void SUnit::dumpAll(const SelectionDAG *G) const {
+ dump(G);
+
+ cerr << " # preds left : " << NumPredsLeft << "\n";
+ cerr << " # succs left : " << NumSuccsLeft << "\n";
+ cerr << " # chain preds left : " << NumChainPredsLeft << "\n";
+ cerr << " # chain succs left : " << NumChainSuccsLeft << "\n";
+ cerr << " Latency : " << Latency << "\n";
+ cerr << " Depth : " << Depth << "\n";
+ cerr << " Height : " << Height << "\n";
+
+ if (Preds.size() != 0) {
+ cerr << " Predecessors:\n";
+ for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ if (I->second)
+ cerr << " ch #";
+ else
+ cerr << " val #";
+ cerr << I->first << " - SU(" << I->first->NodeNum << ")\n";
+ }
+ }
+ if (Succs.size() != 0) {
+ cerr << " Successors:\n";
+ for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+ I != E; ++I) {
+ if (I->second)
+ cerr << " ch #";
+ else
+ cerr << " val #";
+ cerr << I->first << " - SU(" << I->first->NodeNum << ")\n";
+ }
+ }
+ cerr << "\n";
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
new file mode 100644
index 0000000..9e4e46f
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -0,0 +1,531 @@
+//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+#include <queue>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+ tdListDAGScheduler("list-td", " Top-down list scheduler",
+ createTDListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGList - The actual list scheduler implementation. This supports
+/// top-down scheduling.
+///
+class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAG {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue. This keeps track of each SUnit and the
+ /// number of cycles left to execute before the operation is available.
+ std::vector<std::pair<unsigned, SUnit*> > PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ HazardRecognizer *HazardRec;
+
+public:
+ ScheduleDAGList(SelectionDAG &dag, MachineBasicBlock *bb,
+ const TargetMachine &tm,
+ SchedulingPriorityQueue *availqueue,
+ HazardRecognizer *HR)
+ : ScheduleDAG(dag, bb, tm),
+ AvailableQueue(availqueue), HazardRec(HR) {
+ }
+
+ ~ScheduleDAGList() {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+private:
+ void ReleaseSucc(SUnit *SuccSU, bool isChain);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+};
+} // end anonymous namespace
+
+HazardRecognizer::~HazardRecognizer() {}
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGList::Schedule() {
+ DOUT << "********** List Scheduling **********\n";
+
+ // Build scheduling units.
+ BuildSchedUnits();
+
+ AvailableQueue->initNodes(SUnitMap, SUnits);
+
+ ListScheduleTopDown();
+
+ AvailableQueue->releaseState();
+
+ DOUT << "*** Final schedule ***\n";
+ DEBUG(dumpSchedule());
+ DOUT << "\n";
+
+ // Emit in scheduled order
+ EmitSchedule();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero.
+void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) {
+ if (!isChain)
+ SuccSU->NumPredsLeft--;
+ else
+ SuccSU->NumChainPredsLeft--;
+
+ assert(SuccSU->NumPredsLeft >= 0 && SuccSU->NumChainPredsLeft >= 0 &&
+ "List scheduling internal error");
+
+ if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
+ // Compute how many cycles it will be before this actually becomes
+ // available. This is the max of the start time of all predecessors plus
+ // their latencies.
+ unsigned AvailableCycle = 0;
+ for (SUnit::pred_iterator I = SuccSU->Preds.begin(),
+ E = SuccSU->Preds.end(); I != E; ++I) {
+ // If this is a token edge, we don't need to wait for the latency of the
+ // preceeding instruction (e.g. a long-latency load) unless there is also
+ // some other data dependence.
+ SUnit &Pred = *I->first;
+ unsigned PredDoneCycle = Pred.Cycle;
+ if (!I->second)
+ PredDoneCycle += Pred.Latency;
+ else if (Pred.Latency)
+ PredDoneCycle += 1;
+
+ AvailableCycle = std::max(AvailableCycle, PredDoneCycle);
+ }
+
+ PendingQueue.push_back(std::make_pair(AvailableCycle, SuccSU));
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(&DAG));
+
+ Sequence.push_back(SU);
+ SU->Cycle = CurCycle;
+
+ // Bottom up: release successors.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ ReleaseSucc(I->first, I->second);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+ SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
+
+ // All leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = SUnits[i].isPending = true;
+ }
+ }
+
+ // Emit the entry node first.
+ ScheduleNodeTopDown(Entry, CurCycle);
+ HazardRec->EmitInstruction(Entry->Node);
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i].first == CurCycle) {
+ AvailableQueue->push(PendingQueue[i].second);
+ PendingQueue[i].second->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else {
+ assert(PendingQueue[i].first > CurCycle && "Negative latency?");
+ }
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue->empty()) {
+ ++CurCycle;
+ continue;
+ }
+
+ SUnit *FoundSUnit = 0;
+ SDNode *FoundNode = 0;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSUnit = AvailableQueue->pop();
+
+ // Get the node represented by this SUnit.
+ FoundNode = CurSUnit->Node;
+
+ // If this is a pseudo op, like copyfromreg, look to see if there is a
+ // real target node flagged to it. If so, use the target node.
+ for (unsigned i = 0, e = CurSUnit->FlaggedNodes.size();
+ FoundNode->getOpcode() < ISD::BUILTIN_OP_END && i != e; ++i)
+ FoundNode = CurSUnit->FlaggedNodes[i];
+
+ HazardRecognizer::HazardType HT = HazardRec->getHazardType(FoundNode);
+ if (HT == HazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == HazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundNode);
+ FoundSUnit->isScheduled = true;
+ AvailableQueue->ScheduledNode(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ DOUT << "*** Advancing cycle, no work to do\n";
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DOUT << "*** Emitting noop\n";
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL SUnit* -> noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ // Verify that all SUnits were scheduled.
+ bool AnyNotSched = false;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ if (SUnits[i].NumPredsLeft != 0 || SUnits[i].NumChainPredsLeft != 0) {
+ if (!AnyNotSched)
+ cerr << "*** List scheduling failed! ***\n";
+ SUnits[i].dump(&DAG);
+ cerr << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ }
+ assert(!AnyNotSched);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// LatencyPriorityQueue Implementation
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+//
+namespace {
+ class LatencyPriorityQueue;
+
+ /// Sorting functions for the Available queue.
+ struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ LatencyPriorityQueue *PQ;
+ latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {}
+ latency_sort(const latency_sort &RHS) : PQ(RHS.PQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
+} // end anonymous namespace
+
+namespace {
+ class LatencyPriorityQueue : public SchedulingPriorityQueue {
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ // Latencies - The latency (max of latency from this node to the bb exit)
+ // for each node.
+ std::vector<int> Latencies;
+
+ /// NumNodesSolelyBlocking - This vector contains, for every node in the
+ /// Queue, the number of nodes that the node is the sole unscheduled
+ /// predecessor for. This is used as a tie-breaker heuristic for better
+ /// mobility.
+ std::vector<unsigned> NumNodesSolelyBlocking;
+
+ std::priority_queue<SUnit*, std::vector<SUnit*>, latency_sort> Queue;
+public:
+ LatencyPriorityQueue() : Queue(latency_sort(this)) {
+ }
+
+ void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
+ std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Calculate node priorities.
+ CalculatePriorities();
+ }
+ void releaseState() {
+ SUnits = 0;
+ Latencies.clear();
+ }
+
+ unsigned getLatency(unsigned NodeNum) const {
+ assert(NodeNum < Latencies.size());
+ return Latencies[NodeNum];
+ }
+
+ unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
+ assert(NodeNum < NumNodesSolelyBlocking.size());
+ return NumNodesSolelyBlocking[NodeNum];
+ }
+
+ bool empty() const { return Queue.empty(); }
+
+ virtual void push(SUnit *U) {
+ push_impl(U);
+ }
+ void push_impl(SUnit *U);
+
+ void push_all(const std::vector<SUnit *> &Nodes) {
+ for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
+ push_impl(Nodes[i]);
+ }
+
+ SUnit *pop() {
+ if (empty()) return NULL;
+ SUnit *V = Queue.top();
+ Queue.pop();
+ return V;
+ }
+
+ // ScheduledNode - As nodes are scheduled, we look to see if there are any
+ // successor nodes that have a single unscheduled predecessor. If so, that
+ // single predecessor has a higher priority, since scheduling it will make
+ // the node available.
+ void ScheduledNode(SUnit *Node);
+
+private:
+ void CalculatePriorities();
+ int CalcLatency(const SUnit &SU);
+ void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
+ SUnit *getSingleUnscheduledPred(SUnit *SU);
+
+ /// RemoveFromPriorityQueue - This is a really inefficient way to remove a
+ /// node from a priority queue. We should roll our own heap to make this
+ /// better or something.
+ void RemoveFromPriorityQueue(SUnit *SU) {
+ std::vector<SUnit*> Temp;
+
+ assert(!Queue.empty() && "Not in queue!");
+ while (Queue.top() != SU) {
+ Temp.push_back(Queue.top());
+ Queue.pop();
+ assert(!Queue.empty() && "Not in queue!");
+ }
+
+ // Remove the node from the PQ.
+ Queue.pop();
+
+ // Add all the other nodes back.
+ for (unsigned i = 0, e = Temp.size(); i != e; ++i)
+ Queue.push(Temp[i]);
+ }
+ };
+}
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// CalcNodePriority - Calculate the maximal path from the node to the exit.
+///
+int LatencyPriorityQueue::CalcLatency(const SUnit &SU) {
+ int &Latency = Latencies[SU.NodeNum];
+ if (Latency != -1)
+ return Latency;
+
+ int MaxSuccLatency = 0;
+ for (SUnit::const_succ_iterator I = SU.Succs.begin(), E = SU.Succs.end();
+ I != E; ++I)
+ MaxSuccLatency = std::max(MaxSuccLatency, CalcLatency(*I->first));
+
+ return Latency = MaxSuccLatency + SU.Latency;
+}
+
+/// CalculatePriorities - Calculate priorities of all scheduling units.
+void LatencyPriorityQueue::CalculatePriorities() {
+ Latencies.assign(SUnits->size(), -1);
+ NumNodesSolelyBlocking.assign(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcLatency((*SUnits)[i]);
+}
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->first;
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+
+ return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push_impl(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->first) == SU)
+ ++NumNodesBlocking;
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+
+ Queue.push(SU);
+}
+
+
+// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor. If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ AdjustPriorityOfUnscheduledPreds(I->first);
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isPending) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ RemoveFromPriorityQueue(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createTDListDAGScheduler - This creates a top-down list scheduler with a
+/// new hazard recognizer. This scheduler takes ownership of the hazard
+/// recognizer and deletes it when done.
+ScheduleDAG* llvm::createTDListDAGScheduler(SelectionDAGISel *IS,
+ SelectionDAG *DAG,
+ MachineBasicBlock *BB) {
+ return new ScheduleDAGList(*DAG, BB, DAG->getTarget(),
+ new LatencyPriorityQueue(),
+ IS->CreateTargetHazardRecognizer());
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 0000000..f95be7d
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,944 @@
+//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms. The basic approach uses a priority
+// queue of available nodes to schedule. One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+#include <queue>
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static RegisterScheduler
+ burrListDAGScheduler("list-burr",
+ " Bottom-up register reduction list scheduling",
+ createBURRListDAGScheduler);
+static RegisterScheduler
+ tdrListrDAGScheduler("list-tdrr",
+ " Top-down register reduction list scheduling",
+ createTDRRListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation. This supports both top-down and bottom-up scheduling.
+///
+
+class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG {
+private:
+ /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
+ /// it is top-down.
+ bool isBottomUp;
+
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ SchedulingPriorityQueue *AvailableQueue;
+
+public:
+ ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb,
+ const TargetMachine &tm, bool isbottomup,
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup),
+ AvailableQueue(availqueue) {
+ }
+
+ ~ScheduleDAGRRList() {
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+private:
+ void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle);
+ void ReleaseSucc(SUnit *SuccSU, bool isChain, unsigned CurCycle);
+ void ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+ void ListScheduleBottomUp();
+ void CommuteNodesToReducePressure();
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+ DOUT << "********** List Scheduling **********\n";
+
+ // Build scheduling units.
+ BuildSchedUnits();
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(&DAG));
+ CalculateDepths();
+ CalculateHeights();
+
+ AvailableQueue->initNodes(SUnitMap, SUnits);
+
+ // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
+ if (isBottomUp)
+ ListScheduleBottomUp();
+ else
+ ListScheduleTopDown();
+
+ AvailableQueue->releaseState();
+
+ CommuteNodesToReducePressure();
+
+ DOUT << "*** Final schedule ***\n";
+ DEBUG(dumpSchedule());
+ DOUT << "\n";
+
+ // Emit in scheduled order
+ EmitSchedule();
+}
+
+/// CommuteNodesToReducePressure - If a node is two-address and commutable, and
+/// it is not the last use of its first operand, add it to the CommuteSet if
+/// possible. It will be commuted when it is translated to a MI.
+void ScheduleDAGRRList::CommuteNodesToReducePressure() {
+ SmallPtrSet<SUnit*, 4> OperandSeen;
+ for (unsigned i = Sequence.size()-1; i != 0; --i) { // Ignore first node.
+ SUnit *SU = Sequence[i];
+ if (!SU) continue;
+ if (SU->isCommutable) {
+ unsigned Opc = SU->Node->getTargetOpcode();
+ unsigned NumRes = CountResults(SU->Node);
+ unsigned NumOps = CountOperands(SU->Node);
+ for (unsigned j = 0; j != NumOps; ++j) {
+ if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1)
+ continue;
+
+ SDNode *OpN = SU->Node->getOperand(j).Val;
+ SUnit *OpSU = SUnitMap[OpN];
+ if (OpSU && OperandSeen.count(OpSU) == 1) {
+ // Ok, so SU is not the last use of OpSU, but SU is two-address so
+ // it will clobber OpSU. Try to commute SU if no other source operands
+ // are live below.
+ bool DoCommute = true;
+ for (unsigned k = 0; k < NumOps; ++k) {
+ if (k != j) {
+ OpN = SU->Node->getOperand(k).Val;
+ OpSU = SUnitMap[OpN];
+ if (OpSU && OperandSeen.count(OpSU) == 1) {
+ DoCommute = false;
+ break;
+ }
+ }
+ }
+ if (DoCommute)
+ CommuteSet.insert(SU->Node);
+ }
+
+ // Only look at the first use&def node for now.
+ break;
+ }
+ }
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (!I->second)
+ OperandSeen.insert(I->first);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the Available queue is the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain,
+ unsigned CurCycle) {
+ // FIXME: the distance between two nodes is not always == the predecessor's
+ // latency. For example, the reader can very well read the register written
+ // by the predecessor later than the issue cycle. It also depends on the
+ // interrupt model (drain vs. freeze).
+ PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency);
+
+ if (!isChain)
+ PredSU->NumSuccsLeft--;
+ else
+ PredSU->NumChainSuccsLeft--;
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) {
+ cerr << "*** List scheduling failed! ***\n";
+ PredSU->dump(&DAG);
+ cerr << " has been released too many times!\n";
+ assert(0);
+ }
+#endif
+
+ if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) {
+ // EntryToken has to go last! Special case it here.
+ if (PredSU->Node->getOpcode() != ISD::EntryToken) {
+ PredSU->isAvailable = true;
+ AvailableQueue->push(PredSU);
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(&DAG));
+ SU->Cycle = CurCycle;
+
+ AvailableQueue->ScheduledNode(SU);
+ Sequence.push_back(SU);
+
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ ReleasePred(I->first, I->second, CurCycle);
+ SU->isScheduled = true;
+}
+
+/// isReady - True if node's lower cycle bound is less or equal to the current
+/// scheduling cycle. Always true if all nodes have uniform latency 1.
+static inline bool isReady(SUnit *SU, unsigned CurCycle) {
+ return SU->CycleBound <= CurCycle;
+}
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+ // Add root to Available queue.
+ AvailableQueue->push(SUnitMap[DAG.getRoot().Val]);
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurNode = AvailableQueue->pop();
+ while (CurNode && !isReady(CurNode, CurCycle)) {
+ NotReady.push_back(CurNode);
+ CurNode = AvailableQueue->pop();
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+
+ if (CurNode != NULL)
+ ScheduleNodeBottomUp(CurNode, CurCycle);
+ CurCycle++;
+ }
+
+ // Add entry node last
+ if (DAG.getEntryNode().Val != DAG.getRoot().Val) {
+ SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
+ Sequence.push_back(Entry);
+ }
+
+ // Reverse the order if it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+
+#ifndef NDEBUG
+ // Verify that all SUnits were scheduled.
+ bool AnyNotSched = false;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) {
+ if (!AnyNotSched)
+ cerr << "*** List scheduling failed! ***\n";
+ SUnits[i].dump(&DAG);
+ cerr << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ }
+ assert(!AnyNotSched);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero.
+void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain,
+ unsigned CurCycle) {
+ // FIXME: the distance between two nodes is not always == the predecessor's
+ // latency. For example, the reader can very well read the register written
+ // by the predecessor later than the issue cycle. It also depends on the
+ // interrupt model (drain vs. freeze).
+ SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency);
+
+ if (!isChain)
+ SuccSU->NumPredsLeft--;
+ else
+ SuccSU->NumChainPredsLeft--;
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) {
+ cerr << "*** List scheduling failed! ***\n";
+ SuccSU->dump(&DAG);
+ cerr << " has been released too many times!\n";
+ assert(0);
+ }
+#endif
+
+ if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
+ SuccSU->isAvailable = true;
+ AvailableQueue->push(SuccSU);
+ }
+}
+
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DOUT << "*** Scheduling [" << CurCycle << "]: ";
+ DEBUG(SU->dump(&DAG));
+ SU->Cycle = CurCycle;
+
+ AvailableQueue->ScheduledNode(SU);
+ Sequence.push_back(SU);
+
+ // Top down: release successors
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ ReleaseSucc(I->first, I->second, CurCycle);
+ SU->isScheduled = true;
+}
+
+void ScheduleDAGRRList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+ SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
+
+ // All leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // Emit the entry node first.
+ ScheduleNodeTopDown(Entry, CurCycle);
+ CurCycle++;
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurNode = AvailableQueue->pop();
+ while (CurNode && !isReady(CurNode, CurCycle)) {
+ NotReady.push_back(CurNode);
+ CurNode = AvailableQueue->pop();
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+
+ if (CurNode != NULL)
+ ScheduleNodeTopDown(CurNode, CurCycle);
+ CurCycle++;
+ }
+
+
+#ifndef NDEBUG
+ // Verify that all SUnits were scheduled.
+ bool AnyNotSched = false;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ if (!SUnits[i].isScheduled) {
+ if (!AnyNotSched)
+ cerr << "*** List scheduling failed! ***\n";
+ SUnits[i].dump(&DAG);
+ cerr << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ }
+ assert(!AnyNotSched);
+#endif
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// RegReductionPriorityQueue Implementation
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
+namespace {
+ template<class SF>
+ class RegReductionPriorityQueue;
+
+ /// Sorting functions for the Available queue.
+ struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
+ bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
+ bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
+
+ struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
+ td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
+ td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
+} // end anonymous namespace
+
+static inline bool isCopyFromLiveIn(const SUnit *SU) {
+ SDNode *N = SU->Node;
+ return N->getOpcode() == ISD::CopyFromReg &&
+ N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag;
+}
+
+namespace {
+ template<class SF>
+ class VISIBILITY_HIDDEN RegReductionPriorityQueue
+ : public SchedulingPriorityQueue {
+ std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue;
+
+ public:
+ RegReductionPriorityQueue() :
+ Queue(SF(this)) {}
+
+ virtual void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
+ std::vector<SUnit> &sunits) {}
+ virtual void releaseState() {}
+
+ virtual unsigned getNodePriority(const SUnit *SU) const {
+ return 0;
+ }
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ Queue.push(U);
+ }
+ void push_all(const std::vector<SUnit *> &Nodes) {
+ for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
+ Queue.push(Nodes[i]);
+ }
+
+ SUnit *pop() {
+ if (empty()) return NULL;
+ SUnit *V = Queue.top();
+ Queue.pop();
+ return V;
+ }
+
+ virtual bool isDUOperand(const SUnit *SU1, const SUnit *SU2) {
+ return false;
+ }
+ };
+
+ template<class SF>
+ class VISIBILITY_HIDDEN BURegReductionPriorityQueue
+ : public RegReductionPriorityQueue<SF> {
+ // SUnitMap SDNode to SUnit mapping (n -> 1).
+ DenseMap<SDNode*, SUnit*> *SUnitMap;
+
+ // SUnits - The SUnits for the current graph.
+ const std::vector<SUnit> *SUnits;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ const TargetInstrInfo *TII;
+ public:
+ BURegReductionPriorityQueue(const TargetInstrInfo *tii)
+ : TII(tii) {}
+
+ void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
+ std::vector<SUnit> &sunits) {
+ SUnitMap = &sumap;
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ AddPseudoTwoAddrDeps();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+ }
+
+ void releaseState() {
+ SUnits = 0;
+ SethiUllmanNumbers.clear();
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->Node->getOpcode();
+ if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU))
+ // CopyFromReg should be close to its def because it restricts
+ // allocation choices. But if it is a livein then perhaps we want it
+ // closer to its uses so it can be coalesced.
+ return 0xffff;
+ else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ else if (SU->NumSuccs == 0)
+ // If SU does not have a use, i.e. it doesn't produce a value that would
+ // be consumed (e.g. store), then it terminates a chain of computation.
+ // Give it a large SethiUllman number so it will be scheduled right
+ // before its predecessors that it doesn't lengthen their live ranges.
+ return 0xffff;
+ else if (SU->NumPreds == 0)
+ // If SU does not have a def, schedule it close to its uses because it
+ // does not lengthen any live ranges.
+ return 0;
+ else
+ return SethiUllmanNumbers[SU->NodeNum];
+ }
+
+ bool isDUOperand(const SUnit *SU1, const SUnit *SU2) {
+ unsigned Opc = SU1->Node->getTargetOpcode();
+ unsigned NumRes = ScheduleDAG::CountResults(SU1->Node);
+ unsigned NumOps = ScheduleDAG::CountOperands(SU1->Node);
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) == -1)
+ continue;
+ if (SU1->Node->getOperand(i).isOperand(SU2->Node))
+ return true;
+ }
+ return false;
+ }
+ private:
+ bool canClobber(SUnit *SU, SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void CalculateSethiUllmanNumbers();
+ unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
+ };
+
+
+ template<class SF>
+ class TDRegReductionPriorityQueue : public RegReductionPriorityQueue<SF> {
+ // SUnitMap SDNode to SUnit mapping (n -> 1).
+ DenseMap<SDNode*, SUnit*> *SUnitMap;
+
+ // SUnits - The SUnits for the current graph.
+ const std::vector<SUnit> *SUnits;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ public:
+ TDRegReductionPriorityQueue() {}
+
+ void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
+ std::vector<SUnit> &sunits) {
+ SUnitMap = &sumap;
+ SUnits = &sunits;
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+ }
+
+ void releaseState() {
+ SUnits = 0;
+ SethiUllmanNumbers.clear();
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ return SethiUllmanNumbers[SU->NodeNum];
+ }
+
+ private:
+ void CalculateSethiUllmanNumbers();
+ unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
+ };
+}
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closet to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+ unsigned MaxCycle = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ unsigned Cycle = I->first->Cycle;
+ // If there are bunch of CopyToRegs stacked up, they should be considered
+ // to be at the same position.
+ if (I->first->Node->getOpcode() == ISD::CopyToReg)
+ Cycle = closestSucc(I->first)+1;
+ if (Cycle > MaxCycle)
+ MaxCycle = Cycle;
+ }
+ return MaxCycle;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers. Live-in operands and live-out results don't count
+/// since they are "fixed".
+static unsigned calcMaxScratches(const SUnit *SU) {
+ unsigned Scratches = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->second) continue; // ignore chain preds
+ if (I->first->Node->getOpcode() != ISD::CopyFromReg)
+ Scratches++;
+ }
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->second) continue; // ignore chain succs
+ if (I->first->Node->getOpcode() != ISD::CopyToReg)
+ Scratches += 10;
+ }
+ return Scratches;
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+ // There used to be a special tie breaker here that looked for
+ // two-address instructions and preferred the instruction with a
+ // def&use operand. The special case triggered diagnostics when
+ // _GLIBCXX_DEBUG was enabled because it broke the strict weak
+ // ordering that priority_queue requires. It didn't help much anyway
+ // because AddPseudoTwoAddrDeps already covers many of the cases
+ // where it would have applied. In addition, it's counter-intuitive
+ // that a tie breaker would be the first thing attempted. There's a
+ // "real" tie breaker below that is the operation of last resort.
+ // The fact that the "special tie breaker" would trigger when there
+ // wasn't otherwise a tie is what broke the strict weak ordering
+ // constraint.
+
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+ if (LPriority > RPriority)
+ return true;
+ else if (LPriority == RPriority) {
+ // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+ // e.g.
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // and the following instructions are both ready.
+ // t2 = op c3
+ // t4 = op c4
+ //
+ // Then schedule t2 = op first.
+ // i.e.
+ // t4 = op c4
+ // t2 = op c3
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // This creates more short live intervals.
+ unsigned LDist = closestSucc(left);
+ unsigned RDist = closestSucc(right);
+ if (LDist < RDist)
+ return true;
+ else if (LDist == RDist) {
+ // Intuitively, it's good to push down instructions whose results are
+ // liveout so their long live ranges won't conflict with other values
+ // which are needed inside the BB. Further prioritize liveout instructions
+ // by the number of operands which are calculated within the BB.
+ unsigned LScratch = calcMaxScratches(left);
+ unsigned RScratch = calcMaxScratches(right);
+ if (LScratch > RScratch)
+ return true;
+ else if (LScratch == RScratch)
+ if (left->Height > right->Height)
+ return true;
+ else if (left->Height == right->Height)
+ if (left->Depth < right->Depth)
+ return true;
+ else if (left->Depth == right->Depth)
+ if (left->CycleBound > right->CycleBound)
+ return true;
+ }
+ }
+ return false;
+}
+
+// FIXME: This is probably too slow!
+static void isReachable(SUnit *SU, SUnit *TargetSU,
+ SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) {
+ if (Reached) return;
+ if (SU == TargetSU) {
+ Reached = true;
+ return;
+ }
+ if (!Visited.insert(SU)) return;
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E;
+ ++I)
+ isReachable(I->first, TargetSU, Visited, Reached);
+}
+
+static bool isReachable(SUnit *SU, SUnit *TargetSU) {
+ SmallPtrSet<SUnit*, 32> Visited;
+ bool Reached = false;
+ isReachable(SU, TargetSU, Visited, Reached);
+ return Reached;
+}
+
+template<class SF>
+bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) {
+ if (SU->isTwoAddress) {
+ unsigned Opc = SU->Node->getTargetOpcode();
+ unsigned NumRes = ScheduleDAG::CountResults(SU->Node);
+ unsigned NumOps = ScheduleDAG::CountOperands(SU->Node);
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) {
+ SDNode *DU = SU->Node->getOperand(i).Val;
+ if (Op == (*SUnitMap)[DU])
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule).
+template<class SF>
+void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = (SUnit *)&((*SUnits)[i]);
+ if (!SU->isTwoAddress)
+ continue;
+
+ SDNode *Node = SU->Node;
+ if (!Node->isTargetOpcode())
+ continue;
+
+ unsigned Opc = Node->getTargetOpcode();
+ unsigned NumRes = ScheduleDAG::CountResults(Node);
+ unsigned NumOps = ScheduleDAG::CountOperands(Node);
+ for (unsigned j = 0; j != NumOps; ++j) {
+ if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) {
+ SDNode *DU = SU->Node->getOperand(j).Val;
+ SUnit *DUSU = (*SUnitMap)[DU];
+ if (!DUSU) continue;
+ for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end();
+ I != E; ++I) {
+ if (I->second) continue;
+ SUnit *SuccSU = I->first;
+ if (SuccSU != SU &&
+ (!canClobber(SuccSU, DUSU) ||
+ (!SU->isCommutable && SuccSU->isCommutable))){
+ if (SuccSU->Depth == SU->Depth && !isReachable(SuccSU, SU)) {
+ DOUT << "Adding an edge from SU # " << SU->NodeNum
+ << " to SU #" << SuccSU->NodeNum << "\n";
+ if (SU->addPred(SuccSU, true))
+ SU->NumChainPredsLeft++;
+ if (SuccSU->addSucc(SU, true))
+ SuccSU->NumChainSuccsLeft++;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number.
+/// Smaller number is the higher priority.
+template<class SF>
+unsigned BURegReductionPriorityQueue<SF>::
+CalcNodeSethiUllmanNumber(const SUnit *SU) {
+ unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
+ if (SethiUllmanNumber != 0)
+ return SethiUllmanNumber;
+
+ unsigned Extra = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->second) continue; // ignore chain preds
+ SUnit *PredSU = I->first;
+ unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber && !I->second)
+ Extra++;
+ }
+
+ SethiUllmanNumber += Extra;
+
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+
+ return SethiUllmanNumber;
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+template<class SF>
+void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
+}
+
+static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) {
+ unsigned Sum = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ SUnit *SuccSU = I->first;
+ for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
+ EE = SuccSU->Preds.end(); II != EE; ++II) {
+ SUnit *PredSU = II->first;
+ if (!PredSU->isScheduled)
+ Sum++;
+ }
+ }
+
+ return Sum;
+}
+
+
+// Top down
+bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+ bool LIsTarget = left->Node->isTargetOpcode();
+ bool RIsTarget = right->Node->isTargetOpcode();
+ bool LIsFloater = LIsTarget && left->NumPreds == 0;
+ bool RIsFloater = RIsTarget && right->NumPreds == 0;
+ unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0;
+ unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0;
+
+ if (left->NumSuccs == 0 && right->NumSuccs != 0)
+ return false;
+ else if (left->NumSuccs != 0 && right->NumSuccs == 0)
+ return true;
+
+ // Special tie breaker: if two nodes share a operand, the one that use it
+ // as a def&use operand is preferred.
+ if (LIsTarget && RIsTarget) {
+ if (left->isTwoAddress && !right->isTwoAddress) {
+ SDNode *DUNode = left->Node->getOperand(0).Val;
+ if (DUNode->isOperand(right->Node))
+ RBonus += 2;
+ }
+ if (!left->isTwoAddress && right->isTwoAddress) {
+ SDNode *DUNode = right->Node->getOperand(0).Val;
+ if (DUNode->isOperand(left->Node))
+ LBonus += 2;
+ }
+ }
+ if (LIsFloater)
+ LBonus -= 2;
+ if (RIsFloater)
+ RBonus -= 2;
+ if (left->NumSuccs == 1)
+ LBonus += 2;
+ if (right->NumSuccs == 1)
+ RBonus += 2;
+
+ if (LPriority+LBonus < RPriority+RBonus)
+ return true;
+ else if (LPriority == RPriority)
+ if (left->Depth < right->Depth)
+ return true;
+ else if (left->Depth == right->Depth)
+ if (left->NumSuccsLeft > right->NumSuccsLeft)
+ return true;
+ else if (left->NumSuccsLeft == right->NumSuccsLeft)
+ if (left->CycleBound > right->CycleBound)
+ return true;
+ return false;
+}
+
+/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number.
+/// Smaller number is the higher priority.
+template<class SF>
+unsigned TDRegReductionPriorityQueue<SF>::
+CalcNodeSethiUllmanNumber(const SUnit *SU) {
+ unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
+ if (SethiUllmanNumber != 0)
+ return SethiUllmanNumber;
+
+ unsigned Opc = SU->Node->getOpcode();
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ SethiUllmanNumber = 0xffff;
+ else if (SU->NumSuccsLeft == 0)
+ // If SU does not have a use, i.e. it doesn't produce a value that would
+ // be consumed (e.g. store), then it terminates a chain of computation.
+ // Give it a small SethiUllman number so it will be scheduled right before
+ // its predecessors that it doesn't lengthen their live ranges.
+ SethiUllmanNumber = 0;
+ else if (SU->NumPredsLeft == 0 &&
+ (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU)))
+ SethiUllmanNumber = 0xffff;
+ else {
+ int Extra = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->second) continue; // ignore chain preds
+ SUnit *PredSU = I->first;
+ unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber && !I->second)
+ Extra++;
+ }
+
+ SethiUllmanNumber += Extra;
+ }
+
+ return SethiUllmanNumber;
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+template<class SF>
+void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ SelectionDAG *DAG,
+ MachineBasicBlock *BB) {
+ const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo();
+ return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true,
+ new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII));
+}
+
+llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
+ SelectionDAG *DAG,
+ MachineBasicBlock *BB) {
+ return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false,
+ new TDRegReductionPriorityQueue<td_ls_rr_sort>());
+}
+
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp
new file mode 100644
index 0000000..62854f7
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp
@@ -0,0 +1,1156 @@
+//===-- ScheduleDAGSimple.cpp - Implement a trivial DAG scheduler ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by James M. Laskey and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a simple two pass scheduler. The first pass attempts to push
+// backward any lengthy instructions and critical paths. The second pass packs
+// instructions into semi-optimal time slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+static RegisterScheduler
+ bfsDAGScheduler("none", " No scheduling: breadth first sequencing",
+ createBFS_DAGScheduler);
+static RegisterScheduler
+ simpleDAGScheduler("simple",
+ " Simple two pass scheduling: minimize critical path "
+ "and maximize processor utilization",
+ createSimpleDAGScheduler);
+static RegisterScheduler
+ noitinDAGScheduler("simple-noitin",
+ " Simple two pass scheduling: Same as simple "
+ "except using generic latency",
+ createNoItinsDAGScheduler);
+
+class NodeInfo;
+typedef NodeInfo *NodeInfoPtr;
+typedef std::vector<NodeInfoPtr> NIVector;
+typedef std::vector<NodeInfoPtr>::iterator NIIterator;
+
+//===--------------------------------------------------------------------===//
+///
+/// Node group - This struct is used to manage flagged node groups.
+///
+class NodeGroup {
+public:
+ NodeGroup *Next;
+private:
+ NIVector Members; // Group member nodes
+ NodeInfo *Dominator; // Node with highest latency
+ unsigned Latency; // Total latency of the group
+ int Pending; // Number of visits pending before
+ // adding to order
+
+public:
+ // Ctor.
+ NodeGroup() : Next(NULL), Dominator(NULL), Pending(0) {}
+
+ // Accessors
+ inline void setDominator(NodeInfo *D) { Dominator = D; }
+ inline NodeInfo *getTop() { return Members.front(); }
+ inline NodeInfo *getBottom() { return Members.back(); }
+ inline NodeInfo *getDominator() { return Dominator; }
+ inline void setLatency(unsigned L) { Latency = L; }
+ inline unsigned getLatency() { return Latency; }
+ inline int getPending() const { return Pending; }
+ inline void setPending(int P) { Pending = P; }
+ inline int addPending(int I) { return Pending += I; }
+
+ // Pass thru
+ inline bool group_empty() { return Members.empty(); }
+ inline NIIterator group_begin() { return Members.begin(); }
+ inline NIIterator group_end() { return Members.end(); }
+ inline void group_push_back(const NodeInfoPtr &NI) {
+ Members.push_back(NI);
+ }
+ inline NIIterator group_insert(NIIterator Pos, const NodeInfoPtr &NI) {
+ return Members.insert(Pos, NI);
+ }
+ inline void group_insert(NIIterator Pos, NIIterator First,
+ NIIterator Last) {
+ Members.insert(Pos, First, Last);
+ }
+
+ static void Add(NodeInfo *D, NodeInfo *U);
+};
+
+//===--------------------------------------------------------------------===//
+///
+/// NodeInfo - This struct tracks information used to schedule the a node.
+///
+class NodeInfo {
+private:
+ int Pending; // Number of visits pending before
+ // adding to order
+public:
+ SDNode *Node; // DAG node
+ InstrStage *StageBegin; // First stage in itinerary
+ InstrStage *StageEnd; // Last+1 stage in itinerary
+ unsigned Latency; // Total cycles to complete instr
+ bool IsCall : 1; // Is function call
+ bool IsLoad : 1; // Is memory load
+ bool IsStore : 1; // Is memory store
+ unsigned Slot; // Node's time slot
+ NodeGroup *Group; // Grouping information
+#ifndef NDEBUG
+ unsigned Preorder; // Index before scheduling
+#endif
+
+ // Ctor.
+ NodeInfo(SDNode *N = NULL)
+ : Pending(0)
+ , Node(N)
+ , StageBegin(NULL)
+ , StageEnd(NULL)
+ , Latency(0)
+ , IsCall(false)
+ , Slot(0)
+ , Group(NULL)
+#ifndef NDEBUG
+ , Preorder(0)
+#endif
+ {}
+
+ // Accessors
+ inline bool isInGroup() const {
+ assert(!Group || !Group->group_empty() && "Group with no members");
+ return Group != NULL;
+ }
+ inline bool isGroupDominator() const {
+ return isInGroup() && Group->getDominator() == this;
+ }
+ inline int getPending() const {
+ return Group ? Group->getPending() : Pending;
+ }
+ inline void setPending(int P) {
+ if (Group) Group->setPending(P);
+ else Pending = P;
+ }
+ inline int addPending(int I) {
+ if (Group) return Group->addPending(I);
+ else return Pending += I;
+ }
+};
+
+//===--------------------------------------------------------------------===//
+///
+/// NodeGroupIterator - Iterates over all the nodes indicated by the node
+/// info. If the node is in a group then iterate over the members of the
+/// group, otherwise just the node info.
+///
+class NodeGroupIterator {
+private:
+ NodeInfo *NI; // Node info
+ NIIterator NGI; // Node group iterator
+ NIIterator NGE; // Node group iterator end
+
+public:
+ // Ctor.
+ NodeGroupIterator(NodeInfo *N) : NI(N) {
+ // If the node is in a group then set up the group iterator. Otherwise
+ // the group iterators will trip first time out.
+ if (N->isInGroup()) {
+ // get Group
+ NodeGroup *Group = NI->Group;
+ NGI = Group->group_begin();
+ NGE = Group->group_end();
+ // Prevent this node from being used (will be in members list
+ NI = NULL;
+ }
+ }
+
+ /// next - Return the next node info, otherwise NULL.
+ ///
+ NodeInfo *next() {
+ // If members list
+ if (NGI != NGE) return *NGI++;
+ // Use node as the result (may be NULL)
+ NodeInfo *Result = NI;
+ // Only use once
+ NI = NULL;
+ // Return node or NULL
+ return Result;
+ }
+};
+//===--------------------------------------------------------------------===//
+
+
+//===--------------------------------------------------------------------===//
+///
+/// NodeGroupOpIterator - Iterates over all the operands of a node. If the
+/// node is a member of a group, this iterates over all the operands of all
+/// the members of the group.
+///
+class NodeGroupOpIterator {
+private:
+ NodeInfo *NI; // Node containing operands
+ NodeGroupIterator GI; // Node group iterator
+ SDNode::op_iterator OI; // Operand iterator
+ SDNode::op_iterator OE; // Operand iterator end
+
+ /// CheckNode - Test if node has more operands. If not get the next node
+ /// skipping over nodes that have no operands.
+ void CheckNode() {
+ // Only if operands are exhausted first
+ while (OI == OE) {
+ // Get next node info
+ NodeInfo *NI = GI.next();
+ // Exit if nodes are exhausted
+ if (!NI) return;
+ // Get node itself
+ SDNode *Node = NI->Node;
+ // Set up the operand iterators
+ OI = Node->op_begin();
+ OE = Node->op_end();
+ }
+ }
+
+public:
+ // Ctor.
+ NodeGroupOpIterator(NodeInfo *N)
+ : NI(N), GI(N), OI(SDNode::op_iterator()), OE(SDNode::op_iterator()) {}
+
+ /// isEnd - Returns true when not more operands are available.
+ ///
+ inline bool isEnd() { CheckNode(); return OI == OE; }
+
+ /// next - Returns the next available operand.
+ ///
+ inline SDOperand next() {
+ assert(OI != OE &&
+ "Not checking for end of NodeGroupOpIterator correctly");
+ return *OI++;
+ }
+};
+
+
+//===----------------------------------------------------------------------===//
+///
+/// BitsIterator - Provides iteration through individual bits in a bit vector.
+///
+template<class T>
+class BitsIterator {
+private:
+ T Bits; // Bits left to iterate through
+
+public:
+ /// Ctor.
+ BitsIterator(T Initial) : Bits(Initial) {}
+
+ /// Next - Returns the next bit set or zero if exhausted.
+ inline T Next() {
+ // Get the rightmost bit set
+ T Result = Bits & -Bits;
+ // Remove from rest
+ Bits &= ~Result;
+ // Return single bit or zero
+ return Result;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+///
+/// ResourceTally - Manages the use of resources over time intervals. Each
+/// item (slot) in the tally vector represents the resources used at a given
+/// moment. A bit set to 1 indicates that a resource is in use, otherwise
+/// available. An assumption is made that the tally is large enough to schedule
+/// all current instructions (asserts otherwise.)
+///
+template<class T>
+class ResourceTally {
+private:
+ std::vector<T> Tally; // Resources used per slot
+ typedef typename std::vector<T>::iterator Iter;
+ // Tally iterator
+
+ /// SlotsAvailable - Returns true if all units are available.
+ ///
+ bool SlotsAvailable(Iter Begin, unsigned N, unsigned ResourceSet,
+ unsigned &Resource) {
+ assert(N && "Must check availability with N != 0");
+ // Determine end of interval
+ Iter End = Begin + N;
+ assert(End <= Tally.end() && "Tally is not large enough for schedule");
+
+ // Iterate thru each resource
+ BitsIterator<T> Resources(ResourceSet & ~*Begin);
+ while (unsigned Res = Resources.Next()) {
+ // Check if resource is available for next N slots
+ Iter Interval = End;
+ do {
+ Interval--;
+ if (*Interval & Res) break;
+ } while (Interval != Begin);
+
+ // If available for N
+ if (Interval == Begin) {
+ // Success
+ Resource = Res;
+ return true;
+ }
+ }
+
+ // No luck
+ Resource = 0;
+ return false;
+ }
+
+ /// RetrySlot - Finds a good candidate slot to retry search.
+ Iter RetrySlot(Iter Begin, unsigned N, unsigned ResourceSet) {
+ assert(N && "Must check availability with N != 0");
+ // Determine end of interval
+ Iter End = Begin + N;
+ assert(End <= Tally.end() && "Tally is not large enough for schedule");
+
+ while (Begin != End--) {
+ // Clear units in use
+ ResourceSet &= ~*End;
+ // If no units left then we should go no further
+ if (!ResourceSet) return End + 1;
+ }
+ // Made it all the way through
+ return Begin;
+ }
+
+ /// FindAndReserveStages - Return true if the stages can be completed. If
+ /// so mark as busy.
+ bool FindAndReserveStages(Iter Begin,
+ InstrStage *Stage, InstrStage *StageEnd) {
+ // If at last stage then we're done
+ if (Stage == StageEnd) return true;
+ // Get number of cycles for current stage
+ unsigned N = Stage->Cycles;
+ // Check to see if N slots are available, if not fail
+ unsigned Resource;
+ if (!SlotsAvailable(Begin, N, Stage->Units, Resource)) return false;
+ // Check to see if remaining stages are available, if not fail
+ if (!FindAndReserveStages(Begin + N, Stage + 1, StageEnd)) return false;
+ // Reserve resource
+ Reserve(Begin, N, Resource);
+ // Success
+ return true;
+ }
+
+ /// Reserve - Mark busy (set) the specified N slots.
+ void Reserve(Iter Begin, unsigned N, unsigned Resource) {
+ // Determine end of interval
+ Iter End = Begin + N;
+ assert(End <= Tally.end() && "Tally is not large enough for schedule");
+
+ // Set resource bit in each slot
+ for (; Begin < End; Begin++)
+ *Begin |= Resource;
+ }
+
+ /// FindSlots - Starting from Begin, locate consecutive slots where all stages
+ /// can be completed. Returns the address of first slot.
+ Iter FindSlots(Iter Begin, InstrStage *StageBegin, InstrStage *StageEnd) {
+ // Track position
+ Iter Cursor = Begin;
+
+ // Try all possible slots forward
+ while (true) {
+ // Try at cursor, if successful return position.
+ if (FindAndReserveStages(Cursor, StageBegin, StageEnd)) return Cursor;
+ // Locate a better position
+ Cursor = RetrySlot(Cursor + 1, StageBegin->Cycles, StageBegin->Units);
+ }
+ }
+
+public:
+ /// Initialize - Resize and zero the tally to the specified number of time
+ /// slots.
+ inline void Initialize(unsigned N) {
+ Tally.assign(N, 0); // Initialize tally to all zeros.
+ }
+
+ // FindAndReserve - Locate an ideal slot for the specified stages and mark
+ // as busy.
+ unsigned FindAndReserve(unsigned Slot, InstrStage *StageBegin,
+ InstrStage *StageEnd) {
+ // Where to begin
+ Iter Begin = Tally.begin() + Slot;
+ // Find a free slot
+ Iter Where = FindSlots(Begin, StageBegin, StageEnd);
+ // Distance is slot number
+ unsigned Final = Where - Tally.begin();
+ return Final;
+ }
+
+};
+
+//===----------------------------------------------------------------------===//
+///
+/// ScheduleDAGSimple - Simple two pass scheduler.
+///
+class VISIBILITY_HIDDEN ScheduleDAGSimple : public ScheduleDAG {
+private:
+ bool NoSched; // Just do a BFS schedule, nothing fancy
+ bool NoItins; // Don't use itineraries?
+ ResourceTally<unsigned> Tally; // Resource usage tally
+ unsigned NSlots; // Total latency
+ static const unsigned NotFound = ~0U; // Search marker
+
+ unsigned NodeCount; // Number of nodes in DAG
+ std::map<SDNode *, NodeInfo *> Map; // Map nodes to info
+ bool HasGroups; // True if there are any groups
+ NodeInfo *Info; // Info for nodes being scheduled
+ NIVector Ordering; // Emit ordering of nodes
+ NodeGroup *HeadNG, *TailNG; // Keep track of allocated NodeGroups
+
+public:
+
+ // Ctor.
+ ScheduleDAGSimple(bool noSched, bool noItins, SelectionDAG &dag,
+ MachineBasicBlock *bb, const TargetMachine &tm)
+ : ScheduleDAG(dag, bb, tm), NoSched(noSched), NoItins(noItins), NSlots(0),
+ NodeCount(0), HasGroups(false), Info(NULL), HeadNG(NULL), TailNG(NULL) {
+ assert(&TII && "Target doesn't provide instr info?");
+ assert(&MRI && "Target doesn't provide register info?");
+ }
+
+ virtual ~ScheduleDAGSimple() {
+ if (Info)
+ delete[] Info;
+
+ NodeGroup *NG = HeadNG;
+ while (NG) {
+ NodeGroup *NextSU = NG->Next;
+ delete NG;
+ NG = NextSU;
+ }
+ }
+
+ void Schedule();
+
+ /// getNI - Returns the node info for the specified node.
+ ///
+ NodeInfo *getNI(SDNode *Node) { return Map[Node]; }
+
+private:
+ static bool isDefiner(NodeInfo *A, NodeInfo *B);
+ void IncludeNode(NodeInfo *NI);
+ void VisitAll();
+ void GatherSchedulingInfo();
+ void FakeGroupDominators();
+ bool isStrongDependency(NodeInfo *A, NodeInfo *B);
+ bool isWeakDependency(NodeInfo *A, NodeInfo *B);
+ void ScheduleBackward();
+ void ScheduleForward();
+
+ void AddToGroup(NodeInfo *D, NodeInfo *U);
+ /// PrepareNodeInfo - Set up the basic minimum node info for scheduling.
+ ///
+ void PrepareNodeInfo();
+
+ /// IdentifyGroups - Put flagged nodes into groups.
+ ///
+ void IdentifyGroups();
+
+ /// print - Print ordering to specified output stream.
+ ///
+ void print(std::ostream &O) const;
+ void print(std::ostream *O) const { if (O) print(*O); }
+
+ void dump(const char *tag) const;
+
+ virtual void dump() const;
+
+ /// EmitAll - Emit all nodes in schedule sorted order.
+ ///
+ void EmitAll();
+
+ /// printNI - Print node info.
+ ///
+ void printNI(std::ostream &O, NodeInfo *NI) const;
+ void printNI(std::ostream *O, NodeInfo *NI) const { if (O) printNI(*O, NI); }
+
+ /// printChanges - Hilight changes in order caused by scheduling.
+ ///
+ void printChanges(unsigned Index) const;
+};
+
+//===----------------------------------------------------------------------===//
+/// Special case itineraries.
+///
+enum {
+ CallLatency = 40, // To push calls back in time
+
+ RSInteger = 0xC0000000, // Two integer units
+ RSFloat = 0x30000000, // Two float units
+ RSLoadStore = 0x0C000000, // Two load store units
+ RSBranch = 0x02000000 // One branch unit
+};
+static InstrStage LoadStage = { 5, RSLoadStore };
+static InstrStage StoreStage = { 2, RSLoadStore };
+static InstrStage IntStage = { 2, RSInteger };
+static InstrStage FloatStage = { 3, RSFloat };
+//===----------------------------------------------------------------------===//
+
+} // namespace
+
+//===----------------------------------------------------------------------===//
+
+/// PrepareNodeInfo - Set up the basic minimum node info for scheduling.
+///
+void ScheduleDAGSimple::PrepareNodeInfo() {
+ // Allocate node information
+ Info = new NodeInfo[NodeCount];
+
+ unsigned i = 0;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I, ++i) {
+ // Fast reference to node schedule info
+ NodeInfo* NI = &Info[i];
+ // Set up map
+ Map[I] = NI;
+ // Set node
+ NI->Node = I;
+ // Set pending visit count
+ NI->setPending(I->use_size());
+ }
+}
+
+/// IdentifyGroups - Put flagged nodes into groups.
+///
+void ScheduleDAGSimple::IdentifyGroups() {
+ for (unsigned i = 0, N = NodeCount; i < N; i++) {
+ NodeInfo* NI = &Info[i];
+ SDNode *Node = NI->Node;
+
+ // For each operand (in reverse to only look at flags)
+ for (unsigned N = Node->getNumOperands(); 0 < N--;) {
+ // Get operand
+ SDOperand Op = Node->getOperand(N);
+ // No more flags to walk
+ if (Op.getValueType() != MVT::Flag) break;
+ // Add to node group
+ AddToGroup(getNI(Op.Val), NI);
+ // Let everyone else know
+ HasGroups = true;
+ }
+ }
+}
+
+/// CountInternalUses - Returns the number of edges between the two nodes.
+///
+static unsigned CountInternalUses(NodeInfo *D, NodeInfo *U) {
+ unsigned N = 0;
+ for (unsigned M = U->Node->getNumOperands(); 0 < M--;) {
+ SDOperand Op = U->Node->getOperand(M);
+ if (Op.Val == D->Node) N++;
+ }
+
+ return N;
+}
+
+//===----------------------------------------------------------------------===//
+/// Add - Adds a definer and user pair to a node group.
+///
+void ScheduleDAGSimple::AddToGroup(NodeInfo *D, NodeInfo *U) {
+ // Get current groups
+ NodeGroup *DGroup = D->Group;
+ NodeGroup *UGroup = U->Group;
+ // If both are members of groups
+ if (DGroup && UGroup) {
+ // There may have been another edge connecting
+ if (DGroup == UGroup) return;
+ // Add the pending users count
+ DGroup->addPending(UGroup->getPending());
+ // For each member of the users group
+ NodeGroupIterator UNGI(U);
+ while (NodeInfo *UNI = UNGI.next() ) {
+ // Change the group
+ UNI->Group = DGroup;
+ // For each member of the definers group
+ NodeGroupIterator DNGI(D);
+ while (NodeInfo *DNI = DNGI.next() ) {
+ // Remove internal edges
+ DGroup->addPending(-CountInternalUses(DNI, UNI));
+ }
+ }
+ // Merge the two lists
+ DGroup->group_insert(DGroup->group_end(),
+ UGroup->group_begin(), UGroup->group_end());
+ } else if (DGroup) {
+ // Make user member of definers group
+ U->Group = DGroup;
+ // Add users uses to definers group pending
+ DGroup->addPending(U->Node->use_size());
+ // For each member of the definers group
+ NodeGroupIterator DNGI(D);
+ while (NodeInfo *DNI = DNGI.next() ) {
+ // Remove internal edges
+ DGroup->addPending(-CountInternalUses(DNI, U));
+ }
+ DGroup->group_push_back(U);
+ } else if (UGroup) {
+ // Make definer member of users group
+ D->Group = UGroup;
+ // Add definers uses to users group pending
+ UGroup->addPending(D->Node->use_size());
+ // For each member of the users group
+ NodeGroupIterator UNGI(U);
+ while (NodeInfo *UNI = UNGI.next() ) {
+ // Remove internal edges
+ UGroup->addPending(-CountInternalUses(D, UNI));
+ }
+ UGroup->group_insert(UGroup->group_begin(), D);
+ } else {
+ D->Group = U->Group = DGroup = new NodeGroup();
+ DGroup->addPending(D->Node->use_size() + U->Node->use_size() -
+ CountInternalUses(D, U));
+ DGroup->group_push_back(D);
+ DGroup->group_push_back(U);
+
+ if (HeadNG == NULL)
+ HeadNG = DGroup;
+ if (TailNG != NULL)
+ TailNG->Next = DGroup;
+ TailNG = DGroup;
+ }
+}
+
+
+/// print - Print ordering to specified output stream.
+///
+void ScheduleDAGSimple::print(std::ostream &O) const {
+#ifndef NDEBUG
+ O << "Ordering\n";
+ for (unsigned i = 0, N = Ordering.size(); i < N; i++) {
+ NodeInfo *NI = Ordering[i];
+ printNI(O, NI);
+ O << "\n";
+ if (NI->isGroupDominator()) {
+ NodeGroup *Group = NI->Group;
+ for (NIIterator NII = Group->group_begin(), E = Group->group_end();
+ NII != E; NII++) {
+ O << " ";
+ printNI(O, *NII);
+ O << "\n";
+ }
+ }
+ }
+#endif
+}
+
+void ScheduleDAGSimple::dump(const char *tag) const {
+ cerr << tag; dump();
+}
+
+void ScheduleDAGSimple::dump() const {
+ print(cerr);
+}
+
+
+/// EmitAll - Emit all nodes in schedule sorted order.
+///
+void ScheduleDAGSimple::EmitAll() {
+ // If this is the first basic block in the function, and if it has live ins
+ // that need to be copied into vregs, emit the copies into the top of the
+ // block before emitting the code for the block.
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (&MF.front() == BB && MF.livein_begin() != MF.livein_end()) {
+ for (MachineFunction::livein_iterator LI = MF.livein_begin(),
+ E = MF.livein_end(); LI != E; ++LI)
+ if (LI->second)
+ MRI->copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second,
+ LI->first, RegMap->getRegClass(LI->second));
+ }
+
+ DenseMap<SDOperand, unsigned> VRBaseMap;
+
+ // For each node in the ordering
+ for (unsigned i = 0, N = Ordering.size(); i < N; i++) {
+ // Get the scheduling info
+ NodeInfo *NI = Ordering[i];
+ if (NI->isInGroup()) {
+ NodeGroupIterator NGI(Ordering[i]);
+ while (NodeInfo *NI = NGI.next()) EmitNode(NI->Node, VRBaseMap);
+ } else {
+ EmitNode(NI->Node, VRBaseMap);
+ }
+ }
+}
+
+/// isFlagDefiner - Returns true if the node defines a flag result.
+static bool isFlagDefiner(SDNode *A) {
+ unsigned N = A->getNumValues();
+ return N && A->getValueType(N - 1) == MVT::Flag;
+}
+
+/// isFlagUser - Returns true if the node uses a flag result.
+///
+static bool isFlagUser(SDNode *A) {
+ unsigned N = A->getNumOperands();
+ return N && A->getOperand(N - 1).getValueType() == MVT::Flag;
+}
+
+/// printNI - Print node info.
+///
+void ScheduleDAGSimple::printNI(std::ostream &O, NodeInfo *NI) const {
+#ifndef NDEBUG
+ SDNode *Node = NI->Node;
+ O << " "
+ << std::hex << Node << std::dec
+ << ", Lat=" << NI->Latency
+ << ", Slot=" << NI->Slot
+ << ", ARITY=(" << Node->getNumOperands() << ","
+ << Node->getNumValues() << ")"
+ << " " << Node->getOperationName(&DAG);
+ if (isFlagDefiner(Node)) O << "<#";
+ if (isFlagUser(Node)) O << ">#";
+#endif
+}
+
+/// printChanges - Hilight changes in order caused by scheduling.
+///
+void ScheduleDAGSimple::printChanges(unsigned Index) const {
+#ifndef NDEBUG
+ // Get the ordered node count
+ unsigned N = Ordering.size();
+ // Determine if any changes
+ unsigned i = 0;
+ for (; i < N; i++) {
+ NodeInfo *NI = Ordering[i];
+ if (NI->Preorder != i) break;
+ }
+
+ if (i < N) {
+ cerr << Index << ". New Ordering\n";
+
+ for (i = 0; i < N; i++) {
+ NodeInfo *NI = Ordering[i];
+ cerr << " " << NI->Preorder << ". ";
+ printNI(cerr, NI);
+ cerr << "\n";
+ if (NI->isGroupDominator()) {
+ NodeGroup *Group = NI->Group;
+ for (NIIterator NII = Group->group_begin(), E = Group->group_end();
+ NII != E; NII++) {
+ cerr << " ";
+ printNI(cerr, *NII);
+ cerr << "\n";
+ }
+ }
+ }
+ } else {
+ cerr << Index << ". No Changes\n";
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+/// isDefiner - Return true if node A is a definer for B.
+///
+bool ScheduleDAGSimple::isDefiner(NodeInfo *A, NodeInfo *B) {
+ // While there are A nodes
+ NodeGroupIterator NII(A);
+ while (NodeInfo *NI = NII.next()) {
+ // Extract node
+ SDNode *Node = NI->Node;
+ // While there operands in nodes of B
+ NodeGroupOpIterator NGOI(B);
+ while (!NGOI.isEnd()) {
+ SDOperand Op = NGOI.next();
+ // If node from A defines a node in B
+ if (Node == Op.Val) return true;
+ }
+ }
+ return false;
+}
+
+/// IncludeNode - Add node to NodeInfo vector.
+///
+void ScheduleDAGSimple::IncludeNode(NodeInfo *NI) {
+ // Get node
+ SDNode *Node = NI->Node;
+ // Ignore entry node
+ if (Node->getOpcode() == ISD::EntryToken) return;
+ // Check current count for node
+ int Count = NI->getPending();
+ // If the node is already in list
+ if (Count < 0) return;
+ // Decrement count to indicate a visit
+ Count--;
+ // If count has gone to zero then add node to list
+ if (!Count) {
+ // Add node
+ if (NI->isInGroup()) {
+ Ordering.push_back(NI->Group->getDominator());
+ } else {
+ Ordering.push_back(NI);
+ }
+ // indicate node has been added
+ Count--;
+ }
+ // Mark as visited with new count
+ NI->setPending(Count);
+}
+
+/// GatherSchedulingInfo - Get latency and resource information about each node.
+///
+void ScheduleDAGSimple::GatherSchedulingInfo() {
+ // Get instruction itineraries for the target
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+
+ // For each node
+ for (unsigned i = 0, N = NodeCount; i < N; i++) {
+ // Get node info
+ NodeInfo* NI = &Info[i];
+ SDNode *Node = NI->Node;
+
+ // If there are itineraries and it is a machine instruction
+ if (InstrItins.isEmpty() || NoItins) {
+ // If machine opcode
+ if (Node->isTargetOpcode()) {
+ // Get return type to guess which processing unit
+ MVT::ValueType VT = Node->getValueType(0);
+ // Get machine opcode
+ MachineOpCode TOpc = Node->getTargetOpcode();
+ NI->IsCall = TII->isCall(TOpc);
+ NI->IsLoad = TII->isLoad(TOpc);
+ NI->IsStore = TII->isStore(TOpc);
+
+ if (TII->isLoad(TOpc)) NI->StageBegin = &LoadStage;
+ else if (TII->isStore(TOpc)) NI->StageBegin = &StoreStage;
+ else if (MVT::isInteger(VT)) NI->StageBegin = &IntStage;
+ else if (MVT::isFloatingPoint(VT)) NI->StageBegin = &FloatStage;
+ if (NI->StageBegin) NI->StageEnd = NI->StageBegin + 1;
+ }
+ } else if (Node->isTargetOpcode()) {
+ // get machine opcode
+ MachineOpCode TOpc = Node->getTargetOpcode();
+ // Check to see if it is a call
+ NI->IsCall = TII->isCall(TOpc);
+ // Get itinerary stages for instruction
+ unsigned II = TII->getSchedClass(TOpc);
+ NI->StageBegin = InstrItins.begin(II);
+ NI->StageEnd = InstrItins.end(II);
+ }
+
+ // One slot for the instruction itself
+ NI->Latency = 1;
+
+ // Add long latency for a call to push it back in time
+ if (NI->IsCall) NI->Latency += CallLatency;
+
+ // Sum up all the latencies
+ for (InstrStage *Stage = NI->StageBegin, *E = NI->StageEnd;
+ Stage != E; Stage++) {
+ NI->Latency += Stage->Cycles;
+ }
+
+ // Sum up all the latencies for max tally size
+ NSlots += NI->Latency;
+ }
+
+ // Unify metrics if in a group
+ if (HasGroups) {
+ for (unsigned i = 0, N = NodeCount; i < N; i++) {
+ NodeInfo* NI = &Info[i];
+
+ if (NI->isInGroup()) {
+ NodeGroup *Group = NI->Group;
+
+ if (!Group->getDominator()) {
+ NIIterator NGI = Group->group_begin(), NGE = Group->group_end();
+ NodeInfo *Dominator = *NGI;
+ unsigned Latency = 0;
+
+ for (NGI++; NGI != NGE; NGI++) {
+ NodeInfo* NGNI = *NGI;
+ Latency += NGNI->Latency;
+ if (Dominator->Latency < NGNI->Latency) Dominator = NGNI;
+ }
+
+ Dominator->Latency = Latency;
+ Group->setDominator(Dominator);
+ }
+ }
+ }
+ }
+}
+
+/// VisitAll - Visit each node breadth-wise to produce an initial ordering.
+/// Note that the ordering in the Nodes vector is reversed.
+void ScheduleDAGSimple::VisitAll() {
+ // Add first element to list
+ NodeInfo *NI = getNI(DAG.getRoot().Val);
+ if (NI->isInGroup()) {
+ Ordering.push_back(NI->Group->getDominator());
+ } else {
+ Ordering.push_back(NI);
+ }
+
+ // Iterate through all nodes that have been added
+ for (unsigned i = 0; i < Ordering.size(); i++) { // note: size() varies
+ // Visit all operands
+ NodeGroupOpIterator NGI(Ordering[i]);
+ while (!NGI.isEnd()) {
+ // Get next operand
+ SDOperand Op = NGI.next();
+ // Get node
+ SDNode *Node = Op.Val;
+ // Ignore passive nodes
+ if (isPassiveNode(Node)) continue;
+ // Check out node
+ IncludeNode(getNI(Node));
+ }
+ }
+
+ // Add entry node last (IncludeNode filters entry nodes)
+ if (DAG.getEntryNode().Val != DAG.getRoot().Val)
+ Ordering.push_back(getNI(DAG.getEntryNode().Val));
+
+ // Reverse the order
+ std::reverse(Ordering.begin(), Ordering.end());
+}
+
+/// FakeGroupDominators - Set dominators for non-scheduling.
+///
+void ScheduleDAGSimple::FakeGroupDominators() {
+ for (unsigned i = 0, N = NodeCount; i < N; i++) {
+ NodeInfo* NI = &Info[i];
+
+ if (NI->isInGroup()) {
+ NodeGroup *Group = NI->Group;
+
+ if (!Group->getDominator()) {
+ Group->setDominator(NI);
+ }
+ }
+ }
+}
+
+/// isStrongDependency - Return true if node A has results used by node B.
+/// I.E., B must wait for latency of A.
+bool ScheduleDAGSimple::isStrongDependency(NodeInfo *A, NodeInfo *B) {
+ // If A defines for B then it's a strong dependency or
+ // if a load follows a store (may be dependent but why take a chance.)
+ return isDefiner(A, B) || (A->IsStore && B->IsLoad);
+}
+
+/// isWeakDependency Return true if node A produces a result that will
+/// conflict with operands of B. It is assumed that we have called
+/// isStrongDependency prior.
+bool ScheduleDAGSimple::isWeakDependency(NodeInfo *A, NodeInfo *B) {
+ // TODO check for conflicting real registers and aliases
+#if 0 // FIXME - Since we are in SSA form and not checking register aliasing
+ return A->Node->getOpcode() == ISD::EntryToken || isStrongDependency(B, A);
+#else
+ return A->Node->getOpcode() == ISD::EntryToken;
+#endif
+}
+
+/// ScheduleBackward - Schedule instructions so that any long latency
+/// instructions and the critical path get pushed back in time. Time is run in
+/// reverse to allow code reuse of the Tally and eliminate the overhead of
+/// biasing every slot indices against NSlots.
+void ScheduleDAGSimple::ScheduleBackward() {
+ // Size and clear the resource tally
+ Tally.Initialize(NSlots);
+ // Get number of nodes to schedule
+ unsigned N = Ordering.size();
+
+ // For each node being scheduled
+ for (unsigned i = N; 0 < i--;) {
+ NodeInfo *NI = Ordering[i];
+ // Track insertion
+ unsigned Slot = NotFound;
+
+ // Compare against those previously scheduled nodes
+ unsigned j = i + 1;
+ for (; j < N; j++) {
+ // Get following instruction
+ NodeInfo *Other = Ordering[j];
+
+ // Check dependency against previously inserted nodes
+ if (isStrongDependency(NI, Other)) {
+ Slot = Other->Slot + Other->Latency;
+ break;
+ } else if (isWeakDependency(NI, Other)) {
+ Slot = Other->Slot;
+ break;
+ }
+ }
+
+ // If independent of others (or first entry)
+ if (Slot == NotFound) Slot = 0;
+
+#if 0 // FIXME - measure later
+ // Find a slot where the needed resources are available
+ if (NI->StageBegin != NI->StageEnd)
+ Slot = Tally.FindAndReserve(Slot, NI->StageBegin, NI->StageEnd);
+#endif
+
+ // Set node slot
+ NI->Slot = Slot;
+
+ // Insert sort based on slot
+ j = i + 1;
+ for (; j < N; j++) {
+ // Get following instruction
+ NodeInfo *Other = Ordering[j];
+ // Should we look further (remember slots are in reverse time)
+ if (Slot >= Other->Slot) break;
+ // Shuffle other into ordering
+ Ordering[j - 1] = Other;
+ }
+ // Insert node in proper slot
+ if (j != i + 1) Ordering[j - 1] = NI;
+ }
+}
+
+/// ScheduleForward - Schedule instructions to maximize packing.
+///
+void ScheduleDAGSimple::ScheduleForward() {
+ // Size and clear the resource tally
+ Tally.Initialize(NSlots);
+ // Get number of nodes to schedule
+ unsigned N = Ordering.size();
+
+ // For each node being scheduled
+ for (unsigned i = 0; i < N; i++) {
+ NodeInfo *NI = Ordering[i];
+ // Track insertion
+ unsigned Slot = NotFound;
+
+ // Compare against those previously scheduled nodes
+ unsigned j = i;
+ for (; 0 < j--;) {
+ // Get following instruction
+ NodeInfo *Other = Ordering[j];
+
+ // Check dependency against previously inserted nodes
+ if (isStrongDependency(Other, NI)) {
+ Slot = Other->Slot + Other->Latency;
+ break;
+ } else if (Other->IsCall || isWeakDependency(Other, NI)) {
+ Slot = Other->Slot;
+ break;
+ }
+ }
+
+ // If independent of others (or first entry)
+ if (Slot == NotFound) Slot = 0;
+
+ // Find a slot where the needed resources are available
+ if (NI->StageBegin != NI->StageEnd)
+ Slot = Tally.FindAndReserve(Slot, NI->StageBegin, NI->StageEnd);
+
+ // Set node slot
+ NI->Slot = Slot;
+
+ // Insert sort based on slot
+ j = i;
+ for (; 0 < j--;) {
+ // Get prior instruction
+ NodeInfo *Other = Ordering[j];
+ // Should we look further
+ if (Slot >= Other->Slot) break;
+ // Shuffle other into ordering
+ Ordering[j + 1] = Other;
+ }
+ // Insert node in proper slot
+ if (j != i) Ordering[j + 1] = NI;
+ }
+}
+
+/// Schedule - Order nodes according to selected style.
+///
+void ScheduleDAGSimple::Schedule() {
+ // Number the nodes
+ NodeCount = std::distance(DAG.allnodes_begin(), DAG.allnodes_end());
+
+ // Set up minimum info for scheduling
+ PrepareNodeInfo();
+ // Construct node groups for flagged nodes
+ IdentifyGroups();
+
+ // Test to see if scheduling should occur
+ bool ShouldSchedule = NodeCount > 3 && !NoSched;
+ // Don't waste time if is only entry and return
+ if (ShouldSchedule) {
+ // Get latency and resource requirements
+ GatherSchedulingInfo();
+ } else if (HasGroups) {
+ // Make sure all the groups have dominators
+ FakeGroupDominators();
+ }
+
+ // Breadth first walk of DAG
+ VisitAll();
+
+#ifndef NDEBUG
+ static unsigned Count = 0;
+ Count++;
+ for (unsigned i = 0, N = Ordering.size(); i < N; i++) {
+ NodeInfo *NI = Ordering[i];
+ NI->Preorder = i;
+ }
+#endif
+
+ // Don't waste time if is only entry and return
+ if (ShouldSchedule) {
+ // Push back long instructions and critical path
+ ScheduleBackward();
+
+ // Pack instructions to maximize resource utilization
+ ScheduleForward();
+ }
+
+ DEBUG(printChanges(Count));
+
+ // Emit in scheduled order
+ EmitAll();
+}
+
+
+/// createSimpleDAGScheduler - This creates a simple two pass instruction
+/// scheduler using instruction itinerary.
+llvm::ScheduleDAG* llvm::createSimpleDAGScheduler(SelectionDAGISel *IS,
+ SelectionDAG *DAG,
+ MachineBasicBlock *BB) {
+ return new ScheduleDAGSimple(false, false, *DAG, BB, DAG->getTarget());
+}
+
+/// createNoItinsDAGScheduler - This creates a simple two pass instruction
+/// scheduler without using instruction itinerary.
+llvm::ScheduleDAG* llvm::createNoItinsDAGScheduler(SelectionDAGISel *IS,
+ SelectionDAG *DAG,
+ MachineBasicBlock *BB) {
+ return new ScheduleDAGSimple(false, true, *DAG, BB, DAG->getTarget());
+}
+
+/// createBFS_DAGScheduler - This creates a simple breadth first instruction
+/// scheduler.
+llvm::ScheduleDAG* llvm::createBFS_DAGScheduler(SelectionDAGISel *IS,
+ SelectionDAG *DAG,
+ MachineBasicBlock *BB) {
+ return new ScheduleDAGSimple(true, false, *DAG, BB, DAG->getTarget());
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 0000000..9803ab8
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,3694 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const MVT::ValueType *VTs, unsigned NumVTs) {
+ SDVTList Res = {VTs, NumVTs};
+ return Res;
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(double V) const {
+ return DoubleToBits(V) == DoubleToBits(Value);
+}
+
+//===----------------------------------------------------------------------===//
+// ISD Namespace
+//===----------------------------------------------------------------------===//
+
+/// isBuildVectorAllOnes - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BIT_CONVERT)
+ N = N->getOperand(0).Val;
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements.
+ SDOperand NotZero = N->getOperand(i);
+ if (isa<ConstantSDNode>(NotZero)) {
+ if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+ return false;
+ } else if (isa<ConstantFPSDNode>(NotZero)) {
+ MVT::ValueType VT = NotZero.getValueType();
+ if (VT== MVT::f64) {
+ if (DoubleToBits(cast<ConstantFPSDNode>(NotZero)->getValue()) !=
+ (uint64_t)-1)
+ return false;
+ } else {
+ if (FloatToBits(cast<ConstantFPSDNode>(NotZero)->getValue()) !=
+ (uint32_t)-1)
+ return false;
+ }
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != NotZero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+
+/// isBuildVectorAllZeros - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are 0 or undef.
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+ // Look through a bit convert.
+ if (N->getOpcode() == ISD::BIT_CONVERT)
+ N = N->getOperand(0).Val;
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements.
+ SDOperand Zero = N->getOperand(i);
+ if (isa<ConstantSDNode>(Zero)) {
+ if (!cast<ConstantSDNode>(Zero)->isNullValue())
+ return false;
+ } else if (isa<ConstantFPSDNode>(Zero)) {
+ if (!cast<ConstantFPSDNode>(Zero)->isExactlyValue(0.0))
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != Zero &&
+ N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+/// when given the operation for (X op Y).
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+ // To perform this operation, we just need to swap the L and G bits of the
+ // operation.
+ unsigned OldL = (Operation >> 2) & 1;
+ unsigned OldG = (Operation >> 1) & 1;
+ return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits
+ (OldL << 1) | // New G bit
+ (OldG << 2)); // New L bit.
+}
+
+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+/// 'op' is a valid SetCC operation.
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+ unsigned Operation = Op;
+ if (isInteger)
+ Operation ^= 7; // Flip L, G, E bits, but not U.
+ else
+ Operation ^= 15; // Flip all of the condition bits.
+ if (Operation > ISD::SETTRUE2)
+ Operation &= ~8; // Don't let N and U bits get set.
+ return ISD::CondCode(Operation);
+}
+
+
+/// isSignedOp - For an integer comparison, return 1 if the comparison is a
+/// signed operation and 2 if the result is an unsigned comparison. Return zero
+/// if the operation does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+ switch (Opcode) {
+ default: assert(0 && "Illegal integer setcc operation!");
+ case ISD::SETEQ:
+ case ISD::SETNE: return 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE: return 1;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: return 2;
+ }
+}
+
+/// getSetCCOrOperation - Return the result of a logical OR between different
+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function
+/// returns SETCC_INVALID if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed integer setcc with an unsigned integer setcc.
+ return ISD::SETCC_INVALID;
+
+ unsigned Op = Op1 | Op2; // Combine all of the condition bits.
+
+ // If the N and U bits get set then the resultant comparison DOES suddenly
+ // care about orderedness, and is true when ordered.
+ if (Op > ISD::SETTRUE2)
+ Op &= ~16; // Clear the U bit if the N bit is set.
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ Op = ISD::SETNE;
+
+ return ISD::CondCode(Op);
+}
+
+/// getSetCCAndOperation - Return the result of a logical AND between different
+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This
+/// function returns zero if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed setcc with an unsigned setcc.
+ return ISD::SETCC_INVALID;
+
+ // Combine all of the condition bits.
+ ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger) {
+ switch (Result) {
+ default: break;
+ case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
+ case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
+ case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
+ case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
+ }
+ }
+
+ return Result;
+}
+
+const TargetMachine &SelectionDAG::getTarget() const {
+ return TLI.getTargetMachine();
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
+ ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+ ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ const SDOperand *Ops, unsigned NumOps) {
+ for (; NumOps; --NumOps, ++Ops) {
+ ID.AddPointer(Ops->Val);
+ ID.AddInteger(Ops->ResNo);
+ }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID,
+ unsigned short OpC, SDVTList VTList,
+ const SDOperand *OpList, unsigned N) {
+ AddNodeIDOpcode(ID, OpC);
+ AddNodeIDValueTypes(ID, VTList);
+ AddNodeIDOperands(ID, OpList, N);
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, SDNode *N) {
+ AddNodeIDOpcode(ID, N->getOpcode());
+ // Add the return value info.
+ AddNodeIDValueTypes(ID, N->getVTList());
+ // Add the operand info.
+ AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+
+ // Handle SDNode leafs with special info.
+ switch (N->getOpcode()) {
+ default: break; // Normal nodes don't need extra info.
+ case ISD::TargetConstant:
+ case ISD::Constant:
+ ID.AddInteger(cast<ConstantSDNode>(N)->getValue());
+ break;
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP:
+ ID.AddDouble(cast<ConstantFPSDNode>(N)->getValue());
+ break;
+ case ISD::TargetGlobalAddress:
+ case ISD::GlobalAddress:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::GlobalTLSAddress: {
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ ID.AddPointer(GA->getGlobal());
+ ID.AddInteger(GA->getOffset());
+ break;
+ }
+ case ISD::BasicBlock:
+ ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+ break;
+ case ISD::Register:
+ ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+ break;
+ case ISD::SRCVALUE: {
+ SrcValueSDNode *SV = cast<SrcValueSDNode>(N);
+ ID.AddPointer(SV->getValue());
+ ID.AddInteger(SV->getOffset());
+ break;
+ }
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+ break;
+ case ISD::JumpTable:
+ case ISD::TargetJumpTable:
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+ break;
+ case ISD::ConstantPool:
+ case ISD::TargetConstantPool: {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ ID.AddInteger(CP->getAlignment());
+ ID.AddInteger(CP->getOffset());
+ if (CP->isMachineConstantPoolEntry())
+ CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
+ else
+ ID.AddPointer(CP->getConstVal());
+ break;
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ID.AddInteger(LD->getAddressingMode());
+ ID.AddInteger(LD->getExtensionType());
+ ID.AddInteger(LD->getLoadedVT());
+ ID.AddPointer(LD->getSrcValue());
+ ID.AddInteger(LD->getSrcValueOffset());
+ ID.AddInteger(LD->getAlignment());
+ ID.AddInteger(LD->isVolatile());
+ break;
+ }
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ ID.AddInteger(ST->getAddressingMode());
+ ID.AddInteger(ST->isTruncatingStore());
+ ID.AddInteger(ST->getStoredVT());
+ ID.AddPointer(ST->getSrcValue());
+ ID.AddInteger(ST->getSrcValueOffset());
+ ID.AddInteger(ST->getAlignment());
+ ID.AddInteger(ST->isVolatile());
+ break;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted.
+ HandleSDNode Dummy(getRoot());
+
+ SmallVector<SDNode*, 128> DeadNodes;
+
+ // Add all obviously-dead nodes to the DeadNodes worklist.
+ for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+ if (I->use_empty())
+ DeadNodes.push_back(I);
+
+ // Process the worklist, deleting the nodes and adding their uses to the
+ // worklist.
+ while (!DeadNodes.empty()) {
+ SDNode *N = DeadNodes.back();
+ DeadNodes.pop_back();
+
+ // Take the node out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Next, brutally remove the operand list. This is safe to do, as there are
+ // no cycles in the graph.
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+ SDNode *Operand = I->Val;
+ Operand->removeUser(N);
+
+ // Now that we removed this operand, see if there are no uses of it left.
+ if (Operand->use_empty())
+ DeadNodes.push_back(Operand);
+ }
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+ N->OperandList = 0;
+ N->NumOperands = 0;
+
+ // Finally, remove N itself.
+ AllNodes.erase(N);
+ }
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ setRoot(Dummy.getValue());
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N, std::vector<SDNode*> &Deleted) {
+ SmallVector<SDNode*, 16> DeadNodes;
+ DeadNodes.push_back(N);
+
+ // Process the worklist, deleting the nodes and adding their uses to the
+ // worklist.
+ while (!DeadNodes.empty()) {
+ SDNode *N = DeadNodes.back();
+ DeadNodes.pop_back();
+
+ // Take the node out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Next, brutally remove the operand list. This is safe to do, as there are
+ // no cycles in the graph.
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+ SDNode *Operand = I->Val;
+ Operand->removeUser(N);
+
+ // Now that we removed this operand, see if there are no uses of it left.
+ if (Operand->use_empty())
+ DeadNodes.push_back(Operand);
+ }
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+ N->OperandList = 0;
+ N->NumOperands = 0;
+
+ // Finally, remove N itself.
+ Deleted.push_back(N);
+ AllNodes.erase(N);
+ }
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+ assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+ // First take this out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Finally, remove uses due to operands of this node, remove from the
+ // AllNodes list, and delete the node.
+ DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+
+ // Remove it from the AllNodes list.
+ AllNodes.remove(N);
+
+ // Drop all of the operands and decrement used nodes use counts.
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+ I->Val->removeUser(N);
+ if (N->OperandsNeedDelete)
+ delete[] N->OperandList;
+ N->OperandList = 0;
+ N->NumOperands = 0;
+
+ delete N;
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it. This is useful when we're about to delete or repurpose
+/// the node. We don't want future request for structurally identical nodes
+/// to return N anymore.
+void SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+ bool Erased = false;
+ switch (N->getOpcode()) {
+ case ISD::HANDLENODE: return; // noop.
+ case ISD::STRING:
+ Erased = StringNodes.erase(cast<StringSDNode>(N)->getValue());
+ break;
+ case ISD::CONDCODE:
+ assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+ "Cond code doesn't exist!");
+ Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
+ CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+ break;
+ case ISD::ExternalSymbol:
+ Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::TargetExternalSymbol:
+ Erased =
+ TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::VALUETYPE:
+ Erased = ValueTypeNodes[cast<VTSDNode>(N)->getVT()] != 0;
+ ValueTypeNodes[cast<VTSDNode>(N)->getVT()] = 0;
+ break;
+ default:
+ // Remove it from the CSE Map.
+ Erased = CSEMap.RemoveNode(N);
+ break;
+ }
+#ifndef NDEBUG
+ // Verify that the node was actually in one of the CSE maps, unless it has a
+ // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // not subject to CSE.
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+ !N->isTargetOpcode()) {
+ N->dump(this);
+ cerr << "\n";
+ assert(0 && "Node is not in map!");
+ }
+#endif
+}
+
+/// AddNonLeafNodeToCSEMaps - Add the specified node back to the CSE maps. It
+/// has been taken out and modified in some way. If the specified node already
+/// exists in the CSE maps, do not modify the maps, but return the existing node
+/// instead. If it doesn't exist, add it and return null.
+///
+SDNode *SelectionDAG::AddNonLeafNodeToCSEMaps(SDNode *N) {
+ assert(N->getNumOperands() && "This is a leaf node!");
+ if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag)
+ return 0; // Never add these nodes.
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Flag)
+ return 0; // Never CSE anything that produces a flag.
+
+ SDNode *New = CSEMap.GetOrInsertNode(N);
+ if (New != N) return New; // Node already existed.
+ return 0;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDOperand Op,
+ void *&InsertPos) {
+ if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag)
+ return 0; // Never add these nodes.
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Flag)
+ return 0; // Never CSE anything that produces a flag.
+
+ SDOperand Ops[] = { Op };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+ return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ SDOperand Op1, SDOperand Op2,
+ void *&InsertPos) {
+ if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag)
+ return 0; // Never add these nodes.
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Flag)
+ return 0; // Never CSE anything that produces a flag.
+
+ SDOperand Ops[] = { Op1, Op2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+ return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ const SDOperand *Ops,unsigned NumOps,
+ void *&InsertPos) {
+ if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag)
+ return 0; // Never add these nodes.
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Flag)
+ return 0; // Never CSE anything that produces a flag.
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+
+ if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ ID.AddInteger(LD->getAddressingMode());
+ ID.AddInteger(LD->getExtensionType());
+ ID.AddInteger(LD->getLoadedVT());
+ ID.AddPointer(LD->getSrcValue());
+ ID.AddInteger(LD->getSrcValueOffset());
+ ID.AddInteger(LD->getAlignment());
+ ID.AddInteger(LD->isVolatile());
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ID.AddInteger(ST->getAddressingMode());
+ ID.AddInteger(ST->isTruncatingStore());
+ ID.AddInteger(ST->getStoredVT());
+ ID.AddPointer(ST->getSrcValue());
+ ID.AddInteger(ST->getSrcValueOffset());
+ ID.AddInteger(ST->getAlignment());
+ ID.AddInteger(ST->isVolatile());
+ }
+
+ return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+
+SelectionDAG::~SelectionDAG() {
+ while (!AllNodes.empty()) {
+ SDNode *N = AllNodes.begin();
+ N->SetNextInBucket(0);
+ if (N->OperandsNeedDelete)
+ delete [] N->OperandList;
+ N->OperandList = 0;
+ N->NumOperands = 0;
+ AllNodes.pop_front();
+ }
+}
+
+SDOperand SelectionDAG::getZeroExtendInReg(SDOperand Op, MVT::ValueType VT) {
+ if (Op.getValueType() == VT) return Op;
+ int64_t Imm = ~0ULL >> (64-MVT::getSizeInBits(VT));
+ return getNode(ISD::AND, Op.getValueType(), Op,
+ getConstant(Imm, Op.getValueType()));
+}
+
+SDOperand SelectionDAG::getString(const std::string &Val) {
+ StringSDNode *&N = StringNodes[Val];
+ if (!N) {
+ N = new StringSDNode(Val);
+ AllNodes.push_back(N);
+ }
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getConstant(uint64_t Val, MVT::ValueType VT, bool isT) {
+ assert(MVT::isInteger(VT) && "Cannot create FP integer constant!");
+ assert(!MVT::isVector(VT) && "Cannot create Vector ConstantSDNodes!");
+
+ // Mask out any bits that are not valid for this constant.
+ Val &= MVT::getIntVTBitMask(VT);
+
+ unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Val);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new ConstantSDNode(isT, Val, VT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+
+SDOperand SelectionDAG::getConstantFP(double Val, MVT::ValueType VT,
+ bool isTarget) {
+ assert(MVT::isFloatingPoint(VT) && "Cannot create integer FP constant!");
+ MVT::ValueType EltVT =
+ MVT::isVector(VT) ? MVT::getVectorElementType(VT) : VT;
+ if (EltVT == MVT::f32)
+ Val = (float)Val; // Mask out extra precision.
+
+ // Do the map lookup using the actual bit pattern for the floating point
+ // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+ // we don't have issues with SNANs.
+ unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ ID.AddDouble(Val);
+ void *IP = 0;
+ SDNode *N = NULL;
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if (!MVT::isVector(VT))
+ return SDOperand(N, 0);
+ if (!N) {
+ N = new ConstantFPSDNode(isTarget, Val, EltVT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ }
+
+ SDOperand Result(N, 0);
+ if (MVT::isVector(VT)) {
+ SmallVector<SDOperand, 8> Ops;
+ Ops.assign(MVT::getVectorNumElements(VT), Result);
+ Result = getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
+ }
+ return Result;
+}
+
+SDOperand SelectionDAG::getGlobalAddress(const GlobalValue *GV,
+ MVT::ValueType VT, int Offset,
+ bool isTargetGA) {
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ unsigned Opc;
+ if (GVar && GVar->isThreadLocal())
+ Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+ else
+ Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddPointer(GV);
+ ID.AddInteger(Offset);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new GlobalAddressSDNode(isTargetGA, GV, VT, Offset);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getFrameIndex(int FI, MVT::ValueType VT,
+ bool isTarget) {
+ unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(FI);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new FrameIndexSDNode(FI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getJumpTable(int JTI, MVT::ValueType VT, bool isTarget){
+ unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(JTI);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new JumpTableSDNode(JTI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getConstantPool(Constant *C, MVT::ValueType VT,
+ unsigned Alignment, int Offset,
+ bool isTarget) {
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ ID.AddPointer(C);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+
+SDOperand SelectionDAG::getConstantPool(MachineConstantPoolValue *C,
+ MVT::ValueType VT,
+ unsigned Alignment, int Offset,
+ bool isTarget) {
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ C->AddSelectionDAGCSEId(ID);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+
+SDOperand SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(MBB);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new BasicBlockSDNode(MBB);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getValueType(MVT::ValueType VT) {
+ if ((unsigned)VT >= ValueTypeNodes.size())
+ ValueTypeNodes.resize(VT+1);
+ if (ValueTypeNodes[VT] == 0) {
+ ValueTypeNodes[VT] = new VTSDNode(VT);
+ AllNodes.push_back(ValueTypeNodes[VT]);
+ }
+
+ return SDOperand(ValueTypeNodes[VT], 0);
+}
+
+SDOperand SelectionDAG::getExternalSymbol(const char *Sym, MVT::ValueType VT) {
+ SDNode *&N = ExternalSymbols[Sym];
+ if (N) return SDOperand(N, 0);
+ N = new ExternalSymbolSDNode(false, Sym, VT);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getTargetExternalSymbol(const char *Sym,
+ MVT::ValueType VT) {
+ SDNode *&N = TargetExternalSymbols[Sym];
+ if (N) return SDOperand(N, 0);
+ N = new ExternalSymbolSDNode(true, Sym, VT);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getCondCode(ISD::CondCode Cond) {
+ if ((unsigned)Cond >= CondCodeNodes.size())
+ CondCodeNodes.resize(Cond+1);
+
+ if (CondCodeNodes[Cond] == 0) {
+ CondCodeNodes[Cond] = new CondCodeSDNode(Cond);
+ AllNodes.push_back(CondCodeNodes[Cond]);
+ }
+ return SDOperand(CondCodeNodes[Cond], 0);
+}
+
+SDOperand SelectionDAG::getRegister(unsigned RegNo, MVT::ValueType VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+ ID.AddInteger(RegNo);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new RegisterSDNode(RegNo, VT);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getSrcValue(const Value *V, int Offset) {
+ assert((!V || isa<PointerType>(V->getType())) &&
+ "SrcValue is not a pointer?");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+ ID.AddPointer(V);
+ ID.AddInteger(Offset);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new SrcValueSDNode(V, Offset);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::FoldSetCC(MVT::ValueType VT, SDOperand N1,
+ SDOperand N2, ISD::CondCode Cond) {
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return getConstant(1, VT);
+
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETO:
+ case ISD::SETUO:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ assert(!MVT::isInteger(N1.getValueType()) && "Illegal setcc for integer!");
+ break;
+ }
+
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val)) {
+ uint64_t C2 = N2C->getValue();
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val)) {
+ uint64_t C1 = N1C->getValue();
+
+ // Sign extend the operands if required
+ if (ISD::isSignedIntSetCC(Cond)) {
+ C1 = N1C->getSignExtended();
+ C2 = N2C->getSignExtended();
+ }
+
+ switch (Cond) {
+ default: assert(0 && "Unknown integer setcc!");
+ case ISD::SETEQ: return getConstant(C1 == C2, VT);
+ case ISD::SETNE: return getConstant(C1 != C2, VT);
+ case ISD::SETULT: return getConstant(C1 < C2, VT);
+ case ISD::SETUGT: return getConstant(C1 > C2, VT);
+ case ISD::SETULE: return getConstant(C1 <= C2, VT);
+ case ISD::SETUGE: return getConstant(C1 >= C2, VT);
+ case ISD::SETLT: return getConstant((int64_t)C1 < (int64_t)C2, VT);
+ case ISD::SETGT: return getConstant((int64_t)C1 > (int64_t)C2, VT);
+ case ISD::SETLE: return getConstant((int64_t)C1 <= (int64_t)C2, VT);
+ case ISD::SETGE: return getConstant((int64_t)C1 >= (int64_t)C2, VT);
+ }
+ }
+ }
+ if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.Val))
+ if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.Val)) {
+ double C1 = N1C->getValue(), C2 = N2C->getValue();
+
+ switch (Cond) {
+ default: break; // FIXME: Implement the rest of these!
+ case ISD::SETEQ: return getConstant(C1 == C2, VT);
+ case ISD::SETNE: return getConstant(C1 != C2, VT);
+ case ISD::SETLT: return getConstant(C1 < C2, VT);
+ case ISD::SETGT: return getConstant(C1 > C2, VT);
+ case ISD::SETLE: return getConstant(C1 <= C2, VT);
+ case ISD::SETGE: return getConstant(C1 >= C2, VT);
+ }
+ } else {
+ // Ensure that the constant occurs on the RHS.
+ return getSetCC(VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+ }
+
+ // Could not fold it.
+ return SDOperand();
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDOperand Op, uint64_t Mask,
+ unsigned Depth) const {
+ // The masks are not wide enough to represent this type! Should use APInt.
+ if (Op.getValueType() == MVT::i128)
+ return false;
+
+ uint64_t KnownZero, KnownOne;
+ ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets. This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDOperand Op, uint64_t Mask,
+ uint64_t &KnownZero, uint64_t &KnownOne,
+ unsigned Depth) const {
+ KnownZero = KnownOne = 0; // Don't know anything.
+ if (Depth == 6 || Mask == 0)
+ return; // Limit search depth.
+
+ // The masks are not wide enough to represent this type! Should use APInt.
+ if (Op.getValueType() == MVT::i128)
+ return;
+
+ uint64_t KnownZero2, KnownOne2;
+
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getValue() & Mask;
+ KnownZero = ~KnownOne & Mask;
+ return;
+ case ISD::AND:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ Mask &= ~KnownZero;
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ return;
+ case ISD::OR:
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ Mask &= ~KnownOne;
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ return;
+ case ISD::XOR: {
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownZero = KnownZeroOut;
+ return;
+ }
+ case ISD::SELECT:
+ ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SELECT_CC:
+ ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ return;
+ case ISD::SETCC:
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI.getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult)
+ KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
+ return;
+ case ISD::SHL:
+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ ComputeMaskedBits(Op.getOperand(0), Mask >> SA->getValue(),
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero <<= SA->getValue();
+ KnownOne <<= SA->getValue();
+ KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero.
+ }
+ return;
+ case ISD::SRL:
+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ MVT::ValueType VT = Op.getValueType();
+ unsigned ShAmt = SA->getValue();
+
+ uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+ ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt) & TypeMask,
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero &= TypeMask;
+ KnownOne &= TypeMask;
+ KnownZero >>= ShAmt;
+ KnownOne >>= ShAmt;
+
+ uint64_t HighBits = (1ULL << ShAmt)-1;
+ HighBits <<= MVT::getSizeInBits(VT)-ShAmt;
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ return;
+ case ISD::SRA:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ MVT::ValueType VT = Op.getValueType();
+ unsigned ShAmt = SA->getValue();
+
+ // Compute the new bits that are at the top now.
+ uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+
+ uint64_t InDemandedMask = (Mask << ShAmt) & TypeMask;
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ uint64_t HighBits = (1ULL << ShAmt)-1;
+ HighBits <<= MVT::getSizeInBits(VT) - ShAmt;
+ if (HighBits & Mask)
+ InDemandedMask |= MVT::getIntVTSignBit(VT);
+
+ ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero &= TypeMask;
+ KnownOne &= TypeMask;
+ KnownZero >>= ShAmt;
+ KnownOne >>= ShAmt;
+
+ // Handle the sign bits.
+ uint64_t SignBit = MVT::getIntVTSignBit(VT);
+ SignBit >>= ShAmt; // Adjust to where it is now in the mask.
+
+ if (KnownZero & SignBit) {
+ KnownZero |= HighBits; // New bits are known zero.
+ } else if (KnownOne & SignBit) {
+ KnownOne |= HighBits; // New bits are known one.
+ }
+ }
+ return;
+ case ISD::SIGN_EXTEND_INREG: {
+ MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask;
+
+ uint64_t InSignBit = MVT::getIntVTSignBit(EVT);
+ int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT);
+
+ // If the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ if (NewBits)
+ InputDemandedBits |= InSignBit;
+
+ ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (KnownZero & InSignBit) { // Input sign bit known clear
+ KnownZero |= NewBits;
+ KnownOne &= ~NewBits;
+ } else if (KnownOne & InSignBit) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ return;
+ }
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP: {
+ MVT::ValueType VT = Op.getValueType();
+ unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1;
+ KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT);
+ KnownOne = 0;
+ return;
+ }
+ case ISD::LOAD: {
+ if (ISD::isZEXTLoad(Op.Val)) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ MVT::ValueType VT = LD->getLoadedVT();
+ KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask;
+ }
+ return;
+ }
+ case ISD::ZERO_EXTEND: {
+ uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+ uint64_t NewBits = (~InMask) & Mask;
+ ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
+ KnownOne, Depth+1);
+ KnownZero |= NewBits & Mask;
+ KnownOne &= ~NewBits;
+ return;
+ }
+ case ISD::SIGN_EXTEND: {
+ MVT::ValueType InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = MVT::getSizeInBits(InVT);
+ uint64_t InMask = MVT::getIntVTBitMask(InVT);
+ uint64_t InSignBit = 1ULL << (InBits-1);
+ uint64_t NewBits = (~InMask) & Mask;
+ uint64_t InDemandedBits = Mask & InMask;
+
+ // If any of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ if (NewBits & Mask)
+ InDemandedBits |= InSignBit;
+
+ ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ KnownOne, Depth+1);
+ // If the sign bit is known zero or one, the top bits match.
+ if (KnownZero & InSignBit) {
+ KnownZero |= NewBits;
+ KnownOne &= ~NewBits;
+ } else if (KnownOne & InSignBit) {
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Otherwise, top bits aren't known.
+ KnownOne &= ~NewBits;
+ KnownZero &= ~NewBits;
+ }
+ return;
+ }
+ case ISD::ANY_EXTEND: {
+ MVT::ValueType VT = Op.getOperand(0).getValueType();
+ ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT),
+ KnownZero, KnownOne, Depth+1);
+ return;
+ }
+ case ISD::TRUNCATE: {
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType());
+ KnownZero &= OutMask;
+ KnownOne &= OutMask;
+ break;
+ }
+ case ISD::AssertZext: {
+ MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ uint64_t InMask = MVT::getIntVTBitMask(VT);
+ ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
+ KnownOne, Depth+1);
+ KnownZero |= (~InMask) & Mask;
+ return;
+ }
+ case ISD::ADD: {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Output known-0 bits are known if clear or set in both the low clear bits
+ // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
+ // low 3 bits clear.
+ uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero),
+ CountTrailingZeros_64(~KnownZero2));
+
+ KnownZero = (1ULL << KnownZeroOut) - 1;
+ KnownOne = 0;
+ return;
+ }
+ case ISD::SUB: {
+ ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+ if (!CLHS) return;
+
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ MVT::ValueType VT = CLHS->getValueType(0);
+ if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) { // sign bit clear
+ unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1);
+ uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit
+ MaskV = ~MaskV & MVT::getIntVTBitMask(VT);
+ ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the output
+ // top bits are zero, because we now know that the output is from [0-C].
+ if ((KnownZero & MaskV) == MaskV) {
+ unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue());
+ KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask; // Top bits known zero.
+ KnownOne = 0; // No one bits known.
+ } else {
+ KnownZero = KnownOne = 0; // Otherwise, nothing known.
+ }
+ }
+ return;
+ }
+ default:
+ // Allow the target to implement this method for its nodes.
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this);
+ }
+ return;
+ }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits. We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information. For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{
+ MVT::ValueType VT = Op.getValueType();
+ assert(MVT::isInteger(VT) && "Invalid VT!");
+ unsigned VTBits = MVT::getSizeInBits(VT);
+ unsigned Tmp, Tmp2;
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::AssertSext:
+ Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+ return VTBits-Tmp+1;
+ case ISD::AssertZext:
+ Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+ return VTBits-Tmp;
+
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(Op)->getValue();
+ // If negative, invert the bits, then look at it.
+ if (Val & MVT::getIntVTSignBit(VT))
+ Val = ~Val;
+
+ // Shift the bits so they are the leading bits in the int64_t.
+ Val <<= 64-VTBits;
+
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::min(VTBits, CountLeadingZeros_64(Val));
+ }
+
+ case ISD::SIGN_EXTEND:
+ Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType());
+ return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+
+ case ISD::SIGN_EXTEND_INREG:
+ // Max of the input and what this extends.
+ Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+ Tmp = VTBits-Tmp+1;
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ return std::max(Tmp, Tmp2);
+
+ case ISD::SRA:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ // SRA X, C -> adds C sign bits.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Tmp += C->getValue();
+ if (Tmp > VTBits) Tmp = VTBits;
+ }
+ return Tmp;
+ case ISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (C->getValue() >= VTBits || // Bad shift.
+ C->getValue() >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - C->getValue();
+ }
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case ISD::SELECT:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ return std::min(Tmp, Tmp2);
+
+ case ISD::SETCC:
+ // If setcc returns 0/-1, all bits are sign bits.
+ if (TLI.getSetCCResultContents() ==
+ TargetLowering::ZeroOrNegativeOneSetCCResult)
+ return VTBits;
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RotAmt = C->getValue() & (VTBits-1);
+
+ // Handle rotate right by N like a rotate left by 32-N.
+ if (Op.getOpcode() == ISD::ROTR)
+ RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+ // If we aren't rotating out all of the known-in sign bits, return the
+ // number that are left. This handles rotl(sext(x), 1) for example.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp > RotAmt+1) return Tmp-RotAmt;
+ }
+ break;
+ case ISD::ADD:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+ if (CRHS->isAllOnesValue()) {
+ uint64_t KnownZero, KnownOne;
+ uint64_t Mask = MVT::getIntVTBitMask(VT);
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero|1) == Mask)
+ return VTBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (KnownZero & MVT::getIntVTSignBit(VT))
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+ break;
+
+ case ISD::SUB:
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+ if (CLHS->getValue() == 0) {
+ uint64_t KnownZero, KnownOne;
+ uint64_t Mask = MVT::getIntVTBitMask(VT);
+ ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero|1) == Mask)
+ return VTBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (KnownZero & MVT::getIntVTSignBit(VT))
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+ break;
+ case ISD::TRUNCATE:
+ // FIXME: it's tricky to do anything useful for this, but it is an important
+ // case for targets like X86.
+ break;
+ }
+
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (Op.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // '17' bits known
+ Tmp = MVT::getSizeInBits(LD->getLoadedVT());
+ return VTBits-Tmp+1;
+ case ISD::ZEXTLOAD: // '16' bits known
+ Tmp = MVT::getSizeInBits(LD->getLoadedVT());
+ return VTBits-Tmp;
+ }
+ }
+
+ // Allow the target to implement this method for its nodes.
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) {
+ unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+ if (NumBits > 1) return NumBits;
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ uint64_t KnownZero, KnownOne;
+ uint64_t Mask = MVT::getIntVTBitMask(VT);
+ ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+
+ uint64_t SignBit = MVT::getIntVTSignBit(VT);
+ if (KnownZero & SignBit) { // SignBit is 0
+ Mask = KnownZero;
+ } else if (KnownOne & SignBit) { // SignBit is 1;
+ Mask = KnownOne;
+ } else {
+ // Nothing known.
+ return 1;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask ^= ~0ULL;
+ Mask <<= 64-VTBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::min(VTBits, CountLeadingZeros_64(Mask));
+}
+
+
+/// getNode - Gets or creates the specified node.
+///
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new SDNode(Opcode, SDNode::getSDVTList(VT));
+ CSEMap.InsertNode(N, IP);
+
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+ SDOperand Operand) {
+ unsigned Tmp1;
+ // Constant fold unary operations with an integer constant operand.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.Val)) {
+ uint64_t Val = C->getValue();
+ switch (Opcode) {
+ default: break;
+ case ISD::SIGN_EXTEND: return getConstant(C->getSignExtended(), VT);
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND: return getConstant(Val, VT);
+ case ISD::TRUNCATE: return getConstant(Val, VT);
+ case ISD::SINT_TO_FP: return getConstantFP(C->getSignExtended(), VT);
+ case ISD::UINT_TO_FP: return getConstantFP(C->getValue(), VT);
+ case ISD::BIT_CONVERT:
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(BitsToFloat(Val), VT);
+ else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(BitsToDouble(Val), VT);
+ break;
+ case ISD::BSWAP:
+ switch(VT) {
+ default: assert(0 && "Invalid bswap!"); break;
+ case MVT::i16: return getConstant(ByteSwap_16((unsigned short)Val), VT);
+ case MVT::i32: return getConstant(ByteSwap_32((unsigned)Val), VT);
+ case MVT::i64: return getConstant(ByteSwap_64(Val), VT);
+ }
+ break;
+ case ISD::CTPOP:
+ switch(VT) {
+ default: assert(0 && "Invalid ctpop!"); break;
+ case MVT::i1: return getConstant(Val != 0, VT);
+ case MVT::i8:
+ Tmp1 = (unsigned)Val & 0xFF;
+ return getConstant(CountPopulation_32(Tmp1), VT);
+ case MVT::i16:
+ Tmp1 = (unsigned)Val & 0xFFFF;
+ return getConstant(CountPopulation_32(Tmp1), VT);
+ case MVT::i32:
+ return getConstant(CountPopulation_32((unsigned)Val), VT);
+ case MVT::i64:
+ return getConstant(CountPopulation_64(Val), VT);
+ }
+ case ISD::CTLZ:
+ switch(VT) {
+ default: assert(0 && "Invalid ctlz!"); break;
+ case MVT::i1: return getConstant(Val == 0, VT);
+ case MVT::i8:
+ Tmp1 = (unsigned)Val & 0xFF;
+ return getConstant(CountLeadingZeros_32(Tmp1)-24, VT);
+ case MVT::i16:
+ Tmp1 = (unsigned)Val & 0xFFFF;
+ return getConstant(CountLeadingZeros_32(Tmp1)-16, VT);
+ case MVT::i32:
+ return getConstant(CountLeadingZeros_32((unsigned)Val), VT);
+ case MVT::i64:
+ return getConstant(CountLeadingZeros_64(Val), VT);
+ }
+ case ISD::CTTZ:
+ switch(VT) {
+ default: assert(0 && "Invalid cttz!"); break;
+ case MVT::i1: return getConstant(Val == 0, VT);
+ case MVT::i8:
+ Tmp1 = (unsigned)Val | 0x100;
+ return getConstant(CountTrailingZeros_32(Tmp1), VT);
+ case MVT::i16:
+ Tmp1 = (unsigned)Val | 0x10000;
+ return getConstant(CountTrailingZeros_32(Tmp1), VT);
+ case MVT::i32:
+ return getConstant(CountTrailingZeros_32((unsigned)Val), VT);
+ case MVT::i64:
+ return getConstant(CountTrailingZeros_64(Val), VT);
+ }
+ }
+ }
+
+ // Constant fold unary operations with an floating point constant operand.
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.Val))
+ switch (Opcode) {
+ case ISD::FNEG:
+ return getConstantFP(-C->getValue(), VT);
+ case ISD::FABS:
+ return getConstantFP(fabs(C->getValue()), VT);
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
+ return getConstantFP(C->getValue(), VT);
+ case ISD::FP_TO_SINT:
+ return getConstant((int64_t)C->getValue(), VT);
+ case ISD::FP_TO_UINT:
+ return getConstant((uint64_t)C->getValue(), VT);
+ case ISD::BIT_CONVERT:
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant(FloatToBits(C->getValue()), VT);
+ else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(DoubleToBits(C->getValue()), VT);
+ break;
+ }
+
+ unsigned OpOpcode = Operand.Val->getOpcode();
+ switch (Opcode) {
+ case ISD::TokenFactor:
+ return Operand; // Factor of one node? No factor.
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
+ assert(MVT::isFloatingPoint(VT) &&
+ MVT::isFloatingPoint(Operand.getValueType()) && "Invalid FP cast!");
+ break;
+ case ISD::SIGN_EXTEND:
+ assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) &&
+ "Invalid SIGN_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType() < VT && "Invalid sext node, dst < src!");
+ if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+ return getNode(OpOpcode, VT, Operand.Val->getOperand(0));
+ break;
+ case ISD::ZERO_EXTEND:
+ assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) &&
+ "Invalid ZERO_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType() < VT && "Invalid zext node, dst < src!");
+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
+ return getNode(ISD::ZERO_EXTEND, VT, Operand.Val->getOperand(0));
+ break;
+ case ISD::ANY_EXTEND:
+ assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) &&
+ "Invalid ANY_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert(Operand.getValueType() < VT && "Invalid anyext node, dst < src!");
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND)
+ // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
+ return getNode(OpOpcode, VT, Operand.Val->getOperand(0));
+ break;
+ case ISD::TRUNCATE:
+ assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) &&
+ "Invalid TRUNCATE!");
+ if (Operand.getValueType() == VT) return Operand; // noop truncate
+ assert(Operand.getValueType() > VT && "Invalid truncate node, src < dst!");
+ if (OpOpcode == ISD::TRUNCATE)
+ return getNode(ISD::TRUNCATE, VT, Operand.Val->getOperand(0));
+ else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
+ // If the source is smaller than the dest, we still need an extend.
+ if (Operand.Val->getOperand(0).getValueType() < VT)
+ return getNode(OpOpcode, VT, Operand.Val->getOperand(0));
+ else if (Operand.Val->getOperand(0).getValueType() > VT)
+ return getNode(ISD::TRUNCATE, VT, Operand.Val->getOperand(0));
+ else
+ return Operand.Val->getOperand(0);
+ }
+ break;
+ case ISD::BIT_CONVERT:
+ // Basic sanity checking.
+ assert(MVT::getSizeInBits(VT) == MVT::getSizeInBits(Operand.getValueType())
+ && "Cannot BIT_CONVERT between types of different sizes!");
+ if (VT == Operand.getValueType()) return Operand; // noop conversion.
+ if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BIT_CONVERT, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ return getNode(ISD::UNDEF, VT);
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ assert(MVT::isVector(VT) && !MVT::isVector(Operand.getValueType()) &&
+ MVT::getVectorElementType(VT) == Operand.getValueType() &&
+ "Illegal SCALAR_TO_VECTOR node!");
+ break;
+ case ISD::FNEG:
+ if (OpOpcode == ISD::FSUB) // -(X-Y) -> (Y-X)
+ return getNode(ISD::FSUB, VT, Operand.Val->getOperand(1),
+ Operand.Val->getOperand(0));
+ if (OpOpcode == ISD::FNEG) // --X -> X
+ return Operand.Val->getOperand(0);
+ break;
+ case ISD::FABS:
+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
+ return getNode(ISD::FABS, VT, Operand.Val->getOperand(0));
+ break;
+ }
+
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+ FoldingSetNodeID ID;
+ SDOperand Ops[1] = { Operand };
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ N = new UnarySDNode(Opcode, VTs, Operand);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new UnarySDNode(Opcode, VTs, Operand);
+ }
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+ SDOperand N1, SDOperand N2) {
+#ifndef NDEBUG
+ switch (Opcode) {
+ case ISD::TokenFactor:
+ assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+ N2.getValueType() == MVT::Other && "Invalid token factor!");
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::MULHU:
+ case ISD::MULHS:
+ assert(MVT::isInteger(VT) && "This operator does not apply to FP types!");
+ // fall through
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::SREM:
+ assert(MVT::isInteger(N1.getValueType()) && "Should use F* for FP ops");
+ // fall through.
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
+ assert(N1.getValueType() == VT &&
+ MVT::isFloatingPoint(N1.getValueType()) &&
+ MVT::isFloatingPoint(N2.getValueType()) &&
+ "Invalid FCOPYSIGN!");
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ assert(VT == N1.getValueType() &&
+ "Shift operators return type must be the same as their first arg");
+ assert(MVT::isInteger(VT) && MVT::isInteger(N2.getValueType()) &&
+ VT != MVT::i1 && "Shifts only work on integers");
+ break;
+ case ISD::FP_ROUND_INREG: {
+ MVT::ValueType EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg round!");
+ assert(MVT::isFloatingPoint(VT) && MVT::isFloatingPoint(EVT) &&
+ "Cannot FP_ROUND_INREG integer types");
+ assert(EVT <= VT && "Not rounding down!");
+ break;
+ }
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ case ISD::SIGN_EXTEND_INREG: {
+ MVT::ValueType EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(MVT::isInteger(VT) && MVT::isInteger(EVT) &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(EVT <= VT && "Not extending!");
+ }
+
+ default: break;
+ }
+#endif
+
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val);
+ if (N1C) {
+ if (Opcode == ISD::SIGN_EXTEND_INREG) {
+ int64_t Val = N1C->getValue();
+ unsigned FromBits = MVT::getSizeInBits(cast<VTSDNode>(N2)->getVT());
+ Val <<= 64-FromBits;
+ Val >>= 64-FromBits;
+ return getConstant(Val, VT);
+ }
+
+ if (N2C) {
+ uint64_t C1 = N1C->getValue(), C2 = N2C->getValue();
+ switch (Opcode) {
+ case ISD::ADD: return getConstant(C1 + C2, VT);
+ case ISD::SUB: return getConstant(C1 - C2, VT);
+ case ISD::MUL: return getConstant(C1 * C2, VT);
+ case ISD::UDIV:
+ if (C2) return getConstant(C1 / C2, VT);
+ break;
+ case ISD::UREM :
+ if (C2) return getConstant(C1 % C2, VT);
+ break;
+ case ISD::SDIV :
+ if (C2) return getConstant(N1C->getSignExtended() /
+ N2C->getSignExtended(), VT);
+ break;
+ case ISD::SREM :
+ if (C2) return getConstant(N1C->getSignExtended() %
+ N2C->getSignExtended(), VT);
+ break;
+ case ISD::AND : return getConstant(C1 & C2, VT);
+ case ISD::OR : return getConstant(C1 | C2, VT);
+ case ISD::XOR : return getConstant(C1 ^ C2, VT);
+ case ISD::SHL : return getConstant(C1 << C2, VT);
+ case ISD::SRL : return getConstant(C1 >> C2, VT);
+ case ISD::SRA : return getConstant(N1C->getSignExtended() >>(int)C2, VT);
+ case ISD::ROTL :
+ return getConstant((C1 << C2) | (C1 >> (MVT::getSizeInBits(VT) - C2)),
+ VT);
+ case ISD::ROTR :
+ return getConstant((C1 >> C2) | (C1 << (MVT::getSizeInBits(VT) - C2)),
+ VT);
+ default: break;
+ }
+ } else { // Cannonicalize constant to RHS if commutative
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ }
+ }
+ }
+
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.Val);
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.Val);
+ if (N1CFP) {
+ if (N2CFP) {
+ double C1 = N1CFP->getValue(), C2 = N2CFP->getValue();
+ switch (Opcode) {
+ case ISD::FADD: return getConstantFP(C1 + C2, VT);
+ case ISD::FSUB: return getConstantFP(C1 - C2, VT);
+ case ISD::FMUL: return getConstantFP(C1 * C2, VT);
+ case ISD::FDIV:
+ if (C2) return getConstantFP(C1 / C2, VT);
+ break;
+ case ISD::FREM :
+ if (C2) return getConstantFP(fmod(C1, C2), VT);
+ break;
+ case ISD::FCOPYSIGN: {
+ union {
+ double F;
+ uint64_t I;
+ } u1;
+ u1.F = C1;
+ if (int64_t(DoubleToBits(C2)) < 0) // Sign bit of RHS set?
+ u1.I |= 1ULL << 63; // Set the sign bit of the LHS.
+ else
+ u1.I &= (1ULL << 63)-1; // Clear the sign bit of the LHS.
+ return getConstantFP(u1.F, VT);
+ }
+ default: break;
+ }
+ } else { // Cannonicalize constant to RHS if commutative
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ }
+ }
+ }
+
+ // Canonicalize an UNDEF to the RHS, even over a constant.
+ if (N1.getOpcode() == ISD::UNDEF) {
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1, N2);
+ } else {
+ switch (Opcode) {
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SUB:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SRA:
+ return N1; // fold op(undef, arg2) -> undef
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!MVT::isVector(VT))
+ return getConstant(0, VT); // fold op(undef, arg2) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N2;
+ }
+ }
+ }
+
+ // Fold a bunch of operators when the RHS is undef.
+ if (N2.getOpcode() == ISD::UNDEF) {
+ switch (Opcode) {
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUB:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::XOR:
+ return N2; // fold op(arg1, undef) -> undef
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!MVT::isVector(VT))
+ return getConstant(0, VT); // fold op(arg1, undef) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N1;
+ case ISD::OR:
+ if (!MVT::isVector(VT))
+ return getConstant(MVT::getIntVTBitMask(VT), VT);
+ // For vectors, we can't easily build an all one vector, just return
+ // the LHS.
+ return N1;
+ case ISD::SRA:
+ return N1;
+ }
+ }
+
+ // Fold operations.
+ switch (Opcode) {
+ case ISD::TokenFactor:
+ // Fold trivial token factors.
+ if (N1.getOpcode() == ISD::EntryToken) return N2;
+ if (N2.getOpcode() == ISD::EntryToken) return N1;
+ break;
+
+ case ISD::AND:
+ // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
+ // worth handling here.
+ if (N2C && N2C->getValue() == 0)
+ return N2;
+ break;
+ case ISD::OR:
+ case ISD::XOR:
+ // (X ^| 0) -> X. This commonly occurs when legalizing i64 values, so it's
+ // worth handling here.
+ if (N2C && N2C->getValue() == 0)
+ return N1;
+ break;
+ case ISD::FP_ROUND_INREG:
+ if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ MVT::ValueType EVT = cast<VTSDNode>(N2)->getVT();
+ if (EVT == VT) return N1; // Not actually extending
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ assert(N2C && "Bad EXTRACT_VECTOR_ELT!");
+
+ // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+ // expanding copies of large vectors from registers.
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0) {
+ unsigned Factor =
+ MVT::getVectorNumElements(N1.getOperand(0).getValueType());
+ return getNode(ISD::EXTRACT_VECTOR_ELT, VT,
+ N1.getOperand(N2C->getValue() / Factor),
+ getConstant(N2C->getValue() % Factor, N2.getValueType()));
+ }
+
+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+ // expanding large vector constants.
+ if (N1.getOpcode() == ISD::BUILD_VECTOR)
+ return N1.getOperand(N2C->getValue());
+
+ // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+ // operations are lowered to scalars.
+ if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT)
+ if (ConstantSDNode *IEC = dyn_cast<ConstantSDNode>(N1.getOperand(2))) {
+ if (IEC == N2C)
+ return N1.getOperand(1);
+ else
+ return getNode(ISD::EXTRACT_VECTOR_ELT, VT, N1.getOperand(0), N2);
+ }
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ assert(N2C && (unsigned)N2C->getValue() < 2 && "Bad EXTRACT_ELEMENT!");
+
+ // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+ // 64-bit integers into 32-bit parts. Instead of building the extract of
+ // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+ if (N1.getOpcode() == ISD::BUILD_PAIR)
+ return N1.getOperand(N2C->getValue());
+
+ // EXTRACT_ELEMENT of a constant int is also very common.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ unsigned Shift = MVT::getSizeInBits(VT) * N2C->getValue();
+ return getConstant(C->getValue() >> Shift, VT);
+ }
+ break;
+
+ // FIXME: figure out how to safely handle things like
+ // int foo(int x) { return 1 << (x & 255); }
+ // int bar() { return foo(256); }
+#if 0
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ if (N2.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N2.getOperand(1))->getVT() != MVT::i1)
+ return getNode(Opcode, VT, N1, N2.getOperand(0));
+ else if (N2.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N2.getOperand(1))) {
+ // If the and is only masking out bits that cannot effect the shift,
+ // eliminate the and.
+ unsigned NumBits = MVT::getSizeInBits(VT);
+ if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+ return getNode(Opcode, VT, N1, N2.getOperand(0));
+ }
+ break;
+#endif
+ }
+
+ // Memoize this node if possible.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) {
+ SDOperand Ops[] = { N1, N2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ N = new BinarySDNode(Opcode, VTs, N1, N2);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new BinarySDNode(Opcode, VTs, N1, N2);
+ }
+
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+ SDOperand N1, SDOperand N2, SDOperand N3) {
+ // Perform various simplifications.
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val);
+ switch (Opcode) {
+ case ISD::SETCC: {
+ // Use FoldSetCC to simplify SETCC's.
+ SDOperand Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get());
+ if (Simp.Val) return Simp;
+ break;
+ }
+ case ISD::SELECT:
+ if (N1C)
+ if (N1C->getValue())
+ return N2; // select true, X, Y -> X
+ else
+ return N3; // select false, X, Y -> Y
+
+ if (N2 == N3) return N2; // select C, X, X -> X
+ break;
+ case ISD::BRCOND:
+ if (N2C)
+ if (N2C->getValue()) // Unconditional branch
+ return getNode(ISD::BR, MVT::Other, N1, N3);
+ else
+ return N1; // Never-taken branch
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ assert(VT == N1.getValueType() && VT == N2.getValueType() &&
+ MVT::isVector(VT) && MVT::isVector(N3.getValueType()) &&
+ N3.getOpcode() == ISD::BUILD_VECTOR &&
+ MVT::getVectorNumElements(VT) == N3.getNumOperands() &&
+ "Illegal VECTOR_SHUFFLE node!");
+ break;
+ case ISD::BIT_CONVERT:
+ // Fold bit_convert nodes from a type to themselves.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
+ }
+
+ // Memoize node if it doesn't produce a flag.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) {
+ SDOperand Ops[] = { N1, N2, N3 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ N = new TernarySDNode(Opcode, VTs, N1, N2, N3);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new TernarySDNode(Opcode, VTs, N1, N2, N3);
+ }
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+ SDOperand N1, SDOperand N2, SDOperand N3,
+ SDOperand N4) {
+ SDOperand Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, VT, Ops, 4);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+ SDOperand N1, SDOperand N2, SDOperand N3,
+ SDOperand N4, SDOperand N5) {
+ SDOperand Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, VT, Ops, 5);
+}
+
+SDOperand SelectionDAG::getLoad(MVT::ValueType VT,
+ SDOperand Chain, SDOperand Ptr,
+ const Value *SV, int SVOffset,
+ bool isVolatile, unsigned Alignment) {
+ if (Alignment == 0) { // Ensure that codegen never sees alignment 0
+ const Type *Ty = 0;
+ if (VT != MVT::iPTR) {
+ Ty = MVT::getTypeForValueType(VT);
+ } else if (SV) {
+ const PointerType *PT = dyn_cast<PointerType>(SV->getType());
+ assert(PT && "Value for load must be a pointer");
+ Ty = PT->getElementType();
+ }
+ assert(Ty && "Could not get type information for load");
+ Alignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ }
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType());
+ SDOperand Ops[] = { Chain, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+ ID.AddInteger(ISD::UNINDEXED);
+ ID.AddInteger(ISD::NON_EXTLOAD);
+ ID.AddInteger(VT);
+ ID.AddPointer(SV);
+ ID.AddInteger(SVOffset);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(isVolatile);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new LoadSDNode(Ops, VTs, ISD::UNINDEXED,
+ ISD::NON_EXTLOAD, VT, SV, SVOffset, Alignment,
+ isVolatile);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, MVT::ValueType VT,
+ SDOperand Chain, SDOperand Ptr,
+ const Value *SV,
+ int SVOffset, MVT::ValueType EVT,
+ bool isVolatile, unsigned Alignment) {
+ // If they are asking for an extending load from/to the same thing, return a
+ // normal load.
+ if (VT == EVT)
+ ExtType = ISD::NON_EXTLOAD;
+
+ if (MVT::isVector(VT))
+ assert(EVT == MVT::getVectorElementType(VT) && "Invalid vector extload!");
+ else
+ assert(EVT < VT && "Should only be an extending load, not truncating!");
+ assert((ExtType == ISD::EXTLOAD || MVT::isInteger(VT)) &&
+ "Cannot sign/zero extend a FP/Vector load!");
+ assert(MVT::isInteger(VT) == MVT::isInteger(EVT) &&
+ "Cannot convert from FP to Int or Int -> FP!");
+
+ if (Alignment == 0) { // Ensure that codegen never sees alignment 0
+ const Type *Ty = 0;
+ if (VT != MVT::iPTR) {
+ Ty = MVT::getTypeForValueType(VT);
+ } else if (SV) {
+ const PointerType *PT = dyn_cast<PointerType>(SV->getType());
+ assert(PT && "Value for load must be a pointer");
+ Ty = PT->getElementType();
+ }
+ assert(Ty && "Could not get type information for load");
+ Alignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ }
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType());
+ SDOperand Ops[] = { Chain, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+ ID.AddInteger(ISD::UNINDEXED);
+ ID.AddInteger(ExtType);
+ ID.AddInteger(EVT);
+ ID.AddPointer(SV);
+ ID.AddInteger(SVOffset);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(isVolatile);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new LoadSDNode(Ops, VTs, ISD::UNINDEXED, ExtType, EVT,
+ SV, SVOffset, Alignment, isVolatile);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand
+SelectionDAG::getIndexedLoad(SDOperand OrigLoad, SDOperand Base,
+ SDOperand Offset, ISD::MemIndexedMode AM) {
+ LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+ assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
+ "Load is already a indexed load!");
+ MVT::ValueType VT = OrigLoad.getValueType();
+ SDVTList VTs = getVTList(VT, Base.getValueType(), MVT::Other);
+ SDOperand Ops[] = { LD->getChain(), Base, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+ ID.AddInteger(AM);
+ ID.AddInteger(LD->getExtensionType());
+ ID.AddInteger(LD->getLoadedVT());
+ ID.AddPointer(LD->getSrcValue());
+ ID.AddInteger(LD->getSrcValueOffset());
+ ID.AddInteger(LD->getAlignment());
+ ID.AddInteger(LD->isVolatile());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new LoadSDNode(Ops, VTs, AM,
+ LD->getExtensionType(), LD->getLoadedVT(),
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getAlignment(), LD->isVolatile());
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getStore(SDOperand Chain, SDOperand Val,
+ SDOperand Ptr, const Value *SV, int SVOffset,
+ bool isVolatile, unsigned Alignment) {
+ MVT::ValueType VT = Val.getValueType();
+
+ if (Alignment == 0) { // Ensure that codegen never sees alignment 0
+ const Type *Ty = 0;
+ if (VT != MVT::iPTR) {
+ Ty = MVT::getTypeForValueType(VT);
+ } else if (SV) {
+ const PointerType *PT = dyn_cast<PointerType>(SV->getType());
+ assert(PT && "Value for store must be a pointer");
+ Ty = PT->getElementType();
+ }
+ assert(Ty && "Could not get type information for store");
+ Alignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ }
+ SDVTList VTs = getVTList(MVT::Other);
+ SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType());
+ SDOperand Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(ISD::UNINDEXED);
+ ID.AddInteger(false);
+ ID.AddInteger(VT);
+ ID.AddPointer(SV);
+ ID.AddInteger(SVOffset);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(isVolatile);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new StoreSDNode(Ops, VTs, ISD::UNINDEXED, false,
+ VT, SV, SVOffset, Alignment, isVolatile);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getTruncStore(SDOperand Chain, SDOperand Val,
+ SDOperand Ptr, const Value *SV,
+ int SVOffset, MVT::ValueType SVT,
+ bool isVolatile, unsigned Alignment) {
+ MVT::ValueType VT = Val.getValueType();
+ bool isTrunc = VT != SVT;
+
+ assert(VT > SVT && "Not a truncation?");
+ assert(MVT::isInteger(VT) == MVT::isInteger(SVT) &&
+ "Can't do FP-INT conversion!");
+
+ if (Alignment == 0) { // Ensure that codegen never sees alignment 0
+ const Type *Ty = 0;
+ if (VT != MVT::iPTR) {
+ Ty = MVT::getTypeForValueType(VT);
+ } else if (SV) {
+ const PointerType *PT = dyn_cast<PointerType>(SV->getType());
+ assert(PT && "Value for store must be a pointer");
+ Ty = PT->getElementType();
+ }
+ assert(Ty && "Could not get type information for store");
+ Alignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ }
+ SDVTList VTs = getVTList(MVT::Other);
+ SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType());
+ SDOperand Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(ISD::UNINDEXED);
+ ID.AddInteger(isTrunc);
+ ID.AddInteger(SVT);
+ ID.AddPointer(SV);
+ ID.AddInteger(SVOffset);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(isVolatile);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new StoreSDNode(Ops, VTs, ISD::UNINDEXED, isTrunc,
+ SVT, SV, SVOffset, Alignment, isVolatile);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand
+SelectionDAG::getIndexedStore(SDOperand OrigStore, SDOperand Base,
+ SDOperand Offset, ISD::MemIndexedMode AM) {
+ StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+ assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
+ "Store is already a indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDOperand Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ ID.AddInteger(AM);
+ ID.AddInteger(ST->isTruncatingStore());
+ ID.AddInteger(ST->getStoredVT());
+ ID.AddPointer(ST->getSrcValue());
+ ID.AddInteger(ST->getSrcValueOffset());
+ ID.AddInteger(ST->getAlignment());
+ ID.AddInteger(ST->isVolatile());
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ SDNode *N = new StoreSDNode(Ops, VTs, AM,
+ ST->isTruncatingStore(), ST->getStoredVT(),
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getAlignment(), ST->isVolatile());
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getVAArg(MVT::ValueType VT,
+ SDOperand Chain, SDOperand Ptr,
+ SDOperand SV) {
+ SDOperand Ops[] = { Chain, Ptr, SV };
+ return getNode(ISD::VAARG, getVTList(VT, MVT::Other), Ops, 3);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
+ const SDOperand *Ops, unsigned NumOps) {
+ switch (NumOps) {
+ case 0: return getNode(Opcode, VT);
+ case 1: return getNode(Opcode, VT, Ops[0]);
+ case 2: return getNode(Opcode, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::SELECT_CC: {
+ assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+ assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+ "LHS and RHS of condition must have same type!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "True and False arms of SelectCC must have same type!");
+ assert(Ops[2].getValueType() == VT &&
+ "select_cc node must be of same type as true and false value!");
+ break;
+ }
+ case ISD::BR_CC: {
+ assert(NumOps == 5 && "BR_CC takes 5 operands!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "LHS/RHS of comparison should match types!");
+ break;
+ }
+ }
+
+ // Memoize nodes.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ N = new SDNode(Opcode, VTs, Ops, NumOps);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new SDNode(Opcode, VTs, Ops, NumOps);
+ }
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode,
+ std::vector<MVT::ValueType> &ResultTys,
+ const SDOperand *Ops, unsigned NumOps) {
+ return getNode(Opcode, getNodeValueTypes(ResultTys), ResultTys.size(),
+ Ops, NumOps);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode,
+ const MVT::ValueType *VTs, unsigned NumVTs,
+ const SDOperand *Ops, unsigned NumOps) {
+ if (NumVTs == 1)
+ return getNode(Opcode, VTs[0], Ops, NumOps);
+ return getNode(Opcode, makeVTList(VTs, NumVTs), Ops, NumOps);
+}
+
+SDOperand SelectionDAG::getNode(unsigned Opcode, SDVTList VTList,
+ const SDOperand *Ops, unsigned NumOps) {
+ if (VTList.NumVTs == 1)
+ return getNode(Opcode, VTList.VTs[0], Ops, NumOps);
+
+ switch (Opcode) {
+ // FIXME: figure out how to safely handle things like
+ // int foo(int x) { return 1 << (x & 255); }
+ // int bar() { return foo(256); }
+#if 0
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS:
+ case ISD::SHL_PARTS:
+ if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+ return getNode(Opcode, VT, N1, N2, N3.getOperand(0));
+ else if (N3.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+ // If the and is only masking out bits that cannot effect the shift,
+ // eliminate the and.
+ unsigned NumBits = MVT::getSizeInBits(VT)*2;
+ if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+ return getNode(Opcode, VT, N1, N2, N3.getOperand(0));
+ }
+ break;
+#endif
+ }
+
+ // Memoize the node unless it returns a flag.
+ SDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDOperand(E, 0);
+ if (NumOps == 1)
+ N = new UnarySDNode(Opcode, VTList, Ops[0]);
+ else if (NumOps == 2)
+ N = new BinarySDNode(Opcode, VTList, Ops[0], Ops[1]);
+ else if (NumOps == 3)
+ N = new TernarySDNode(Opcode, VTList, Ops[0], Ops[1], Ops[2]);
+ else
+ N = new SDNode(Opcode, VTList, Ops, NumOps);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ if (NumOps == 1)
+ N = new UnarySDNode(Opcode, VTList, Ops[0]);
+ else if (NumOps == 2)
+ N = new BinarySDNode(Opcode, VTList, Ops[0], Ops[1]);
+ else if (NumOps == 3)
+ N = new TernarySDNode(Opcode, VTList, Ops[0], Ops[1], Ops[2]);
+ else
+ N = new SDNode(Opcode, VTList, Ops, NumOps);
+ }
+ AllNodes.push_back(N);
+ return SDOperand(N, 0);
+}
+
+SDVTList SelectionDAG::getVTList(MVT::ValueType VT) {
+ if (!MVT::isExtendedVT(VT))
+ return makeVTList(SDNode::getValueTypeList(VT), 1);
+
+ for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+ E = VTList.end(); I != E; ++I) {
+ if (I->size() == 1 && (*I)[0] == VT)
+ return makeVTList(&(*I)[0], 1);
+ }
+ std::vector<MVT::ValueType> V;
+ V.push_back(VT);
+ VTList.push_front(V);
+ return makeVTList(&(*VTList.begin())[0], 1);
+}
+
+SDVTList SelectionDAG::getVTList(MVT::ValueType VT1, MVT::ValueType VT2) {
+ for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+ E = VTList.end(); I != E; ++I) {
+ if (I->size() == 2 && (*I)[0] == VT1 && (*I)[1] == VT2)
+ return makeVTList(&(*I)[0], 2);
+ }
+ std::vector<MVT::ValueType> V;
+ V.push_back(VT1);
+ V.push_back(VT2);
+ VTList.push_front(V);
+ return makeVTList(&(*VTList.begin())[0], 2);
+}
+SDVTList SelectionDAG::getVTList(MVT::ValueType VT1, MVT::ValueType VT2,
+ MVT::ValueType VT3) {
+ for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+ E = VTList.end(); I != E; ++I) {
+ if (I->size() == 3 && (*I)[0] == VT1 && (*I)[1] == VT2 &&
+ (*I)[2] == VT3)
+ return makeVTList(&(*I)[0], 3);
+ }
+ std::vector<MVT::ValueType> V;
+ V.push_back(VT1);
+ V.push_back(VT2);
+ V.push_back(VT3);
+ VTList.push_front(V);
+ return makeVTList(&(*VTList.begin())[0], 3);
+}
+
+SDVTList SelectionDAG::getVTList(const MVT::ValueType *VTs, unsigned NumVTs) {
+ switch (NumVTs) {
+ case 0: assert(0 && "Cannot have nodes without results!");
+ case 1: return getVTList(VTs[0]);
+ case 2: return getVTList(VTs[0], VTs[1]);
+ case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
+ default: break;
+ }
+
+ for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(),
+ E = VTList.end(); I != E; ++I) {
+ if (I->size() != NumVTs || VTs[0] != (*I)[0] || VTs[1] != (*I)[1]) continue;
+
+ bool NoMatch = false;
+ for (unsigned i = 2; i != NumVTs; ++i)
+ if (VTs[i] != (*I)[i]) {
+ NoMatch = true;
+ break;
+ }
+ if (!NoMatch)
+ return makeVTList(&*I->begin(), NumVTs);
+ }
+
+ VTList.push_front(std::vector<MVT::ValueType>(VTs, VTs+NumVTs));
+ return makeVTList(&*VTList.begin()->begin(), NumVTs);
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands. If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists. If the resultant node does not exist in the DAG, the
+/// input node is returned. As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand InN, SDOperand Op) {
+ SDNode *N = InN.Val;
+ assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op == N->getOperand(0)) return InN;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+ return SDOperand(Existing, InN.ResNo);
+
+ // Nope it doesn't. Remove the node from it's current place in the maps.
+ if (InsertPos)
+ RemoveNodeFromCSEMaps(N);
+
+ // Now we update the operands.
+ N->OperandList[0].Val->removeUser(N);
+ Op.Val->addUser(N);
+ N->OperandList[0] = Op;
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return InN;
+}
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand InN, SDOperand Op1, SDOperand Op2) {
+ SDNode *N = InN.Val;
+ assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+ return InN; // No operands changed, just return the input node.
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+ return SDOperand(Existing, InN.ResNo);
+
+ // Nope it doesn't. Remove the node from it's current place in the maps.
+ if (InsertPos)
+ RemoveNodeFromCSEMaps(N);
+
+ // Now we update the operands.
+ if (N->OperandList[0] != Op1) {
+ N->OperandList[0].Val->removeUser(N);
+ Op1.Val->addUser(N);
+ N->OperandList[0] = Op1;
+ }
+ if (N->OperandList[1] != Op2) {
+ N->OperandList[1].Val->removeUser(N);
+ Op2.Val->addUser(N);
+ N->OperandList[1] = Op2;
+ }
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return InN;
+}
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand N, SDOperand Op1, SDOperand Op2, SDOperand Op3) {
+ SDOperand Ops[] = { Op1, Op2, Op3 };
+ return UpdateNodeOperands(N, Ops, 3);
+}
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand N, SDOperand Op1, SDOperand Op2,
+ SDOperand Op3, SDOperand Op4) {
+ SDOperand Ops[] = { Op1, Op2, Op3, Op4 };
+ return UpdateNodeOperands(N, Ops, 4);
+}
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand N, SDOperand Op1, SDOperand Op2,
+ SDOperand Op3, SDOperand Op4, SDOperand Op5) {
+ SDOperand Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+ return UpdateNodeOperands(N, Ops, 5);
+}
+
+
+SDOperand SelectionDAG::
+UpdateNodeOperands(SDOperand InN, SDOperand *Ops, unsigned NumOps) {
+ SDNode *N = InN.Val;
+ assert(N->getNumOperands() == NumOps &&
+ "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ bool AnyChange = false;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (Ops[i] != N->getOperand(i)) {
+ AnyChange = true;
+ break;
+ }
+ }
+
+ // No operands changed, just return the input node.
+ if (!AnyChange) return InN;
+
+ // See if the modified node already exists.
+ void *InsertPos = 0;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+ return SDOperand(Existing, InN.ResNo);
+
+ // Nope it doesn't. Remove the node from it's current place in the maps.
+ if (InsertPos)
+ RemoveNodeFromCSEMaps(N);
+
+ // Now we update the operands.
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (N->OperandList[i] != Ops[i]) {
+ N->OperandList[i].Val->removeUser(N);
+ Ops[i].Val->addUser(N);
+ N->OperandList[i] = Ops[i];
+ }
+ }
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return InN;
+}
+
+
+/// MorphNodeTo - This frees the operands of the current node, resets the
+/// opcode, types, and operands to the specified value. This should only be
+/// used by the SelectionDAG class.
+void SDNode::MorphNodeTo(unsigned Opc, SDVTList L,
+ const SDOperand *Ops, unsigned NumOps) {
+ NodeType = Opc;
+ ValueList = L.VTs;
+ NumValues = L.NumVTs;
+
+ // Clear the operands list, updating used nodes to remove this from their
+ // use list.
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+ I->Val->removeUser(this);
+
+ // If NumOps is larger than the # of operands we currently have, reallocate
+ // the operand list.
+ if (NumOps > NumOperands) {
+ if (OperandsNeedDelete)
+ delete [] OperandList;
+ OperandList = new SDOperand[NumOps];
+ OperandsNeedDelete = true;
+ }
+
+ // Assign the new operands.
+ NumOperands = NumOps;
+
+ for (unsigned i = 0, e = NumOps; i != e; ++i) {
+ OperandList[i] = Ops[i];
+ SDNode *N = OperandList[i].Val;
+ N->Uses.push_back(this);
+ }
+}
+
+/// SelectNodeTo - These are used for target selectors to *mutate* the
+/// specified node to have the specified return type, Target opcode, and
+/// operands. Note that target opcodes are stored as
+/// ISD::BUILTIN_OP_END+TargetOpcode in the node opcode field.
+///
+/// Note that SelectNodeTo returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+ MVT::ValueType VT) {
+ SDVTList VTs = getVTList(VT);
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, 0, 0);
+ void *IP = 0;
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return ON;
+
+ RemoveNodeFromCSEMaps(N);
+
+ N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, 0, 0);
+
+ CSEMap.InsertNode(N, IP);
+ return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+ MVT::ValueType VT, SDOperand Op1) {
+ // If an identical node already exists, use it.
+ SDVTList VTs = getVTList(VT);
+ SDOperand Ops[] = { Op1 };
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 1);
+ void *IP = 0;
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return ON;
+
+ RemoveNodeFromCSEMaps(N);
+ N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 1);
+ CSEMap.InsertNode(N, IP);
+ return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+ MVT::ValueType VT, SDOperand Op1,
+ SDOperand Op2) {
+ // If an identical node already exists, use it.
+ SDVTList VTs = getVTList(VT);
+ SDOperand Ops[] = { Op1, Op2 };
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2);
+ void *IP = 0;
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return ON;
+
+ RemoveNodeFromCSEMaps(N);
+
+ N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2);
+
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+ MVT::ValueType VT, SDOperand Op1,
+ SDOperand Op2, SDOperand Op3) {
+ // If an identical node already exists, use it.
+ SDVTList VTs = getVTList(VT);
+ SDOperand Ops[] = { Op1, Op2, Op3 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3);
+ void *IP = 0;
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return ON;
+
+ RemoveNodeFromCSEMaps(N);
+
+ N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3);
+
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+ MVT::ValueType VT, const SDOperand *Ops,
+ unsigned NumOps) {
+ // If an identical node already exists, use it.
+ SDVTList VTs = getVTList(VT);
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, NumOps);
+ void *IP = 0;
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return ON;
+
+ RemoveNodeFromCSEMaps(N);
+ N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, NumOps);
+
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+ MVT::ValueType VT1, MVT::ValueType VT2,
+ SDOperand Op1, SDOperand Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ FoldingSetNodeID ID;
+ SDOperand Ops[] = { Op1, Op2 };
+ AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2);
+ void *IP = 0;
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return ON;
+
+ RemoveNodeFromCSEMaps(N);
+ N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2);
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc,
+ MVT::ValueType VT1, MVT::ValueType VT2,
+ SDOperand Op1, SDOperand Op2,
+ SDOperand Op3) {
+ // If an identical node already exists, use it.
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDOperand Ops[] = { Op1, Op2, Op3 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3);
+ void *IP = 0;
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return ON;
+
+ RemoveNodeFromCSEMaps(N);
+
+ N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3);
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+
+/// getTargetNode - These are used for target selectors to create a new node
+/// with specified return type(s), target opcode, and operands.
+///
+/// Note that getTargetNode returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT) {
+ return getNode(ISD::BUILTIN_OP_END+Opcode, VT).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
+ SDOperand Op1) {
+ return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
+ SDOperand Op1, SDOperand Op2) {
+ return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
+ SDOperand Op1, SDOperand Op2,
+ SDOperand Op3) {
+ return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2, Op3).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
+ const SDOperand *Ops, unsigned NumOps) {
+ return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops, NumOps).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+ MVT::ValueType VT2, SDOperand Op1) {
+ const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+ return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, &Op1, 1).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+ MVT::ValueType VT2, SDOperand Op1,
+ SDOperand Op2) {
+ const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+ SDOperand Ops[] = { Op1, Op2 };
+ return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, Ops, 2).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+ MVT::ValueType VT2, SDOperand Op1,
+ SDOperand Op2, SDOperand Op3) {
+ const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+ SDOperand Ops[] = { Op1, Op2, Op3 };
+ return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, Ops, 3).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+ MVT::ValueType VT2,
+ const SDOperand *Ops, unsigned NumOps) {
+ const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+ return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, Ops, NumOps).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+ MVT::ValueType VT2, MVT::ValueType VT3,
+ SDOperand Op1, SDOperand Op2) {
+ const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2, VT3);
+ SDOperand Ops[] = { Op1, Op2 };
+ return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 3, Ops, 2).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+ MVT::ValueType VT2, MVT::ValueType VT3,
+ SDOperand Op1, SDOperand Op2,
+ SDOperand Op3) {
+ const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2, VT3);
+ SDOperand Ops[] = { Op1, Op2, Op3 };
+ return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 3, Ops, 3).Val;
+}
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+ MVT::ValueType VT2, MVT::ValueType VT3,
+ const SDOperand *Ops, unsigned NumOps) {
+ const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2, VT3);
+ return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 3, Ops, NumOps).Val;
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From/To have a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDOperand FromN, SDOperand ToN,
+ std::vector<SDNode*> *Deleted) {
+ SDNode *From = FromN.Val, *To = ToN.Val;
+ assert(From->getNumValues() == 1 && To->getNumValues() == 1 &&
+ "Cannot replace with this method!");
+ assert(From != To && "Cannot replace uses of with self");
+
+ while (!From->use_empty()) {
+ // Process users until they are all gone.
+ SDNode *U = *From->use_begin();
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(U);
+
+ for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+ I != E; ++I)
+ if (I->Val == From) {
+ From->removeUser(U);
+ I->Val = To;
+ To->addUser(U);
+ }
+
+ // Now that we have modified U, add it back to the CSE maps. If it already
+ // exists there, recursively merge the results together.
+ if (SDNode *Existing = AddNonLeafNodeToCSEMaps(U)) {
+ ReplaceAllUsesWith(U, Existing, Deleted);
+ // U is now dead.
+ if (Deleted) Deleted->push_back(U);
+ DeleteNodeNotInCSEMaps(U);
+ }
+ }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From/To have matching types and numbers of result
+/// values.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
+ std::vector<SDNode*> *Deleted) {
+ assert(From != To && "Cannot replace uses of with self");
+ assert(From->getNumValues() == To->getNumValues() &&
+ "Cannot use this version of ReplaceAllUsesWith!");
+ if (From->getNumValues() == 1) { // If possible, use the faster version.
+ ReplaceAllUsesWith(SDOperand(From, 0), SDOperand(To, 0), Deleted);
+ return;
+ }
+
+ while (!From->use_empty()) {
+ // Process users until they are all gone.
+ SDNode *U = *From->use_begin();
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(U);
+
+ for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+ I != E; ++I)
+ if (I->Val == From) {
+ From->removeUser(U);
+ I->Val = To;
+ To->addUser(U);
+ }
+
+ // Now that we have modified U, add it back to the CSE maps. If it already
+ // exists there, recursively merge the results together.
+ if (SDNode *Existing = AddNonLeafNodeToCSEMaps(U)) {
+ ReplaceAllUsesWith(U, Existing, Deleted);
+ // U is now dead.
+ if (Deleted) Deleted->push_back(U);
+ DeleteNodeNotInCSEMaps(U);
+ }
+ }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values. To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
+ const SDOperand *To,
+ std::vector<SDNode*> *Deleted) {
+ if (From->getNumValues() == 1 && To[0].Val->getNumValues() == 1) {
+ // Degenerate case handled above.
+ ReplaceAllUsesWith(SDOperand(From, 0), To[0], Deleted);
+ return;
+ }
+
+ while (!From->use_empty()) {
+ // Process users until they are all gone.
+ SDNode *U = *From->use_begin();
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(U);
+
+ for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands;
+ I != E; ++I)
+ if (I->Val == From) {
+ const SDOperand &ToOp = To[I->ResNo];
+ From->removeUser(U);
+ *I = ToOp;
+ ToOp.Val->addUser(U);
+ }
+
+ // Now that we have modified U, add it back to the CSE maps. If it already
+ // exists there, recursively merge the results together.
+ if (SDNode *Existing = AddNonLeafNodeToCSEMaps(U)) {
+ ReplaceAllUsesWith(U, Existing, Deleted);
+ // U is now dead.
+ if (Deleted) Deleted->push_back(U);
+ DeleteNodeNotInCSEMaps(U);
+ }
+ }
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.Val alone. The Deleted vector is
+/// handled the same was as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDOperand From, SDOperand To,
+ std::vector<SDNode*> &Deleted) {
+ assert(From != To && "Cannot replace a value with itself");
+ // Handle the simple, trivial, case efficiently.
+ if (From.Val->getNumValues() == 1 && To.Val->getNumValues() == 1) {
+ ReplaceAllUsesWith(From, To, &Deleted);
+ return;
+ }
+
+ // Get all of the users of From.Val. We want these in a nice,
+ // deterministically ordered and uniqued set, so we use a SmallSetVector.
+ SmallSetVector<SDNode*, 16> Users(From.Val->use_begin(), From.Val->use_end());
+
+ while (!Users.empty()) {
+ // We know that this user uses some value of From. If it is the right
+ // value, update it.
+ SDNode *User = Users.back();
+ Users.pop_back();
+
+ for (SDOperand *Op = User->OperandList,
+ *E = User->OperandList+User->NumOperands; Op != E; ++Op) {
+ if (*Op == From) {
+ // Okay, we know this user needs to be updated. Remove its old self
+ // from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // Update all operands that match "From".
+ for (; Op != E; ++Op) {
+ if (*Op == From) {
+ From.Val->removeUser(User);
+ *Op = To;
+ To.Val->addUser(User);
+ }
+ }
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ if (SDNode *Existing = AddNonLeafNodeToCSEMaps(User)) {
+ unsigned NumDeleted = Deleted.size();
+ ReplaceAllUsesWith(User, Existing, &Deleted);
+
+ // User is now dead.
+ Deleted.push_back(User);
+ DeleteNodeNotInCSEMaps(User);
+
+ // We have to be careful here, because ReplaceAllUsesWith could have
+ // deleted a user of From, which means there may be dangling pointers
+ // in the "Users" setvector. Scan over the deleted node pointers and
+ // remove them from the setvector.
+ for (unsigned i = NumDeleted, e = Deleted.size(); i != e; ++i)
+ Users.remove(Deleted[i]);
+ }
+ break; // Exit the operand scanning loop.
+ }
+ }
+ }
+}
+
+
+/// AssignNodeIds - Assign a unique node id for each node in the DAG based on
+/// their allnodes order. It returns the maximum id.
+unsigned SelectionDAG::AssignNodeIds() {
+ unsigned Id = 0;
+ for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I){
+ SDNode *N = I;
+ N->setNodeId(Id++);
+ }
+ return Id;
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder(std::vector<SDNode*> &TopOrder) {
+ unsigned DAGSize = AllNodes.size();
+ std::vector<unsigned> InDegree(DAGSize);
+ std::vector<SDNode*> Sources;
+
+ // Use a two pass approach to avoid using a std::map which is slow.
+ unsigned Id = 0;
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I){
+ SDNode *N = I;
+ N->setNodeId(Id++);
+ unsigned Degree = N->use_size();
+ InDegree[N->getNodeId()] = Degree;
+ if (Degree == 0)
+ Sources.push_back(N);
+ }
+
+ TopOrder.clear();
+ while (!Sources.empty()) {
+ SDNode *N = Sources.back();
+ Sources.pop_back();
+ TopOrder.push_back(N);
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+ SDNode *P = I->Val;
+ unsigned Degree = --InDegree[P->getNodeId()];
+ if (Degree == 0)
+ Sources.push_back(P);
+ }
+ }
+
+ // Second pass, assign the actual topological order as node ids.
+ Id = 0;
+ for (std::vector<SDNode*>::iterator TI = TopOrder.begin(),TE = TopOrder.end();
+ TI != TE; ++TI)
+ (*TI)->setNodeId(Id++);
+
+ return Id;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// SDNode Class
+//===----------------------------------------------------------------------===//
+
+// Out-of-line virtual method to give class a home.
+void SDNode::ANCHOR() {}
+void UnarySDNode::ANCHOR() {}
+void BinarySDNode::ANCHOR() {}
+void TernarySDNode::ANCHOR() {}
+void HandleSDNode::ANCHOR() {}
+void StringSDNode::ANCHOR() {}
+void ConstantSDNode::ANCHOR() {}
+void ConstantFPSDNode::ANCHOR() {}
+void GlobalAddressSDNode::ANCHOR() {}
+void FrameIndexSDNode::ANCHOR() {}
+void JumpTableSDNode::ANCHOR() {}
+void ConstantPoolSDNode::ANCHOR() {}
+void BasicBlockSDNode::ANCHOR() {}
+void SrcValueSDNode::ANCHOR() {}
+void RegisterSDNode::ANCHOR() {}
+void ExternalSymbolSDNode::ANCHOR() {}
+void CondCodeSDNode::ANCHOR() {}
+void VTSDNode::ANCHOR() {}
+void LoadSDNode::ANCHOR() {}
+void StoreSDNode::ANCHOR() {}
+
+HandleSDNode::~HandleSDNode() {
+ SDVTList VTs = { 0, 0 };
+ MorphNodeTo(ISD::HANDLENODE, VTs, 0, 0); // Drops operand uses.
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA,
+ MVT::ValueType VT, int o)
+ : SDNode(isa<GlobalVariable>(GA) &&
+ dyn_cast<GlobalVariable>(GA)->isThreadLocal() ?
+ // Thread Local
+ (isTarget ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress) :
+ // Non Thread Local
+ (isTarget ? ISD::TargetGlobalAddress : ISD::GlobalAddress),
+ getSDVTList(VT)), Offset(o) {
+ TheGlobal = const_cast<GlobalValue*>(GA);
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) {
+ AddNodeIDNode(ID, this);
+}
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+MVT::ValueType *SDNode::getValueTypeList(MVT::ValueType VT) {
+ static MVT::ValueType VTs[MVT::LAST_VALUETYPE];
+ VTs[VT] = VT;
+ return &VTs[VT];
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value. This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ // If there is only one value, this is easy.
+ if (getNumValues() == 1)
+ return use_size() == NUses;
+ if (Uses.size() < NUses) return false;
+
+ SDOperand TheValue(const_cast<SDNode *>(this), Value);
+
+ SmallPtrSet<SDNode*, 32> UsersHandled;
+
+ for (SDNode::use_iterator UI = Uses.begin(), E = Uses.end(); UI != E; ++UI) {
+ SDNode *User = *UI;
+ if (User->getNumOperands() == 1 ||
+ UsersHandled.insert(User)) // First time we've seen this?
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+ if (User->getOperand(i) == TheValue) {
+ if (NUses == 0)
+ return false; // too many uses
+ --NUses;
+ }
+ }
+
+ // Found exactly the right number of uses?
+ return NUses == 0;
+}
+
+
+/// isOnlyUse - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUse(SDNode *N) const {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (User == this)
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDOperand::isOperand(SDNode *N) const {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (*this == N->getOperand(i))
+ return true;
+ return false;
+}
+
+bool SDNode::isOperand(SDNode *N) const {
+ for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
+ if (this == N->OperandList[i].Val)
+ return true;
+ return false;
+}
+
+static void findPredecessor(SDNode *N, const SDNode *P, bool &found,
+ SmallPtrSet<SDNode *, 32> &Visited) {
+ if (found || !Visited.insert(N))
+ return;
+
+ for (unsigned i = 0, e = N->getNumOperands(); !found && i != e; ++i) {
+ SDNode *Op = N->getOperand(i).Val;
+ if (Op == P) {
+ found = true;
+ return;
+ }
+ findPredecessor(Op, P, found, Visited);
+ }
+}
+
+/// isPredecessor - Return true if this node is a predecessor of N. This node
+/// is either an operand of N or it can be reached by recursively traversing
+/// up the operands.
+/// NOTE: this is an expensive method. Use it carefully.
+bool SDNode::isPredecessor(SDNode *N) const {
+ SmallPtrSet<SDNode *, 32> Visited;
+ bool found = false;
+ findPredecessor(N, this, found, Visited);
+ return found;
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+ assert(Num < NumOperands && "Invalid child # of SDNode!");
+ return cast<ConstantSDNode>(OperandList[Num])->getValue();
+}
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ else {
+ if (G) {
+ if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (getOpcode()-ISD::BUILTIN_OP_END < TII->getNumOpcodes())
+ return TII->getName(getOpcode()-ISD::BUILTIN_OP_END);
+
+ TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name =
+ TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ }
+
+ return "<<Unknown Target Node>>";
+ }
+
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::STRING: return "String";
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+
+ case ISD::Constant: return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+ case ISD::EHSELECTION: return "EHSELECTION";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IID = cast<ConstantSDNode>(getOperand(0))->getValue();
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ }
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IID = cast<ConstantSDNode>(getOperand(1))->getValue();
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant: return "TargetConstant";
+ case ISD::TargetConstantFP:return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::LABEL: return "label";
+ case ISD::HANDLENODE: return "handlenode";
+ case ISD::FORMAL_ARGUMENTS: return "formal_arguments";
+ case ISD::CALL: return "call";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::FPOWI: return "fpowi";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FADD: return "fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::FREM: return "frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+
+ case ISD::SETCC: return "setcc";
+ case ISD::SELECT: return "select";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+ case ISD::FP_EXTEND: return "fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::BIT_CONVERT: return "bit_convert";
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::RET: return "ret";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+
+ // Block memory operations.
+ case ISD::MEMSET: return "memset";
+ case ISD::MEMCPY: return "memcpy";
+ case ISD::MEMMOVE: return "memmove";
+
+ // Bit manipulation
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTLZ: return "ctlz";
+
+ // Debug info
+ case ISD::LOCATION: return "location";
+ case ISD::DEBUG_LOC: return "debug_loc";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: assert(0 && "Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setue";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+ }
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default:
+ return "";
+ case ISD::PRE_INC:
+ return "<pre-inc>";
+ case ISD::PRE_DEC:
+ return "<pre-dec>";
+ case ISD::POST_INC:
+ return "<post-inc>";
+ case ISD::POST_DEC:
+ return "<post-dec>";
+ }
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+ cerr << (void*)this << ": ";
+
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) cerr << ",";
+ if (getValueType(i) == MVT::Other)
+ cerr << "ch";
+ else
+ cerr << MVT::getValueTypeString(getValueType(i));
+ }
+ cerr << " = " << getOperationName(G);
+
+ cerr << " ";
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) cerr << ", ";
+ cerr << (void*)getOperand(i).Val;
+ if (unsigned RN = getOperand(i).ResNo)
+ cerr << ":" << RN;
+ }
+
+ if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ cerr << "<" << CSDN->getValue() << ">";
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ cerr << "<" << CSDN->getValue() << ">";
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int offset = GADN->getOffset();
+ cerr << "<";
+ WriteAsOperand(*cerr.stream(), GADN->getGlobal()) << ">";
+ if (offset > 0)
+ cerr << " + " << offset;
+ else
+ cerr << " " << offset;
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ cerr << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ cerr << "<" << JTDN->getIndex() << ">";
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ cerr << "<" << *CP->getMachineCPVal() << ">";
+ else
+ cerr << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ cerr << " + " << offset;
+ else
+ cerr << " " << offset;
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ cerr << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ cerr << LBB->getName() << " ";
+ cerr << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ if (G && R->getReg() && MRegisterInfo::isPhysicalRegister(R->getReg())) {
+ cerr << " " <<G->getTarget().getRegisterInfo()->getName(R->getReg());
+ } else {
+ cerr << " #" << R->getReg();
+ }
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ cerr << "'" << ES->getSymbol() << "'";
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ cerr << "<" << M->getValue() << ":" << M->getOffset() << ">";
+ else
+ cerr << "<null:" << M->getOffset() << ">";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ cerr << ":" << MVT::getValueTypeString(N->getVT());
+ } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD:
+ cerr << " <anyext ";
+ break;
+ case ISD::SEXTLOAD:
+ cerr << " <sext ";
+ break;
+ case ISD::ZEXTLOAD:
+ cerr << " <zext ";
+ break;
+ }
+ if (doExt)
+ cerr << MVT::getValueTypeString(LD->getLoadedVT()) << ">";
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (AM != "")
+ cerr << " " << AM;
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ if (ST->isTruncatingStore())
+ cerr << " <trunc "
+ << MVT::getValueTypeString(ST->getStoredVT()) << ">";
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (AM != "")
+ cerr << " " << AM;
+ }
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i).Val->hasOneUse())
+ DumpNodes(N->getOperand(i).Val, indent+2, G);
+ else
+ cerr << "\n" << std::string(indent+2, ' ')
+ << (void*)N->getOperand(i).Val << ": <multiple use>";
+
+
+ cerr << "\n" << std::string(indent, ' ');
+ N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+ cerr << "SelectionDAG has " << AllNodes.size() << " nodes:";
+ std::vector<const SDNode*> Nodes;
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I)
+ Nodes.push_back(I);
+
+ std::sort(Nodes.begin(), Nodes.end());
+
+ for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+ if (!Nodes[i]->hasOneUse() && Nodes[i] != getRoot().Val)
+ DumpNodes(Nodes[i], 2, this);
+ }
+
+ if (getRoot().Val) DumpNodes(getRoot().Val, 2, this);
+
+ cerr << "\n\n";
+}
+
+const Type *ConstantPoolSDNode::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 0000000..9256faa
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,4904 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ParameterAttributes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+using namespace llvm;
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+ cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show sched dags as they are processed"));
+#else
+static const bool ViewISelDAGs = 0, ViewSchedDAGs = 0;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+namespace {
+ cl::opt<RegisterScheduler::FunctionPassCtor, false,
+ RegisterPassParser<RegisterScheduler> >
+ ISHeuristic("pre-RA-sched",
+ cl::init(&createDefaultScheduler),
+ cl::desc("Instruction schedulers available (before register allocation):"));
+
+ static RegisterScheduler
+ defaultListDAGScheduler("default", " Best scheduler for the target",
+ createDefaultScheduler);
+} // namespace
+
+namespace { struct AsmOperandInfo; }
+
+namespace {
+ /// RegsForValue - This struct represents the physical registers that a
+ /// particular value is assigned and the type information about the value.
+ /// This is needed because values can be promoted into larger registers and
+ /// expanded into multiple smaller registers than the value.
+ struct VISIBILITY_HIDDEN RegsForValue {
+ /// Regs - This list holds the register (for legal and promoted values)
+ /// or register set (for expanded values) that the value should be assigned
+ /// to.
+ std::vector<unsigned> Regs;
+
+ /// RegVT - The value type of each register.
+ ///
+ MVT::ValueType RegVT;
+
+ /// ValueVT - The value type of the LLVM value, which may be promoted from
+ /// RegVT or made from merging the two expanded parts.
+ MVT::ValueType ValueVT;
+
+ RegsForValue() : RegVT(MVT::Other), ValueVT(MVT::Other) {}
+
+ RegsForValue(unsigned Reg, MVT::ValueType regvt, MVT::ValueType valuevt)
+ : RegVT(regvt), ValueVT(valuevt) {
+ Regs.push_back(Reg);
+ }
+ RegsForValue(const std::vector<unsigned> &regs,
+ MVT::ValueType regvt, MVT::ValueType valuevt)
+ : Regs(regs), RegVT(regvt), ValueVT(valuevt) {
+ }
+
+ /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+ /// this value and returns the result as a ValueVT value. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ SDOperand getCopyFromRegs(SelectionDAG &DAG,
+ SDOperand &Chain, SDOperand *Flag) const;
+
+ /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+ /// specified value into the registers specified by this object. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ void getCopyToRegs(SDOperand Val, SelectionDAG &DAG,
+ SDOperand &Chain, SDOperand *Flag) const;
+
+ /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+ /// operand list. This adds the code marker and includes the number of
+ /// values added into it.
+ void AddInlineAsmOperands(unsigned Code, SelectionDAG &DAG,
+ std::vector<SDOperand> &Ops) const;
+ };
+}
+
+namespace llvm {
+ //===--------------------------------------------------------------------===//
+ /// createDefaultScheduler - This creates an instruction scheduler appropriate
+ /// for the target.
+ ScheduleDAG* createDefaultScheduler(SelectionDAGISel *IS,
+ SelectionDAG *DAG,
+ MachineBasicBlock *BB) {
+ TargetLowering &TLI = IS->getTargetLowering();
+
+ if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency) {
+ return createTDListDAGScheduler(IS, DAG, BB);
+ } else {
+ assert(TLI.getSchedulingPreference() ==
+ TargetLowering::SchedulingForRegPressure && "Unknown sched type!");
+ return createBURRListDAGScheduler(IS, DAG, BB);
+ }
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ /// FunctionLoweringInfo - This contains information that is global to a
+ /// function that is used when lowering a region of the function.
+ class FunctionLoweringInfo {
+ public:
+ TargetLowering &TLI;
+ Function &Fn;
+ MachineFunction &MF;
+ SSARegMap *RegMap;
+
+ FunctionLoweringInfo(TargetLowering &TLI, Function &Fn,MachineFunction &MF);
+
+ /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
+ std::map<const BasicBlock*, MachineBasicBlock *> MBBMap;
+
+ /// ValueMap - Since we emit code for the function a basic block at a time,
+ /// we must remember which virtual registers hold the values for
+ /// cross-basic-block values.
+ DenseMap<const Value*, unsigned> ValueMap;
+
+ /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
+ /// the entry block. This allows the allocas to be efficiently referenced
+ /// anywhere in the function.
+ std::map<const AllocaInst*, int> StaticAllocaMap;
+
+#ifndef NDEBUG
+ SmallSet<Instruction*, 8> CatchInfoLost;
+ SmallSet<Instruction*, 8> CatchInfoFound;
+#endif
+
+ unsigned MakeReg(MVT::ValueType VT) {
+ return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
+ }
+
+ /// isExportedInst - Return true if the specified value is an instruction
+ /// exported from its block.
+ bool isExportedInst(const Value *V) {
+ return ValueMap.count(V);
+ }
+
+ unsigned CreateRegForValue(const Value *V);
+
+ unsigned InitializeRegForValue(const Value *V) {
+ unsigned &R = ValueMap[V];
+ assert(R == 0 && "Already initialized this value register!");
+ return R = CreateRegForValue(V);
+ }
+ };
+}
+
+/// isSelector - Return true if this instruction is a call to the
+/// eh.selector intrinsic.
+static bool isSelector(Instruction *I) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ return II->getIntrinsicID() == Intrinsic::eh_selector;
+ return false;
+}
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
+ if (isa<PHINode>(I)) return true;
+ BasicBlock *BB = I->getParent();
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
+ if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI) ||
+ // FIXME: Remove switchinst special case.
+ isa<SwitchInst>(*UI))
+ return true;
+ return false;
+}
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true. This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(Argument *A) {
+ BasicBlock *Entry = A->getParent()->begin();
+ for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+ if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
+ return false; // Use not in entry block.
+ return true;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli,
+ Function &fn, MachineFunction &mf)
+ : TLI(tli), Fn(fn), MF(mf), RegMap(MF.getSSARegMap()) {
+
+ // Create a vreg for each argument register that is not dead and is used
+ // outside of the entry block for the function.
+ for (Function::arg_iterator AI = Fn.arg_begin(), E = Fn.arg_end();
+ AI != E; ++AI)
+ if (!isOnlyUsedInEntryBlock(AI))
+ InitializeRegForValue(AI);
+
+ // Initialize the mapping of values to registers. This is only set up for
+ // instruction values that are used outside of the block that defines
+ // them.
+ Function::iterator BB = Fn.begin(), EB = Fn.end();
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+ const Type *Ty = AI->getAllocatedType();
+ uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+
+ TySize *= CUI->getZExtValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+ StaticAllocaMap[AI] =
+ MF.getFrameInfo()->CreateStackObject(TySize, Align);
+ }
+
+ for (; BB != EB; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
+ if (!isa<AllocaInst>(I) ||
+ !StaticAllocaMap.count(cast<AllocaInst>(I)))
+ InitializeRegForValue(I);
+
+ // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
+ // also creates the initial PHI MachineInstrs, though none of the input
+ // operands are populated.
+ for (BB = Fn.begin(), EB = Fn.end(); BB != EB; ++BB) {
+ MachineBasicBlock *MBB = new MachineBasicBlock(BB);
+ MBBMap[BB] = MBB;
+ MF.getBasicBlockList().push_back(MBB);
+
+ // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+ // appropriate.
+ PHINode *PN;
+ for (BasicBlock::iterator I = BB->begin();(PN = dyn_cast<PHINode>(I)); ++I){
+ if (PN->use_empty()) continue;
+
+ MVT::ValueType VT = TLI.getValueType(PN->getType());
+ unsigned NumRegisters = TLI.getNumRegisters(VT);
+ unsigned PHIReg = ValueMap[PN];
+ assert(PHIReg && "PHI node does not have an assigned virtual register!");
+ const TargetInstrInfo *TII = TLI.getTargetMachine().getInstrInfo();
+ for (unsigned i = 0; i != NumRegisters; ++i)
+ BuildMI(MBB, TII->get(TargetInstrInfo::PHI), PHIReg+i);
+ }
+ }
+}
+
+/// CreateRegForValue - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types. Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
+ MVT::ValueType VT = TLI.getValueType(V->getType());
+
+ unsigned NumRegisters = TLI.getNumRegisters(VT);
+ MVT::ValueType RegisterVT = TLI.getRegisterType(VT);
+
+ unsigned R = MakeReg(RegisterVT);
+ for (unsigned i = 1; i != NumRegisters; ++i)
+ MakeReg(RegisterVT);
+
+ return R;
+}
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLowering - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+/// Also, targets can overload any lowering method.
+///
+namespace llvm {
+class SelectionDAGLowering {
+ MachineBasicBlock *CurMBB;
+
+ DenseMap<const Value*, SDOperand> NodeMap;
+
+ /// PendingLoads - Loads are not emitted to the program immediately. We bunch
+ /// them up and then emit token factor nodes when possible. This allows us to
+ /// get simple disambiguation between loads without worrying about alias
+ /// analysis.
+ std::vector<SDOperand> PendingLoads;
+
+ /// Case - A struct to record the Value for a switch case, and the
+ /// case's target basic block.
+ struct Case {
+ Constant* Low;
+ Constant* High;
+ MachineBasicBlock* BB;
+
+ Case() : Low(0), High(0), BB(0) { }
+ Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
+ Low(low), High(high), BB(bb) { }
+ uint64_t size() const {
+ uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue();
+ uint64_t rLow = cast<ConstantInt>(Low)->getSExtValue();
+ return (rHigh - rLow + 1ULL);
+ }
+ };
+
+ struct CaseBits {
+ uint64_t Mask;
+ MachineBasicBlock* BB;
+ unsigned Bits;
+
+ CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
+ Mask(mask), BB(bb), Bits(bits) { }
+ };
+
+ typedef std::vector<Case> CaseVector;
+ typedef std::vector<CaseBits> CaseBitsVector;
+ typedef CaseVector::iterator CaseItr;
+ typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+ /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+ /// of conditional branches.
+ struct CaseRec {
+ CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :
+ CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+ /// CaseBB - The MBB in which to emit the compare and branch
+ MachineBasicBlock *CaseBB;
+ /// LT, GE - If nonzero, we know the current case value must be less-than or
+ /// greater-than-or-equal-to these Constants.
+ Constant *LT;
+ Constant *GE;
+ /// Range - A pair of iterators representing the range of case values to be
+ /// processed at this point in the binary search tree.
+ CaseRange Range;
+ };
+
+ typedef std::vector<CaseRec> CaseRecVector;
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator () (const Case& C1, const Case& C2) {
+ assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
+
+ struct CaseBitsCmp {
+ bool operator () (const CaseBits& C1, const CaseBits& C2) {
+ return C1.Bits > C2.Bits;
+ }
+ };
+
+ unsigned Clusterify(CaseVector& Cases, const SwitchInst &SI);
+
+public:
+ // TLI - This is information that describes the available target features we
+ // need for lowering. This indicates when operations are unavailable,
+ // implemented with a libcall, etc.
+ TargetLowering &TLI;
+ SelectionDAG &DAG;
+ const TargetData *TD;
+
+ /// SwitchCases - Vector of CaseBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<SelectionDAGISel::CaseBlock> SwitchCases;
+ /// JTCases - Vector of JumpTable structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<SelectionDAGISel::JumpTableBlock> JTCases;
+ std::vector<SelectionDAGISel::BitTestBlock> BitTestCases;
+
+ /// FuncInfo - Information about the function as a whole.
+ ///
+ FunctionLoweringInfo &FuncInfo;
+
+ SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,
+ FunctionLoweringInfo &funcinfo)
+ : TLI(tli), DAG(dag), TD(DAG.getTarget().getTargetData()),
+ FuncInfo(funcinfo) {
+ }
+
+ /// getRoot - Return the current virtual root of the Selection DAG.
+ ///
+ SDOperand getRoot() {
+ if (PendingLoads.empty())
+ return DAG.getRoot();
+
+ if (PendingLoads.size() == 1) {
+ SDOperand Root = PendingLoads[0];
+ DAG.setRoot(Root);
+ PendingLoads.clear();
+ return Root;
+ }
+
+ // Otherwise, we have to make a token factor node.
+ SDOperand Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &PendingLoads[0], PendingLoads.size());
+ PendingLoads.clear();
+ DAG.setRoot(Root);
+ return Root;
+ }
+
+ SDOperand CopyValueToVirtualRegister(Value *V, unsigned Reg);
+
+ void visit(Instruction &I) { visit(I.getOpcode(), I); }
+
+ void visit(unsigned Opcode, User &I) {
+ // Note: this doesn't use InstVisitor, because it has to work with
+ // ConstantExpr's in addition to instructions.
+ switch (Opcode) {
+ default: assert(0 && "Unknown instruction type encountered!");
+ abort();
+ // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);
+#include "llvm/Instruction.def"
+ }
+ }
+
+ void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
+
+ SDOperand getLoadFrom(const Type *Ty, SDOperand Ptr,
+ const Value *SV, SDOperand Root,
+ bool isVolatile, unsigned Alignment);
+
+ SDOperand getIntPtrConstant(uint64_t Val) {
+ return DAG.getConstant(Val, TLI.getPointerTy());
+ }
+
+ SDOperand getValue(const Value *V);
+
+ void setValue(const Value *V, SDOperand NewN) {
+ SDOperand &N = NodeMap[V];
+ assert(N.Val == 0 && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void GetRegistersForValue(AsmOperandInfo &OpInfo, bool HasEarlyClobber,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs);
+
+ void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+ unsigned Opc);
+ bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);
+ void ExportFromCurrentBlock(Value *V);
+ void LowerCallTo(Instruction &I,
+ const Type *CalledValueTy, unsigned CallingConv,
+ bool IsTailCall, SDOperand Callee, unsigned OpIdx,
+ MachineBasicBlock *LandingPad = NULL);
+
+ // Terminator instructions.
+ void visitRet(ReturnInst &I);
+ void visitBr(BranchInst &I);
+ void visitSwitch(SwitchInst &I);
+ void visitUnreachable(UnreachableInst &I) { /* noop */ }
+
+ // Helpers for visitSwitch
+ bool handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ bool handleJTSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ bool handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ bool handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default);
+ void visitSwitchCase(SelectionDAGISel::CaseBlock &CB);
+ void visitBitTestHeader(SelectionDAGISel::BitTestBlock &B);
+ void visitBitTestCase(MachineBasicBlock* NextMBB,
+ unsigned Reg,
+ SelectionDAGISel::BitTestCase &B);
+ void visitJumpTable(SelectionDAGISel::JumpTable &JT);
+ void visitJumpTableHeader(SelectionDAGISel::JumpTable &JT,
+ SelectionDAGISel::JumpTableHeader &JTH);
+
+ // These all get lowered before this pass.
+ void visitInvoke(InvokeInst &I);
+ void visitUnwind(UnwindInst &I);
+
+ void visitBinary(User &I, unsigned OpCode);
+ void visitShift(User &I, unsigned Opcode);
+ void visitAdd(User &I) {
+ if (I.getType()->isFPOrFPVector())
+ visitBinary(I, ISD::FADD);
+ else
+ visitBinary(I, ISD::ADD);
+ }
+ void visitSub(User &I);
+ void visitMul(User &I) {
+ if (I.getType()->isFPOrFPVector())
+ visitBinary(I, ISD::FMUL);
+ else
+ visitBinary(I, ISD::MUL);
+ }
+ void visitURem(User &I) { visitBinary(I, ISD::UREM); }
+ void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
+ void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
+ void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }
+ void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }
+ void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }
+ void visitAnd (User &I) { visitBinary(I, ISD::AND); }
+ void visitOr (User &I) { visitBinary(I, ISD::OR); }
+ void visitXor (User &I) { visitBinary(I, ISD::XOR); }
+ void visitShl (User &I) { visitShift(I, ISD::SHL); }
+ void visitLShr(User &I) { visitShift(I, ISD::SRL); }
+ void visitAShr(User &I) { visitShift(I, ISD::SRA); }
+ void visitICmp(User &I);
+ void visitFCmp(User &I);
+ // Visit the conversion instructions
+ void visitTrunc(User &I);
+ void visitZExt(User &I);
+ void visitSExt(User &I);
+ void visitFPTrunc(User &I);
+ void visitFPExt(User &I);
+ void visitFPToUI(User &I);
+ void visitFPToSI(User &I);
+ void visitUIToFP(User &I);
+ void visitSIToFP(User &I);
+ void visitPtrToInt(User &I);
+ void visitIntToPtr(User &I);
+ void visitBitCast(User &I);
+
+ void visitExtractElement(User &I);
+ void visitInsertElement(User &I);
+ void visitShuffleVector(User &I);
+
+ void visitGetElementPtr(User &I);
+ void visitSelect(User &I);
+
+ void visitMalloc(MallocInst &I);
+ void visitFree(FreeInst &I);
+ void visitAlloca(AllocaInst &I);
+ void visitLoad(LoadInst &I);
+ void visitStore(StoreInst &I);
+ void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
+ void visitCall(CallInst &I);
+ void visitInlineAsm(CallInst &I);
+ const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
+ void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
+
+ void visitVAStart(CallInst &I);
+ void visitVAArg(VAArgInst &I);
+ void visitVAEnd(CallInst &I);
+ void visitVACopy(CallInst &I);
+
+ void visitMemIntrinsic(CallInst &I, unsigned Op);
+
+ void visitUserOp1(Instruction &I) {
+ assert(0 && "UserOp1 should not exist at instruction selection time!");
+ abort();
+ }
+ void visitUserOp2(Instruction &I) {
+ assert(0 && "UserOp2 should not exist at instruction selection time!");
+ abort();
+ }
+};
+} // end namespace llvm
+
+
+/// getCopyFromParts - Create a value that contains the
+/// specified legal parts combined into the value they represent.
+static SDOperand getCopyFromParts(SelectionDAG &DAG,
+ const SDOperand *Parts,
+ unsigned NumParts,
+ MVT::ValueType PartVT,
+ MVT::ValueType ValueVT,
+ ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+ if (!MVT::isVector(ValueVT) || NumParts == 1) {
+ SDOperand Val = Parts[0];
+
+ // If the value was expanded, copy from the top part.
+ if (NumParts > 1) {
+ assert(NumParts == 2 &&
+ "Cannot expand to more than 2 elts yet!");
+ SDOperand Hi = Parts[1];
+ if (!DAG.getTargetLoweringInfo().isLittleEndian())
+ std::swap(Val, Hi);
+ return DAG.getNode(ISD::BUILD_PAIR, ValueVT, Val, Hi);
+ }
+
+ // Otherwise, if the value was promoted or extended, truncate it to the
+ // appropriate type.
+ if (PartVT == ValueVT)
+ return Val;
+
+ if (MVT::isVector(PartVT)) {
+ assert(MVT::isVector(ValueVT) && "Unknown vector conversion!");
+ return DAG.getNode(ISD::BIT_CONVERT, PartVT, Val);
+ }
+
+ if (MVT::isInteger(PartVT) &&
+ MVT::isInteger(ValueVT)) {
+ if (ValueVT < PartVT) {
+ // For a truncate, see if we have any information to
+ // indicate whether the truncated bits will always be
+ // zero or sign-extension.
+ if (AssertOp != ISD::DELETED_NODE)
+ Val = DAG.getNode(AssertOp, PartVT, Val,
+ DAG.getValueType(ValueVT));
+ return DAG.getNode(ISD::TRUNCATE, ValueVT, Val);
+ } else {
+ return DAG.getNode(ISD::ANY_EXTEND, ValueVT, Val);
+ }
+ }
+
+ if (MVT::isFloatingPoint(PartVT) &&
+ MVT::isFloatingPoint(ValueVT))
+ return DAG.getNode(ISD::FP_ROUND, ValueVT, Val);
+
+ if (MVT::getSizeInBits(PartVT) ==
+ MVT::getSizeInBits(ValueVT))
+ return DAG.getNode(ISD::BIT_CONVERT, ValueVT, Val);
+
+ assert(0 && "Unknown mismatch!");
+ }
+
+ // Handle a multi-element vector.
+ MVT::ValueType IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs =
+ DAG.getTargetLoweringInfo()
+ .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT == Parts[0].getValueType() &&
+ "Part type doesn't match part!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDOperand, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, &Parts[i], 1,
+ PartVT, IntermediateVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate operands
+ // from the parts.
+ assert(NumIntermediates % NumParts == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumIntermediates / NumParts;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate
+ // operands.
+ return DAG.getNode(MVT::isVector(IntermediateVT) ?
+ ISD::CONCAT_VECTORS :
+ ISD::BUILD_VECTOR,
+ ValueVT, &Ops[0], NumParts);
+}
+
+/// getCopyToParts - Create a series of nodes that contain the
+/// specified value split into legal parts.
+static void getCopyToParts(SelectionDAG &DAG,
+ SDOperand Val,
+ SDOperand *Parts,
+ unsigned NumParts,
+ MVT::ValueType PartVT) {
+ MVT::ValueType ValueVT = Val.getValueType();
+
+ if (!MVT::isVector(ValueVT) || NumParts == 1) {
+ // If the value was expanded, copy from the parts.
+ if (NumParts > 1) {
+ for (unsigned i = 0; i != NumParts; ++i)
+ Parts[i] = DAG.getNode(ISD::EXTRACT_ELEMENT, PartVT, Val,
+ DAG.getConstant(i, MVT::i32));
+ if (!DAG.getTargetLoweringInfo().isLittleEndian())
+ std::reverse(Parts, Parts + NumParts);
+ return;
+ }
+
+ // If there is a single part and the types differ, this must be
+ // a promotion.
+ if (PartVT != ValueVT) {
+ if (MVT::isVector(PartVT)) {
+ assert(MVT::isVector(ValueVT) &&
+ "Not a vector-vector cast?");
+ Val = DAG.getNode(ISD::BIT_CONVERT, PartVT, Val);
+ } else if (MVT::isInteger(PartVT) && MVT::isInteger(ValueVT)) {
+ if (PartVT < ValueVT)
+ Val = DAG.getNode(ISD::TRUNCATE, PartVT, Val);
+ else
+ Val = DAG.getNode(ISD::ANY_EXTEND, PartVT, Val);
+ } else if (MVT::isFloatingPoint(PartVT) &&
+ MVT::isFloatingPoint(ValueVT)) {
+ Val = DAG.getNode(ISD::FP_EXTEND, PartVT, Val);
+ } else if (MVT::getSizeInBits(PartVT) ==
+ MVT::getSizeInBits(ValueVT)) {
+ Val = DAG.getNode(ISD::BIT_CONVERT, PartVT, Val);
+ } else {
+ assert(0 && "Unknown mismatch!");
+ }
+ }
+ Parts[0] = Val;
+ return;
+ }
+
+ // Handle a multi-element vector.
+ MVT::ValueType IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs =
+ DAG.getTargetLoweringInfo()
+ .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
+ unsigned NumElements = MVT::getVectorNumElements(ValueVT);
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+ // Split the vector into intermediate operands.
+ SmallVector<SDOperand, 8> Ops(NumIntermediates);
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ if (MVT::isVector(IntermediateVT))
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR,
+ IntermediateVT, Val,
+ DAG.getConstant(i * (NumElements / NumIntermediates),
+ MVT::i32));
+ else
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ IntermediateVT, Val,
+ DAG.getConstant(i, MVT::i32));
+
+ // Split the intermediate operands into legal parts.
+ if (NumParts == NumIntermediates) {
+ // If the register was not expanded, promote or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ getCopyToParts(DAG, Ops[i], &Parts[i], 1, PartVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, split each the value into
+ // legal parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ getCopyToParts(DAG, Ops[i], &Parts[i * Factor], Factor, PartVT);
+ }
+}
+
+
+SDOperand SelectionDAGLowering::getValue(const Value *V) {
+ SDOperand &N = NodeMap[V];
+ if (N.Val) return N;
+
+ const Type *VTy = V->getType();
+ MVT::ValueType VT = TLI.getValueType(VTy);
+ if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ visit(CE->getOpcode(), *CE);
+ SDOperand N1 = NodeMap[V];
+ assert(N1.Val && "visit didn't populate the ValueMap!");
+ return N1;
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
+ return N = DAG.getGlobalAddress(GV, VT);
+ } else if (isa<ConstantPointerNull>(C)) {
+ return N = DAG.getConstant(0, TLI.getPointerTy());
+ } else if (isa<UndefValue>(C)) {
+ if (!isa<VectorType>(VTy))
+ return N = DAG.getNode(ISD::UNDEF, VT);
+
+ // Create a BUILD_VECTOR of undef nodes.
+ const VectorType *PTy = cast<VectorType>(VTy);
+ unsigned NumElements = PTy->getNumElements();
+ MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
+
+ SmallVector<SDOperand, 8> Ops;
+ Ops.assign(NumElements, DAG.getNode(ISD::UNDEF, PVT));
+
+ // Create a VConstant node with generic Vector type.
+ MVT::ValueType VT = MVT::getVectorType(PVT, NumElements);
+ return N = DAG.getNode(ISD::BUILD_VECTOR, VT,
+ &Ops[0], Ops.size());
+ } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ return N = DAG.getConstantFP(CFP->getValue(), VT);
+ } else if (const VectorType *PTy = dyn_cast<VectorType>(VTy)) {
+ unsigned NumElements = PTy->getNumElements();
+ MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
+
+ // Now that we know the number and type of the elements, push a
+ // Constant or ConstantFP node onto the ops list for each element of
+ // the vector constant.
+ SmallVector<SDOperand, 8> Ops;
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+ for (unsigned i = 0; i != NumElements; ++i)
+ Ops.push_back(getValue(CP->getOperand(i)));
+ } else {
+ assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+ SDOperand Op;
+ if (MVT::isFloatingPoint(PVT))
+ Op = DAG.getConstantFP(0, PVT);
+ else
+ Op = DAG.getConstant(0, PVT);
+ Ops.assign(NumElements, Op);
+ }
+
+ // Create a BUILD_VECTOR node.
+ MVT::ValueType VT = MVT::getVectorType(PVT, NumElements);
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0],
+ Ops.size());
+ } else {
+ // Canonicalize all constant ints to be unsigned.
+ return N = DAG.getConstant(cast<ConstantInt>(C)->getZExtValue(),VT);
+ }
+ }
+
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ std::map<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+ }
+
+ unsigned InReg = FuncInfo.ValueMap[V];
+ assert(InReg && "Value not in map!");
+
+ MVT::ValueType RegisterVT = TLI.getRegisterType(VT);
+ unsigned NumRegs = TLI.getNumRegisters(VT);
+
+ std::vector<unsigned> Regs(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs[i] = InReg + i;
+
+ RegsForValue RFV(Regs, RegisterVT, VT);
+ SDOperand Chain = DAG.getEntryNode();
+
+ return RFV.getCopyFromRegs(DAG, Chain, NULL);
+}
+
+
+void SelectionDAGLowering::visitRet(ReturnInst &I) {
+ if (I.getNumOperands() == 0) {
+ DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, getRoot()));
+ return;
+ }
+ SmallVector<SDOperand, 8> NewValues;
+ NewValues.push_back(getRoot());
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ SDOperand RetOp = getValue(I.getOperand(i));
+
+ // If this is an integer return value, we need to promote it ourselves to
+ // the full width of a register, since getCopyToParts and Legalize will use
+ // ANY_EXTEND rather than sign/zero.
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling conventions.
+ if (MVT::isInteger(RetOp.getValueType()) &&
+ RetOp.getValueType() < MVT::i64) {
+ MVT::ValueType TmpVT;
+ if (TLI.getTypeAction(MVT::i32) == TargetLowering::Promote)
+ TmpVT = TLI.getTypeToTransformTo(MVT::i32);
+ else
+ TmpVT = MVT::i32;
+ const FunctionType *FTy = I.getParent()->getParent()->getFunctionType();
+ const ParamAttrsList *Attrs = FTy->getParamAttrs();
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ if (Attrs && Attrs->paramHasAttr(0, ParamAttr::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ if (Attrs && Attrs->paramHasAttr(0, ParamAttr::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+ RetOp = DAG.getNode(ExtendKind, TmpVT, RetOp);
+ NewValues.push_back(RetOp);
+ NewValues.push_back(DAG.getConstant(false, MVT::i32));
+ } else {
+ MVT::ValueType VT = RetOp.getValueType();
+ unsigned NumParts = TLI.getNumRegisters(VT);
+ MVT::ValueType PartVT = TLI.getRegisterType(VT);
+ SmallVector<SDOperand, 4> Parts(NumParts);
+ getCopyToParts(DAG, RetOp, &Parts[0], NumParts, PartVT);
+ for (unsigned i = 0; i < NumParts; ++i) {
+ NewValues.push_back(Parts[i]);
+ NewValues.push_back(DAG.getConstant(false, MVT::i32));
+ }
+ }
+ }
+ DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other,
+ &NewValues[0], NewValues.size()));
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) {
+ // No need to export constants.
+ if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+ // Already exported?
+ if (FuncInfo.isExportedInst(V)) return;
+
+ unsigned Reg = FuncInfo.InitializeRegForValue(V);
+ PendingLoads.push_back(CopyValueToVirtualRegister(V, Reg));
+}
+
+bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V,
+ const BasicBlock *FromBB) {
+ // The operands of the setcc have to be in this block. We don't know
+ // how to export them from some other block.
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ // Can export from current BB.
+ if (VI->getParent() == FromBB)
+ return true;
+
+ // Is already exported, noop.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // If this is an argument, we can export it if the BB is the entry block or
+ // if it is already exported.
+ if (isa<Argument>(V)) {
+ if (FromBB == &FromBB->getParent()->getEntryBlock())
+ return true;
+
+ // Otherwise, can only export this if it is already exported.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // Otherwise, constants can always be exported.
+ return true;
+}
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+/// FindMergedConditions - If Cond is an expression like
+void SelectionDAGLowering::FindMergedConditions(Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ unsigned Opc) {
+ // If this node is not part of the or/and tree, emit it as a branch.
+ Instruction *BOp = dyn_cast<Instruction>(Cond);
+
+ if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+ (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+ BOp->getParent() != CurBB->getBasicBlock() ||
+ !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+ !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+ const BasicBlock *BB = CurBB->getBasicBlock();
+
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if ((isa<ICmpInst>(Cond) || isa<FCmpInst>(Cond)) &&
+ // The operands of the cmp have to be in this block. We don't know
+ // how to export them from some other block. If this is the first block
+ // of the sequence, no exporting is needed.
+ (CurBB == CurMBB ||
+ (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+ isExportableFromCurrentBlock(BOp->getOperand(1), BB)))) {
+ BOp = cast<Instruction>(Cond);
+ ISD::CondCode Condition;
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ switch (IC->getPredicate()) {
+ default: assert(0 && "Unknown icmp predicate opcode!");
+ case ICmpInst::ICMP_EQ: Condition = ISD::SETEQ; break;
+ case ICmpInst::ICMP_NE: Condition = ISD::SETNE; break;
+ case ICmpInst::ICMP_SLE: Condition = ISD::SETLE; break;
+ case ICmpInst::ICMP_ULE: Condition = ISD::SETULE; break;
+ case ICmpInst::ICMP_SGE: Condition = ISD::SETGE; break;
+ case ICmpInst::ICMP_UGE: Condition = ISD::SETUGE; break;
+ case ICmpInst::ICMP_SLT: Condition = ISD::SETLT; break;
+ case ICmpInst::ICMP_ULT: Condition = ISD::SETULT; break;
+ case ICmpInst::ICMP_SGT: Condition = ISD::SETGT; break;
+ case ICmpInst::ICMP_UGT: Condition = ISD::SETUGT; break;
+ }
+ } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+ ISD::CondCode FPC, FOC;
+ switch (FC->getPredicate()) {
+ default: assert(0 && "Unknown fcmp predicate opcode!");
+ case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+ case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+ case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+ case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+ case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+ case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+ case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+ case FCmpInst::FCMP_ORD: FOC = ISD::SETEQ; FPC = ISD::SETO; break;
+ case FCmpInst::FCMP_UNO: FOC = ISD::SETNE; FPC = ISD::SETUO; break;
+ case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+ case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+ case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+ case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+ case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+ case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+ case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break;
+ }
+ if (FiniteOnlyFPMath())
+ Condition = FOC;
+ else
+ Condition = FPC;
+ } else {
+ Condition = ISD::SETEQ; // silence warning.
+ assert(0 && "Unknown compare instruction");
+ }
+
+ SelectionDAGISel::CaseBlock CB(Condition, BOp->getOperand(0),
+ BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+ return;
+ }
+
+ // Create a CaseBlock record representing this branch.
+ SelectionDAGISel::CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(),
+ NULL, TBB, FBB, CurBB);
+ SwitchCases.push_back(CB);
+ return;
+ }
+
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI = CurBB;
+ MachineBasicBlock *TmpBB = new MachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->getBasicBlockList().insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
+
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+ }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+static bool
+ShouldEmitAsBranches(const std::vector<SelectionDAGISel::CaseBlock> &Cases) {
+ if (Cases.size() != 2) return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ return true;
+}
+
+void SelectionDAGLowering::visitBr(BranchInst &I) {
+ // Update machine-CFG edges.
+ MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ if (I.isUnconditional()) {
+ // If this is not a fall-through branch, emit the branch.
+ if (Succ0MBB != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(),
+ DAG.getBasicBlock(Succ0MBB)));
+
+ // Update machine-CFG edges.
+ CurMBB->addSuccessor(Succ0MBB);
+
+ return;
+ }
+
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ Value *CondVal = I.getCondition();
+ MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ //
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+ if (BOp->hasOneUse() &&
+ (BOp->getOpcode() == Instruction::And ||
+ BOp->getOpcode() == Instruction::Or)) {
+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
+ // If the compares in later blocks need to use values not currently
+ // exported from this block, export them now. This block should always
+ // be the first entry.
+ assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (ShouldEmitAsBranches(SwitchCases)) {
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+ ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+ ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+ }
+
+ // Emit the branch for this block.
+ visitSwitchCase(SwitchCases[0]);
+ SwitchCases.erase(SwitchCases.begin());
+ return;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+ CurMBB->getParent()->getBasicBlockList().erase(SwitchCases[i].ThisBB);
+
+ SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ SelectionDAGISel::CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(),
+ NULL, Succ0MBB, Succ1MBB, CurMBB);
+ // Use visitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ visitSwitchCase(CB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGLowering::visitSwitchCase(SelectionDAGISel::CaseBlock &CB) {
+ SDOperand Cond;
+ SDOperand CondLHS = getValue(CB.CmpLHS);
+
+ // Build the setcc now.
+ if (CB.CmpMHS == NULL) {
+ // Fold "(X == true)" to X and "(X == false)" to !X to
+ // handle common cases produced by branch lowering.
+ if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ)
+ Cond = CondLHS;
+ else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) {
+ SDOperand True = DAG.getConstant(1, CondLHS.getValueType());
+ Cond = DAG.getNode(ISD::XOR, CondLHS.getValueType(), CondLHS, True);
+ } else
+ Cond = DAG.getSetCC(MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+ } else {
+ assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+ uint64_t Low = cast<ConstantInt>(CB.CmpLHS)->getSExtValue();
+ uint64_t High = cast<ConstantInt>(CB.CmpRHS)->getSExtValue();
+
+ SDOperand CmpOp = getValue(CB.CmpMHS);
+ MVT::ValueType VT = CmpOp.getValueType();
+
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+ Cond = DAG.getSetCC(MVT::i1, CmpOp, DAG.getConstant(High, VT), ISD::SETLE);
+ } else {
+ SDOperand SUB = DAG.getNode(ISD::SUB, VT, CmpOp, DAG.getConstant(Low, VT));
+ Cond = DAG.getSetCC(MVT::i1, SUB,
+ DAG.getConstant(High-Low, VT), ISD::SETULE);
+ }
+
+ }
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ // If the lhs block is the next block, invert the condition so that we can
+ // fall through to the lhs instead of the rhs block.
+ if (CB.TrueBB == NextBlock) {
+ std::swap(CB.TrueBB, CB.FalseBB);
+ SDOperand True = DAG.getConstant(1, Cond.getValueType());
+ Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True);
+ }
+ SDOperand BrCond = DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), Cond,
+ DAG.getBasicBlock(CB.TrueBB));
+ if (CB.FalseBB == NextBlock)
+ DAG.setRoot(BrCond);
+ else
+ DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB)));
+ // Update successor info
+ CurMBB->addSuccessor(CB.TrueBB);
+ CurMBB->addSuccessor(CB.FalseBB);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGLowering::visitJumpTable(SelectionDAGISel::JumpTable &JT) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ MVT::ValueType PTy = TLI.getPointerTy();
+ SDOperand Index = DAG.getCopyFromReg(getRoot(), JT.Reg, PTy);
+ SDOperand Table = DAG.getJumpTable(JT.JTI, PTy);
+ DAG.setRoot(DAG.getNode(ISD::BR_JT, MVT::Other, Index.getValue(1),
+ Table, Index));
+ return;
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGLowering::visitJumpTableHeader(SelectionDAGISel::JumpTable &JT,
+ SelectionDAGISel::JumpTableHeader &JTH) {
+ // Subtract the lowest switch case value from the value being switched on
+ // and conditional branch to default mbb if the result is greater than the
+ // difference between smallest and largest cases.
+ SDOperand SwitchOp = getValue(JTH.SValue);
+ MVT::ValueType VT = SwitchOp.getValueType();
+ SDOperand SUB = DAG.getNode(ISD::SUB, VT, SwitchOp,
+ DAG.getConstant(JTH.First, VT));
+
+ // The SDNode we just created, which holds the value being switched on
+ // minus the the smallest case value, needs to be copied to a virtual
+ // register so it can be used as an index into the jump table in a
+ // subsequent basic block. This value may be smaller or larger than the
+ // target's pointer type, and therefore require extension or truncating.
+ if (MVT::getSizeInBits(VT) > MVT::getSizeInBits(TLI.getPointerTy()))
+ SwitchOp = DAG.getNode(ISD::TRUNCATE, TLI.getPointerTy(), SUB);
+ else
+ SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(), SUB);
+
+ unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
+ SDOperand CopyTo = DAG.getCopyToReg(getRoot(), JumpTableReg, SwitchOp);
+ JT.Reg = JumpTableReg;
+
+ // Emit the range check for the jump table, and branch to the default
+ // block for the switch statement if the value being switched on exceeds
+ // the largest case in the switch.
+ SDOperand CMP = DAG.getSetCC(TLI.getSetCCResultTy(), SUB,
+ DAG.getConstant(JTH.Last-JTH.First,VT),
+ ISD::SETUGT);
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ SDOperand BrCond = DAG.getNode(ISD::BRCOND, MVT::Other, CopyTo, CMP,
+ DAG.getBasicBlock(JT.Default));
+
+ if (JT.MBB == NextBlock)
+ DAG.setRoot(BrCond);
+ else
+ DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrCond,
+ DAG.getBasicBlock(JT.MBB)));
+
+ return;
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGLowering::visitBitTestHeader(SelectionDAGISel::BitTestBlock &B) {
+ // Subtract the minimum value
+ SDOperand SwitchOp = getValue(B.SValue);
+ MVT::ValueType VT = SwitchOp.getValueType();
+ SDOperand SUB = DAG.getNode(ISD::SUB, VT, SwitchOp,
+ DAG.getConstant(B.First, VT));
+
+ // Check range
+ SDOperand RangeCmp = DAG.getSetCC(TLI.getSetCCResultTy(), SUB,
+ DAG.getConstant(B.Range, VT),
+ ISD::SETUGT);
+
+ SDOperand ShiftOp;
+ if (MVT::getSizeInBits(VT) > MVT::getSizeInBits(TLI.getShiftAmountTy()))
+ ShiftOp = DAG.getNode(ISD::TRUNCATE, TLI.getShiftAmountTy(), SUB);
+ else
+ ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, TLI.getShiftAmountTy(), SUB);
+
+ // Make desired shift
+ SDOperand SwitchVal = DAG.getNode(ISD::SHL, TLI.getPointerTy(),
+ DAG.getConstant(1, TLI.getPointerTy()),
+ ShiftOp);
+
+ unsigned SwitchReg = FuncInfo.MakeReg(TLI.getPointerTy());
+ SDOperand CopyTo = DAG.getCopyToReg(getRoot(), SwitchReg, SwitchVal);
+ B.Reg = SwitchReg;
+
+ SDOperand BrRange = DAG.getNode(ISD::BRCOND, MVT::Other, CopyTo, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+ if (MBB == NextBlock)
+ DAG.setRoot(BrRange);
+ else
+ DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, CopyTo,
+ DAG.getBasicBlock(MBB)));
+
+ CurMBB->addSuccessor(B.Default);
+ CurMBB->addSuccessor(MBB);
+
+ return;
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB,
+ unsigned Reg,
+ SelectionDAGISel::BitTestCase &B) {
+ // Emit bit tests and jumps
+ SDOperand SwitchVal = DAG.getCopyFromReg(getRoot(), Reg, TLI.getPointerTy());
+
+ SDOperand AndOp = DAG.getNode(ISD::AND, TLI.getPointerTy(),
+ SwitchVal,
+ DAG.getConstant(B.Mask,
+ TLI.getPointerTy()));
+ SDOperand AndCmp = DAG.getSetCC(TLI.getSetCCResultTy(), AndOp,
+ DAG.getConstant(0, TLI.getPointerTy()),
+ ISD::SETNE);
+ SDOperand BrAnd = DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(),
+ AndCmp, DAG.getBasicBlock(B.TargetBB));
+
+ // Set NextBlock to be the MBB immediately after the current one, if any.
+ // This is used to avoid emitting unnecessary branches to the next block.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ if (NextMBB == NextBlock)
+ DAG.setRoot(BrAnd);
+ else
+ DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrAnd,
+ DAG.getBasicBlock(NextMBB)));
+
+ CurMBB->addSuccessor(B.TargetBB);
+ CurMBB->addSuccessor(NextMBB);
+
+ return;
+}
+
+void SelectionDAGLowering::visitInvoke(InvokeInst &I) {
+ // Retrieve successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+ MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ LowerCallTo(I, I.getCalledValue()->getType(),
+ I.getCallingConv(),
+ false,
+ getValue(I.getOperand(0)),
+ 3, LandingPad);
+
+ // If the value of the invoke is used outside of its defining block, make it
+ // available as a virtual register.
+ if (!I.use_empty()) {
+ DenseMap<const Value*, unsigned>::iterator VMI = FuncInfo.ValueMap.find(&I);
+ if (VMI != FuncInfo.ValueMap.end())
+ DAG.setRoot(CopyValueToVirtualRegister(&I, VMI->second));
+ }
+
+ // Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(),
+ DAG.getBasicBlock(Return)));
+
+ // Update successor info
+ CurMBB->addSuccessor(Return);
+ CurMBB->addSuccessor(LandingPad);
+}
+
+void SelectionDAGLowering::visitUnwind(UnwindInst &I) {
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default) {
+ Case& BackCase = *(CR.Range.second-1);
+
+ // Size is the number of Cases represented by this range.
+ unsigned Size = CR.Range.second - CR.Range.first;
+ if (Size > 3)
+ return false;
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = CurMBB->getParent();
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ // TODO: If any two of the cases has the same destination, and if one value
+ // is the same as the other, but has one bit unset that the other has set,
+ // use bit manipulation to do two compares at once. For example:
+ // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+
+ // Rearrange the case blocks so that the last one falls through if possible.
+ if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+ // The last case block won't fall through into 'NextBlock' if we emit the
+ // branches in this order. See if rearranging a case value would help.
+ for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+ if (I->BB == NextBlock) {
+ std::swap(*I, BackCase);
+ break;
+ }
+ }
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the Case's target mbb if the value being switched on SV is equal
+ // to C.
+ MachineBasicBlock *CurBlock = CR.CaseBB;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+ MachineBasicBlock *FallThrough;
+ if (I != E-1) {
+ FallThrough = new MachineBasicBlock(CurBlock->getBasicBlock());
+ CurMF->getBasicBlockList().insert(BBI, FallThrough);
+ } else {
+ // If the last case doesn't match, go to the default block.
+ FallThrough = Default;
+ }
+
+ Value *RHS, *LHS, *MHS;
+ ISD::CondCode CC;
+ if (I->High == I->Low) {
+ // This is just small small case range :) containing exactly 1 case
+ CC = ISD::SETEQ;
+ LHS = SV; RHS = I->High; MHS = NULL;
+ } else {
+ CC = ISD::SETLE;
+ LHS = I->Low; MHS = SV; RHS = I->High;
+ }
+ SelectionDAGISel::CaseBlock CB(CC, LHS, RHS, MHS,
+ I->BB, FallThrough, CurBlock);
+
+ // If emitting the first comparison, just call visitSwitchCase to emit the
+ // code into the current block. Otherwise, push the CaseBlock onto the
+ // vector to be later processed by SDISel, and insert the node's MBB
+ // before the next MBB.
+ if (CurBlock == CurMBB)
+ visitSwitchCase(CB);
+ else
+ SwitchCases.push_back(CB);
+
+ CurBlock = FallThrough;
+ }
+
+ return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+ return (TLI.isOperationLegal(ISD::BR_JT, MVT::Other) ||
+ TLI.isOperationLegal(ISD::BRIND, MVT::Other));
+}
+
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default) {
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ int64_t First = cast<ConstantInt>(FrontCase.Low)->getSExtValue();
+ int64_t Last = cast<ConstantInt>(BackCase.High)->getSExtValue();
+
+ uint64_t TSize = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I)
+ TSize += I->size();
+
+ if (!areJTsAllowed(TLI) || TSize <= 3)
+ return false;
+
+ double Density = (double)TSize / (double)((Last - First) + 1ULL);
+ if (Density < 0.4)
+ return false;
+
+ DOUT << "Lowering jump table\n"
+ << "First entry: " << First << ". Last entry: " << Last << "\n"
+ << "Size: " << TSize << ". Density: " << Density << "\n\n";
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = CurMBB->getParent();
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Create a new basic block to hold the code for loading the address
+ // of the jump table, and jumping to it. Update successor information;
+ // we will either branch to the default case for the switch, or the jump
+ // table.
+ MachineBasicBlock *JumpTableBB = new MachineBasicBlock(LLVMBB);
+ CurMF->getBasicBlockList().insert(BBI, JumpTableBB);
+ CR.CaseBB->addSuccessor(Default);
+ CR.CaseBB->addSuccessor(JumpTableBB);
+
+ // Build a vector of destination BBs, corresponding to each target
+ // of the jump table. If the value of the jump table slot corresponds to
+ // a case statement, push the case's BB onto the vector, otherwise, push
+ // the default BB.
+ std::vector<MachineBasicBlock*> DestBBs;
+ int64_t TEI = First;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+ int64_t Low = cast<ConstantInt>(I->Low)->getSExtValue();
+ int64_t High = cast<ConstantInt>(I->High)->getSExtValue();
+
+ if ((Low <= TEI) && (TEI <= High)) {
+ DestBBs.push_back(I->BB);
+ if (TEI==High)
+ ++I;
+ } else {
+ DestBBs.push_back(Default);
+ }
+ }
+
+ // Update successor info. Add one edge to each unique successor.
+ BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
+ for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
+ E = DestBBs.end(); I != E; ++I) {
+ if (!SuccsHandled[(*I)->getNumber()]) {
+ SuccsHandled[(*I)->getNumber()] = true;
+ JumpTableBB->addSuccessor(*I);
+ }
+ }
+
+ // Create a jump table index for this jump table, or return an existing
+ // one.
+ unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs);
+
+ // Set the jump table information so that we can codegen it as a second
+ // MachineBasicBlock
+ SelectionDAGISel::JumpTable JT(-1U, JTI, JumpTableBB, Default);
+ SelectionDAGISel::JumpTableHeader JTH(First, Last, SV, CR.CaseBB,
+ (CR.CaseBB == CurMBB));
+ if (CR.CaseBB == CurMBB)
+ visitJumpTableHeader(JT, JTH);
+
+ JTCases.push_back(SelectionDAGISel::JumpTableBlock(JTH, JT));
+
+ return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default) {
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = CurMBB->getParent();
+
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CR.CaseBB;
+
+ if (++BBI != CurMBB->getParent()->end())
+ NextBlock = BBI;
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ // Size is the number of Cases represented by this range.
+ unsigned Size = CR.Range.second - CR.Range.first;
+
+ int64_t First = cast<ConstantInt>(FrontCase.Low)->getSExtValue();
+ int64_t Last = cast<ConstantInt>(BackCase.High)->getSExtValue();
+ double FMetric = 0;
+ CaseItr Pivot = CR.Range.first + Size/2;
+
+ // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+ // (heuristically) allow us to emit JumpTable's later.
+ uint64_t TSize = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I)
+ TSize += I->size();
+
+ uint64_t LSize = FrontCase.size();
+ uint64_t RSize = TSize-LSize;
+ DOUT << "Selecting best pivot: \n"
+ << "First: " << First << ", Last: " << Last <<"\n"
+ << "LSize: " << LSize << ", RSize: " << RSize << "\n";
+ for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+ J!=E; ++I, ++J) {
+ int64_t LEnd = cast<ConstantInt>(I->High)->getSExtValue();
+ int64_t RBegin = cast<ConstantInt>(J->Low)->getSExtValue();
+ assert((RBegin-LEnd>=1) && "Invalid case distance");
+ double LDensity = (double)LSize / (double)((LEnd - First) + 1ULL);
+ double RDensity = (double)RSize / (double)((Last - RBegin) + 1ULL);
+ double Metric = Log2_64(RBegin-LEnd)*(LDensity+RDensity);
+ // Should always split in some non-trivial place
+ DOUT <<"=>Step\n"
+ << "LEnd: " << LEnd << ", RBegin: " << RBegin << "\n"
+ << "LDensity: " << LDensity << ", RDensity: " << RDensity << "\n"
+ << "Metric: " << Metric << "\n";
+ if (FMetric < Metric) {
+ Pivot = J;
+ FMetric = Metric;
+ DOUT << "Current metric set to: " << FMetric << "\n";
+ }
+
+ LSize += J->size();
+ RSize -= J->size();
+ }
+ if (areJTsAllowed(TLI)) {
+ // If our case is dense we *really* should handle it earlier!
+ assert((FMetric > 0) && "Should handle dense range earlier!");
+ } else {
+ Pivot = CR.Range.first + Size/2;
+ }
+
+ CaseRange LHSR(CR.Range.first, Pivot);
+ CaseRange RHSR(Pivot, CR.Range.second);
+ Constant *C = Pivot->Low;
+ MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+
+ // We know that we branch to the LHS if the Value being switched on is
+ // less than the Pivot value, C. We use this to optimize our binary
+ // tree a bit, by recognizing that if SV is greater than or equal to the
+ // LHS's Case Value, and that Case Value is exactly one less than the
+ // Pivot's Value, then we can branch directly to the LHS's Target,
+ // rather than creating a leaf node for it.
+ if ((LHSR.second - LHSR.first) == 1 &&
+ LHSR.first->High == CR.GE &&
+ cast<ConstantInt>(C)->getSExtValue() ==
+ (cast<ConstantInt>(CR.GE)->getSExtValue() + 1LL)) {
+ TrueBB = LHSR.first->BB;
+ } else {
+ TrueBB = new MachineBasicBlock(LLVMBB);
+ CurMF->getBasicBlockList().insert(BBI, TrueBB);
+ WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+ }
+
+ // Similar to the optimization above, if the Value being switched on is
+ // known to be less than the Constant CR.LT, and the current Case Value
+ // is CR.LT - 1, then we can branch directly to the target block for
+ // the current Case Value, rather than emitting a RHS leaf node for it.
+ if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+ cast<ConstantInt>(RHSR.first->Low)->getSExtValue() ==
+ (cast<ConstantInt>(CR.LT)->getSExtValue() - 1LL)) {
+ FalseBB = RHSR.first->BB;
+ } else {
+ FalseBB = new MachineBasicBlock(LLVMBB);
+ CurMF->getBasicBlockList().insert(BBI, FalseBB);
+ WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+ }
+
+ // Create a CaseBlock record representing a conditional branch to
+ // the LHS node if the value being switched on SV is less than C.
+ // Otherwise, branch to LHS.
+ SelectionDAGISel::CaseBlock CB(ISD::SETLT, SV, C, NULL,
+ TrueBB, FalseBB, CR.CaseBB);
+
+ if (CR.CaseBB == CurMBB)
+ visitSwitchCase(CB);
+ else
+ SwitchCases.push_back(CB);
+
+ return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR,
+ CaseRecVector& WorkList,
+ Value* SV,
+ MachineBasicBlock* Default){
+ unsigned IntPtrBits = MVT::getSizeInBits(TLI.getPointerTy());
+
+ Case& FrontCase = *CR.Range.first;
+ Case& BackCase = *(CR.Range.second-1);
+
+ // Get the MachineFunction which holds the current MBB. This is used when
+ // inserting any additional MBBs necessary to represent the switch.
+ MachineFunction *CurMF = CurMBB->getParent();
+
+ unsigned numCmps = 0;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second;
+ I!=E; ++I) {
+ // Single case counts one, case range - two.
+ if (I->Low == I->High)
+ numCmps +=1;
+ else
+ numCmps +=2;
+ }
+
+ // Count unique destinations
+ SmallSet<MachineBasicBlock*, 4> Dests;
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ Dests.insert(I->BB);
+ if (Dests.size() > 3)
+ // Don't bother the code below, if there are too much unique destinations
+ return false;
+ }
+ DOUT << "Total number of unique destinations: " << Dests.size() << "\n"
+ << "Total number of comparisons: " << numCmps << "\n";
+
+ // Compute span of values.
+ Constant* minValue = FrontCase.Low;
+ Constant* maxValue = BackCase.High;
+ uint64_t range = cast<ConstantInt>(maxValue)->getSExtValue() -
+ cast<ConstantInt>(minValue)->getSExtValue();
+ DOUT << "Compare range: " << range << "\n"
+ << "Low bound: " << cast<ConstantInt>(minValue)->getSExtValue() << "\n"
+ << "High bound: " << cast<ConstantInt>(maxValue)->getSExtValue() << "\n";
+
+ if (range>=IntPtrBits ||
+ (!(Dests.size() == 1 && numCmps >= 3) &&
+ !(Dests.size() == 2 && numCmps >= 5) &&
+ !(Dests.size() >= 3 && numCmps >= 6)))
+ return false;
+
+ DOUT << "Emitting bit tests\n";
+ int64_t lowBound = 0;
+
+ // Optimize the case where all the case values fit in a
+ // word without having to subtract minValue. In this case,
+ // we can optimize away the subtraction.
+ if (cast<ConstantInt>(minValue)->getSExtValue() >= 0 &&
+ cast<ConstantInt>(maxValue)->getSExtValue() < IntPtrBits) {
+ range = cast<ConstantInt>(maxValue)->getSExtValue();
+ } else {
+ lowBound = cast<ConstantInt>(minValue)->getSExtValue();
+ }
+
+ CaseBitsVector CasesBits;
+ unsigned i, count = 0;
+
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+ MachineBasicBlock* Dest = I->BB;
+ for (i = 0; i < count; ++i)
+ if (Dest == CasesBits[i].BB)
+ break;
+
+ if (i == count) {
+ assert((count < 3) && "Too much destinations to test!");
+ CasesBits.push_back(CaseBits(0, Dest, 0));
+ count++;
+ }
+
+ uint64_t lo = cast<ConstantInt>(I->Low)->getSExtValue() - lowBound;
+ uint64_t hi = cast<ConstantInt>(I->High)->getSExtValue() - lowBound;
+
+ for (uint64_t j = lo; j <= hi; j++) {
+ CasesBits[i].Mask |= 1ULL << j;
+ CasesBits[i].Bits++;
+ }
+
+ }
+ std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+
+ SelectionDAGISel::BitTestInfo BTC;
+
+ // Figure out which block is immediately after the current one.
+ MachineFunction::iterator BBI = CR.CaseBB;
+ ++BBI;
+
+ const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+ DOUT << "Cases:\n";
+ for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+ DOUT << "Mask: " << CasesBits[i].Mask << ", Bits: " << CasesBits[i].Bits
+ << ", BB: " << CasesBits[i].BB << "\n";
+
+ MachineBasicBlock *CaseBB = new MachineBasicBlock(LLVMBB);
+ CurMF->getBasicBlockList().insert(BBI, CaseBB);
+ BTC.push_back(SelectionDAGISel::BitTestCase(CasesBits[i].Mask,
+ CaseBB,
+ CasesBits[i].BB));
+ }
+
+ SelectionDAGISel::BitTestBlock BTB(lowBound, range, SV,
+ -1U, (CR.CaseBB == CurMBB),
+ CR.CaseBB, Default, BTC);
+
+ if (CR.CaseBB == CurMBB)
+ visitBitTestHeader(BTB);
+
+ BitTestCases.push_back(BTB);
+
+ return true;
+}
+
+
+// Clusterify - Transform simple list of Cases into list of CaseRange's
+unsigned SelectionDAGLowering::Clusterify(CaseVector& Cases,
+ const SwitchInst& SI) {
+ unsigned numCmps = 0;
+
+ // Start with "simple" cases
+ for (unsigned i = 1; i < SI.getNumSuccessors(); ++i) {
+ MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+ Cases.push_back(Case(SI.getSuccessorValue(i),
+ SI.getSuccessorValue(i),
+ SMBB));
+ }
+ sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size()>=2)
+ // Must recompute end() each iteration because it may be
+ // invalidated by erase if we hold on to it
+ for (CaseItr I=Cases.begin(), J=++(Cases.begin()); J!=Cases.end(); ) {
+ int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+ int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+ MachineBasicBlock* nextBB = J->BB;
+ MachineBasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
+ // A range counts double, since it requires two compares.
+ ++numCmps;
+ }
+
+ return numCmps;
+}
+
+void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {
+ // Figure out which block is immediately after the current one.
+ MachineBasicBlock *NextBlock = 0;
+ MachineFunction::iterator BBI = CurMBB;
+
+ MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+ // If there is only the default destination, branch to it if it is not the
+ // next basic block. Otherwise, just fall through.
+ if (SI.getNumOperands() == 2) {
+ // Update machine-CFG edges.
+
+ // If this is not a fall-through branch, emit the branch.
+ if (Default != NextBlock)
+ DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(),
+ DAG.getBasicBlock(Default)));
+
+ CurMBB->addSuccessor(Default);
+ return;
+ }
+
+ // If there are any non-default case statements, create a vector of Cases
+ // representing each one, and sort the vector so that we can efficiently
+ // create a binary search tree from them.
+ CaseVector Cases;
+ unsigned numCmps = Clusterify(Cases, SI);
+ DOUT << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << "\n";
+
+ // Get the Value to be switched on and default basic blocks, which will be
+ // inserted into CaseBlock records, representing basic blocks in the binary
+ // search tree.
+ Value *SV = SI.getOperand(0);
+
+ // Push the initial CaseRec onto the worklist
+ CaseRecVector WorkList;
+ WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
+
+ while (!WorkList.empty()) {
+ // Grab a record representing a case range to process off the worklist
+ CaseRec CR = WorkList.back();
+ WorkList.pop_back();
+
+ if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
+ continue;
+
+ // If the range has few cases (two or less) emit a series of specific
+ // tests.
+ if (handleSmallSwitchRange(CR, WorkList, SV, Default))
+ continue;
+
+ // If the switch has more than 5 blocks, and at least 40% dense, and the
+ // target supports indirect branches, then emit a jump table rather than
+ // lowering the switch to a binary tree of conditional branches.
+ if (handleJTSwitchCase(CR, WorkList, SV, Default))
+ continue;
+
+ // Emit binary tree. We need to pick a pivot, and push left and right ranges
+ // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+ handleBTSplitSwitchCase(CR, WorkList, SV, Default);
+ }
+}
+
+
+void SelectionDAGLowering::visitSub(User &I) {
+ // -0.0 - X --> fneg
+ const Type *Ty = I.getType();
+ if (isa<VectorType>(Ty)) {
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
+ const VectorType *DestTy = cast<VectorType>(I.getType());
+ const Type *ElTy = DestTy->getElementType();
+ if (ElTy->isFloatingPoint()) {
+ unsigned VL = DestTy->getNumElements();
+ std::vector<Constant*> NZ(VL, ConstantFP::get(ElTy, -0.0));
+ Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
+ if (CV == CNZ) {
+ SDOperand Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, Op2.getValueType(), Op2));
+ return;
+ }
+ }
+ }
+ }
+ if (Ty->isFloatingPoint()) {
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
+ if (CFP->isExactlyValue(-0.0)) {
+ SDOperand Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, Op2.getValueType(), Op2));
+ return;
+ }
+ }
+
+ visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB);
+}
+
+void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
+ SDOperand Op1 = getValue(I.getOperand(0));
+ SDOperand Op2 = getValue(I.getOperand(1));
+
+ setValue(&I, DAG.getNode(OpCode, Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
+ SDOperand Op1 = getValue(I.getOperand(0));
+ SDOperand Op2 = getValue(I.getOperand(1));
+
+ if (MVT::getSizeInBits(TLI.getShiftAmountTy()) <
+ MVT::getSizeInBits(Op2.getValueType()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, TLI.getShiftAmountTy(), Op2);
+ else if (TLI.getShiftAmountTy() > Op2.getValueType())
+ Op2 = DAG.getNode(ISD::ANY_EXTEND, TLI.getShiftAmountTy(), Op2);
+
+ setValue(&I, DAG.getNode(Opcode, Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGLowering::visitICmp(User &I) {
+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+ predicate = IC->getPredicate();
+ else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+ predicate = ICmpInst::Predicate(IC->getPredicate());
+ SDOperand Op1 = getValue(I.getOperand(0));
+ SDOperand Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Opcode;
+ switch (predicate) {
+ case ICmpInst::ICMP_EQ : Opcode = ISD::SETEQ; break;
+ case ICmpInst::ICMP_NE : Opcode = ISD::SETNE; break;
+ case ICmpInst::ICMP_UGT : Opcode = ISD::SETUGT; break;
+ case ICmpInst::ICMP_UGE : Opcode = ISD::SETUGE; break;
+ case ICmpInst::ICMP_ULT : Opcode = ISD::SETULT; break;
+ case ICmpInst::ICMP_ULE : Opcode = ISD::SETULE; break;
+ case ICmpInst::ICMP_SGT : Opcode = ISD::SETGT; break;
+ case ICmpInst::ICMP_SGE : Opcode = ISD::SETGE; break;
+ case ICmpInst::ICMP_SLT : Opcode = ISD::SETLT; break;
+ case ICmpInst::ICMP_SLE : Opcode = ISD::SETLE; break;
+ default:
+ assert(!"Invalid ICmp predicate value");
+ Opcode = ISD::SETEQ;
+ break;
+ }
+ setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Opcode));
+}
+
+void SelectionDAGLowering::visitFCmp(User &I) {
+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+ if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ predicate = FC->getPredicate();
+ else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+ predicate = FCmpInst::Predicate(FC->getPredicate());
+ SDOperand Op1 = getValue(I.getOperand(0));
+ SDOperand Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Condition, FOC, FPC;
+ switch (predicate) {
+ case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+ case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+ case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+ case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+ case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+ case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+ case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+ case FCmpInst::FCMP_ORD: FOC = ISD::SETEQ; FPC = ISD::SETO; break;
+ case FCmpInst::FCMP_UNO: FOC = ISD::SETNE; FPC = ISD::SETUO; break;
+ case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+ case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+ case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+ case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+ case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+ case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+ case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break;
+ default:
+ assert(!"Invalid FCmp predicate value");
+ FOC = FPC = ISD::SETFALSE;
+ break;
+ }
+ if (FiniteOnlyFPMath())
+ Condition = FOC;
+ else
+ Condition = FPC;
+ setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Condition));
+}
+
+void SelectionDAGLowering::visitSelect(User &I) {
+ SDOperand Cond = getValue(I.getOperand(0));
+ SDOperand TrueVal = getValue(I.getOperand(1));
+ SDOperand FalseVal = getValue(I.getOperand(2));
+ setValue(&I, DAG.getNode(ISD::SELECT, TrueVal.getValueType(), Cond,
+ TrueVal, FalseVal));
+}
+
+
+void SelectionDAGLowering::visitTrunc(User &I) {
+ // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+ SDOperand N = getValue(I.getOperand(0));
+ MVT::ValueType DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, DestVT, N));
+}
+
+void SelectionDAGLowering::visitZExt(User &I) {
+ // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDOperand N = getValue(I.getOperand(0));
+ MVT::ValueType DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitSExt(User &I) {
+ // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // SExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDOperand N = getValue(I.getOperand(0));
+ MVT::ValueType DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPTrunc(User &I) {
+ // FPTrunc is never a no-op cast, no need to check
+ SDOperand N = getValue(I.getOperand(0));
+ MVT::ValueType DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_ROUND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPExt(User &I){
+ // FPTrunc is never a no-op cast, no need to check
+ SDOperand N = getValue(I.getOperand(0));
+ MVT::ValueType DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPToUI(User &I) {
+ // FPToUI is never a no-op cast, no need to check
+ SDOperand N = getValue(I.getOperand(0));
+ MVT::ValueType DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT, DestVT, N));
+}
+
+void SelectionDAGLowering::visitFPToSI(User &I) {
+ // FPToSI is never a no-op cast, no need to check
+ SDOperand N = getValue(I.getOperand(0));
+ MVT::ValueType DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT, DestVT, N));
+}
+
+void SelectionDAGLowering::visitUIToFP(User &I) {
+ // UIToFP is never a no-op cast, no need to check
+ SDOperand N = getValue(I.getOperand(0));
+ MVT::ValueType DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::UINT_TO_FP, DestVT, N));
+}
+
+void SelectionDAGLowering::visitSIToFP(User &I){
+ // UIToFP is never a no-op cast, no need to check
+ SDOperand N = getValue(I.getOperand(0));
+ MVT::ValueType DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::SINT_TO_FP, DestVT, N));
+}
+
+void SelectionDAGLowering::visitPtrToInt(User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDOperand N = getValue(I.getOperand(0));
+ MVT::ValueType SrcVT = N.getValueType();
+ MVT::ValueType DestVT = TLI.getValueType(I.getType());
+ SDOperand Result;
+ if (MVT::getSizeInBits(DestVT) < MVT::getSizeInBits(SrcVT))
+ Result = DAG.getNode(ISD::TRUNCATE, DestVT, N);
+ else
+ // Note: ZERO_EXTEND can handle cases where the sizes are equal too
+ Result = DAG.getNode(ISD::ZERO_EXTEND, DestVT, N);
+ setValue(&I, Result);
+}
+
+void SelectionDAGLowering::visitIntToPtr(User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDOperand N = getValue(I.getOperand(0));
+ MVT::ValueType SrcVT = N.getValueType();
+ MVT::ValueType DestVT = TLI.getValueType(I.getType());
+ if (MVT::getSizeInBits(DestVT) < MVT::getSizeInBits(SrcVT))
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, DestVT, N));
+ else
+ // Note: ZERO_EXTEND can handle cases where the sizes are equal too
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestVT, N));
+}
+
+void SelectionDAGLowering::visitBitCast(User &I) {
+ SDOperand N = getValue(I.getOperand(0));
+ MVT::ValueType DestVT = TLI.getValueType(I.getType());
+
+ // BitCast assures us that source and destination are the same size so this
+ // is either a BIT_CONVERT or a no-op.
+ if (DestVT != N.getValueType())
+ setValue(&I, DAG.getNode(ISD::BIT_CONVERT, DestVT, N)); // convert types
+ else
+ setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGLowering::visitInsertElement(User &I) {
+ SDOperand InVec = getValue(I.getOperand(0));
+ SDOperand InVal = getValue(I.getOperand(1));
+ SDOperand InIdx = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(),
+ getValue(I.getOperand(2)));
+
+ setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT,
+ TLI.getValueType(I.getType()),
+ InVec, InVal, InIdx));
+}
+
+void SelectionDAGLowering::visitExtractElement(User &I) {
+ SDOperand InVec = getValue(I.getOperand(0));
+ SDOperand InIdx = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(),
+ getValue(I.getOperand(1)));
+ setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+void SelectionDAGLowering::visitShuffleVector(User &I) {
+ SDOperand V1 = getValue(I.getOperand(0));
+ SDOperand V2 = getValue(I.getOperand(1));
+ SDOperand Mask = getValue(I.getOperand(2));
+
+ setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE,
+ TLI.getValueType(I.getType()),
+ V1, V2, Mask));
+}
+
+
+void SelectionDAGLowering::visitGetElementPtr(User &I) {
+ SDOperand N = getValue(I.getOperand(0));
+ const Type *Ty = I.getOperand(0)->getType();
+
+ for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
+ OI != E; ++OI) {
+ Value *Idx = *OI;
+ if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+ N = DAG.getNode(ISD::ADD, N.getValueType(), N,
+ getIntPtrConstant(Offset));
+ }
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->getZExtValue() == 0) continue;
+ uint64_t Offs =
+ TD->getTypeSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ N = DAG.getNode(ISD::ADD, N.getValueType(), N, getIntPtrConstant(Offs));
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = TD->getTypeSize(Ty);
+ SDOperand IdxN = getValue(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend
+ // it.
+ if (IdxN.getValueType() < N.getValueType()) {
+ IdxN = DAG.getNode(ISD::SIGN_EXTEND, N.getValueType(), IdxN);
+ } else if (IdxN.getValueType() > N.getValueType())
+ IdxN = DAG.getNode(ISD::TRUNCATE, N.getValueType(), IdxN);
+
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (isPowerOf2_64(ElementSize)) {
+ unsigned Amt = Log2_64(ElementSize);
+ IdxN = DAG.getNode(ISD::SHL, N.getValueType(), IdxN,
+ DAG.getConstant(Amt, TLI.getShiftAmountTy()));
+ N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
+ continue;
+ }
+
+ SDOperand Scale = getIntPtrConstant(ElementSize);
+ IdxN = DAG.getNode(ISD::MUL, N.getValueType(), IdxN, Scale);
+ N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN);
+ }
+ }
+ setValue(&I, N);
+}
+
+void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
+ // If this is a fixed sized alloca in the entry block of the function,
+ // allocate it statically on the stack.
+ if (FuncInfo.StaticAllocaMap.count(&I))
+ return; // getValue will auto-populate this.
+
+ const Type *Ty = I.getAllocatedType();
+ uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+ I.getAlignment());
+
+ SDOperand AllocSize = getValue(I.getArraySize());
+ MVT::ValueType IntPtr = TLI.getPointerTy();
+ if (IntPtr < AllocSize.getValueType())
+ AllocSize = DAG.getNode(ISD::TRUNCATE, IntPtr, AllocSize);
+ else if (IntPtr > AllocSize.getValueType())
+ AllocSize = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, AllocSize);
+
+ AllocSize = DAG.getNode(ISD::MUL, IntPtr, AllocSize,
+ getIntPtrConstant(TySize));
+
+ // Handle alignment. If the requested alignment is less than the stack
+ // alignment, ignore it and round the size of the allocation up to the stack
+ // alignment size. If the size is greater than or equal to the stack
+ // alignment, we note this in the DYNAMIC_STACKALLOC node.
+ unsigned StackAlign =
+ TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+ if (Align < StackAlign) {
+ Align = 0;
+ // Add SA-1 to the size.
+ AllocSize = DAG.getNode(ISD::ADD, AllocSize.getValueType(), AllocSize,
+ getIntPtrConstant(StackAlign-1));
+ // Mask out the low bits for alignment purposes.
+ AllocSize = DAG.getNode(ISD::AND, AllocSize.getValueType(), AllocSize,
+ getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+ }
+
+ SDOperand Ops[] = { getRoot(), AllocSize, getIntPtrConstant(Align) };
+ const MVT::ValueType *VTs = DAG.getNodeValueTypes(AllocSize.getValueType(),
+ MVT::Other);
+ SDOperand DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, VTs, 2, Ops, 3);
+ setValue(&I, DSA);
+ DAG.setRoot(DSA.getValue(1));
+
+ // Inform the Frame Information that we have just allocated a variable-sized
+ // object.
+ CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject();
+}
+
+void SelectionDAGLowering::visitLoad(LoadInst &I) {
+ SDOperand Ptr = getValue(I.getOperand(0));
+
+ SDOperand Root;
+ if (I.isVolatile())
+ Root = getRoot();
+ else {
+ // Do not serialize non-volatile loads against each other.
+ Root = DAG.getRoot();
+ }
+
+ setValue(&I, getLoadFrom(I.getType(), Ptr, I.getOperand(0),
+ Root, I.isVolatile(), I.getAlignment()));
+}
+
+SDOperand SelectionDAGLowering::getLoadFrom(const Type *Ty, SDOperand Ptr,
+ const Value *SV, SDOperand Root,
+ bool isVolatile,
+ unsigned Alignment) {
+ SDOperand L =
+ DAG.getLoad(TLI.getValueType(Ty), Root, Ptr, SV, 0,
+ isVolatile, Alignment);
+
+ if (isVolatile)
+ DAG.setRoot(L.getValue(1));
+ else
+ PendingLoads.push_back(L.getValue(1));
+
+ return L;
+}
+
+
+void SelectionDAGLowering::visitStore(StoreInst &I) {
+ Value *SrcV = I.getOperand(0);
+ SDOperand Src = getValue(SrcV);
+ SDOperand Ptr = getValue(I.getOperand(1));
+ DAG.setRoot(DAG.getStore(getRoot(), Src, Ptr, I.getOperand(1), 0,
+ I.isVolatile(), I.getAlignment()));
+}
+
+/// IntrinsicCannotAccessMemory - Return true if the specified intrinsic cannot
+/// access memory and has no other side effects at all.
+static bool IntrinsicCannotAccessMemory(unsigned IntrinsicID) {
+#define GET_NO_MEMORY_INTRINSICS
+#include "llvm/Intrinsics.gen"
+#undef GET_NO_MEMORY_INTRINSICS
+ return false;
+}
+
+// IntrinsicOnlyReadsMemory - Return true if the specified intrinsic doesn't
+// have any side-effects or if it only reads memory.
+static bool IntrinsicOnlyReadsMemory(unsigned IntrinsicID) {
+#define GET_SIDE_EFFECT_INFO
+#include "llvm/Intrinsics.gen"
+#undef GET_SIDE_EFFECT_INFO
+ return false;
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
+ unsigned Intrinsic) {
+ bool HasChain = !IntrinsicCannotAccessMemory(Intrinsic);
+ bool OnlyLoad = HasChain && IntrinsicOnlyReadsMemory(Intrinsic);
+
+ // Build the operand list.
+ SmallVector<SDOperand, 8> Ops;
+ if (HasChain) { // If this intrinsic has side-effects, chainify it.
+ if (OnlyLoad) {
+ // We don't need to serialize loads against other loads.
+ Ops.push_back(DAG.getRoot());
+ } else {
+ Ops.push_back(getRoot());
+ }
+ }
+
+ // Add the intrinsic ID as an integer operand.
+ Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+
+ // Add all operands of the call to the operand list.
+ for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+ SDOperand Op = getValue(I.getOperand(i));
+ assert(TLI.isTypeLegal(Op.getValueType()) &&
+ "Intrinsic uses a non-legal type?");
+ Ops.push_back(Op);
+ }
+
+ std::vector<MVT::ValueType> VTs;
+ if (I.getType() != Type::VoidTy) {
+ MVT::ValueType VT = TLI.getValueType(I.getType());
+ if (MVT::isVector(VT)) {
+ const VectorType *DestTy = cast<VectorType>(I.getType());
+ MVT::ValueType EltVT = TLI.getValueType(DestTy->getElementType());
+
+ VT = MVT::getVectorType(EltVT, DestTy->getNumElements());
+ assert(VT != MVT::Other && "Intrinsic uses a non-legal type?");
+ }
+
+ assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?");
+ VTs.push_back(VT);
+ }
+ if (HasChain)
+ VTs.push_back(MVT::Other);
+
+ const MVT::ValueType *VTList = DAG.getNodeValueTypes(VTs);
+
+ // Create the node.
+ SDOperand Result;
+ if (!HasChain)
+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VTList, VTs.size(),
+ &Ops[0], Ops.size());
+ else if (I.getType() != Type::VoidTy)
+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, VTList, VTs.size(),
+ &Ops[0], Ops.size());
+ else
+ Result = DAG.getNode(ISD::INTRINSIC_VOID, VTList, VTs.size(),
+ &Ops[0], Ops.size());
+
+ if (HasChain) {
+ SDOperand Chain = Result.getValue(Result.Val->getNumValues()-1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+ if (I.getType() != Type::VoidTy) {
+ if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+ MVT::ValueType VT = TLI.getValueType(PTy);
+ Result = DAG.getNode(ISD::BIT_CONVERT, VT, Result);
+ }
+ setValue(&I, Result);
+ }
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+static GlobalVariable *ExtractTypeInfo (Value *V) {
+ V = IntrinsicInst::StripPointerCasts(V);
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+ assert (GV || isa<ConstantPointerNull>(V) &&
+ "TypeInfo must be a global variable or NULL");
+ return GV;
+}
+
+/// addCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+static void addCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+ MachineBasicBlock *MBB) {
+ // Inform the MachineModuleInfo of the personality for this landing pad.
+ ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+ assert(CE->getOpcode() == Instruction::BitCast &&
+ isa<Function>(CE->getOperand(0)) &&
+ "Personality should be a function");
+ MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+ // Gather all the type infos for this landing pad and pass them along to
+ // MachineModuleInfo.
+ std::vector<GlobalVariable *> TyInfo;
+ unsigned N = I.getNumOperands();
+
+ for (unsigned i = N - 1; i > 2; --i) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+ unsigned FilterLength = CI->getZExtValue();
+ unsigned FirstCatch = i + FilterLength + 1;
+ assert (FirstCatch <= N && "Invalid filter length");
+
+ if (FirstCatch < N) {
+ TyInfo.reserve(N - FirstCatch);
+ for (unsigned j = FirstCatch; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+ }
+
+ TyInfo.reserve(FilterLength);
+ for (unsigned j = i + 1; j < FirstCatch; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ MMI->addFilterTypeInfo(MBB, TyInfo);
+ TyInfo.clear();
+
+ N = i;
+ }
+ }
+
+ if (N > 3) {
+ TyInfo.reserve(N - 3);
+ for (unsigned j = 3; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ MMI->addCatchTypeInfo(MBB, TyInfo);
+ }
+}
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+ switch (Intrinsic) {
+ default:
+ // By default, turn this into a target intrinsic node.
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ case Intrinsic::vastart: visitVAStart(I); return 0;
+ case Intrinsic::vaend: visitVAEnd(I); return 0;
+ case Intrinsic::vacopy: visitVACopy(I); return 0;
+ case Intrinsic::returnaddress:
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, TLI.getPointerTy(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::frameaddress:
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, TLI.getPointerTy(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::setjmp:
+ return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+ break;
+ case Intrinsic::longjmp:
+ return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+ break;
+ case Intrinsic::memcpy_i32:
+ case Intrinsic::memcpy_i64:
+ visitMemIntrinsic(I, ISD::MEMCPY);
+ return 0;
+ case Intrinsic::memset_i32:
+ case Intrinsic::memset_i64:
+ visitMemIntrinsic(I, ISD::MEMSET);
+ return 0;
+ case Intrinsic::memmove_i32:
+ case Intrinsic::memmove_i64:
+ visitMemIntrinsic(I, ISD::MEMMOVE);
+ return 0;
+
+ case Intrinsic::dbg_stoppoint: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
+ if (MMI && SPI.getContext() && MMI->Verify(SPI.getContext())) {
+ SDOperand Ops[5];
+
+ Ops[0] = getRoot();
+ Ops[1] = getValue(SPI.getLineValue());
+ Ops[2] = getValue(SPI.getColumnValue());
+
+ DebugInfoDesc *DD = MMI->getDescFor(SPI.getContext());
+ assert(DD && "Not a debug information descriptor");
+ CompileUnitDesc *CompileUnit = cast<CompileUnitDesc>(DD);
+
+ Ops[3] = DAG.getString(CompileUnit->getFileName());
+ Ops[4] = DAG.getString(CompileUnit->getDirectory());
+
+ DAG.setRoot(DAG.getNode(ISD::LOCATION, MVT::Other, Ops, 5));
+ }
+
+ return 0;
+ }
+ case Intrinsic::dbg_region_start: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I);
+ if (MMI && RSI.getContext() && MMI->Verify(RSI.getContext())) {
+ unsigned LabelID = MMI->RecordRegionStart(RSI.getContext());
+ DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, getRoot(),
+ DAG.getConstant(LabelID, MVT::i32)));
+ }
+
+ return 0;
+ }
+ case Intrinsic::dbg_region_end: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I);
+ if (MMI && REI.getContext() && MMI->Verify(REI.getContext())) {
+ unsigned LabelID = MMI->RecordRegionEnd(REI.getContext());
+ DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other,
+ getRoot(), DAG.getConstant(LabelID, MVT::i32)));
+ }
+
+ return 0;
+ }
+ case Intrinsic::dbg_func_start: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);
+ if (MMI && FSI.getSubprogram() &&
+ MMI->Verify(FSI.getSubprogram())) {
+ unsigned LabelID = MMI->RecordRegionStart(FSI.getSubprogram());
+ DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other,
+ getRoot(), DAG.getConstant(LabelID, MVT::i32)));
+ }
+
+ return 0;
+ }
+ case Intrinsic::dbg_declare: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+ if (MMI && DI.getVariable() && MMI->Verify(DI.getVariable())) {
+ SDOperand AddressOp = getValue(DI.getAddress());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(AddressOp))
+ MMI->RecordVariable(DI.getVariable(), FI->getIndex());
+ }
+
+ return 0;
+ }
+
+ case Intrinsic::eh_exception: {
+ if (ExceptionHandling) {
+ if (!CurMBB->isLandingPad()) {
+ // FIXME: Mark exception register as live in. Hack for PR1508.
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ if (Reg) CurMBB->addLiveIn(Reg);
+ }
+ // Insert the EXCEPTIONADDR instruction.
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDOperand Ops[1];
+ Ops[0] = DAG.getRoot();
+ SDOperand Op = DAG.getNode(ISD::EXCEPTIONADDR, VTs, Ops, 1);
+ setValue(&I, Op);
+ DAG.setRoot(Op.getValue(1));
+ } else {
+ setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+ }
+ return 0;
+ }
+
+ case Intrinsic::eh_selector:{
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+ if (ExceptionHandling && MMI) {
+ if (CurMBB->isLandingPad())
+ addCatchInfo(I, MMI, CurMBB);
+ else {
+#ifndef NDEBUG
+ FuncInfo.CatchInfoLost.insert(&I);
+#endif
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) CurMBB->addLiveIn(Reg);
+ }
+
+ // Insert the EHSELECTION instruction.
+ SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+ SDOperand Ops[2];
+ Ops[0] = getValue(I.getOperand(1));
+ Ops[1] = getRoot();
+ SDOperand Op = DAG.getNode(ISD::EHSELECTION, VTs, Ops, 2);
+ setValue(&I, Op);
+ DAG.setRoot(Op.getValue(1));
+ } else {
+ setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+ }
+
+ return 0;
+ }
+
+ case Intrinsic::eh_typeid_for: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+ if (MMI) {
+ // Find the type id for the given typeinfo.
+ GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
+
+ unsigned TypeID = MMI->getTypeIDFor(GV);
+ setValue(&I, DAG.getConstant(TypeID, MVT::i32));
+ } else {
+ // Return something different to eh_selector.
+ setValue(&I, DAG.getConstant(1, MVT::i32));
+ }
+
+ return 0;
+ }
+
+ case Intrinsic::eh_return: {
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+ if (MMI && ExceptionHandling) {
+ MMI->setCallsEHReturn(true);
+ DAG.setRoot(DAG.getNode(ISD::EH_RETURN,
+ MVT::Other,
+ getRoot(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2))));
+ } else {
+ setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+ }
+
+ return 0;
+ }
+
+ case Intrinsic::eh_unwind_init: {
+ if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+ MMI->setCallsUnwindInit(true);
+ }
+
+ return 0;
+ }
+
+ case Intrinsic::eh_dwarf_cfa: {
+ if (ExceptionHandling) {
+ MVT::ValueType VT = getValue(I.getOperand(1)).getValueType();
+ SDOperand Offset = DAG.getNode(ISD::ADD,
+ TLI.getPointerTy(),
+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET,
+ VT),
+ getValue(I.getOperand(1)));
+ setValue(&I, DAG.getNode(ISD::ADD,
+ TLI.getPointerTy(),
+ DAG.getNode(ISD::FRAMEADDR,
+ TLI.getPointerTy(),
+ DAG.getConstant(0,
+ TLI.getPointerTy())),
+ Offset));
+ } else {
+ setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+ }
+
+ return 0;
+ }
+
+ case Intrinsic::sqrt_f32:
+ case Intrinsic::sqrt_f64:
+ setValue(&I, DAG.getNode(ISD::FSQRT,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::powi_f32:
+ case Intrinsic::powi_f64:
+ setValue(&I, DAG.getNode(ISD::FPOWI,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2))));
+ return 0;
+ case Intrinsic::pcmarker: {
+ SDOperand Tmp = getValue(I.getOperand(1));
+ DAG.setRoot(DAG.getNode(ISD::PCMARKER, MVT::Other, getRoot(), Tmp));
+ return 0;
+ }
+ case Intrinsic::readcyclecounter: {
+ SDOperand Op = getRoot();
+ SDOperand Tmp = DAG.getNode(ISD::READCYCLECOUNTER,
+ DAG.getNodeValueTypes(MVT::i64, MVT::Other), 2,
+ &Op, 1);
+ setValue(&I, Tmp);
+ DAG.setRoot(Tmp.getValue(1));
+ return 0;
+ }
+ case Intrinsic::part_select: {
+ // Currently not implemented: just abort
+ assert(0 && "part_select intrinsic not implemented");
+ abort();
+ }
+ case Intrinsic::part_set: {
+ // Currently not implemented: just abort
+ assert(0 && "part_set intrinsic not implemented");
+ abort();
+ }
+ case Intrinsic::bswap:
+ setValue(&I, DAG.getNode(ISD::BSWAP,
+ getValue(I.getOperand(1)).getValueType(),
+ getValue(I.getOperand(1))));
+ return 0;
+ case Intrinsic::cttz: {
+ SDOperand Arg = getValue(I.getOperand(1));
+ MVT::ValueType Ty = Arg.getValueType();
+ SDOperand result = DAG.getNode(ISD::CTTZ, Ty, Arg);
+ if (Ty < MVT::i32)
+ result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result);
+ else if (Ty > MVT::i32)
+ result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result);
+ setValue(&I, result);
+ return 0;
+ }
+ case Intrinsic::ctlz: {
+ SDOperand Arg = getValue(I.getOperand(1));
+ MVT::ValueType Ty = Arg.getValueType();
+ SDOperand result = DAG.getNode(ISD::CTLZ, Ty, Arg);
+ if (Ty < MVT::i32)
+ result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result);
+ else if (Ty > MVT::i32)
+ result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result);
+ setValue(&I, result);
+ return 0;
+ }
+ case Intrinsic::ctpop: {
+ SDOperand Arg = getValue(I.getOperand(1));
+ MVT::ValueType Ty = Arg.getValueType();
+ SDOperand result = DAG.getNode(ISD::CTPOP, Ty, Arg);
+ if (Ty < MVT::i32)
+ result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result);
+ else if (Ty > MVT::i32)
+ result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result);
+ setValue(&I, result);
+ return 0;
+ }
+ case Intrinsic::stacksave: {
+ SDOperand Op = getRoot();
+ SDOperand Tmp = DAG.getNode(ISD::STACKSAVE,
+ DAG.getNodeValueTypes(TLI.getPointerTy(), MVT::Other), 2, &Op, 1);
+ setValue(&I, Tmp);
+ DAG.setRoot(Tmp.getValue(1));
+ return 0;
+ }
+ case Intrinsic::stackrestore: {
+ SDOperand Tmp = getValue(I.getOperand(1));
+ DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, MVT::Other, getRoot(), Tmp));
+ return 0;
+ }
+ case Intrinsic::prefetch:
+ // FIXME: Currently discarding prefetches.
+ return 0;
+
+ case Intrinsic::var_annotation:
+ // Discard annotate attributes
+ return 0;
+ }
+}
+
+
+void SelectionDAGLowering::LowerCallTo(Instruction &I,
+ const Type *CalledValueTy,
+ unsigned CallingConv,
+ bool IsTailCall,
+ SDOperand Callee, unsigned OpIdx,
+ MachineBasicBlock *LandingPad) {
+ const PointerType *PT = cast<PointerType>(CalledValueTy);
+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ const ParamAttrsList *Attrs = FTy->getParamAttrs();
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+ unsigned BeginLabel = 0, EndLabel = 0;
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Args.reserve(I.getNumOperands());
+ for (unsigned i = OpIdx, e = I.getNumOperands(); i != e; ++i) {
+ Value *Arg = I.getOperand(i);
+ SDOperand ArgNode = getValue(Arg);
+ Entry.Node = ArgNode; Entry.Ty = Arg->getType();
+
+ unsigned attrInd = i - OpIdx + 1;
+ Entry.isSExt = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::SExt);
+ Entry.isZExt = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::ZExt);
+ Entry.isInReg = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::InReg);
+ Entry.isSRet = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::StructRet);
+ Args.push_back(Entry);
+ }
+
+ if (ExceptionHandling && MMI) {
+ // Insert a label before the invoke call to mark the try range. This can be
+ // used to detect deletion of the invoke via the MachineModuleInfo.
+ BeginLabel = MMI->NextLabelID();
+ DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, getRoot(),
+ DAG.getConstant(BeginLabel, MVT::i32)));
+ }
+
+ std::pair<SDOperand,SDOperand> Result =
+ TLI.LowerCallTo(getRoot(), I.getType(),
+ Attrs && Attrs->paramHasAttr(0, ParamAttr::SExt),
+ FTy->isVarArg(), CallingConv, IsTailCall,
+ Callee, Args, DAG);
+ if (I.getType() != Type::VoidTy)
+ setValue(&I, Result.first);
+ DAG.setRoot(Result.second);
+
+ if (ExceptionHandling && MMI) {
+ // Insert a label at the end of the invoke call to mark the try range. This
+ // can be used to detect deletion of the invoke via the MachineModuleInfo.
+ EndLabel = MMI->NextLabelID();
+ DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, getRoot(),
+ DAG.getConstant(EndLabel, MVT::i32)));
+
+ // Inform MachineModuleInfo of range.
+ MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
+ }
+}
+
+
+void SelectionDAGLowering::visitCall(CallInst &I) {
+ const char *RenameFn = 0;
+ if (Function *F = I.getCalledFunction()) {
+ if (F->isDeclaration())
+ if (unsigned IID = F->getIntrinsicID()) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ } else { // Not an LLVM intrinsic.
+ const std::string &Name = F->getName();
+ if (Name[0] == 'c' && (Name == "copysign" || Name == "copysignf")) {
+ if (I.getNumOperands() == 3 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType() &&
+ I.getType() == I.getOperand(2)->getType()) {
+ SDOperand LHS = getValue(I.getOperand(1));
+ SDOperand RHS = getValue(I.getOperand(2));
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, LHS.getValueType(),
+ LHS, RHS));
+ return;
+ }
+ } else if (Name[0] == 'f' && (Name == "fabs" || Name == "fabsf")) {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType()) {
+ SDOperand Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FABS, Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (Name[0] == 's' && (Name == "sin" || Name == "sinf")) {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType()) {
+ SDOperand Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FSIN, Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if (Name[0] == 'c' && (Name == "cos" || Name == "cosf")) {
+ if (I.getNumOperands() == 2 && // Basic sanity checks.
+ I.getOperand(1)->getType()->isFloatingPoint() &&
+ I.getType() == I.getOperand(1)->getType()) {
+ SDOperand Tmp = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FCOS, Tmp.getValueType(), Tmp));
+ return;
+ }
+ }
+ }
+ } else if (isa<InlineAsm>(I.getOperand(0))) {
+ visitInlineAsm(I);
+ return;
+ }
+
+ SDOperand Callee;
+ if (!RenameFn)
+ Callee = getValue(I.getOperand(0));
+ else
+ Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+ LowerCallTo(I, I.getCalledValue()->getType(),
+ I.getCallingConv(),
+ I.isTailCall(),
+ Callee,
+ 1);
+}
+
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDOperand RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+ SDOperand &Chain, SDOperand *Flag)const{
+ // Copy the legal parts from the registers.
+ unsigned NumParts = Regs.size();
+ SmallVector<SDOperand, 8> Parts(NumParts);
+ for (unsigned i = 0; i != NumParts; ++i) {
+ SDOperand Part = Flag ?
+ DAG.getCopyFromReg(Chain, Regs[i], RegVT, *Flag) :
+ DAG.getCopyFromReg(Chain, Regs[i], RegVT);
+ Chain = Part.getValue(1);
+ if (Flag)
+ *Flag = Part.getValue(2);
+ Parts[i] = Part;
+ }
+
+ // Assemble the legal parts into the final value.
+ return getCopyFromParts(DAG, &Parts[0], NumParts, RegVT, ValueVT);
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDOperand Val, SelectionDAG &DAG,
+ SDOperand &Chain, SDOperand *Flag) const {
+ // Get the list of the values's legal parts.
+ unsigned NumParts = Regs.size();
+ SmallVector<SDOperand, 8> Parts(NumParts);
+ getCopyToParts(DAG, Val, &Parts[0], NumParts, RegVT);
+
+ // Copy the parts into the registers.
+ for (unsigned i = 0; i != NumParts; ++i) {
+ SDOperand Part = Flag ?
+ DAG.getCopyToReg(Chain, Regs[i], Parts[i], *Flag) :
+ DAG.getCopyToReg(Chain, Regs[i], Parts[i]);
+ Chain = Part.getValue(0);
+ if (Flag)
+ *Flag = Part.getValue(1);
+ }
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list. This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code, SelectionDAG &DAG,
+ std::vector<SDOperand> &Ops) const {
+ MVT::ValueType IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
+ Ops.push_back(DAG.getTargetConstant(Code | (Regs.size() << 3), IntPtrTy));
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(Regs[i], RegVT));
+}
+
+/// isAllocatableRegister - If the specified register is safe to allocate,
+/// i.e. it isn't a stack pointer or some other special register, return the
+/// register class for the register. Otherwise, return null.
+static const TargetRegisterClass *
+isAllocatableRegister(unsigned Reg, MachineFunction &MF,
+ const TargetLowering &TLI, const MRegisterInfo *MRI) {
+ MVT::ValueType FoundVT = MVT::Other;
+ const TargetRegisterClass *FoundRC = 0;
+ for (MRegisterInfo::regclass_iterator RCI = MRI->regclass_begin(),
+ E = MRI->regclass_end(); RCI != E; ++RCI) {
+ MVT::ValueType ThisVT = MVT::Other;
+
+ const TargetRegisterClass *RC = *RCI;
+ // If none of the the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (TLI.isTypeLegal(*I)) {
+ // If we have already found this register in a different register class,
+ // choose the one with the largest VT specified. For example, on
+ // PowerPC, we favor f64 register classes over f32.
+ if (FoundVT == MVT::Other ||
+ MVT::getSizeInBits(FoundVT) < MVT::getSizeInBits(*I)) {
+ ThisVT = *I;
+ break;
+ }
+ }
+ }
+
+ if (ThisVT == MVT::Other) continue;
+
+ // NOTE: This isn't ideal. In particular, this might allocate the
+ // frame pointer in functions that need it (due to them not being taken
+ // out of allocation, because a variable sized allocation hasn't been seen
+ // yet). This is a slight code pessimization, but should still work.
+ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+ E = RC->allocation_order_end(MF); I != E; ++I)
+ if (*I == Reg) {
+ // We found a matching register class. Keep looking at others in case
+ // we find one with larger registers that this physreg is also in.
+ FoundRC = RC;
+ FoundVT = ThisVT;
+ break;
+ }
+ }
+ return FoundRC;
+}
+
+
+namespace {
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
+ /// ConstraintCode - This contains the actual string for the code, like "m".
+ std::string ConstraintCode;
+
+ /// ConstraintType - Information about the constraint code, e.g. Register,
+ /// RegisterClass, Memory, Other, Unknown.
+ TargetLowering::ConstraintType ConstraintType;
+
+ /// CallOperand/CallOperandval - If this is the result output operand or a
+ /// clobber, this is null, otherwise it is the incoming operand to the
+ /// CallInst. This gets modified as the asm is processed.
+ SDOperand CallOperand;
+ Value *CallOperandVal;
+
+ /// ConstraintVT - The ValueType for the operand value.
+ MVT::ValueType ConstraintVT;
+
+ /// AssignedRegs - If this is a register or register class operand, this
+ /// contains the set of register corresponding to the operand.
+ RegsForValue AssignedRegs;
+
+ AsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+ : InlineAsm::ConstraintInfo(info),
+ ConstraintType(TargetLowering::C_Unknown),
+ CallOperand(0,0), CallOperandVal(0), ConstraintVT(MVT::Other) {
+ }
+
+ void ComputeConstraintToUse(const TargetLowering &TLI);
+
+ /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
+ /// busy in OutputRegs/InputRegs.
+ void MarkAllocatedRegs(bool isOutReg, bool isInReg,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs) const {
+ if (isOutReg)
+ OutputRegs.insert(AssignedRegs.Regs.begin(), AssignedRegs.Regs.end());
+ if (isInReg)
+ InputRegs.insert(AssignedRegs.Regs.begin(), AssignedRegs.Regs.end());
+ }
+};
+} // end anon namespace.
+
+/// getConstraintGenerality - Return an integer indicating how general CT is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+ switch (CT) {
+ default: assert(0 && "Unknown constraint type!");
+ case TargetLowering::C_Other:
+ case TargetLowering::C_Unknown:
+ return 0;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Memory:
+ return 3;
+ }
+}
+
+void AsmOperandInfo::ComputeConstraintToUse(const TargetLowering &TLI) {
+ assert(!Codes.empty() && "Must have at least one constraint");
+
+ std::string *Current = &Codes[0];
+ TargetLowering::ConstraintType CurType = TLI.getConstraintType(*Current);
+ if (Codes.size() == 1) { // Single-letter constraints ('r') are very common.
+ ConstraintCode = *Current;
+ ConstraintType = CurType;
+ return;
+ }
+
+ unsigned CurGenerality = getConstraintGenerality(CurType);
+
+ // If we have multiple constraints, try to pick the most general one ahead
+ // of time. This isn't a wonderful solution, but handles common cases.
+ for (unsigned j = 1, e = Codes.size(); j != e; ++j) {
+ TargetLowering::ConstraintType ThisType = TLI.getConstraintType(Codes[j]);
+ unsigned ThisGenerality = getConstraintGenerality(ThisType);
+ if (ThisGenerality > CurGenerality) {
+ // This constraint letter is more general than the previous one,
+ // use it.
+ CurType = ThisType;
+ Current = &Codes[j];
+ CurGenerality = ThisGenerality;
+ }
+ }
+
+ ConstraintCode = *Current;
+ ConstraintType = CurType;
+}
+
+
+void SelectionDAGLowering::
+GetRegistersForValue(AsmOperandInfo &OpInfo, bool HasEarlyClobber,
+ std::set<unsigned> &OutputRegs,
+ std::set<unsigned> &InputRegs) {
+ // Compute whether this value requires an input register, an output register,
+ // or both.
+ bool isOutReg = false;
+ bool isInReg = false;
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ isOutReg = true;
+
+ // If this is an early-clobber output, or if there is an input
+ // constraint that matches this, we need to reserve the input register
+ // so no other inputs allocate to it.
+ isInReg = OpInfo.isEarlyClobber || OpInfo.hasMatchingInput;
+ break;
+ case InlineAsm::isInput:
+ isInReg = true;
+ isOutReg = false;
+ break;
+ case InlineAsm::isClobber:
+ isOutReg = true;
+ isInReg = true;
+ break;
+ }
+
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ std::vector<unsigned> Regs;
+
+ // If this is a constraint for a single physreg, or a constraint for a
+ // register class, find it.
+ std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other)
+ NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT);
+ MVT::ValueType RegVT;
+ MVT::ValueType ValueVT = OpInfo.ConstraintVT;
+
+
+ // If this is a constraint for a specific physical register, like {r17},
+ // assign it now.
+ if (PhysReg.first) {
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = *PhysReg.second->vt_begin();
+
+ // Get the actual register value type. This is important, because the user
+ // may have asked for (e.g.) the AX register in i32 type. We need to
+ // remember that AX is actually i16 to get the right extension.
+ RegVT = *PhysReg.second->vt_begin();
+
+ // This is a explicit reference to a physical register.
+ Regs.push_back(PhysReg.first);
+
+ // If this is an expanded reference, add the rest of the regs to Regs.
+ if (NumRegs != 1) {
+ TargetRegisterClass::iterator I = PhysReg.second->begin();
+ TargetRegisterClass::iterator E = PhysReg.second->end();
+ for (; *I != PhysReg.first; ++I)
+ assert(I != E && "Didn't find reg!");
+
+ // Already added the first reg.
+ --NumRegs; ++I;
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != E && "Ran out of registers to allocate!");
+ Regs.push_back(*I);
+ }
+ }
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs);
+ return;
+ }
+
+ // Otherwise, if this was a reference to an LLVM register class, create vregs
+ // for this reference.
+ std::vector<unsigned> RegClassRegs;
+ const TargetRegisterClass *RC = PhysReg.second;
+ if (RC) {
+ // If this is an early clobber or tied register, our regalloc doesn't know
+ // how to maintain the constraint. If it isn't, go ahead and create vreg
+ // and let the regalloc do the right thing.
+ if (!OpInfo.hasMatchingInput && !OpInfo.isEarlyClobber &&
+ // If there is some other early clobber and this is an input register,
+ // then we are forced to pre-allocate the input reg so it doesn't
+ // conflict with the earlyclobber.
+ !(OpInfo.Type == InlineAsm::isInput && HasEarlyClobber)) {
+ RegVT = *PhysReg.second->vt_begin();
+
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = RegVT;
+
+ // Create the appropriate number of virtual registers.
+ SSARegMap *RegMap = MF.getSSARegMap();
+ for (; NumRegs; --NumRegs)
+ Regs.push_back(RegMap->createVirtualRegister(PhysReg.second));
+
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs);
+ return;
+ }
+
+ // Otherwise, we can't allocate it. Let the code below figure out how to
+ // maintain these constraints.
+ RegClassRegs.assign(PhysReg.second->begin(), PhysReg.second->end());
+
+ } else {
+ // This is a reference to a register class that doesn't directly correspond
+ // to an LLVM register class. Allocate NumRegs consecutive, available,
+ // registers from the class.
+ RegClassRegs = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ }
+
+ const MRegisterInfo *MRI = DAG.getTarget().getRegisterInfo();
+ unsigned NumAllocated = 0;
+ for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
+ unsigned Reg = RegClassRegs[i];
+ // See if this register is available.
+ if ((isOutReg && OutputRegs.count(Reg)) || // Already used.
+ (isInReg && InputRegs.count(Reg))) { // Already used.
+ // Make sure we find consecutive registers.
+ NumAllocated = 0;
+ continue;
+ }
+
+ // Check to see if this register is allocatable (i.e. don't give out the
+ // stack pointer).
+ if (RC == 0) {
+ RC = isAllocatableRegister(Reg, MF, TLI, MRI);
+ if (!RC) { // Couldn't allocate this register.
+ // Reset NumAllocated to make sure we return consecutive registers.
+ NumAllocated = 0;
+ continue;
+ }
+ }
+
+ // Okay, this register is good, we can use it.
+ ++NumAllocated;
+
+ // If we allocated enough consecutive registers, succeed.
+ if (NumAllocated == NumRegs) {
+ unsigned RegStart = (i-NumAllocated)+1;
+ unsigned RegEnd = i+1;
+ // Mark all of the allocated registers used.
+ for (unsigned i = RegStart; i != RegEnd; ++i)
+ Regs.push_back(RegClassRegs[i]);
+
+ OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(),
+ OpInfo.ConstraintVT);
+ OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs);
+ return;
+ }
+ }
+
+ // Otherwise, we couldn't allocate enough registers for this.
+ return;
+}
+
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGLowering::visitInlineAsm(CallInst &I) {
+ InlineAsm *IA = cast<InlineAsm>(I.getOperand(0));
+
+ /// ConstraintOperands - Information about all of the constraints.
+ std::vector<AsmOperandInfo> ConstraintOperands;
+
+ SDOperand Chain = getRoot();
+ SDOperand Flag;
+
+ std::set<unsigned> OutputRegs, InputRegs;
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ std::vector<InlineAsm::ConstraintInfo>
+ ConstraintInfos = IA->ParseConstraints();
+
+ // SawEarlyClobber - Keep track of whether we saw an earlyclobber output
+ // constraint. If so, we can't let the register allocator allocate any input
+ // registers, because it will not know to avoid the earlyclobbered output reg.
+ bool SawEarlyClobber = false;
+
+ unsigned OpNo = 1; // OpNo - The operand of the CallInst.
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+ AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ MVT::ValueType OpVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ if (!OpInfo.isIndirect) {
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(I.getType() != Type::VoidTy && "Bad inline asm!");
+ OpVT = TLI.getValueType(I.getType());
+ } else {
+ OpInfo.CallOperandVal = I.getOperand(OpNo++);
+ }
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = I.getOperand(OpNo++);
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // If this is an input or an indirect output, process the call argument.
+ if (OpInfo.CallOperandVal) {
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+ const Type *OpTy = OpInfo.CallOperandVal->getType();
+ // If this is an indirect operand, the operand is a pointer to the
+ // accessed type.
+ if (OpInfo.isIndirect)
+ OpTy = cast<PointerType>(OpTy)->getElementType();
+
+ // If OpTy is not a first-class value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isFirstClassType() && OpTy->isSized()) {
+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ OpTy = IntegerType::get(BitSize);
+ break;
+ }
+ }
+
+ OpVT = TLI.getValueType(OpTy, true);
+ }
+
+ OpInfo.ConstraintVT = OpVT;
+
+ // Compute the constraint code and ConstraintType to use.
+ OpInfo.ComputeConstraintToUse(TLI);
+
+ // Keep track of whether we see an earlyclobber.
+ SawEarlyClobber |= OpInfo.isEarlyClobber;
+
+ // If this is a memory input, and if the operand is not indirect, do what we
+ // need to to provide an address for the memory input.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ !OpInfo.isIndirect) {
+ assert(OpInfo.Type == InlineAsm::isInput &&
+ "Can only indirectify direct input operands!");
+
+ // Memory operands really want the address of the value. If we don't have
+ // an indirect input, put it in the constpool if we can, otherwise spill
+ // it to a stack slot.
+
+ // If the operand is a float, integer, or vector constant, spill to a
+ // constant pool entry to get its address.
+ Value *OpVal = OpInfo.CallOperandVal;
+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+ isa<ConstantVector>(OpVal)) {
+ OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+ TLI.getPointerTy());
+ } else {
+ // Otherwise, create a stack slot and emit a store to it before the
+ // asm.
+ const Type *Ty = OpVal->getType();
+ uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Chain = DAG.getStore(Chain, OpInfo.CallOperand, StackSlot, NULL, 0);
+ OpInfo.CallOperand = StackSlot;
+ }
+
+ // There is no longer a Value* corresponding to this operand.
+ OpInfo.CallOperandVal = 0;
+ // It is now an indirect operand.
+ OpInfo.isIndirect = true;
+ }
+
+ // If this constraint is for a specific register, allocate it before
+ // anything else.
+ if (OpInfo.ConstraintType == TargetLowering::C_Register)
+ GetRegistersForValue(OpInfo, SawEarlyClobber, OutputRegs, InputRegs);
+ }
+ ConstraintInfos.clear();
+
+
+ // Second pass - Loop over all of the operands, assigning virtual or physregs
+ // to registerclass operands.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ AsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // C_Register operands have already been allocated, Other/Memory don't need
+ // to be.
+ if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+ GetRegistersForValue(OpInfo, SawEarlyClobber, OutputRegs, InputRegs);
+ }
+
+ // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+ std::vector<SDOperand> AsmNodeOperands;
+ AsmNodeOperands.push_back(SDOperand()); // reserve space for input chain
+ AsmNodeOperands.push_back(
+ DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));
+
+
+ // Loop over all of the inputs, copying the operand values into the
+ // appropriate registers and processing the output regs.
+ RegsForValue RetValRegs;
+
+ // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+ std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ AsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput: {
+ if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+ OpInfo.ConstraintType != TargetLowering::C_Register) {
+ // Memory output, or 'other' output (e.g. 'X' constraint).
+ assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+ // Add information to the INLINEASM node to know about this output.
+ unsigned ResOpType = 4/*MEM*/ | (1 << 3);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(OpInfo.CallOperand);
+ break;
+ }
+
+ // Otherwise, this is a register or register class output.
+
+ // Copy the output from the appropriate register. Find a register that
+ // we can use.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ cerr << "Couldn't allocate output reg for contraint '"
+ << OpInfo.ConstraintCode << "'!\n";
+ exit(1);
+ }
+
+ if (!OpInfo.isIndirect) {
+ // This is the result value of the call.
+ assert(RetValRegs.Regs.empty() &&
+ "Cannot have multiple output constraints yet!");
+ assert(I.getType() != Type::VoidTy && "Bad inline asm!");
+ RetValRegs = OpInfo.AssignedRegs;
+ } else {
+ IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+ OpInfo.CallOperandVal));
+ }
+
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs.AddInlineAsmOperands(2 /*REGDEF*/, DAG,
+ AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isInput: {
+ SDOperand InOperandVal = OpInfo.CallOperand;
+
+ if (isdigit(OpInfo.ConstraintCode[0])) { // Matching constraint?
+ // If this is required to match an output register we have already set,
+ // just use its register.
+ unsigned OperandNo = atoi(OpInfo.ConstraintCode.c_str());
+
+ // Scan until we find the definition we already emitted of this operand.
+ // When we find it, create a RegsForValue operand.
+ unsigned CurOp = 2; // The first operand.
+ for (; OperandNo; --OperandNo) {
+ // Advance to the next operand.
+ unsigned NumOps =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getValue();
+ assert(((NumOps & 7) == 2 /*REGDEF*/ ||
+ (NumOps & 7) == 4 /*MEM*/) &&
+ "Skipped past definitions?");
+ CurOp += (NumOps>>3)+1;
+ }
+
+ unsigned NumOps =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getValue();
+ if ((NumOps & 7) == 2 /*REGDEF*/) {
+ // Add NumOps>>3 registers to MatchedRegs.
+ RegsForValue MatchedRegs;
+ MatchedRegs.ValueVT = InOperandVal.getValueType();
+ MatchedRegs.RegVT = AsmNodeOperands[CurOp+1].getValueType();
+ for (unsigned i = 0, e = NumOps>>3; i != e; ++i) {
+ unsigned Reg =
+ cast<RegisterSDNode>(AsmNodeOperands[++CurOp])->getReg();
+ MatchedRegs.Regs.push_back(Reg);
+ }
+
+ // Use the produced MatchedRegs object to
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, Chain, &Flag);
+ MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, DAG, AsmNodeOperands);
+ break;
+ } else {
+ assert((NumOps & 7) == 4/*MEM*/ && "Unknown matching constraint!");
+ assert(0 && "matching constraints for memory operands unimp");
+ }
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ assert(!OpInfo.isIndirect &&
+ "Don't know how to handle indirect other inputs yet!");
+
+ InOperandVal = TLI.isOperandValidForConstraint(InOperandVal,
+ OpInfo.ConstraintCode[0],
+ DAG);
+ if (!InOperandVal.Val) {
+ cerr << "Invalid operand for inline asm constraint '"
+ << OpInfo.ConstraintCode << "'!\n";
+ exit(1);
+ }
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = 3 /*IMM*/ | (1 << 3);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(InOperandVal);
+ break;
+ } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+ assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+ "Memory operands expect pointer values");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = 4/*MEM*/ | (1 << 3);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ TLI.getPointerTy()));
+ AsmNodeOperands.push_back(InOperandVal);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ "Unknown constraint type!");
+ assert(!OpInfo.isIndirect &&
+ "Don't know how to handle indirect register inputs yet!");
+
+ // Copy the input into the appropriate registers.
+ assert(!OpInfo.AssignedRegs.Regs.empty() &&
+ "Couldn't allocate input reg!");
+
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, Chain, &Flag);
+
+ OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, DAG,
+ AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isClobber: {
+ // Add the clobbered value to the operand list, so that the register
+ // allocator is aware that the physreg got clobbered.
+ if (!OpInfo.AssignedRegs.Regs.empty())
+ OpInfo.AssignedRegs.AddInlineAsmOperands(2/*REGDEF*/, DAG,
+ AsmNodeOperands);
+ break;
+ }
+ }
+ }
+
+ // Finish up input operands.
+ AsmNodeOperands[0] = Chain;
+ if (Flag.Val) AsmNodeOperands.push_back(Flag);
+
+ Chain = DAG.getNode(ISD::INLINEASM,
+ DAG.getNodeValueTypes(MVT::Other, MVT::Flag), 2,
+ &AsmNodeOperands[0], AsmNodeOperands.size());
+ Flag = Chain.getValue(1);
+
+ // If this asm returns a register value, copy the result from that register
+ // and set it as the value of the call.
+ if (!RetValRegs.Regs.empty()) {
+ SDOperand Val = RetValRegs.getCopyFromRegs(DAG, Chain, &Flag);
+
+ // If the result of the inline asm is a vector, it may have the wrong
+ // width/num elts. Make sure to convert it to the right type with
+ // bit_convert.
+ if (MVT::isVector(Val.getValueType())) {
+ const VectorType *VTy = cast<VectorType>(I.getType());
+ MVT::ValueType DesiredVT = TLI.getValueType(VTy);
+
+ Val = DAG.getNode(ISD::BIT_CONVERT, DesiredVT, Val);
+ }
+
+ setValue(&I, Val);
+ }
+
+ std::vector<std::pair<SDOperand, Value*> > StoresToEmit;
+
+ // Process indirect outputs, first output all of the flagged copies out of
+ // physregs.
+ for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+ RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+ Value *Ptr = IndirectStoresToEmit[i].second;
+ SDOperand OutVal = OutRegs.getCopyFromRegs(DAG, Chain, &Flag);
+ StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+ }
+
+ // Emit the non-flagged stores from the physregs.
+ SmallVector<SDOperand, 8> OutChains;
+ for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i)
+ OutChains.push_back(DAG.getStore(Chain, StoresToEmit[i].first,
+ getValue(StoresToEmit[i].second),
+ StoresToEmit[i].second, 0));
+ if (!OutChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &OutChains[0], OutChains.size());
+ DAG.setRoot(Chain);
+}
+
+
+void SelectionDAGLowering::visitMalloc(MallocInst &I) {
+ SDOperand Src = getValue(I.getOperand(0));
+
+ MVT::ValueType IntPtr = TLI.getPointerTy();
+
+ if (IntPtr < Src.getValueType())
+ Src = DAG.getNode(ISD::TRUNCATE, IntPtr, Src);
+ else if (IntPtr > Src.getValueType())
+ Src = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, Src);
+
+ // Scale the source by the type size.
+ uint64_t ElementSize = TD->getTypeSize(I.getType()->getElementType());
+ Src = DAG.getNode(ISD::MUL, Src.getValueType(),
+ Src, getIntPtrConstant(ElementSize));
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Src;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Args.push_back(Entry);
+
+ std::pair<SDOperand,SDOperand> Result =
+ TLI.LowerCallTo(getRoot(), I.getType(), false, false, CallingConv::C, true,
+ DAG.getExternalSymbol("malloc", IntPtr),
+ Args, DAG);
+ setValue(&I, Result.first); // Pointers always fit in registers
+ DAG.setRoot(Result.second);
+}
+
+void SelectionDAGLowering::visitFree(FreeInst &I) {
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = getValue(I.getOperand(0));
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Args.push_back(Entry);
+ MVT::ValueType IntPtr = TLI.getPointerTy();
+ std::pair<SDOperand,SDOperand> Result =
+ TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, CallingConv::C, true,
+ DAG.getExternalSymbol("free", IntPtr), Args, DAG);
+ DAG.setRoot(Result.second);
+}
+
+// InsertAtEndOfBasicBlock - This method should be implemented by targets that
+// mark instructions with the 'usesCustomDAGSchedInserter' flag. These
+// instructions are special in various ways, which require special support to
+// insert. The specified MachineInstr is created but not inserted into any
+// basic blocks, and the scheduler passes ownership of it to this method.
+MachineBasicBlock *TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ cerr << "If a target marks an instruction with "
+ << "'usesCustomDAGSchedInserter', it must implement "
+ << "TargetLowering::InsertAtEndOfBasicBlock!\n";
+ abort();
+ return 0;
+}
+
+void SelectionDAGLowering::visitVAStart(CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VASTART, MVT::Other, getRoot(),
+ getValue(I.getOperand(1)),
+ DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGLowering::visitVAArg(VAArgInst &I) {
+ SDOperand V = DAG.getVAArg(TLI.getValueType(I.getType()), getRoot(),
+ getValue(I.getOperand(0)),
+ DAG.getSrcValue(I.getOperand(0)));
+ setValue(&I, V);
+ DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGLowering::visitVAEnd(CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VAEND, MVT::Other, getRoot(),
+ getValue(I.getOperand(1)),
+ DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGLowering::visitVACopy(CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VACOPY, MVT::Other, getRoot(),
+ getValue(I.getOperand(1)),
+ getValue(I.getOperand(2)),
+ DAG.getSrcValue(I.getOperand(1)),
+ DAG.getSrcValue(I.getOperand(2))));
+}
+
+/// TargetLowering::LowerArguments - This is the default LowerArguments
+/// implementation, which just inserts a FORMAL_ARGUMENTS node. FIXME: When all
+/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be
+/// integrated into SDISel.
+std::vector<SDOperand>
+TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
+ const FunctionType *FTy = F.getFunctionType();
+ const ParamAttrsList *Attrs = FTy->getParamAttrs();
+ // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node.
+ std::vector<SDOperand> Ops;
+ Ops.push_back(DAG.getRoot());
+ Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy()));
+ Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy()));
+
+ // Add one result value for each formal argument.
+ std::vector<MVT::ValueType> RetVals;
+ unsigned j = 1;
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I, ++j) {
+ MVT::ValueType VT = getValueType(I->getType());
+ unsigned Flags = ISD::ParamFlags::NoFlagSet;
+ unsigned OriginalAlignment =
+ getTargetData()->getABITypeAlignment(I->getType());
+
+ // FIXME: Distinguish between a formal with no [sz]ext attribute from one
+ // that is zero extended!
+ if (Attrs && Attrs->paramHasAttr(j, ParamAttr::ZExt))
+ Flags &= ~(ISD::ParamFlags::SExt);
+ if (Attrs && Attrs->paramHasAttr(j, ParamAttr::SExt))
+ Flags |= ISD::ParamFlags::SExt;
+ if (Attrs && Attrs->paramHasAttr(j, ParamAttr::InReg))
+ Flags |= ISD::ParamFlags::InReg;
+ if (Attrs && Attrs->paramHasAttr(j, ParamAttr::StructRet))
+ Flags |= ISD::ParamFlags::StructReturn;
+ if (Attrs && Attrs->paramHasAttr(j, ParamAttr::ByVal))
+ Flags |= ISD::ParamFlags::ByVal;
+ Flags |= (OriginalAlignment << ISD::ParamFlags::OrigAlignmentOffs);
+
+ switch (getTypeAction(VT)) {
+ default: assert(0 && "Unknown type action!");
+ case Legal:
+ RetVals.push_back(VT);
+ Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+ break;
+ case Promote:
+ RetVals.push_back(getTypeToTransformTo(VT));
+ Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+ break;
+ case Expand: {
+ // If this is an illegal type, it needs to be broken up to fit into
+ // registers.
+ MVT::ValueType RegisterVT = getRegisterType(VT);
+ unsigned NumRegs = getNumRegisters(VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ RetVals.push_back(RegisterVT);
+ // if it isn't first piece, alignment must be 1
+ if (i > 0)
+ Flags = (Flags & (~ISD::ParamFlags::OrigAlignment)) |
+ (1 << ISD::ParamFlags::OrigAlignmentOffs);
+ Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+ }
+ break;
+ }
+ }
+ }
+
+ RetVals.push_back(MVT::Other);
+
+ // Create the node.
+ SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS,
+ DAG.getNodeValueTypes(RetVals), RetVals.size(),
+ &Ops[0], Ops.size()).Val;
+ unsigned NumArgRegs = Result->getNumValues() - 1;
+ DAG.setRoot(SDOperand(Result, NumArgRegs));
+
+ // Set up the return result vector.
+ Ops.clear();
+ unsigned i = 0;
+ unsigned Idx = 1;
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+ ++I, ++Idx) {
+ MVT::ValueType VT = getValueType(I->getType());
+
+ switch (getTypeAction(VT)) {
+ default: assert(0 && "Unknown type action!");
+ case Legal:
+ Ops.push_back(SDOperand(Result, i++));
+ break;
+ case Promote: {
+ SDOperand Op(Result, i++);
+ if (MVT::isInteger(VT)) {
+ if (Attrs && Attrs->paramHasAttr(Idx, ParamAttr::SExt))
+ Op = DAG.getNode(ISD::AssertSext, Op.getValueType(), Op,
+ DAG.getValueType(VT));
+ else if (Attrs && Attrs->paramHasAttr(Idx, ParamAttr::ZExt))
+ Op = DAG.getNode(ISD::AssertZext, Op.getValueType(), Op,
+ DAG.getValueType(VT));
+ Op = DAG.getNode(ISD::TRUNCATE, VT, Op);
+ } else {
+ assert(MVT::isFloatingPoint(VT) && "Not int or FP?");
+ Op = DAG.getNode(ISD::FP_ROUND, VT, Op);
+ }
+ Ops.push_back(Op);
+ break;
+ }
+ case Expand: {
+ MVT::ValueType PartVT = getRegisterType(VT);
+ unsigned NumParts = getNumRegisters(VT);
+ SmallVector<SDOperand, 4> Parts(NumParts);
+ for (unsigned j = 0; j != NumParts; ++j)
+ Parts[j] = SDOperand(Result, i++);
+ Ops.push_back(getCopyFromParts(DAG, &Parts[0], NumParts, PartVT, VT));
+ break;
+ }
+ }
+ }
+ assert(i == NumArgRegs && "Argument register count mismatch!");
+ return Ops;
+}
+
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just inserts an ISD::CALL node, which is later custom
+/// lowered by the target to something concrete. FIXME: When all targets are
+/// migrated to using ISD::CALL, this hook should be integrated into SDISel.
+std::pair<SDOperand, SDOperand>
+TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy,
+ bool RetTyIsSigned, bool isVarArg,
+ unsigned CallingConv, bool isTailCall,
+ SDOperand Callee,
+ ArgListTy &Args, SelectionDAG &DAG) {
+ SmallVector<SDOperand, 32> Ops;
+ Ops.push_back(Chain); // Op#0 - Chain
+ Ops.push_back(DAG.getConstant(CallingConv, getPointerTy())); // Op#1 - CC
+ Ops.push_back(DAG.getConstant(isVarArg, getPointerTy())); // Op#2 - VarArg
+ Ops.push_back(DAG.getConstant(isTailCall, getPointerTy())); // Op#3 - Tail
+ Ops.push_back(Callee);
+
+ // Handle all of the outgoing arguments.
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ MVT::ValueType VT = getValueType(Args[i].Ty);
+ SDOperand Op = Args[i].Node;
+ unsigned Flags = ISD::ParamFlags::NoFlagSet;
+ unsigned OriginalAlignment =
+ getTargetData()->getABITypeAlignment(Args[i].Ty);
+
+ if (Args[i].isSExt)
+ Flags |= ISD::ParamFlags::SExt;
+ if (Args[i].isZExt)
+ Flags |= ISD::ParamFlags::ZExt;
+ if (Args[i].isInReg)
+ Flags |= ISD::ParamFlags::InReg;
+ if (Args[i].isSRet)
+ Flags |= ISD::ParamFlags::StructReturn;
+ Flags |= OriginalAlignment << ISD::ParamFlags::OrigAlignmentOffs;
+
+ switch (getTypeAction(VT)) {
+ default: assert(0 && "Unknown type action!");
+ case Legal:
+ Ops.push_back(Op);
+ Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+ break;
+ case Promote:
+ if (MVT::isInteger(VT)) {
+ unsigned ExtOp;
+ if (Args[i].isSExt)
+ ExtOp = ISD::SIGN_EXTEND;
+ else if (Args[i].isZExt)
+ ExtOp = ISD::ZERO_EXTEND;
+ else
+ ExtOp = ISD::ANY_EXTEND;
+ Op = DAG.getNode(ExtOp, getTypeToTransformTo(VT), Op);
+ } else {
+ assert(MVT::isFloatingPoint(VT) && "Not int or FP?");
+ Op = DAG.getNode(ISD::FP_EXTEND, getTypeToTransformTo(VT), Op);
+ }
+ Ops.push_back(Op);
+ Ops.push_back(DAG.getConstant(Flags, MVT::i32));
+ break;
+ case Expand: {
+ MVT::ValueType PartVT = getRegisterType(VT);
+ unsigned NumParts = getNumRegisters(VT);
+ SmallVector<SDOperand, 4> Parts(NumParts);
+ getCopyToParts(DAG, Op, &Parts[0], NumParts, PartVT);
+ for (unsigned i = 0; i != NumParts; ++i) {
+ // if it isn't first piece, alignment must be 1
+ unsigned MyFlags = Flags;
+ if (i != 0)
+ MyFlags = (MyFlags & (~ISD::ParamFlags::OrigAlignment)) |
+ (1 << ISD::ParamFlags::OrigAlignmentOffs);
+
+ Ops.push_back(Parts[i]);
+ Ops.push_back(DAG.getConstant(MyFlags, MVT::i32));
+ }
+ break;
+ }
+ }
+ }
+
+ // Figure out the result value types.
+ MVT::ValueType VT = getValueType(RetTy);
+ MVT::ValueType RegisterVT = getRegisterType(VT);
+ unsigned NumRegs = getNumRegisters(VT);
+ SmallVector<MVT::ValueType, 4> RetTys(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ RetTys[i] = RegisterVT;
+
+ RetTys.push_back(MVT::Other); // Always has a chain.
+
+ // Create the CALL node.
+ SDOperand Res = DAG.getNode(ISD::CALL,
+ DAG.getVTList(&RetTys[0], NumRegs + 1),
+ &Ops[0], Ops.size());
+ SDOperand Chain = Res.getValue(NumRegs);
+
+ // Gather up the call result into a single value.
+ if (RetTy != Type::VoidTy) {
+ ISD::NodeType AssertOp = ISD::AssertSext;
+ if (!RetTyIsSigned)
+ AssertOp = ISD::AssertZext;
+ SmallVector<SDOperand, 4> Results(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Results[i] = Res.getValue(i);
+ Res = getCopyFromParts(DAG, &Results[0], NumRegs, RegisterVT, VT, AssertOp);
+ }
+
+ return std::make_pair(Res, Chain);
+}
+
+SDOperand TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+ assert(0 && "LowerOperation not implemented for this target!");
+ abort();
+ return SDOperand();
+}
+
+SDOperand TargetLowering::CustomPromoteOperation(SDOperand Op,
+ SelectionDAG &DAG) {
+ assert(0 && "CustomPromoteOperation not implemented for this target!");
+ abort();
+ return SDOperand();
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
+ SelectionDAG &DAG) {
+ MVT::ValueType CurVT = VT;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+ uint64_t Val = C->getValue() & 255;
+ unsigned Shift = 8;
+ while (CurVT != MVT::i8) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
+ }
+ return DAG.getConstant(Val, VT);
+ } else {
+ Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
+ unsigned Shift = 8;
+ while (CurVT != MVT::i8) {
+ Value =
+ DAG.getNode(ISD::OR, VT,
+ DAG.getNode(ISD::SHL, VT, Value,
+ DAG.getConstant(Shift, MVT::i8)), Value);
+ Shift <<= 1;
+ CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
+ }
+
+ return Value;
+ }
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDOperand getMemsetStringVal(MVT::ValueType VT,
+ SelectionDAG &DAG, TargetLowering &TLI,
+ std::string &Str, unsigned Offset) {
+ uint64_t Val = 0;
+ unsigned MSB = MVT::getSizeInBits(VT) / 8;
+ if (TLI.isLittleEndian())
+ Offset = Offset + MSB - 1;
+ for (unsigned i = 0; i != MSB; ++i) {
+ Val = (Val << 8) | (unsigned char)Str[Offset];
+ Offset += TLI.isLittleEndian() ? -1 : 1;
+ }
+ return DAG.getConstant(Val, VT);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the
+static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset,
+ SelectionDAG &DAG, TargetLowering &TLI) {
+ MVT::ValueType VT = Base.getValueType();
+ return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT));
+}
+
+/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
+/// to replace the memset / memcpy is below the threshold. It also returns the
+/// types of the sequence of memory ops to perform memset / memcpy.
+static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType> &MemOps,
+ unsigned Limit, uint64_t Size,
+ unsigned Align, TargetLowering &TLI) {
+ MVT::ValueType VT;
+
+ if (TLI.allowsUnalignedMemoryAccesses()) {
+ VT = MVT::i64;
+ } else {
+ switch (Align & 7) {
+ case 0:
+ VT = MVT::i64;
+ break;
+ case 4:
+ VT = MVT::i32;
+ break;
+ case 2:
+ VT = MVT::i16;
+ break;
+ default:
+ VT = MVT::i8;
+ break;
+ }
+ }
+
+ MVT::ValueType LVT = MVT::i64;
+ while (!TLI.isTypeLegal(LVT))
+ LVT = (MVT::ValueType)((unsigned)LVT - 1);
+ assert(MVT::isInteger(LVT));
+
+ if (VT > LVT)
+ VT = LVT;
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ while (VTSize > Size) {
+ VT = (MVT::ValueType)((unsigned)VT - 1);
+ VTSize >>= 1;
+ }
+ assert(MVT::isInteger(VT));
+
+ if (++NumMemOps > Limit)
+ return false;
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
+void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) {
+ SDOperand Op1 = getValue(I.getOperand(1));
+ SDOperand Op2 = getValue(I.getOperand(2));
+ SDOperand Op3 = getValue(I.getOperand(3));
+ SDOperand Op4 = getValue(I.getOperand(4));
+ unsigned Align = (unsigned)cast<ConstantSDNode>(Op4)->getValue();
+ if (Align == 0) Align = 1;
+
+ if (ConstantSDNode *Size = dyn_cast<ConstantSDNode>(Op3)) {
+ std::vector<MVT::ValueType> MemOps;
+
+ // Expand memset / memcpy to a series of load / store ops
+ // if the size operand falls below a certain threshold.
+ SmallVector<SDOperand, 8> OutChains;
+ switch (Op) {
+ default: break; // Do nothing for now.
+ case ISD::MEMSET: {
+ if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(),
+ Size->getValue(), Align, TLI)) {
+ unsigned NumMemOps = MemOps.size();
+ unsigned Offset = 0;
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT::ValueType VT = MemOps[i];
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ SDOperand Value = getMemsetValue(Op2, VT, DAG);
+ SDOperand Store = DAG.getStore(getRoot(), Value,
+ getMemBasePlusOffset(Op1, Offset, DAG, TLI),
+ I.getOperand(1), Offset);
+ OutChains.push_back(Store);
+ Offset += VTSize;
+ }
+ }
+ break;
+ }
+ case ISD::MEMCPY: {
+ if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemcpy(),
+ Size->getValue(), Align, TLI)) {
+ unsigned NumMemOps = MemOps.size();
+ unsigned SrcOff = 0, DstOff = 0, SrcDelta = 0;
+ GlobalAddressSDNode *G = NULL;
+ std::string Str;
+ bool CopyFromStr = false;
+
+ if (Op2.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Op2);
+ else if (Op2.getOpcode() == ISD::ADD &&
+ Op2.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Op2.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Op2.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Op2.getOperand(1))->getValue();
+ }
+ if (G) {
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+ if (GV && GV->isConstant()) {
+ Str = GV->getStringValue(false);
+ if (!Str.empty()) {
+ CopyFromStr = true;
+ SrcOff += SrcDelta;
+ }
+ }
+ }
+
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT::ValueType VT = MemOps[i];
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ SDOperand Value, Chain, Store;
+
+ if (CopyFromStr) {
+ Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff);
+ Chain = getRoot();
+ Store =
+ DAG.getStore(Chain, Value,
+ getMemBasePlusOffset(Op1, DstOff, DAG, TLI),
+ I.getOperand(1), DstOff);
+ } else {
+ Value = DAG.getLoad(VT, getRoot(),
+ getMemBasePlusOffset(Op2, SrcOff, DAG, TLI),
+ I.getOperand(2), SrcOff);
+ Chain = Value.getValue(1);
+ Store =
+ DAG.getStore(Chain, Value,
+ getMemBasePlusOffset(Op1, DstOff, DAG, TLI),
+ I.getOperand(1), DstOff);
+ }
+ OutChains.push_back(Store);
+ SrcOff += VTSize;
+ DstOff += VTSize;
+ }
+ }
+ break;
+ }
+ }
+
+ if (!OutChains.empty()) {
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &OutChains[0], OutChains.size()));
+ return;
+ }
+ }
+
+ DAG.setRoot(DAG.getNode(Op, MVT::Other, getRoot(), Op1, Op2, Op3, Op4));
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+unsigned SelectionDAGISel::MakeReg(MVT::ValueType VT) {
+ return RegMap->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.setPreservesAll();
+}
+
+
+
+bool SelectionDAGISel::runOnFunction(Function &Fn) {
+ MachineFunction &MF = MachineFunction::construct(&Fn, TLI.getTargetMachine());
+ RegMap = MF.getSSARegMap();
+ DOUT << "\n\n\n=== " << Fn.getName() << "\n";
+
+ FunctionLoweringInfo FuncInfo(TLI, Fn, MF);
+
+ if (ExceptionHandling)
+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))
+ // Mark landing pad.
+ FuncInfo.MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+
+ for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ SelectBasicBlock(I, MF, FuncInfo);
+
+ // Add function live-ins to entry block live-in set.
+ BasicBlock *EntryBB = &Fn.getEntryBlock();
+ BB = FuncInfo.MBBMap[EntryBB];
+ if (!MF.livein_empty())
+ for (MachineFunction::livein_iterator I = MF.livein_begin(),
+ E = MF.livein_end(); I != E; ++I)
+ BB->addLiveIn(I->first);
+
+#ifndef NDEBUG
+ assert(FuncInfo.CatchInfoFound.size() == FuncInfo.CatchInfoLost.size() &&
+ "Not all catch info was assigned to a landing pad!");
+#endif
+
+ return true;
+}
+
+SDOperand SelectionDAGLowering::CopyValueToVirtualRegister(Value *V,
+ unsigned Reg) {
+ SDOperand Op = getValue(V);
+ assert((Op.getOpcode() != ISD::CopyFromReg ||
+ cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+ "Copy from a reg to the same reg!");
+
+ MVT::ValueType SrcVT = Op.getValueType();
+ MVT::ValueType RegisterVT = TLI.getRegisterType(SrcVT);
+ unsigned NumRegs = TLI.getNumRegisters(SrcVT);
+ SmallVector<SDOperand, 8> Regs(NumRegs);
+ SmallVector<SDOperand, 8> Chains(NumRegs);
+
+ // Copy the value by legal parts into sequential virtual registers.
+ getCopyToParts(DAG, Op, &Regs[0], NumRegs, RegisterVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Chains[i] = DAG.getCopyToReg(getRoot(), Reg + i, Regs[i]);
+ return DAG.getNode(ISD::TokenFactor, MVT::Other, &Chains[0], NumRegs);
+}
+
+void SelectionDAGISel::
+LowerArguments(BasicBlock *LLVMBB, SelectionDAGLowering &SDL,
+ std::vector<SDOperand> &UnorderedChains) {
+ // If this is the entry block, emit arguments.
+ Function &F = *LLVMBB->getParent();
+ FunctionLoweringInfo &FuncInfo = SDL.FuncInfo;
+ SDOperand OldRoot = SDL.DAG.getRoot();
+ std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG);
+
+ unsigned a = 0;
+ for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
+ AI != E; ++AI, ++a)
+ if (!AI->use_empty()) {
+ SDL.setValue(AI, Args[a]);
+
+ // If this argument is live outside of the entry block, insert a copy from
+ // whereever we got it to the vreg that other BB's will reference it as.
+ DenseMap<const Value*, unsigned>::iterator VMI=FuncInfo.ValueMap.find(AI);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ SDOperand Copy = SDL.CopyValueToVirtualRegister(AI, VMI->second);
+ UnorderedChains.push_back(Copy);
+ }
+ }
+
+ // Finally, if the target has anything special to do, allow it to do so.
+ // FIXME: this should insert code into the DAG!
+ EmitFunctionEntryCode(F, SDL.DAG.getMachineFunction());
+}
+
+static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+ MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+ assert(!FLI.MBBMap[SrcBB]->isLandingPad() &&
+ "Copying catch info out of a landing pad!");
+ for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
+ if (isSelector(I)) {
+ // Apply the catch info to DestBB.
+ addCatchInfo(cast<CallInst>(*I), MMI, FLI.MBBMap[DestBB]);
+#ifndef NDEBUG
+ FLI.CatchInfoFound.insert(I);
+#endif
+ }
+}
+
+void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB,
+ std::vector<std::pair<MachineInstr*, unsigned> > &PHINodesToUpdate,
+ FunctionLoweringInfo &FuncInfo) {
+ SelectionDAGLowering SDL(DAG, TLI, FuncInfo);
+
+ std::vector<SDOperand> UnorderedChains;
+
+ // Lower any arguments needed in this block if this is the entry block.
+ if (LLVMBB == &LLVMBB->getParent()->getEntryBlock())
+ LowerArguments(LLVMBB, SDL, UnorderedChains);
+
+ BB = FuncInfo.MBBMap[LLVMBB];
+ SDL.setCurrentBasicBlock(BB);
+
+ MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+ if (ExceptionHandling && MMI && BB->isLandingPad()) {
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ unsigned LabelID = MMI->addLandingPad(BB);
+ DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, DAG.getEntryNode(),
+ DAG.getConstant(LabelID, MVT::i32)));
+
+ // Mark exception register as live in.
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ if (Reg) BB->addLiveIn(Reg);
+
+ // Mark exception selector register as live in.
+ Reg = TLI.getExceptionSelectorRegister();
+ if (Reg) BB->addLiveIn(Reg);
+
+ // FIXME: Hack around an exception handling flaw (PR1508): the personality
+ // function and list of typeids logically belong to the invoke (or, if you
+ // like, the basic block containing the invoke), and need to be associated
+ // with it in the dwarf exception handling tables. Currently however the
+ // information is provided by an intrinsic (eh.selector) that can be moved
+ // to unexpected places by the optimizers: if the unwind edge is critical,
+ // then breaking it can result in the intrinsics being in the successor of
+ // the landing pad, not the landing pad itself. This results in exceptions
+ // not being caught because no typeids are associated with the invoke.
+ // This may not be the only way things can go wrong, but it is the only way
+ // we try to work around for the moment.
+ BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+ if (Br && Br->isUnconditional()) { // Critical edge?
+ BasicBlock::iterator I, E;
+ for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+ if (isSelector(I))
+ break;
+
+ if (I == E)
+ // No catch info found - try to extract some from the successor.
+ copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, FuncInfo);
+ }
+ }
+
+ // Lower all of the non-terminator instructions.
+ for (BasicBlock::iterator I = LLVMBB->begin(), E = --LLVMBB->end();
+ I != E; ++I)
+ SDL.visit(*I);
+
+ // Ensure that all instructions which are used outside of their defining
+ // blocks are available as virtual registers. Invoke is handled elsewhere.
+ for (BasicBlock::iterator I = LLVMBB->begin(), E = LLVMBB->end(); I != E;++I)
+ if (!I->use_empty() && !isa<PHINode>(I) && !isa<InvokeInst>(I)) {
+ DenseMap<const Value*, unsigned>::iterator VMI =FuncInfo.ValueMap.find(I);
+ if (VMI != FuncInfo.ValueMap.end())
+ UnorderedChains.push_back(
+ SDL.CopyValueToVirtualRegister(I, VMI->second));
+ }
+
+ // Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
+ // ensure constants are generated when needed. Remember the virtual registers
+ // that need to be added to the Machine PHI nodes as input. We cannot just
+ // directly add them, because expansion might result in multiple MBB's for one
+ // BB. As such, the start of the BB might correspond to a different MBB than
+ // the end.
+ //
+ TerminatorInst *TI = LLVMBB->getTerminator();
+
+ // Emit constants only once even if used by multiple PHI nodes.
+ std::map<Constant*, unsigned> ConstantsOut;
+
+ // Vector bool would be better, but vector<bool> is really slow.
+ std::vector<unsigned char> SuccsHandled;
+ if (TI->getNumSuccessors())
+ SuccsHandled.resize(BB->getParent()->getNumBlockIDs());
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ unsigned SuccMBBNo = SuccMBB->getNumber();
+ if (SuccsHandled[SuccMBBNo]) continue;
+ SuccsHandled[SuccMBBNo] = true;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+ PHINode *PN;
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::iterator I = SuccBB->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I) {
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ unsigned Reg;
+ Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ if (Constant *C = dyn_cast<Constant>(PHIOp)) {
+ unsigned &RegOut = ConstantsOut[C];
+ if (RegOut == 0) {
+ RegOut = FuncInfo.CreateRegForValue(C);
+ UnorderedChains.push_back(
+ SDL.CopyValueToVirtualRegister(C, RegOut));
+ }
+ Reg = RegOut;
+ } else {
+ Reg = FuncInfo.ValueMap[PHIOp];
+ if (Reg == 0) {
+ assert(isa<AllocaInst>(PHIOp) &&
+ FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+ "Didn't codegen value into a register!??");
+ Reg = FuncInfo.CreateRegForValue(PHIOp);
+ UnorderedChains.push_back(
+ SDL.CopyValueToVirtualRegister(PHIOp, Reg));
+ }
+ }
+
+ // Remember that this register needs to added to the machine PHI node as
+ // the input for this MBB.
+ MVT::ValueType VT = TLI.getValueType(PN->getType());
+ unsigned NumRegisters = TLI.getNumRegisters(VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+ }
+ }
+ ConstantsOut.clear();
+
+ // Turn all of the unordered chains into one factored node.
+ if (!UnorderedChains.empty()) {
+ SDOperand Root = SDL.getRoot();
+ if (Root.getOpcode() != ISD::EntryToken) {
+ unsigned i = 0, e = UnorderedChains.size();
+ for (; i != e; ++i) {
+ assert(UnorderedChains[i].Val->getNumOperands() > 1);
+ if (UnorderedChains[i].Val->getOperand(0) == Root)
+ break; // Don't add the root if we already indirectly depend on it.
+ }
+
+ if (i == e)
+ UnorderedChains.push_back(Root);
+ }
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &UnorderedChains[0], UnorderedChains.size()));
+ }
+
+ // Lower the terminator after the copies are emitted.
+ SDL.visit(*LLVMBB->getTerminator());
+
+ // Copy over any CaseBlock records that may now exist due to SwitchInst
+ // lowering, as well as any jump table information.
+ SwitchCases.clear();
+ SwitchCases = SDL.SwitchCases;
+ JTCases.clear();
+ JTCases = SDL.JTCases;
+ BitTestCases.clear();
+ BitTestCases = SDL.BitTestCases;
+
+ // Make sure the root of the DAG is up-to-date.
+ DAG.setRoot(SDL.getRoot());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG(SelectionDAG &DAG) {
+ // Get alias analysis for load/store combining.
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+ // Run the DAG combiner in pre-legalize mode.
+ DAG.Combine(false, AA);
+
+ DOUT << "Lowered selection DAG:\n";
+ DEBUG(DAG.dump());
+
+ // Second step, hack on the DAG until it only uses operations and types that
+ // the target supports.
+ DAG.Legalize();
+
+ DOUT << "Legalized selection DAG:\n";
+ DEBUG(DAG.dump());
+
+ // Run the DAG combiner in post-legalize mode.
+ DAG.Combine(true, AA);
+
+ if (ViewISelDAGs) DAG.viewGraph();
+
+ // Third, instruction select all of the operations to machine code, adding the
+ // code to the MachineBasicBlock.
+ InstructionSelectBasicBlock(DAG);
+
+ DOUT << "Selected machine code:\n";
+ DEBUG(BB->dump());
+}
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, MachineFunction &MF,
+ FunctionLoweringInfo &FuncInfo) {
+ std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+ {
+ SelectionDAG DAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+ CurDAG = &DAG;
+
+ // First step, lower LLVM code to some DAG. This DAG may use operations and
+ // types that are not supported by the target.
+ BuildSelectionDAG(DAG, LLVMBB, PHINodesToUpdate, FuncInfo);
+
+ // Second step, emit the lowered DAG as machine code.
+ CodeGenAndEmitDAG(DAG);
+ }
+
+ DOUT << "Total amount of phi nodes to update: "
+ << PHINodesToUpdate.size() << "\n";
+ DEBUG(for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i)
+ DOUT << "Node " << i << " : (" << PHINodesToUpdate[i].first
+ << ", " << PHINodesToUpdate[i].second << ")\n";);
+
+ // Next, now that we know what the last MBB the LLVM BB expanded is, update
+ // PHI nodes in successors.
+ if (SwitchCases.empty() && JTCases.empty() && BitTestCases.empty()) {
+ for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstr *PHI = PHINodesToUpdate[i].first;
+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+ "This is not a machine PHI node that we are updating!");
+ PHI->addRegOperand(PHINodesToUpdate[i].second, false);
+ PHI->addMachineBasicBlockOperand(BB);
+ }
+ return;
+ }
+
+ for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!BitTestCases[i].Emitted) {
+ SelectionDAG HSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+ CurDAG = &HSDAG;
+ SelectionDAGLowering HSDL(HSDAG, TLI, FuncInfo);
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = BitTestCases[i].Parent;
+ HSDL.setCurrentBasicBlock(BB);
+ // Emit the code
+ HSDL.visitBitTestHeader(BitTestCases[i]);
+ HSDAG.setRoot(HSDL.getRoot());
+ CodeGenAndEmitDAG(HSDAG);
+ }
+
+ for (unsigned j = 0, ej = BitTestCases[i].Cases.size(); j != ej; ++j) {
+ SelectionDAG BSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+ CurDAG = &BSDAG;
+ SelectionDAGLowering BSDL(BSDAG, TLI, FuncInfo);
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = BitTestCases[i].Cases[j].ThisBB;
+ BSDL.setCurrentBasicBlock(BB);
+ // Emit the code
+ if (j+1 != ej)
+ BSDL.visitBitTestCase(BitTestCases[i].Cases[j+1].ThisBB,
+ BitTestCases[i].Reg,
+ BitTestCases[i].Cases[j]);
+ else
+ BSDL.visitBitTestCase(BitTestCases[i].Default,
+ BitTestCases[i].Reg,
+ BitTestCases[i].Cases[j]);
+
+
+ BSDAG.setRoot(BSDL.getRoot());
+ CodeGenAndEmitDAG(BSDAG);
+ }
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = PHINodesToUpdate.size(); pi != pe; ++pi) {
+ MachineInstr *PHI = PHINodesToUpdate[pi].first;
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+ "This is not a machine PHI node that we are updating!");
+ // This is "default" BB. We have two jumps to it. From "header" BB and
+ // from last "case" BB.
+ if (PHIBB == BitTestCases[i].Default) {
+ PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+ PHI->addMachineBasicBlockOperand(BitTestCases[i].Parent);
+ PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+ PHI->addMachineBasicBlockOperand(BitTestCases[i].Cases.back().ThisBB);
+ }
+ // One of "cases" BB.
+ for (unsigned j = 0, ej = BitTestCases[i].Cases.size(); j != ej; ++j) {
+ MachineBasicBlock* cBB = BitTestCases[i].Cases[j].ThisBB;
+ if (cBB->succ_end() !=
+ std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) {
+ PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+ PHI->addMachineBasicBlockOperand(cBB);
+ }
+ }
+ }
+ }
+
+ // If the JumpTable record is filled in, then we need to emit a jump table.
+ // Updating the PHI nodes is tricky in this case, since we need to determine
+ // whether the PHI is a successor of the range check MBB or the jump table MBB
+ for (unsigned i = 0, e = JTCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!JTCases[i].first.Emitted) {
+ SelectionDAG HSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+ CurDAG = &HSDAG;
+ SelectionDAGLowering HSDL(HSDAG, TLI, FuncInfo);
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = JTCases[i].first.HeaderBB;
+ HSDL.setCurrentBasicBlock(BB);
+ // Emit the code
+ HSDL.visitJumpTableHeader(JTCases[i].second, JTCases[i].first);
+ HSDAG.setRoot(HSDL.getRoot());
+ CodeGenAndEmitDAG(HSDAG);
+ }
+
+ SelectionDAG JSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+ CurDAG = &JSDAG;
+ SelectionDAGLowering JSDL(JSDAG, TLI, FuncInfo);
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = JTCases[i].second.MBB;
+ JSDL.setCurrentBasicBlock(BB);
+ // Emit the code
+ JSDL.visitJumpTable(JTCases[i].second);
+ JSDAG.setRoot(JSDL.getRoot());
+ CodeGenAndEmitDAG(JSDAG);
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = PHINodesToUpdate.size(); pi != pe; ++pi) {
+ MachineInstr *PHI = PHINodesToUpdate[pi].first;
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+ "This is not a machine PHI node that we are updating!");
+ // "default" BB. We can go there only from header BB.
+ if (PHIBB == JTCases[i].second.Default) {
+ PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+ PHI->addMachineBasicBlockOperand(JTCases[i].first.HeaderBB);
+ }
+ // JT BB. Just iterate over successors here
+ if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) {
+ PHI->addRegOperand(PHINodesToUpdate[pi].second, false);
+ PHI->addMachineBasicBlockOperand(BB);
+ }
+ }
+ }
+
+ // If the switch block involved a branch to one of the actual successors, we
+ // need to update PHI nodes in that block.
+ for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstr *PHI = PHINodesToUpdate[i].first;
+ assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
+ "This is not a machine PHI node that we are updating!");
+ if (BB->isSuccessor(PHI->getParent())) {
+ PHI->addRegOperand(PHINodesToUpdate[i].second, false);
+ PHI->addMachineBasicBlockOperand(BB);
+ }
+ }
+
+ // If we generated any switch lowering information, build and codegen any
+ // additional DAGs necessary.
+ for (unsigned i = 0, e = SwitchCases.size(); i != e; ++i) {
+ SelectionDAG SDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>());
+ CurDAG = &SDAG;
+ SelectionDAGLowering SDL(SDAG, TLI, FuncInfo);
+
+ // Set the current basic block to the mbb we wish to insert the code into
+ BB = SwitchCases[i].ThisBB;
+ SDL.setCurrentBasicBlock(BB);
+
+ // Emit the code
+ SDL.visitSwitchCase(SwitchCases[i]);
+ SDAG.setRoot(SDL.getRoot());
+ CodeGenAndEmitDAG(SDAG);
+
+ // Handle any PHI nodes in successors of this chunk, as if we were coming
+ // from the original BB before switch expansion. Note that PHI nodes can
+ // occur multiple times in PHINodesToUpdate. We have to be very careful to
+ // handle them the right number of times.
+ while ((BB = SwitchCases[i].TrueBB)) { // Handle LHS and RHS.
+ for (MachineBasicBlock::iterator Phi = BB->begin();
+ Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){
+ // This value for this PHI node is recorded in PHINodesToUpdate, get it.
+ for (unsigned pn = 0; ; ++pn) {
+ assert(pn != PHINodesToUpdate.size() && "Didn't find PHI entry!");
+ if (PHINodesToUpdate[pn].first == Phi) {
+ Phi->addRegOperand(PHINodesToUpdate[pn].second, false);
+ Phi->addMachineBasicBlockOperand(SwitchCases[i].ThisBB);
+ break;
+ }
+ }
+ }
+
+ // Don't process RHS if same block as LHS.
+ if (BB == SwitchCases[i].FalseBB)
+ SwitchCases[i].FalseBB = 0;
+
+ // If we haven't handled the RHS, do so now. Otherwise, we're done.
+ SwitchCases[i].TrueBB = SwitchCases[i].FalseBB;
+ SwitchCases[i].FalseBB = 0;
+ }
+ assert(SwitchCases[i].TrueBB == 0 && SwitchCases[i].FalseBB == 0);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+/// ScheduleAndEmitDAG - Pick a safe ordering and emit instructions for each
+/// target node in the graph.
+void SelectionDAGISel::ScheduleAndEmitDAG(SelectionDAG &DAG) {
+ if (ViewSchedDAGs) DAG.viewGraph();
+
+ RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
+
+ if (!Ctor) {
+ Ctor = ISHeuristic;
+ RegisterScheduler::setDefault(Ctor);
+ }
+
+ ScheduleDAG *SL = Ctor(this, &DAG, BB);
+ BB = SL->Run();
+ delete SL;
+}
+
+
+HazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
+ return new HazardRecognizer();
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDOperand LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) {
+ uint64_t ActualMask = RHS->getValue();
+ uint64_t DesiredMask =DesiredMaskS & MVT::getIntVTBitMask(LHS.getValueType());
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask & ~DesiredMask)
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ uint64_t NeededMask = DesiredMask & ~ActualMask;
+ if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDOperand LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) {
+ uint64_t ActualMask = RHS->getValue();
+ uint64_t DesiredMask =DesiredMaskS & MVT::getIntVTBitMask(LHS.getValueType());
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask & ~DesiredMask)
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ uint64_t NeededMask = DesiredMask & ~ActualMask;
+
+ uint64_t KnownZero, KnownOne;
+ CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+
+ // If all the missing bits in the or are already known to be set, match!
+ if ((NeededMask & KnownOne) == NeededMask)
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen. Others should not call it.
+void SelectionDAGISel::
+SelectInlineAsmMemoryOperands(std::vector<SDOperand> &Ops, SelectionDAG &DAG) {
+ std::vector<SDOperand> InOps;
+ std::swap(InOps, Ops);
+
+ Ops.push_back(InOps[0]); // input chain.
+ Ops.push_back(InOps[1]); // input asm string.
+
+ unsigned i = 2, e = InOps.size();
+ if (InOps[e-1].getValueType() == MVT::Flag)
+ --e; // Don't process a flag operand if it is here.
+
+ while (i != e) {
+ unsigned Flags = cast<ConstantSDNode>(InOps[i])->getValue();
+ if ((Flags & 7) != 4 /*MEM*/) {
+ // Just skip over this operand, copying the operands verbatim.
+ Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+(Flags >> 3) + 1);
+ i += (Flags >> 3) + 1;
+ } else {
+ assert((Flags >> 3) == 1 && "Memory operand with multiple values?");
+ // Otherwise, this is a memory operand. Ask the target to select it.
+ std::vector<SDOperand> SelOps;
+ if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps, DAG)) {
+ cerr << "Could not match memory address. Inline asm failure!\n";
+ exit(1);
+ }
+
+ // Add this to the output node.
+ MVT::ValueType IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
+ Ops.push_back(DAG.getTargetConstant(4/*MEM*/ | (SelOps.size() << 3),
+ IntPtrTy));
+ Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+ i += 2;
+ }
+ }
+
+ // Add the flag input back if present.
+ if (e != InOps.size())
+ Ops.push_back(InOps.back());
+}
+
+char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 0000000..12b5682
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,245 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+#include <sstream>
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+ static std::string getGraphName(const SelectionDAG *G) {
+ return G->getMachineFunction().getFunction()->getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool hasNodeAddressLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ return true;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ template<typename EdgeIter>
+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
+ SDOperand Op = EI.getNode()->getOperand(EI.getOperand());
+ MVT::ValueType VT = Op.getValueType();
+ if (VT == MVT::Flag)
+ return "color=red,style=bold";
+ else if (VT == MVT::Other)
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ static std::string getNodeLabel(const SDNode *Node,
+ const SelectionDAG *Graph);
+ static std::string getNodeAttributes(const SDNode *N,
+ const SelectionDAG *Graph) {
+#ifndef NDEBUG
+ const std::string &Attrs = Graph->getGraphAttrs(N);
+ if (!Attrs.empty()) {
+ if (Attrs.find("shape=") == std::string::npos)
+ return std::string("shape=Mrecord,") + Attrs;
+ else
+ return Attrs;
+ }
+#endif
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(SelectionDAG *G,
+ GraphWriter<SelectionDAG*> &GW) {
+ GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ if (G->getRoot().Val)
+ GW.emitEdge(0, -1, G->getRoot().Val, -1, "");
+ }
+ };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ std::string Op = Node->getOperationName(G);
+
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ if (Node->getValueType(i) == MVT::Other)
+ Op += ":ch";
+ else
+ Op = Op + ":" + MVT::getValueTypeString(Node->getValueType(i));
+
+ if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Node)) {
+ Op += ": " + utostr(CSDN->getValue());
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(Node)) {
+ Op += ": " + ftostr(CSDN->getValue());
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(Node)) {
+ int offset = GADN->getOffset();
+ Op += ": " + GADN->getGlobal()->getName();
+ if (offset > 0)
+ Op += "+" + itostr(offset);
+ else
+ Op += itostr(offset);
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(Node)) {
+ Op += " " + itostr(FIDN->getIndex());
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(Node)) {
+ Op += " " + itostr(JTDN->getIndex());
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Node)){
+ if (CP->isMachineConstantPoolEntry()) {
+ std::ostringstream SS;
+ CP->getMachineCPVal()->print(SS);
+ Op += "<" + SS.str() + ">";
+ } else {
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ Op += "<" + ftostr(CFP->getValue()) + ">";
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(CP->getConstVal()))
+ Op += "<" + utostr(CI->getZExtValue()) + ">";
+ else {
+ std::ostringstream SS;
+ WriteAsOperand(SS, CP->getConstVal(), false);
+ Op += "<" + SS.str() + ">";
+ }
+ }
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(Node)) {
+ Op = "BB: ";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ Op += LBB->getName();
+ //Op += " " + (const void*)BBDN->getBasicBlock();
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node)) {
+ if (G && R->getReg() != 0 &&
+ MRegisterInfo::isPhysicalRegister(R->getReg())) {
+ Op = Op + " " + G->getTarget().getRegisterInfo()->getName(R->getReg());
+ } else {
+ Op += " #" + utostr(R->getReg());
+ }
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(Node)) {
+ Op += "'" + std::string(ES->getSymbol()) + "'";
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(Node)) {
+ if (M->getValue())
+ Op += "<" + M->getValue()->getName() + ":" + itostr(M->getOffset()) + ">";
+ else
+ Op += "<null:" + itostr(M->getOffset()) + ">";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(Node)) {
+ Op = Op + " VT=" + MVT::getValueTypeString(N->getVT());
+ } else if (const StringSDNode *N = dyn_cast<StringSDNode>(Node)) {
+ Op = Op + "\"" + N->getValue() + "\"";
+ } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node)) {
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD:
+ Op = Op + "<anyext ";
+ break;
+ case ISD::SEXTLOAD:
+ Op = Op + " <sext ";
+ break;
+ case ISD::ZEXTLOAD:
+ Op = Op + " <zext ";
+ break;
+ }
+ if (doExt)
+ Op = Op + MVT::getValueTypeString(LD->getLoadedVT()) + ">";
+
+ Op += LD->getIndexedModeName(LD->getAddressingMode());
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Node)) {
+ if (ST->isTruncatingStore())
+ Op = Op + "<trunc " + MVT::getValueTypeString(ST->getStoredVT()) + ">";
+ Op += ST->getIndexedModeName(ST->getAddressingMode());
+ }
+
+ return Op;
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph() {
+// This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName());
+#else
+ cerr << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+ NodeGraphAttrs.clear();
+#else
+ cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = Attrs;
+#else
+ cerr << "SelectionDAG::setGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+ std::map<const SDNode *, std::string>::const_iterator I =
+ NodeGraphAttrs.find(N);
+
+ if (I != NodeGraphAttrs.end())
+ return I->second;
+ else
+ return "";
+#else
+ cerr << "SelectionDAG::getGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+ return std::string("");
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+ cerr << "SelectionDAG::setGraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 0000000..1b7b436
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,1753 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names) {
+ Names[RTLIB::SHL_I32] = "__ashlsi3";
+ Names[RTLIB::SHL_I64] = "__ashldi3";
+ Names[RTLIB::SRL_I32] = "__lshrsi3";
+ Names[RTLIB::SRL_I64] = "__lshrdi3";
+ Names[RTLIB::SRA_I32] = "__ashrsi3";
+ Names[RTLIB::SRA_I64] = "__ashrdi3";
+ Names[RTLIB::MUL_I32] = "__mulsi3";
+ Names[RTLIB::MUL_I64] = "__muldi3";
+ Names[RTLIB::SDIV_I32] = "__divsi3";
+ Names[RTLIB::SDIV_I64] = "__divdi3";
+ Names[RTLIB::UDIV_I32] = "__udivsi3";
+ Names[RTLIB::UDIV_I64] = "__udivdi3";
+ Names[RTLIB::SREM_I32] = "__modsi3";
+ Names[RTLIB::SREM_I64] = "__moddi3";
+ Names[RTLIB::UREM_I32] = "__umodsi3";
+ Names[RTLIB::UREM_I64] = "__umoddi3";
+ Names[RTLIB::NEG_I32] = "__negsi2";
+ Names[RTLIB::NEG_I64] = "__negdi2";
+ Names[RTLIB::ADD_F32] = "__addsf3";
+ Names[RTLIB::ADD_F64] = "__adddf3";
+ Names[RTLIB::SUB_F32] = "__subsf3";
+ Names[RTLIB::SUB_F64] = "__subdf3";
+ Names[RTLIB::MUL_F32] = "__mulsf3";
+ Names[RTLIB::MUL_F64] = "__muldf3";
+ Names[RTLIB::DIV_F32] = "__divsf3";
+ Names[RTLIB::DIV_F64] = "__divdf3";
+ Names[RTLIB::REM_F32] = "fmodf";
+ Names[RTLIB::REM_F64] = "fmod";
+ Names[RTLIB::NEG_F32] = "__negsf2";
+ Names[RTLIB::NEG_F64] = "__negdf2";
+ Names[RTLIB::POWI_F32] = "__powisf2";
+ Names[RTLIB::POWI_F64] = "__powidf2";
+ Names[RTLIB::SQRT_F32] = "sqrtf";
+ Names[RTLIB::SQRT_F64] = "sqrt";
+ Names[RTLIB::SIN_F32] = "sinf";
+ Names[RTLIB::SIN_F64] = "sin";
+ Names[RTLIB::COS_F32] = "cosf";
+ Names[RTLIB::COS_F64] = "cos";
+ Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+ Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+ Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+ Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+ Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+ Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+ Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+ Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+ Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+ Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+ Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+ Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+ Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+ Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+ Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+ Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+ Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+ Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+ Names[RTLIB::OEQ_F32] = "__eqsf2";
+ Names[RTLIB::OEQ_F64] = "__eqdf2";
+ Names[RTLIB::UNE_F32] = "__nesf2";
+ Names[RTLIB::UNE_F64] = "__nedf2";
+ Names[RTLIB::OGE_F32] = "__gesf2";
+ Names[RTLIB::OGE_F64] = "__gedf2";
+ Names[RTLIB::OLT_F32] = "__ltsf2";
+ Names[RTLIB::OLT_F64] = "__ltdf2";
+ Names[RTLIB::OLE_F32] = "__lesf2";
+ Names[RTLIB::OLE_F64] = "__ledf2";
+ Names[RTLIB::OGT_F32] = "__gtsf2";
+ Names[RTLIB::OGT_F64] = "__gtdf2";
+ Names[RTLIB::UO_F32] = "__unordsf2";
+ Names[RTLIB::UO_F64] = "__unorddf2";
+ Names[RTLIB::O_F32] = "__unordsf2";
+ Names[RTLIB::O_F64] = "__unorddf2";
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+ memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+ CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+ CCs[RTLIB::UNE_F32] = ISD::SETNE;
+ CCs[RTLIB::UNE_F64] = ISD::SETNE;
+ CCs[RTLIB::OGE_F32] = ISD::SETGE;
+ CCs[RTLIB::OGE_F64] = ISD::SETGE;
+ CCs[RTLIB::OLT_F32] = ISD::SETLT;
+ CCs[RTLIB::OLT_F64] = ISD::SETLT;
+ CCs[RTLIB::OLE_F32] = ISD::SETLE;
+ CCs[RTLIB::OLE_F64] = ISD::SETLE;
+ CCs[RTLIB::OGT_F32] = ISD::SETGT;
+ CCs[RTLIB::OGT_F64] = ISD::SETGT;
+ CCs[RTLIB::UO_F32] = ISD::SETNE;
+ CCs[RTLIB::UO_F64] = ISD::SETNE;
+ CCs[RTLIB::O_F32] = ISD::SETEQ;
+ CCs[RTLIB::O_F64] = ISD::SETEQ;
+}
+
+TargetLowering::TargetLowering(TargetMachine &tm)
+ : TM(tm), TD(TM.getTargetData()) {
+ assert(ISD::BUILTIN_OP_END <= 156 &&
+ "Fixed size array in TargetLowering is not large enough!");
+ // All operations default to being supported.
+ memset(OpActions, 0, sizeof(OpActions));
+ memset(LoadXActions, 0, sizeof(LoadXActions));
+ memset(&StoreXActions, 0, sizeof(StoreXActions));
+ memset(&IndexedModeActions, 0, sizeof(IndexedModeActions));
+
+ // Set all indexed load / store to expand.
+ for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ setIndexedLoadAction(IM, (MVT::ValueType)VT, Expand);
+ setIndexedStoreAction(IM, (MVT::ValueType)VT, Expand);
+ }
+ }
+
+ IsLittleEndian = TD->isLittleEndian();
+ UsesGlobalOffsetTable = false;
+ ShiftAmountTy = SetCCResultTy = PointerTy = getValueType(TD->getIntPtrType());
+ ShiftAmtHandling = Undefined;
+ memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+ memset(TargetDAGCombineArray, 0,
+ sizeof(TargetDAGCombineArray)/sizeof(TargetDAGCombineArray[0]));
+ maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+ allowUnalignedMemoryAccesses = false;
+ UseUnderscoreSetJmp = false;
+ UseUnderscoreLongJmp = false;
+ SelectIsExpensive = false;
+ IntDivIsCheap = false;
+ Pow2DivIsCheap = false;
+ StackPointerRegisterToSaveRestore = 0;
+ ExceptionPointerRegister = 0;
+ ExceptionSelectorRegister = 0;
+ SchedPreferenceInfo = SchedulingForLatency;
+ JumpBufSize = 0;
+ JumpBufAlignment = 0;
+ IfCvtBlockSizeLimit = 2;
+
+ InitLibcallNames(LibcallRoutineNames);
+ InitCmpLibcallCCs(CmpLibcallCCs);
+}
+
+TargetLowering::~TargetLowering() {}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLowering::computeRegisterProperties() {
+ assert(MVT::LAST_VALUETYPE <= 32 &&
+ "Too many value types for ValueTypeActions to hold!");
+
+ // Everything defaults to needing one register.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ NumRegistersForVT[i] = 1;
+ RegisterTypeForVT[i] = TransformToType[i] = i;
+ }
+ // ...except isVoid, which doesn't need any registers.
+ NumRegistersForVT[MVT::isVoid] = 0;
+
+ // Find the largest integer register class.
+ unsigned LargestIntReg = MVT::i128;
+ for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+ assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+ // Every integer value type larger than this largest register takes twice as
+ // many registers to represent as the previous ValueType.
+ for (MVT::ValueType ExpandedReg = LargestIntReg + 1;
+ MVT::isInteger(ExpandedReg); ++ExpandedReg) {
+ NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+ RegisterTypeForVT[ExpandedReg] = LargestIntReg;
+ TransformToType[ExpandedReg] = ExpandedReg - 1;
+ ValueTypeActions.setTypeAction(ExpandedReg, Expand);
+ }
+
+ // Inspect all of the ValueType's smaller than the largest integer
+ // register to see which ones need promotion.
+ MVT::ValueType LegalIntReg = LargestIntReg;
+ for (MVT::ValueType IntReg = LargestIntReg - 1;
+ IntReg >= MVT::i1; --IntReg) {
+ if (isTypeLegal(IntReg)) {
+ LegalIntReg = IntReg;
+ } else {
+ RegisterTypeForVT[IntReg] = TransformToType[IntReg] = LegalIntReg;
+ ValueTypeActions.setTypeAction(IntReg, Promote);
+ }
+ }
+
+ // Decide how to handle f64. If the target does not have native f64 support,
+ // expand it to i64 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+ RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+ TransformToType[MVT::f64] = MVT::i64;
+ ValueTypeActions.setTypeAction(MVT::f64, Expand);
+ }
+
+ // Decide how to handle f32. If the target does not have native support for
+ // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+ if (!isTypeLegal(MVT::f32)) {
+ if (isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+ TransformToType[MVT::f32] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::f32, Promote);
+ } else {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+ TransformToType[MVT::f32] = MVT::i32;
+ ValueTypeActions.setTypeAction(MVT::f32, Expand);
+ }
+ }
+
+ // Loop over all of the vector value types to see which need transformations.
+ for (MVT::ValueType i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ if (!isTypeLegal(i)) {
+ MVT::ValueType IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] =
+ getVectorTypeBreakdown(i,
+ IntermediateVT, NumIntermediates,
+ RegisterVT);
+ RegisterTypeForVT[i] = RegisterVT;
+ TransformToType[i] = MVT::Other; // this isn't actually used
+ ValueTypeActions.setTypeAction(i, Expand);
+ }
+ }
+}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ return NULL;
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register. It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLowering::getVectorTypeBreakdown(MVT::ValueType VT,
+ MVT::ValueType &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT::ValueType &RegisterVT) const {
+ // Figure out the right, legal destination reg to copy into.
+ unsigned NumElts = MVT::getVectorNumElements(VT);
+ MVT::ValueType EltTy = MVT::getVectorElementType(VT);
+
+ unsigned NumVectorRegs = 1;
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 &&
+ !isTypeLegal(MVT::getVectorType(EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ MVT::ValueType NewVT = MVT::getVectorType(EltTy, NumElts);
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ MVT::ValueType DestVT = getTypeToTransformTo(NewVT);
+ RegisterVT = DestVT;
+ if (DestVT < NewVT) {
+ // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(MVT::getSizeInBits(NewVT)/MVT::getSizeInBits(DestVT));
+ } else {
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+ }
+
+ return 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer. If so, check to see if there
+/// are any bits set in the constant that are not demanded. If so, shrink the
+/// constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDOperand Op,
+ uint64_t Demanded) {
+ // FIXME: ISD::SELECT, ISD::SELECT_CC
+ switch(Op.getOpcode()) {
+ default: break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ if ((~Demanded & C->getValue()) != 0) {
+ MVT::ValueType VT = Op.getValueType();
+ SDOperand New = DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0),
+ DAG.getConstant(Demanded & C->getValue(),
+ VT));
+ return CombineTo(Op, New);
+ }
+ break;
+ }
+ return false;
+}
+
+/// SimplifyDemandedBits - Look at Op. At this point, we know that only the
+/// DemandedMask bits of the result of Op are ever used downstream. If we can
+/// use this information to simplify Op, create a new simplified DAG node and
+/// return true, returning the original and new nodes in Old and New. Otherwise,
+/// analyze the expression and return a mask of KnownOne and KnownZero bits for
+/// the expression (used to simplify the caller). The KnownZero/One bits may
+/// only be accurate for those bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask,
+ uint64_t &KnownZero,
+ uint64_t &KnownOne,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const {
+ KnownZero = KnownOne = 0; // Don't know anything.
+
+ // The masks are not wide enough to represent this type! Should use APInt.
+ if (Op.getValueType() == MVT::i128)
+ return false;
+
+ // Other users may use these bits.
+ if (!Op.Val->hasOneUse()) {
+ if (Depth != 0) {
+ // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // simplify things downstream.
+ TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+ return false;
+ }
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the DemandedMask to all bits.
+ DemandedMask = MVT::getIntVTBitMask(Op.getValueType());
+ } else if (DemandedMask == 0) {
+ // Not demanding any bits from Op.
+ if (Op.getOpcode() != ISD::UNDEF)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::UNDEF, Op.getValueType()));
+ return false;
+ } else if (Depth == 6) { // Limit search depth.
+ return false;
+ }
+
+ uint64_t KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getValue() & DemandedMask;
+ KnownZero = ~KnownOne & DemandedMask;
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::AND:
+ // If the RHS is a constant, check to see if the LHS would be zero without
+ // using the bits from the RHS. Below, we use knowledge about the RHS to
+ // simplify the LHS, here we're using information from the LHS to simplify
+ // the RHS.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ uint64_t LHSZero, LHSOne;
+ TLO.DAG.ComputeMaskedBits(Op.getOperand(0), DemandedMask,
+ LHSZero, LHSOne, Depth+1);
+ // If the LHS already has zeros where RHSC does, this and is dead.
+ if ((LHSZero & DemandedMask) == (~RHSC->getValue() & DemandedMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ // If any of the set bits in the RHS are known zero on the LHS, shrink
+ // the constant.
+ if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & DemandedMask))
+ return true;
+ }
+
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownZero,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known one on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((DemandedMask & ~KnownZero2 & KnownOne)==(DemandedMask & ~KnownZero2))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((DemandedMask & ~KnownZero & KnownOne2)==(DemandedMask & ~KnownZero))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((DemandedMask & (KnownZero|KnownZero2)) == DemandedMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, DemandedMask & ~KnownZero2))
+ return true;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ break;
+ case ISD::OR:
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownOne,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((DemandedMask & ~KnownOne2 & KnownZero) == (DemandedMask & ~KnownOne2))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((DemandedMask & ~KnownOne & KnownZero2) == (DemandedMask & ~KnownOne))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~KnownZero) & KnownOne2) ==
+ (DemandedMask & (~KnownZero)))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((DemandedMask & (~KnownZero2) & KnownOne) ==
+ (DemandedMask & (~KnownZero2)))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+ return true;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ break;
+ case ISD::XOR:
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((DemandedMask & KnownZero) == DemandedMask)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((DemandedMask & KnownZero2) == DemandedMask)
+ return TLO.CombineTo(Op, Op.getOperand(1));
+
+ // If all of the unknown bits are known to be zero on one side or the other
+ // (but not both) turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((DemandedMask & ~KnownZero & ~KnownZero2) == 0)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1)));
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) { // all known
+ if ((KnownOne & KnownOne2) == KnownOne) {
+ MVT::ValueType VT = Op.getValueType();
+ SDOperand ANDC = TLO.DAG.getConstant(~KnownOne & DemandedMask, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, VT, Op.getOperand(0),
+ ANDC));
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
+ if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+ return true;
+
+ KnownZero = KnownZeroOut;
+ KnownOne = KnownOneOut;
+ break;
+ case ISD::SETCC:
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult)
+ KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
+ break;
+ case ISD::SELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SELECT_CC:
+ if (SimplifyDemandedBits(Op.getOperand(3), DemandedMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SHL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getValue();
+ SDOperand InOp = Op.getOperand(0);
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (InOp.getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (DemandedMask & ((1ULL << ShAmt)-1)) == 0) {
+ unsigned C1 = cast<ConstantSDNode>(InOp.getOperand(1))->getValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
+
+ SDOperand NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ MVT::ValueType VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask >> ShAmt,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero <<= SA->getValue();
+ KnownOne <<= SA->getValue();
+ KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero.
+ }
+ break;
+ case ISD::SRL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ MVT::ValueType VT = Op.getValueType();
+ unsigned ShAmt = SA->getValue();
+ uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+ unsigned VTSize = MVT::getSizeInBits(VT);
+ SDOperand InOp = Op.getOperand(0);
+
+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the top bits (which are shifted out)
+ // are never demanded.
+ if (InOp.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (DemandedMask & (~0ULL << (VTSize-ShAmt))) == 0) {
+ unsigned C1 = cast<ConstantSDNode>(InOp.getOperand(1))->getValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
+
+ SDOperand NewSA =
+ TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ // Compute the new bits that are at the top now.
+ if (SimplifyDemandedBits(InOp, (DemandedMask << ShAmt) & TypeMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero &= TypeMask;
+ KnownOne &= TypeMask;
+ KnownZero >>= ShAmt;
+ KnownOne >>= ShAmt;
+
+ uint64_t HighBits = (1ULL << ShAmt)-1;
+ HighBits <<= VTSize - ShAmt;
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ break;
+ case ISD::SRA:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ MVT::ValueType VT = Op.getValueType();
+ unsigned ShAmt = SA->getValue();
+
+ // Compute the new bits that are at the top now.
+ uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+
+ uint64_t InDemandedMask = (DemandedMask << ShAmt) & TypeMask;
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ uint64_t HighBits = (1ULL << ShAmt)-1;
+ HighBits <<= MVT::getSizeInBits(VT) - ShAmt;
+ if (HighBits & DemandedMask)
+ InDemandedMask |= MVT::getIntVTSignBit(VT);
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero &= TypeMask;
+ KnownOne &= TypeMask;
+ KnownZero >>= ShAmt;
+ KnownOne >>= ShAmt;
+
+ // Handle the sign bits.
+ uint64_t SignBit = MVT::getIntVTSignBit(VT);
+ SignBit >>= ShAmt; // Adjust to where it is now in the mask.
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if ((KnownZero & SignBit) || (HighBits & ~DemandedMask) == HighBits) {
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, VT, Op.getOperand(0),
+ Op.getOperand(1)));
+ } else if (KnownOne & SignBit) { // New bits are known one.
+ KnownOne |= HighBits;
+ }
+ }
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & DemandedMask;
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if (NewBits == 0)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+
+ uint64_t InSignBit = MVT::getIntVTSignBit(EVT);
+ int64_t InputDemandedBits = DemandedMask & MVT::getIntVTBitMask(EVT);
+
+ // Since the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InputDemandedBits |= InSignBit;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, convert this into a zero extension.
+ if (KnownZero & InSignBit)
+ return TLO.CombineTo(Op,
+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0), EVT));
+
+ if (KnownOne & InSignBit) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP: {
+ MVT::ValueType VT = Op.getValueType();
+ unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1;
+ KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT);
+ KnownOne = 0;
+ break;
+ }
+ case ISD::LOAD: {
+ if (ISD::isZEXTLoad(Op.Val)) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ MVT::ValueType VT = LD->getLoadedVT();
+ KnownZero |= ~MVT::getIntVTBitMask(VT) & DemandedMask;
+ }
+ break;
+ }
+ case ISD::ZERO_EXTEND: {
+ uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ uint64_t NewBits = (~InMask) & DemandedMask;
+ if (NewBits == 0)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero |= NewBits;
+ break;
+ }
+ case ISD::SIGN_EXTEND: {
+ MVT::ValueType InVT = Op.getOperand(0).getValueType();
+ uint64_t InMask = MVT::getIntVTBitMask(InVT);
+ uint64_t InSignBit = MVT::getIntVTSignBit(InVT);
+ uint64_t NewBits = (~InMask) & DemandedMask;
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (NewBits == 0)
+ return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND,Op.getValueType(),
+ Op.getOperand(0)));
+
+ // Since some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ uint64_t InDemandedBits = DemandedMask & InMask;
+ InDemandedBits |= InSignBit;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+
+ // If the sign bit is known zero, convert this to a zero extend.
+ if (KnownZero & InSignBit)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // If the sign bit is known one, the top bits match.
+ if (KnownOne & InSignBit) {
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Otherwise, top bits aren't known.
+ KnownOne &= ~NewBits;
+ KnownZero &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ break;
+ }
+ case ISD::TRUNCATE: {
+ // Simplify the input, using demanded bit information, and compute the known
+ // zero/one bits live out.
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+
+ // If the input is only used by this truncate, see if we can shrink it based
+ // on the known demanded bits.
+ if (Op.getOperand(0).Val->hasOneUse()) {
+ SDOperand In = Op.getOperand(0);
+ switch (In.getOpcode()) {
+ default: break;
+ case ISD::SRL:
+ // Shrink SRL by a constant if none of the high bits shifted in are
+ // demanded.
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
+ uint64_t HighBits = MVT::getIntVTBitMask(In.getValueType());
+ HighBits &= ~MVT::getIntVTBitMask(Op.getValueType());
+ HighBits >>= ShAmt->getValue();
+
+ if (ShAmt->getValue() < MVT::getSizeInBits(Op.getValueType()) &&
+ (DemandedMask & HighBits) == 0) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDOperand NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE,
+ Op.getValueType(),
+ In.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL,Op.getValueType(),
+ NewTrunc, In.getOperand(1)));
+ }
+ }
+ break;
+ }
+ }
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType());
+ KnownZero &= OutMask;
+ KnownOne &= OutMask;
+ break;
+ }
+ case ISD::AssertZext: {
+ MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ uint64_t InMask = MVT::getIntVTBitMask(VT);
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero |= ~InMask & DemandedMask;
+ break;
+ }
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // Just use ComputeMaskedBits to compute output bits.
+ TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+ break;
+ }
+
+ // If we know the value of all of the demanded bits, return this as a
+ // constant.
+ if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+
+ return false;
+}
+
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
+/// KnownZero/KnownOne bitsets.
+void TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
+ uint64_t Mask,
+ uint64_t &KnownZero,
+ uint64_t &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+ KnownZero = 0;
+ KnownOne = 0;
+}
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDOperand Op,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use ComputeNumSignBits if you don't know whether Op"
+ " is a target node!");
+ return 1;
+}
+
+
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// and cc. If it is unable to simplify it, return a null SDOperand.
+SDOperand
+TargetLowering::SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1,
+ ISD::CondCode Cond, bool foldBooleans,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: return DAG.getConstant(1, VT);
+ }
+
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val)) {
+ uint64_t C1 = N1C->getValue();
+ if (isa<ConstantSDNode>(N0.Val)) {
+ return DAG.FoldSetCC(VT, N0, N1, Cond);
+ } else {
+ // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+ // equality comparison, then we're just comparing whether X itself is
+ // zero.
+ if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+ N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt == Log2_32(MVT::getSizeInBits(N0.getValueType()))) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDOperand Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(VT, N0.getOperand(0).getOperand(0),
+ Zero, Cond);
+ }
+ }
+
+ // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned InSize = MVT::getSizeInBits(N0.getOperand(0).getValueType());
+
+ // If the comparison constant has bits in the upper part, the
+ // zero-extended value could never match.
+ if (C1 & (~0ULL << InSize)) {
+ unsigned VSize = MVT::getSizeInBits(N0.getValueType());
+ switch (Cond) {
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETEQ: return DAG.getConstant(0, VT);
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETNE: return DAG.getConstant(1, VT);
+ case ISD::SETGT:
+ case ISD::SETGE:
+ // True if the sign bit of C1 is set.
+ return DAG.getConstant((C1 & (1ULL << (VSize-1))) != 0, VT);
+ case ISD::SETLT:
+ case ISD::SETLE:
+ // True if the sign bit of C1 isn't set.
+ return DAG.getConstant((C1 & (1ULL << (VSize-1))) == 0, VT);
+ default:
+ break;
+ }
+ }
+
+ // Otherwise, we can perform the comparison with the low bits.
+ switch (Cond) {
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ return DAG.getSetCC(VT, N0.getOperand(0),
+ DAG.getConstant(C1, N0.getOperand(0).getValueType()),
+ Cond);
+ default:
+ break; // todo, be more careful with signed comparisons
+ }
+ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ MVT::ValueType ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ unsigned ExtSrcTyBits = MVT::getSizeInBits(ExtSrcTy);
+ MVT::ValueType ExtDstTy = N0.getValueType();
+ unsigned ExtDstTyBits = MVT::getSizeInBits(ExtDstTy);
+
+ // If the extended part has any inconsistent bits, it cannot ever
+ // compare equal. In other words, they have to be all ones or all
+ // zeros.
+ uint64_t ExtBits =
+ (~0ULL >> (64-ExtSrcTyBits)) & (~0ULL << (ExtDstTyBits-1));
+ if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
+ return DAG.getConstant(Cond == ISD::SETNE, VT);
+
+ SDOperand ZextOp;
+ MVT::ValueType Op0Ty = N0.getOperand(0).getValueType();
+ if (Op0Ty == ExtSrcTy) {
+ ZextOp = N0.getOperand(0);
+ } else {
+ int64_t Imm = ~0ULL >> (64-ExtSrcTyBits);
+ ZextOp = DAG.getNode(ISD::AND, Op0Ty, N0.getOperand(0),
+ DAG.getConstant(Imm, Op0Ty));
+ }
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(ZextOp.Val);
+ // Otherwise, make this a use of a zext.
+ return DAG.getSetCC(VT, ZextOp,
+ DAG.getConstant(C1 & (~0ULL>>(64-ExtSrcTyBits)),
+ ExtDstTy),
+ Cond);
+ } else if ((N1C->getValue() == 0 || N1C->getValue() == 1) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+
+ // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
+ if (N0.getOpcode() == ISD::SETCC) {
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getValue() != 1);
+ if (TrueWhenTrue)
+ return N0;
+
+ // Invert the condition.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ CC = ISD::getSetCCInverse(CC,
+ MVT::isInteger(N0.getOperand(0).getValueType()));
+ return DAG.getSetCC(VT, N0.getOperand(0), N0.getOperand(1), CC);
+ }
+
+ if ((N0.getOpcode() == ISD::XOR ||
+ (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR &&
+ N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ cast<ConstantSDNode>(N0.getOperand(1))->getValue() == 1) {
+ // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
+ // can only do this if the top bits are known zero.
+ if (DAG.MaskedValueIsZero(N0,
+ MVT::getIntVTBitMask(N0.getValueType())-1)){
+ // Okay, get the un-inverted input value.
+ SDOperand Val;
+ if (N0.getOpcode() == ISD::XOR)
+ Val = N0.getOperand(0);
+ else {
+ assert(N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR);
+ // ((X^1)&1)^1 -> X & 1
+ Val = DAG.getNode(ISD::AND, N0.getValueType(),
+ N0.getOperand(0).getOperand(0),
+ N0.getOperand(1));
+ }
+ return DAG.getSetCC(VT, Val, N1,
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ }
+ }
+
+ uint64_t MinVal, MaxVal;
+ unsigned OperandBitSize = MVT::getSizeInBits(N1C->getValueType(0));
+ if (ISD::isSignedIntSetCC(Cond)) {
+ MinVal = 1ULL << (OperandBitSize-1);
+ if (OperandBitSize != 1) // Avoid X >> 64, which is undefined.
+ MaxVal = ~0ULL >> (65-OperandBitSize);
+ else
+ MaxVal = 0;
+ } else {
+ MinVal = 0;
+ MaxVal = ~0ULL >> (64-OperandBitSize);
+ }
+
+ // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+ if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+ if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
+ --C1; // X >= C0 --> X > (C0-1)
+ return DAG.getSetCC(VT, N0, DAG.getConstant(C1, N1.getValueType()),
+ (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+ }
+
+ if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+ if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
+ ++C1; // X <= C0 --> X < (C0+1)
+ return DAG.getSetCC(VT, N0, DAG.getConstant(C1, N1.getValueType()),
+ (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+ }
+
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+ return DAG.getConstant(0, VT); // X < MIN --> false
+ if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+ return DAG.getConstant(1, VT); // X >= MIN --> true
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+ return DAG.getConstant(0, VT); // X > MAX --> false
+ if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+ return DAG.getConstant(1, VT); // X <= MAX --> true
+
+ // Canonicalize setgt X, Min --> setne X, Min
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+ return DAG.getSetCC(VT, N0, N1, ISD::SETNE);
+ // Canonicalize setlt X, Max --> setne X, Max
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+ return DAG.getSetCC(VT, N0, N1, ISD::SETNE);
+
+ // If we have setult X, 1, turn it into seteq X, 0
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+ return DAG.getSetCC(VT, N0, DAG.getConstant(MinVal, N0.getValueType()),
+ ISD::SETEQ);
+ // If we have setugt X, Max-1, turn it into seteq X, Max
+ else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+ return DAG.getSetCC(VT, N0, DAG.getConstant(MaxVal, N0.getValueType()),
+ ISD::SETEQ);
+
+ // If we have "setcc X, C0", check to see if we can shrink the immediate
+ // by changing cc.
+
+ // SETUGT X, SINTMAX -> SETLT X, 0
+ if (Cond == ISD::SETUGT && OperandBitSize != 1 &&
+ C1 == (~0ULL >> (65-OperandBitSize)))
+ return DAG.getSetCC(VT, N0, DAG.getConstant(0, N1.getValueType()),
+ ISD::SETLT);
+
+ // FIXME: Implement the rest of these.
+
+ // Fold bit comparisons when we can.
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS =
+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
+ // Perform the xform if the AND RHS is a single bit.
+ if (isPowerOf2_64(AndRHS->getValue())) {
+ return DAG.getNode(ISD::SRL, VT, N0,
+ DAG.getConstant(Log2_64(AndRHS->getValue()),
+ getShiftAmountTy()));
+ }
+ } else if (Cond == ISD::SETEQ && C1 == AndRHS->getValue()) {
+ // (X & 8) == 8 --> (X & 8) >> 3
+ // Perform the xform if C1 is a single bit.
+ if (isPowerOf2_64(C1)) {
+ return DAG.getNode(ISD::SRL, VT, N0,
+ DAG.getConstant(Log2_64(C1), getShiftAmountTy()));
+ }
+ }
+ }
+ }
+ } else if (isa<ConstantSDNode>(N0.Val)) {
+ // Ensure that the constant occurs on the RHS.
+ return DAG.getSetCC(VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+ }
+
+ if (isa<ConstantFPSDNode>(N0.Val)) {
+ // Constant fold or commute setcc.
+ SDOperand O = DAG.FoldSetCC(VT, N0, N1, Cond);
+ if (O.Val) return O;
+ }
+
+ if (N0 == N1) {
+ // We can always fold X == X for integer setcc's.
+ if (MVT::isInteger(N0.getValueType()))
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ unsigned UOF = ISD::getUnorderedFlavor(Cond);
+ if (UOF == 2) // FP operators that are undefined on NaNs.
+ return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+ return DAG.getConstant(UOF, VT);
+ // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
+ // if it is not already.
+ ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+ if (NewCond != Cond)
+ return DAG.getSetCC(VT, N0, N1, NewCond);
+ }
+
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ MVT::isInteger(N0.getValueType())) {
+ if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+ N0.getOpcode() == ISD::XOR) {
+ // Simplify (X+Y) == (X+Z) --> Y == Z
+ if (N0.getOpcode() == N1.getOpcode()) {
+ if (N0.getOperand(0) == N1.getOperand(0))
+ return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(1), Cond);
+ if (N0.getOperand(1) == N1.getOperand(1))
+ return DAG.getSetCC(VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+ // If X op Y == Y op X, try other combinations.
+ if (N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(0), Cond);
+ if (N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getSetCC(VT, N0.getOperand(0), N1.getOperand(1), Cond);
+ }
+ }
+
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ // Turn (X+C1) == C2 --> X == C2-C1
+ if (N0.getOpcode() == ISD::ADD && N0.Val->hasOneUse()) {
+ return DAG.getSetCC(VT, N0.getOperand(0),
+ DAG.getConstant(RHSC->getValue()-LHSR->getValue(),
+ N0.getValueType()), Cond);
+ }
+
+ // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+ if (N0.getOpcode() == ISD::XOR)
+ // If we know that all of the inverted bits are zero, don't bother
+ // performing the inversion.
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getValue()))
+ return DAG.getSetCC(VT, N0.getOperand(0),
+ DAG.getConstant(LHSR->getValue()^RHSC->getValue(),
+ N0.getValueType()), Cond);
+ }
+
+ // Turn (C1-X) == C2 --> X == C1-C2
+ if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ if (N0.getOpcode() == ISD::SUB && N0.Val->hasOneUse()) {
+ return DAG.getSetCC(VT, N0.getOperand(1),
+ DAG.getConstant(SUBC->getValue()-RHSC->getValue(),
+ N0.getValueType()), Cond);
+ }
+ }
+ }
+
+ // Simplify (X+Z) == X --> Z == 0
+ if (N0.getOperand(0) == N1)
+ return DAG.getSetCC(VT, N0.getOperand(1),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getOperand(1) == N1) {
+ if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ return DAG.getSetCC(VT, N0.getOperand(0),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ else if (N0.Val->hasOneUse()) {
+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // (Z-X) == X --> Z == X<<1
+ SDOperand SH = DAG.getNode(ISD::SHL, N1.getValueType(),
+ N1,
+ DAG.getConstant(1, getShiftAmountTy()));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.Val);
+ return DAG.getSetCC(VT, N0.getOperand(0), SH, Cond);
+ }
+ }
+ }
+
+ if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+ N1.getOpcode() == ISD::XOR) {
+ // Simplify X == (X+Z) --> Z == 0
+ if (N1.getOperand(0) == N0) {
+ return DAG.getSetCC(VT, N1.getOperand(1),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ } else if (N1.getOperand(1) == N0) {
+ if (DAG.isCommutativeBinOp(N1.getOpcode())) {
+ return DAG.getSetCC(VT, N1.getOperand(0),
+ DAG.getConstant(0, N1.getValueType()), Cond);
+ } else if (N1.Val->hasOneUse()) {
+ assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // X == (Z-X) --> X<<1 == Z
+ SDOperand SH = DAG.getNode(ISD::SHL, N1.getValueType(), N0,
+ DAG.getConstant(1, getShiftAmountTy()));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.Val);
+ return DAG.getSetCC(VT, SH, N1.getOperand(0), Cond);
+ }
+ }
+ }
+ }
+
+ // Fold away ALL boolean setcc's.
+ SDOperand Temp;
+ if (N0.getValueType() == MVT::i1 && foldBooleans) {
+ switch (Cond) {
+ default: assert(0 && "Unknown integer setcc!");
+ case ISD::SETEQ: // X == Y -> (X^Y)^1
+ Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, N1);
+ N0 = DAG.getNode(ISD::XOR, MVT::i1, Temp, DAG.getConstant(1, MVT::i1));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.Val);
+ break;
+ case ISD::SETNE: // X != Y --> (X^Y)
+ N0 = DAG.getNode(ISD::XOR, MVT::i1, N0, N1);
+ break;
+ case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> X^1 & Y
+ case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> X^1 & Y
+ Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, DAG.getConstant(1, MVT::i1));
+ N0 = DAG.getNode(ISD::AND, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.Val);
+ break;
+ case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> Y^1 & X
+ case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> Y^1 & X
+ Temp = DAG.getNode(ISD::XOR, MVT::i1, N1, DAG.getConstant(1, MVT::i1));
+ N0 = DAG.getNode(ISD::AND, MVT::i1, N0, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.Val);
+ break;
+ case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> X^1 | Y
+ case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> X^1 | Y
+ Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, DAG.getConstant(1, MVT::i1));
+ N0 = DAG.getNode(ISD::OR, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.Val);
+ break;
+ case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> Y^1 | X
+ case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> Y^1 | X
+ Temp = DAG.getNode(ISD::XOR, MVT::i1, N1, DAG.getConstant(1, MVT::i1));
+ N0 = DAG.getNode(ISD::OR, MVT::i1, N0, Temp);
+ break;
+ }
+ if (VT != MVT::i1) {
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(N0.Val);
+ // FIXME: If running after legalize, we probably can't do this.
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, VT, N0);
+ }
+ return N0;
+ }
+
+ // Could not fold it.
+ return SDOperand();
+}
+
+SDOperand TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ // Default implementation: no optimization.
+ return SDOperand();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(const std::string &Constraint) const {
+ // FIXME: lots more standard ones to handle.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ case 'm': // memory
+ case 'o': // offsetable
+ case 'V': // not offsetable
+ return C_Memory;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': // Relocatable Constant
+ case 'X': // Allow ANY value.
+ case 'I': // Target registers.
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ return C_Other;
+ }
+ }
+
+ if (Constraint.size() > 1 && Constraint[0] == '{' &&
+ Constraint[Constraint.size()-1] == '}')
+ return C_Register;
+ return C_Unknown;
+}
+
+/// isOperandValidForConstraint - Return the specified operand (possibly
+/// modified) if the specified SDOperand is valid for the specified target
+/// constraint letter, otherwise return null.
+SDOperand TargetLowering::isOperandValidForConstraint(SDOperand Op,
+ char ConstraintLetter,
+ SelectionDAG &DAG) {
+ switch (ConstraintLetter) {
+ default: break;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': // Relocatable Constant
+ case 'X': { // Allows any operand.
+ // These operands are interested in values of the form (GV+C), where C may
+ // be folded in as an offset of GV, or it may be explicitly added. Also, it
+ // is possible and fine if either GV or C are missing.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+
+ // If we have "(add GV, C)", pull out GV/C
+ if (Op.getOpcode() == ISD::ADD) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+ if (C == 0 || GA == 0) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+ }
+ if (C == 0 || GA == 0)
+ C = 0, GA = 0;
+ }
+
+ // If we find a valid operand, map to the TargetXXX version so that the
+ // value itself doesn't get selected.
+ if (GA) { // Either &GV or &GV+C
+ if (ConstraintLetter != 'n') {
+ int64_t Offs = GA->getOffset();
+ if (C) Offs += C->getValue();
+ return DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getValueType(),
+ Offs);
+ }
+ }
+ if (C) { // just C, no GV.
+ // Simple constants are not allowed for 's'.
+ if (ConstraintLetter != 's')
+ return DAG.getTargetConstant(C->getValue(), Op.getValueType());
+ }
+ break;
+ }
+ }
+ return SDOperand(0,0);
+}
+
+std::vector<unsigned> TargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const {
+ return std::vector<unsigned>();
+}
+
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const {
+ if (Constraint[0] != '{')
+ return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+ assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+ // Remove the braces from around the name.
+ std::string RegName(Constraint.begin()+1, Constraint.end()-1);
+
+ // Figure out which register class contains this reg.
+ const MRegisterInfo *RI = TM.getRegisterInfo();
+ for (MRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+ E = RI->regclass_end(); RCI != E; ++RCI) {
+ const TargetRegisterClass *RC = *RCI;
+
+ // If none of the the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ bool isLegal = false;
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (isTypeLegal(*I)) {
+ isLegal = true;
+ break;
+ }
+ }
+
+ if (!isLegal) continue;
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ if (StringsEqualNoCase(RegName, RI->get(*I).Name))
+ return std::make_pair(*I, RC);
+ }
+ }
+
+ return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // The default implementation of this implements a conservative RISCy, r+r and
+ // r+i addr mode.
+
+ // Allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ }
+
+ return true;
+}
+
+// Magic for divide replacement
+
+struct ms {
+ int64_t m; // magic number
+ int64_t s; // shift amount
+};
+
+struct mu {
+ uint64_t m; // magic number
+ int64_t a; // add indicator
+ int64_t s; // shift amount
+};
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1,
+/// or -1.
+static ms magic32(int32_t d) {
+ int32_t p;
+ uint32_t ad, anc, delta, q1, r1, q2, r2, t;
+ const uint32_t two31 = 0x80000000U;
+ struct ms mag;
+
+ ad = abs(d);
+ t = two31 + ((uint32_t)d >> 31);
+ anc = t - 1 - t%ad; // absolute value of nc
+ p = 31; // initialize p
+ q1 = two31/anc; // initialize q1 = 2p/abs(nc)
+ r1 = two31 - q1*anc; // initialize r1 = rem(2p,abs(nc))
+ q2 = two31/ad; // initialize q2 = 2p/abs(d)
+ r2 = two31 - q2*ad; // initialize r2 = rem(2p,abs(d))
+ do {
+ p = p + 1;
+ q1 = 2*q1; // update q1 = 2p/abs(nc)
+ r1 = 2*r1; // update r1 = rem(2p/abs(nc))
+ if (r1 >= anc) { // must be unsigned comparison
+ q1 = q1 + 1;
+ r1 = r1 - anc;
+ }
+ q2 = 2*q2; // update q2 = 2p/abs(d)
+ r2 = 2*r2; // update r2 = rem(2p/abs(d))
+ if (r2 >= ad) { // must be unsigned comparison
+ q2 = q2 + 1;
+ r2 = r2 - ad;
+ }
+ delta = ad - r2;
+ } while (q1 < delta || (q1 == delta && r1 == 0));
+
+ mag.m = (int32_t)(q2 + 1); // make sure to sign extend
+ if (d < 0) mag.m = -mag.m; // resulting magic number
+ mag.s = p - 32; // resulting shift
+ return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
+static mu magicu32(uint32_t d) {
+ int32_t p;
+ uint32_t nc, delta, q1, r1, q2, r2;
+ struct mu magu;
+ magu.a = 0; // initialize "add" indicator
+ nc = - 1 - (-d)%d;
+ p = 31; // initialize p
+ q1 = 0x80000000/nc; // initialize q1 = 2p/nc
+ r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc)
+ q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d
+ r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d)
+ do {
+ p = p + 1;
+ if (r1 >= nc - r1 ) {
+ q1 = 2*q1 + 1; // update q1
+ r1 = 2*r1 - nc; // update r1
+ }
+ else {
+ q1 = 2*q1; // update q1
+ r1 = 2*r1; // update r1
+ }
+ if (r2 + 1 >= d - r2) {
+ if (q2 >= 0x7FFFFFFF) magu.a = 1;
+ q2 = 2*q2 + 1; // update q2
+ r2 = 2*r2 + 1 - d; // update r2
+ }
+ else {
+ if (q2 >= 0x80000000) magu.a = 1;
+ q2 = 2*q2; // update q2
+ r2 = 2*r2 + 1; // update r2
+ }
+ delta = d - 1 - r2;
+ } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
+ magu.m = q2 + 1; // resulting magic number
+ magu.s = p - 32; // resulting shift
+ return magu;
+}
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1,
+/// or -1.
+static ms magic64(int64_t d) {
+ int64_t p;
+ uint64_t ad, anc, delta, q1, r1, q2, r2, t;
+ const uint64_t two63 = 9223372036854775808ULL; // 2^63
+ struct ms mag;
+
+ ad = d >= 0 ? d : -d;
+ t = two63 + ((uint64_t)d >> 63);
+ anc = t - 1 - t%ad; // absolute value of nc
+ p = 63; // initialize p
+ q1 = two63/anc; // initialize q1 = 2p/abs(nc)
+ r1 = two63 - q1*anc; // initialize r1 = rem(2p,abs(nc))
+ q2 = two63/ad; // initialize q2 = 2p/abs(d)
+ r2 = two63 - q2*ad; // initialize r2 = rem(2p,abs(d))
+ do {
+ p = p + 1;
+ q1 = 2*q1; // update q1 = 2p/abs(nc)
+ r1 = 2*r1; // update r1 = rem(2p/abs(nc))
+ if (r1 >= anc) { // must be unsigned comparison
+ q1 = q1 + 1;
+ r1 = r1 - anc;
+ }
+ q2 = 2*q2; // update q2 = 2p/abs(d)
+ r2 = 2*r2; // update r2 = rem(2p/abs(d))
+ if (r2 >= ad) { // must be unsigned comparison
+ q2 = q2 + 1;
+ r2 = r2 - ad;
+ }
+ delta = ad - r2;
+ } while (q1 < delta || (q1 == delta && r1 == 0));
+
+ mag.m = q2 + 1;
+ if (d < 0) mag.m = -mag.m; // resulting magic number
+ mag.s = p - 64; // resulting shift
+ return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
+static mu magicu64(uint64_t d)
+{
+ int64_t p;
+ uint64_t nc, delta, q1, r1, q2, r2;
+ struct mu magu;
+ magu.a = 0; // initialize "add" indicator
+ nc = - 1 - (-d)%d;
+ p = 63; // initialize p
+ q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc
+ r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc)
+ q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d
+ r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d)
+ do {
+ p = p + 1;
+ if (r1 >= nc - r1 ) {
+ q1 = 2*q1 + 1; // update q1
+ r1 = 2*r1 - nc; // update r1
+ }
+ else {
+ q1 = 2*q1; // update q1
+ r1 = 2*r1; // update r1
+ }
+ if (r2 + 1 >= d - r2) {
+ if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1;
+ q2 = 2*q2 + 1; // update q2
+ r2 = 2*r2 + 1 - d; // update r2
+ }
+ else {
+ if (q2 >= 0x8000000000000000ull) magu.a = 1;
+ q2 = 2*q2; // update q2
+ r2 = 2*r2 + 1; // update r2
+ }
+ delta = d - 1 - r2;
+ } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0)));
+ magu.m = q2 + 1; // resulting magic number
+ magu.s = p - 64; // resulting shift
+ return magu;
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+ std::vector<SDNode*>* Created) const {
+ MVT::ValueType VT = N->getValueType(0);
+
+ // Check to see if we can do this.
+ if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
+ return SDOperand(); // BuildSDIV only operates on i32 or i64
+ if (!isOperationLegal(ISD::MULHS, VT))
+ return SDOperand(); // Make sure the target supports MULHS.
+
+ int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended();
+ ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d);
+
+ // Multiply the numerator (operand 0) by the magic value
+ SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ // If d > 0 and m < 0, add the numerator
+ if (d > 0 && magics.m < 0) {
+ Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.Val);
+ }
+ // If d < 0 and m > 0, subtract the numerator.
+ if (d < 0 && magics.m > 0) {
+ Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0));
+ if (Created)
+ Created->push_back(Q.Val);
+ }
+ // Shift right algebraic if shift value is nonzero
+ if (magics.s > 0) {
+ Q = DAG.getNode(ISD::SRA, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy()));
+ if (Created)
+ Created->push_back(Q.Val);
+ }
+ // Extract the sign bit and add it to the quotient
+ SDOperand T =
+ DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(MVT::getSizeInBits(VT)-1,
+ getShiftAmountTy()));
+ if (Created)
+ Created->push_back(T.Val);
+ return DAG.getNode(ISD::ADD, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number. See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+ std::vector<SDNode*>* Created) const {
+ MVT::ValueType VT = N->getValueType(0);
+
+ // Check to see if we can do this.
+ if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
+ return SDOperand(); // BuildUDIV only operates on i32 or i64
+ if (!isOperationLegal(ISD::MULHU, VT))
+ return SDOperand(); // Make sure the target supports MULHU.
+
+ uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue();
+ mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d);
+
+ // Multiply the numerator (operand 0) by the magic value
+ SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, VT));
+ if (Created)
+ Created->push_back(Q.Val);
+
+ if (magics.a == 0) {
+ return DAG.getNode(ISD::SRL, VT, Q,
+ DAG.getConstant(magics.s, getShiftAmountTy()));
+ } else {
+ SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q);
+ if (Created)
+ Created->push_back(NPQ.Val);
+ NPQ = DAG.getNode(ISD::SRL, VT, NPQ,
+ DAG.getConstant(1, getShiftAmountTy()));
+ if (Created)
+ Created->push_back(NPQ.Val);
+ NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q);
+ if (Created)
+ Created->push_back(NPQ.Val);
+ return DAG.getNode(ISD::SRL, VT, NPQ,
+ DAG.getConstant(magics.s-1, getShiftAmountTy()));
+ }
+}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
new file mode 100644
index 0000000..3d8618f
--- /dev/null
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -0,0 +1,1138 @@
+//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register coalescing pass that attempts to
+// aggressively coalesce every register copy that it can.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simpleregistercoalescing"
+#include "llvm/CodeGen/SimpleRegisterCoalescing.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "VirtRegMap.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(numJoins , "Number of interval joins performed");
+STATISTIC(numPeep , "Number of identity moves eliminated after coalescing");
+STATISTIC(numAborts , "Number of times interval joining aborted");
+
+char SimpleRegisterCoalescing::ID = 0;
+namespace {
+ static cl::opt<bool>
+ EnableJoining("join-liveintervals",
+ cl::desc("Coalesce copies (default=true)"),
+ cl::init(true));
+
+ RegisterPass<SimpleRegisterCoalescing>
+ X("simple-register-coalescing",
+ "Simple register coalescing to eliminate all possible register copies");
+}
+
+const PassInfo *llvm::SimpleRegisterCoalescingID = X.getPassInfo();
+
+void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
+ //AU.addPreserved<LiveVariables>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(TwoAddressInstructionPassID);
+ AU.addRequired<LiveVariables>();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<LoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB. If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy. For example:
+///
+/// A3 = B0
+/// ...
+/// B1 = A3 <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB,
+ MachineInstr *CopyMI) {
+ unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI));
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // the example above.
+ LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+ unsigned BValNo = BLR->ValId;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ unsigned BValNoDefIdx = IntB.getInstForValNum(BValNo);
+ if (BValNoDefIdx == ~0U) return false;
+ assert(BValNoDefIdx == CopyIdx &&
+ "Copy doesn't define the value?");
+
+ // AValNo is the value number in A that defines the copy, A0 in the example.
+ LiveInterval::iterator AValLR = IntA.FindLiveRangeContaining(CopyIdx-1);
+ unsigned AValNo = AValLR->ValId;
+
+ // If AValNo is defined as a copy from IntB, we can potentially process this.
+
+ // Get the instruction that defines this value number.
+ unsigned SrcReg = IntA.getSrcRegForValNum(AValNo);
+ if (!SrcReg) return false; // Not defined by a copy.
+
+ // If the value number is not defined by a copy instruction, ignore it.
+
+ // If the source register comes from an interval other than IntB, we can't
+ // handle this.
+ if (rep(SrcReg) != IntB.reg) return false;
+
+ // Get the LiveRange in IntB that this value number starts with.
+ unsigned AValNoInstIdx = IntA.getInstForValNum(AValNo);
+ LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNoInstIdx-1);
+
+ // Make sure that the end of the live range is inside the same block as
+ // CopyMI.
+ MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end-1);
+ if (!ValLREndInst ||
+ ValLREndInst->getParent() != CopyMI->getParent()) return false;
+
+ // Okay, we now know that ValLR ends in the same block that the CopyMI
+ // live-range starts. If there are no intervening live ranges between them in
+ // IntB, we can merge them.
+ if (ValLR+1 != BLR) return false;
+
+ DOUT << "\nExtending: "; IntB.print(DOUT, mri_);
+
+ // We are about to delete CopyMI, so need to remove it as the 'instruction
+ // that defines this value #'.
+ IntB.setValueNumberInfo(BValNo, std::make_pair(~0U, 0));
+
+ // Okay, we can merge them. We need to insert a new liverange:
+ // [ValLR.end, BLR.begin) of either value number, then we merge the
+ // two value numbers.
+ unsigned FillerStart = ValLR->end, FillerEnd = BLR->start;
+ IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+ // If the IntB live range is assigned to a physical register, and if that
+ // physreg has aliases,
+ if (MRegisterInfo::isPhysicalRegister(IntB.reg)) {
+ // Update the liveintervals of sub-registers.
+ for (const unsigned *AS = mri_->getSubRegisters(IntB.reg); *AS; ++AS) {
+ LiveInterval &AliasLI = li_->getInterval(*AS);
+ AliasLI.addRange(LiveRange(FillerStart, FillerEnd,
+ AliasLI.getNextValue(~0U, 0)));
+ }
+ }
+
+ // Okay, merge "B1" into the same value number as "B0".
+ if (BValNo != ValLR->ValId)
+ IntB.MergeValueNumberInto(BValNo, ValLR->ValId);
+ DOUT << " result = "; IntB.print(DOUT, mri_);
+ DOUT << "\n";
+
+ // If the source instruction was killing the source register before the
+ // merge, unset the isKill marker given the live range has been extended.
+ int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+ if (UIdx != -1)
+ ValLREndInst->getOperand(UIdx).unsetIsKill();
+
+ // Finally, delete the copy instruction.
+ li_->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+ ++numPeep;
+ return true;
+}
+
+/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI. This returns true
+/// if the copy was successfully coalesced away, or if it is never possible
+/// to coalesce this copy, due to register constraints. It returns
+/// false if it is not currently possible to coalesce this interval, but
+/// it may be possible if other things get coalesced.
+bool SimpleRegisterCoalescing::JoinCopy(MachineInstr *CopyMI,
+ unsigned SrcReg, unsigned DstReg, bool PhysOnly) {
+ DOUT << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI;
+
+ // Get representative registers.
+ unsigned repSrcReg = rep(SrcReg);
+ unsigned repDstReg = rep(DstReg);
+
+ // If they are already joined we continue.
+ if (repSrcReg == repDstReg) {
+ DOUT << "\tCopy already coalesced.\n";
+ return true; // Not coalescable.
+ }
+
+ bool SrcIsPhys = MRegisterInfo::isPhysicalRegister(repSrcReg);
+ bool DstIsPhys = MRegisterInfo::isPhysicalRegister(repDstReg);
+ if (PhysOnly && !SrcIsPhys && !DstIsPhys)
+ // Only joining physical registers with virtual registers in this round.
+ return true;
+
+ // If they are both physical registers, we cannot join them.
+ if (SrcIsPhys && DstIsPhys) {
+ DOUT << "\tCan not coalesce physregs.\n";
+ return true; // Not coalescable.
+ }
+
+ // We only join virtual registers with allocatable physical registers.
+ if (SrcIsPhys && !allocatableRegs_[repSrcReg]) {
+ DOUT << "\tSrc reg is unallocatable physreg.\n";
+ return true; // Not coalescable.
+ }
+ if (DstIsPhys && !allocatableRegs_[repDstReg]) {
+ DOUT << "\tDst reg is unallocatable physreg.\n";
+ return true; // Not coalescable.
+ }
+
+ // If they are not of the same register class, we cannot join them.
+ if (differingRegisterClasses(repSrcReg, repDstReg)) {
+ DOUT << "\tSrc/Dest are different register classes.\n";
+ return true; // Not coalescable.
+ }
+
+ LiveInterval &SrcInt = li_->getInterval(repSrcReg);
+ LiveInterval &DstInt = li_->getInterval(repDstReg);
+ assert(SrcInt.reg == repSrcReg && DstInt.reg == repDstReg &&
+ "Register mapping is horribly broken!");
+
+ DOUT << "\t\tInspecting "; SrcInt.print(DOUT, mri_);
+ DOUT << " and "; DstInt.print(DOUT, mri_);
+ DOUT << ": ";
+
+ // Check if it is necessary to propagate "isDead" property before intervals
+ // are joined.
+ MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg);
+ bool isDead = mopd->isDead();
+ bool isShorten = false;
+ unsigned SrcStart = 0, RemoveStart = 0;
+ unsigned SrcEnd = 0, RemoveEnd = 0;
+ if (isDead) {
+ unsigned CopyIdx = li_->getInstructionIndex(CopyMI);
+ LiveInterval::iterator SrcLR =
+ SrcInt.FindLiveRangeContaining(li_->getUseIndex(CopyIdx));
+ RemoveStart = SrcStart = SrcLR->start;
+ RemoveEnd = SrcEnd = SrcLR->end;
+ // The instruction which defines the src is only truly dead if there are
+ // no intermediate uses and there isn't a use beyond the copy.
+ // FIXME: find the last use, mark is kill and shorten the live range.
+ if (SrcEnd > li_->getDefIndex(CopyIdx)) {
+ isDead = false;
+ } else {
+ MachineOperand *MOU;
+ MachineInstr *LastUse= lastRegisterUse(SrcStart, CopyIdx, repSrcReg, MOU);
+ if (LastUse) {
+ // Shorten the liveinterval to the end of last use.
+ MOU->setIsKill();
+ isDead = false;
+ isShorten = true;
+ RemoveStart = li_->getDefIndex(li_->getInstructionIndex(LastUse));
+ RemoveEnd = SrcEnd;
+ } else {
+ MachineInstr *SrcMI = li_->getInstructionFromIndex(SrcStart);
+ if (SrcMI) {
+ MachineOperand *mops = findDefOperand(SrcMI, repSrcReg);
+ if (mops)
+ // A dead def should have a single cycle interval.
+ ++RemoveStart;
+ }
+ }
+ }
+ }
+
+ // We need to be careful about coalescing a source physical register with a
+ // virtual register. Once the coalescing is done, it cannot be broken and
+ // these are not spillable! If the destination interval uses are far away,
+ // think twice about coalescing them!
+ if (!mopd->isDead() && (SrcIsPhys || DstIsPhys)) {
+ LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
+ unsigned JoinVReg = SrcIsPhys ? repDstReg : repSrcReg;
+ unsigned JoinPReg = SrcIsPhys ? repSrcReg : repDstReg;
+ const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(JoinVReg);
+ unsigned Threshold = allocatableRCRegs_[RC].count();
+
+ // If the virtual register live interval is long has it has low use desity,
+ // do not join them, instead mark the physical register as its allocation
+ // preference.
+ unsigned Length = JoinVInt.getSize() / InstrSlots::NUM;
+ LiveVariables::VarInfo &vi = lv_->getVarInfo(JoinVReg);
+ if (Length > Threshold &&
+ (((float)vi.NumUses / Length) < (1.0 / Threshold))) {
+ JoinVInt.preference = JoinPReg;
+ ++numAborts;
+ DOUT << "\tMay tie down a physical register, abort!\n";
+ return false;
+ }
+ }
+
+ // Okay, attempt to join these two intervals. On failure, this returns false.
+ // Otherwise, if one of the intervals being joined is a physreg, this method
+ // always canonicalizes DstInt to be it. The output "SrcInt" will not have
+ // been modified, so we can use this information below to update aliases.
+ if (JoinIntervals(DstInt, SrcInt)) {
+ if (isDead) {
+ // Result of the copy is dead. Propagate this property.
+ if (SrcStart == 0) {
+ assert(MRegisterInfo::isPhysicalRegister(repSrcReg) &&
+ "Live-in must be a physical register!");
+ // Live-in to the function but dead. Remove it from entry live-in set.
+ // JoinIntervals may end up swapping the two intervals.
+ mf_->begin()->removeLiveIn(repSrcReg);
+ } else {
+ MachineInstr *SrcMI = li_->getInstructionFromIndex(SrcStart);
+ if (SrcMI) {
+ MachineOperand *mops = findDefOperand(SrcMI, repSrcReg);
+ if (mops)
+ mops->setIsDead();
+ }
+ }
+ }
+
+ if (isShorten || isDead) {
+ // Shorten the live interval.
+ LiveInterval &LiveInInt = (repSrcReg == DstInt.reg) ? DstInt : SrcInt;
+ LiveInInt.removeRange(RemoveStart, RemoveEnd);
+ }
+ } else {
+ // Coalescing failed.
+
+ // If we can eliminate the copy without merging the live ranges, do so now.
+ if (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI))
+ return true;
+
+ // Otherwise, we are unable to join the intervals.
+ DOUT << "Interference!\n";
+ return false;
+ }
+
+ bool Swapped = repSrcReg == DstInt.reg;
+ if (Swapped)
+ std::swap(repSrcReg, repDstReg);
+ assert(MRegisterInfo::isVirtualRegister(repSrcReg) &&
+ "LiveInterval::join didn't work right!");
+
+ // If we're about to merge live ranges into a physical register live range,
+ // we have to update any aliased register's live ranges to indicate that they
+ // have clobbered values for this range.
+ if (MRegisterInfo::isPhysicalRegister(repDstReg)) {
+ // Unset unnecessary kills.
+ if (!DstInt.containsOneValue()) {
+ for (LiveInterval::Ranges::const_iterator I = SrcInt.begin(),
+ E = SrcInt.end(); I != E; ++I)
+ unsetRegisterKills(I->start, I->end, repDstReg);
+ }
+
+ // Update the liveintervals of sub-registers.
+ for (const unsigned *AS = mri_->getSubRegisters(repDstReg); *AS; ++AS)
+ li_->getInterval(*AS).MergeInClobberRanges(SrcInt);
+ } else {
+ // Merge use info if the destination is a virtual register.
+ LiveVariables::VarInfo& dVI = lv_->getVarInfo(repDstReg);
+ LiveVariables::VarInfo& sVI = lv_->getVarInfo(repSrcReg);
+ dVI.NumUses += sVI.NumUses;
+ }
+
+ DOUT << "\n\t\tJoined. Result = "; DstInt.print(DOUT, mri_);
+ DOUT << "\n";
+
+ // Remember these liveintervals have been joined.
+ JoinedLIs.set(repSrcReg - MRegisterInfo::FirstVirtualRegister);
+ if (MRegisterInfo::isVirtualRegister(repDstReg))
+ JoinedLIs.set(repDstReg - MRegisterInfo::FirstVirtualRegister);
+
+ // If the intervals were swapped by Join, swap them back so that the register
+ // mapping (in the r2i map) is correct.
+ if (Swapped) SrcInt.swap(DstInt);
+ li_->removeInterval(repSrcReg);
+ r2rMap_[repSrcReg] = repDstReg;
+
+ // Finally, delete the copy instruction.
+ li_->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+ ++numPeep;
+ ++numJoins;
+ return true;
+}
+
+/// ComputeUltimateVN - Assuming we are going to join two live intervals,
+/// compute what the resultant value numbers for each value in the input two
+/// ranges will be. This is complicated by copies between the two which can
+/// and will commonly cause multiple value numbers to be merged into one.
+///
+/// VN is the value number that we're trying to resolve. InstDefiningValue
+/// keeps track of the new InstDefiningValue assignment for the result
+/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of
+/// whether a value in this or other is a copy from the opposite set.
+/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
+/// already been assigned.
+///
+/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
+/// contains the value number the copy is from.
+///
+static unsigned ComputeUltimateVN(unsigned VN,
+ SmallVector<std::pair<unsigned,
+ unsigned>, 16> &ValueNumberInfo,
+ SmallVector<int, 16> &ThisFromOther,
+ SmallVector<int, 16> &OtherFromThis,
+ SmallVector<int, 16> &ThisValNoAssignments,
+ SmallVector<int, 16> &OtherValNoAssignments,
+ LiveInterval &ThisLI, LiveInterval &OtherLI) {
+ // If the VN has already been computed, just return it.
+ if (ThisValNoAssignments[VN] >= 0)
+ return ThisValNoAssignments[VN];
+// assert(ThisValNoAssignments[VN] != -2 && "Cyclic case?");
+
+ // If this val is not a copy from the other val, then it must be a new value
+ // number in the destination.
+ int OtherValNo = ThisFromOther[VN];
+ if (OtherValNo == -1) {
+ ValueNumberInfo.push_back(ThisLI.getValNumInfo(VN));
+ return ThisValNoAssignments[VN] = ValueNumberInfo.size()-1;
+ }
+
+ // Otherwise, this *is* a copy from the RHS. If the other side has already
+ // been computed, return it.
+ if (OtherValNoAssignments[OtherValNo] >= 0)
+ return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo];
+
+ // Mark this value number as currently being computed, then ask what the
+ // ultimate value # of the other value is.
+ ThisValNoAssignments[VN] = -2;
+ unsigned UltimateVN =
+ ComputeUltimateVN(OtherValNo, ValueNumberInfo,
+ OtherFromThis, ThisFromOther,
+ OtherValNoAssignments, ThisValNoAssignments,
+ OtherLI, ThisLI);
+ return ThisValNoAssignments[VN] = UltimateVN;
+}
+
+static bool InVector(unsigned Val, const SmallVector<unsigned, 8> &V) {
+ return std::find(V.begin(), V.end(), Val) != V.end();
+}
+
+/// SimpleJoin - Attempt to joint the specified interval into this one. The
+/// caller of this method must guarantee that the RHS only contains a single
+/// value number and that the RHS is not defined by a copy from this
+/// interval. This returns false if the intervals are not joinable, or it
+/// joins them and returns true.
+bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS) {
+ assert(RHS.containsOneValue());
+
+ // Some number (potentially more than one) value numbers in the current
+ // interval may be defined as copies from the RHS. Scan the overlapping
+ // portions of the LHS and RHS, keeping track of this and looking for
+ // overlapping live ranges that are NOT defined as copies. If these exist, we
+ // cannot coalesce.
+
+ LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end();
+ LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end();
+
+ if (LHSIt->start < RHSIt->start) {
+ LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start);
+ if (LHSIt != LHS.begin()) --LHSIt;
+ } else if (RHSIt->start < LHSIt->start) {
+ RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start);
+ if (RHSIt != RHS.begin()) --RHSIt;
+ }
+
+ SmallVector<unsigned, 8> EliminatedLHSVals;
+
+ while (1) {
+ // Determine if these live intervals overlap.
+ bool Overlaps = false;
+ if (LHSIt->start <= RHSIt->start)
+ Overlaps = LHSIt->end > RHSIt->start;
+ else
+ Overlaps = RHSIt->end > LHSIt->start;
+
+ // If the live intervals overlap, there are two interesting cases: if the
+ // LHS interval is defined by a copy from the RHS, it's ok and we record
+ // that the LHS value # is the same as the RHS. If it's not, then we cannot
+ // coalesce these live ranges and we bail out.
+ if (Overlaps) {
+ // If we haven't already recorded that this value # is safe, check it.
+ if (!InVector(LHSIt->ValId, EliminatedLHSVals)) {
+ // Copy from the RHS?
+ unsigned SrcReg = LHS.getSrcRegForValNum(LHSIt->ValId);
+ if (rep(SrcReg) != RHS.reg)
+ return false; // Nope, bail out.
+
+ EliminatedLHSVals.push_back(LHSIt->ValId);
+ }
+
+ // We know this entire LHS live range is okay, so skip it now.
+ if (++LHSIt == LHSEnd) break;
+ continue;
+ }
+
+ if (LHSIt->end < RHSIt->end) {
+ if (++LHSIt == LHSEnd) break;
+ } else {
+ // One interesting case to check here. It's possible that we have
+ // something like "X3 = Y" which defines a new value number in the LHS,
+ // and is the last use of this liverange of the RHS. In this case, we
+ // want to notice this copy (so that it gets coalesced away) even though
+ // the live ranges don't actually overlap.
+ if (LHSIt->start == RHSIt->end) {
+ if (InVector(LHSIt->ValId, EliminatedLHSVals)) {
+ // We already know that this value number is going to be merged in
+ // if coalescing succeeds. Just skip the liverange.
+ if (++LHSIt == LHSEnd) break;
+ } else {
+ // Otherwise, if this is a copy from the RHS, mark it as being merged
+ // in.
+ if (rep(LHS.getSrcRegForValNum(LHSIt->ValId)) == RHS.reg) {
+ EliminatedLHSVals.push_back(LHSIt->ValId);
+
+ // We know this entire LHS live range is okay, so skip it now.
+ if (++LHSIt == LHSEnd) break;
+ }
+ }
+ }
+
+ if (++RHSIt == RHSEnd) break;
+ }
+ }
+
+ // If we got here, we know that the coalescing will be successful and that
+ // the value numbers in EliminatedLHSVals will all be merged together. Since
+ // the most common case is that EliminatedLHSVals has a single number, we
+ // optimize for it: if there is more than one value, we merge them all into
+ // the lowest numbered one, then handle the interval as if we were merging
+ // with one value number.
+ unsigned LHSValNo;
+ if (EliminatedLHSVals.size() > 1) {
+ // Loop through all the equal value numbers merging them into the smallest
+ // one.
+ unsigned Smallest = EliminatedLHSVals[0];
+ for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) {
+ if (EliminatedLHSVals[i] < Smallest) {
+ // Merge the current notion of the smallest into the smaller one.
+ LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]);
+ Smallest = EliminatedLHSVals[i];
+ } else {
+ // Merge into the smallest.
+ LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest);
+ }
+ }
+ LHSValNo = Smallest;
+ } else {
+ assert(!EliminatedLHSVals.empty() && "No copies from the RHS?");
+ LHSValNo = EliminatedLHSVals[0];
+ }
+
+ // Okay, now that there is a single LHS value number that we're merging the
+ // RHS into, update the value number info for the LHS to indicate that the
+ // value number is defined where the RHS value number was.
+ LHS.setValueNumberInfo(LHSValNo, RHS.getValNumInfo(0));
+
+ // Okay, the final step is to loop over the RHS live intervals, adding them to
+ // the LHS.
+ LHS.MergeRangesInAsValue(RHS, LHSValNo);
+ LHS.weight += RHS.weight;
+ if (RHS.preference && !LHS.preference)
+ LHS.preference = RHS.preference;
+
+ return true;
+}
+
+/// JoinIntervals - Attempt to join these two intervals. On failure, this
+/// returns false. Otherwise, if one of the intervals being joined is a
+/// physreg, this method always canonicalizes LHS to be it. The output
+/// "RHS" will not have been modified, so we can use this information
+/// below to update aliases.
+bool SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS) {
+ // Compute the final value assignment, assuming that the live ranges can be
+ // coalesced.
+ SmallVector<int, 16> LHSValNoAssignments;
+ SmallVector<int, 16> RHSValNoAssignments;
+ SmallVector<std::pair<unsigned,unsigned>, 16> ValueNumberInfo;
+
+ // If a live interval is a physical register, conservatively check if any
+ // of its sub-registers is overlapping the live interval of the virtual
+ // register. If so, do not coalesce.
+ if (MRegisterInfo::isPhysicalRegister(LHS.reg) &&
+ *mri_->getSubRegisters(LHS.reg)) {
+ for (const unsigned* SR = mri_->getSubRegisters(LHS.reg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+ DOUT << "Interfere with sub-register ";
+ DEBUG(li_->getInterval(*SR).print(DOUT, mri_));
+ return false;
+ }
+ } else if (MRegisterInfo::isPhysicalRegister(RHS.reg) &&
+ *mri_->getSubRegisters(RHS.reg)) {
+ for (const unsigned* SR = mri_->getSubRegisters(RHS.reg); *SR; ++SR)
+ if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
+ DOUT << "Interfere with sub-register ";
+ DEBUG(li_->getInterval(*SR).print(DOUT, mri_));
+ return false;
+ }
+ }
+
+ // Compute ultimate value numbers for the LHS and RHS values.
+ if (RHS.containsOneValue()) {
+ // Copies from a liveinterval with a single value are simple to handle and
+ // very common, handle the special case here. This is important, because
+ // often RHS is small and LHS is large (e.g. a physreg).
+
+ // Find out if the RHS is defined as a copy from some value in the LHS.
+ int RHSValID = -1;
+ std::pair<unsigned,unsigned> RHSValNoInfo;
+ unsigned RHSSrcReg = RHS.getSrcRegForValNum(0);
+ if ((RHSSrcReg == 0 || rep(RHSSrcReg) != LHS.reg)) {
+ // If RHS is not defined as a copy from the LHS, we can use simpler and
+ // faster checks to see if the live ranges are coalescable. This joiner
+ // can't swap the LHS/RHS intervals though.
+ if (!MRegisterInfo::isPhysicalRegister(RHS.reg)) {
+ return SimpleJoin(LHS, RHS);
+ } else {
+ RHSValNoInfo = RHS.getValNumInfo(0);
+ }
+ } else {
+ // It was defined as a copy from the LHS, find out what value # it is.
+ unsigned ValInst = RHS.getInstForValNum(0);
+ RHSValID = LHS.getLiveRangeContaining(ValInst-1)->ValId;
+ RHSValNoInfo = LHS.getValNumInfo(RHSValID);
+ }
+
+ LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+ RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+ ValueNumberInfo.resize(LHS.getNumValNums());
+
+ // Okay, *all* of the values in LHS that are defined as a copy from RHS
+ // should now get updated.
+ for (unsigned VN = 0, e = LHS.getNumValNums(); VN != e; ++VN) {
+ if (unsigned LHSSrcReg = LHS.getSrcRegForValNum(VN)) {
+ if (rep(LHSSrcReg) != RHS.reg) {
+ // If this is not a copy from the RHS, its value number will be
+ // unmodified by the coalescing.
+ ValueNumberInfo[VN] = LHS.getValNumInfo(VN);
+ LHSValNoAssignments[VN] = VN;
+ } else if (RHSValID == -1) {
+ // Otherwise, it is a copy from the RHS, and we don't already have a
+ // value# for it. Keep the current value number, but remember it.
+ LHSValNoAssignments[VN] = RHSValID = VN;
+ ValueNumberInfo[VN] = RHSValNoInfo;
+ } else {
+ // Otherwise, use the specified value #.
+ LHSValNoAssignments[VN] = RHSValID;
+ if (VN != (unsigned)RHSValID)
+ ValueNumberInfo[VN].first = ~1U;
+ else
+ ValueNumberInfo[VN] = RHSValNoInfo;
+ }
+ } else {
+ ValueNumberInfo[VN] = LHS.getValNumInfo(VN);
+ LHSValNoAssignments[VN] = VN;
+ }
+ }
+
+ assert(RHSValID != -1 && "Didn't find value #?");
+ RHSValNoAssignments[0] = RHSValID;
+
+ } else {
+ // Loop over the value numbers of the LHS, seeing if any are defined from
+ // the RHS.
+ SmallVector<int, 16> LHSValsDefinedFromRHS;
+ LHSValsDefinedFromRHS.resize(LHS.getNumValNums(), -1);
+ for (unsigned VN = 0, e = LHS.getNumValNums(); VN != e; ++VN) {
+ unsigned ValSrcReg = LHS.getSrcRegForValNum(VN);
+ if (ValSrcReg == 0) // Src not defined by a copy?
+ continue;
+
+ // DstReg is known to be a register in the LHS interval. If the src is
+ // from the RHS interval, we can use its value #.
+ if (rep(ValSrcReg) != RHS.reg)
+ continue;
+
+ // Figure out the value # from the RHS.
+ unsigned ValInst = LHS.getInstForValNum(VN);
+ LHSValsDefinedFromRHS[VN] = RHS.getLiveRangeContaining(ValInst-1)->ValId;
+ }
+
+ // Loop over the value numbers of the RHS, seeing if any are defined from
+ // the LHS.
+ SmallVector<int, 16> RHSValsDefinedFromLHS;
+ RHSValsDefinedFromLHS.resize(RHS.getNumValNums(), -1);
+ for (unsigned VN = 0, e = RHS.getNumValNums(); VN != e; ++VN) {
+ unsigned ValSrcReg = RHS.getSrcRegForValNum(VN);
+ if (ValSrcReg == 0) // Src not defined by a copy?
+ continue;
+
+ // DstReg is known to be a register in the RHS interval. If the src is
+ // from the LHS interval, we can use its value #.
+ if (rep(ValSrcReg) != LHS.reg)
+ continue;
+
+ // Figure out the value # from the LHS.
+ unsigned ValInst = RHS.getInstForValNum(VN);
+ RHSValsDefinedFromLHS[VN] = LHS.getLiveRangeContaining(ValInst-1)->ValId;
+ }
+
+ LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+ RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+ ValueNumberInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+
+ for (unsigned VN = 0, e = LHS.getNumValNums(); VN != e; ++VN) {
+ if (LHSValNoAssignments[VN] >= 0 || LHS.getInstForValNum(VN) == ~2U)
+ continue;
+ ComputeUltimateVN(VN, ValueNumberInfo,
+ LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+ LHSValNoAssignments, RHSValNoAssignments, LHS, RHS);
+ }
+ for (unsigned VN = 0, e = RHS.getNumValNums(); VN != e; ++VN) {
+ if (RHSValNoAssignments[VN] >= 0 || RHS.getInstForValNum(VN) == ~2U)
+ continue;
+ // If this value number isn't a copy from the LHS, it's a new number.
+ if (RHSValsDefinedFromLHS[VN] == -1) {
+ ValueNumberInfo.push_back(RHS.getValNumInfo(VN));
+ RHSValNoAssignments[VN] = ValueNumberInfo.size()-1;
+ continue;
+ }
+
+ ComputeUltimateVN(VN, ValueNumberInfo,
+ RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+ RHSValNoAssignments, LHSValNoAssignments, RHS, LHS);
+ }
+ }
+
+ // Armed with the mappings of LHS/RHS values to ultimate values, walk the
+ // interval lists to see if these intervals are coalescable.
+ LiveInterval::const_iterator I = LHS.begin();
+ LiveInterval::const_iterator IE = LHS.end();
+ LiveInterval::const_iterator J = RHS.begin();
+ LiveInterval::const_iterator JE = RHS.end();
+
+ // Skip ahead until the first place of potential sharing.
+ if (I->start < J->start) {
+ I = std::upper_bound(I, IE, J->start);
+ if (I != LHS.begin()) --I;
+ } else if (J->start < I->start) {
+ J = std::upper_bound(J, JE, I->start);
+ if (J != RHS.begin()) --J;
+ }
+
+ while (1) {
+ // Determine if these two live ranges overlap.
+ bool Overlaps;
+ if (I->start < J->start) {
+ Overlaps = I->end > J->start;
+ } else {
+ Overlaps = J->end > I->start;
+ }
+
+ // If so, check value # info to determine if they are really different.
+ if (Overlaps) {
+ // If the live range overlap will map to the same value number in the
+ // result liverange, we can still coalesce them. If not, we can't.
+ if (LHSValNoAssignments[I->ValId] != RHSValNoAssignments[J->ValId])
+ return false;
+ }
+
+ if (I->end < J->end) {
+ ++I;
+ if (I == IE) break;
+ } else {
+ ++J;
+ if (J == JE) break;
+ }
+ }
+
+ // If we get here, we know that we can coalesce the live ranges. Ask the
+ // intervals to coalesce themselves now.
+ LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0],
+ ValueNumberInfo);
+ return true;
+}
+
+namespace {
+ // DepthMBBCompare - Comparison predicate that sort first based on the loop
+ // depth of the basic block (the unsigned), and then on the MBB number.
+ struct DepthMBBCompare {
+ typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
+ bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
+ if (LHS.first > RHS.first) return true; // Deeper loops first
+ return LHS.first == RHS.first &&
+ LHS.second->getNumber() < RHS.second->getNumber();
+ }
+ };
+}
+
+void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
+ std::vector<CopyRec> *TryAgain, bool PhysOnly) {
+ DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n";
+
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E;) {
+ MachineInstr *Inst = MII++;
+
+ // If this isn't a copy, we can't join intervals.
+ unsigned SrcReg, DstReg;
+ if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg)) continue;
+
+ if (TryAgain && !JoinCopy(Inst, SrcReg, DstReg, PhysOnly))
+ TryAgain->push_back(getCopyRec(Inst, SrcReg, DstReg));
+ }
+}
+
+void SimpleRegisterCoalescing::joinIntervals() {
+ DOUT << "********** JOINING INTERVALS ***********\n";
+
+ JoinedLIs.resize(li_->getNumIntervals());
+ JoinedLIs.reset();
+
+ std::vector<CopyRec> TryAgainList;
+ const LoopInfo &LI = getAnalysis<LoopInfo>();
+ if (LI.begin() == LI.end()) {
+ // If there are no loops in the function, join intervals in function order.
+ for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+ I != E; ++I)
+ CopyCoalesceInMBB(I, &TryAgainList);
+ } else {
+ // Otherwise, join intervals in inner loops before other intervals.
+ // Unfortunately we can't just iterate over loop hierarchy here because
+ // there may be more MBB's than BB's. Collect MBB's for sorting.
+
+ // Join intervals in the function prolog first. We want to join physical
+ // registers with virtual registers before the intervals got too long.
+ std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
+ for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); I != E;++I)
+ MBBs.push_back(std::make_pair(LI.getLoopDepth(I->getBasicBlock()), I));
+
+ // Sort by loop depth.
+ std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
+
+ // Finally, join intervals in loop nest order.
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+ CopyCoalesceInMBB(MBBs[i].second, NULL, true);
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+ CopyCoalesceInMBB(MBBs[i].second, &TryAgainList, false);
+ }
+
+ // Joining intervals can allow other intervals to be joined. Iteratively join
+ // until we make no progress.
+ bool ProgressMade = true;
+ while (ProgressMade) {
+ ProgressMade = false;
+
+ for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+ CopyRec &TheCopy = TryAgainList[i];
+ if (TheCopy.MI &&
+ JoinCopy(TheCopy.MI, TheCopy.SrcReg, TheCopy.DstReg)) {
+ TheCopy.MI = 0; // Mark this one as done.
+ ProgressMade = true;
+ }
+ }
+ }
+
+ // Some live range has been lengthened due to colaescing, eliminate the
+ // unnecessary kills.
+ int RegNum = JoinedLIs.find_first();
+ while (RegNum != -1) {
+ unsigned Reg = RegNum + MRegisterInfo::FirstVirtualRegister;
+ unsigned repReg = rep(Reg);
+ LiveInterval &LI = li_->getInterval(repReg);
+ LiveVariables::VarInfo& svi = lv_->getVarInfo(Reg);
+ for (unsigned i = 0, e = svi.Kills.size(); i != e; ++i) {
+ MachineInstr *Kill = svi.Kills[i];
+ // Suppose vr1 = op vr2, x
+ // and vr1 and vr2 are coalesced. vr2 should still be marked kill
+ // unless it is a two-address operand.
+ if (li_->isRemoved(Kill) || hasRegisterDef(Kill, repReg))
+ continue;
+ if (LI.liveAt(li_->getInstructionIndex(Kill) + InstrSlots::NUM))
+ unsetRegisterKill(Kill, repReg);
+ }
+ RegNum = JoinedLIs.find_next(RegNum);
+ }
+
+ DOUT << "*** Register mapping ***\n";
+ for (int i = 0, e = r2rMap_.size(); i != e; ++i)
+ if (r2rMap_[i]) {
+ DOUT << " reg " << i << " -> ";
+ DEBUG(printRegName(r2rMap_[i]));
+ DOUT << "\n";
+ }
+}
+
+/// Return true if the two specified registers belong to different register
+/// classes. The registers may be either phys or virt regs.
+bool SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA,
+ unsigned RegB) const {
+
+ // Get the register classes for the first reg.
+ if (MRegisterInfo::isPhysicalRegister(RegA)) {
+ assert(MRegisterInfo::isVirtualRegister(RegB) &&
+ "Shouldn't consider two physregs!");
+ return !mf_->getSSARegMap()->getRegClass(RegB)->contains(RegA);
+ }
+
+ // Compare against the regclass for the second reg.
+ const TargetRegisterClass *RegClass = mf_->getSSARegMap()->getRegClass(RegA);
+ if (MRegisterInfo::isVirtualRegister(RegB))
+ return RegClass != mf_->getSSARegMap()->getRegClass(RegB);
+ else
+ return !RegClass->contains(RegB);
+}
+
+/// lastRegisterUse - Returns the last use of the specific register between
+/// cycles Start and End. It also returns the use operand by reference. It
+/// returns NULL if there are no uses.
+MachineInstr *
+SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, unsigned Reg,
+ MachineOperand *&MOU) {
+ int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM;
+ int s = Start;
+ while (e >= s) {
+ // Skip deleted instructions
+ MachineInstr *MI = li_->getInstructionFromIndex(e);
+ while ((e - InstrSlots::NUM) >= s && !MI) {
+ e -= InstrSlots::NUM;
+ MI = li_->getInstructionFromIndex(e);
+ }
+ if (e < s || MI == NULL)
+ return NULL;
+
+ for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.getReg() &&
+ mri_->regsOverlap(rep(MO.getReg()), Reg)) {
+ MOU = &MO;
+ return MI;
+ }
+ }
+
+ e -= InstrSlots::NUM;
+ }
+
+ return NULL;
+}
+
+
+/// findDefOperand - Returns the MachineOperand that is a def of the specific
+/// register. It returns NULL if the def is not found.
+MachineOperand *SimpleRegisterCoalescing::findDefOperand(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ mri_->regsOverlap(rep(MO.getReg()), Reg))
+ return &MO;
+ }
+ return NULL;
+}
+
+/// unsetRegisterKill - Unset IsKill property of all uses of specific register
+/// of the specific instruction.
+void SimpleRegisterCoalescing::unsetRegisterKill(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.isKill() && MO.getReg() &&
+ mri_->regsOverlap(rep(MO.getReg()), Reg))
+ MO.unsetIsKill();
+ }
+}
+
+/// unsetRegisterKills - Unset IsKill property of all uses of specific register
+/// between cycles Start and End.
+void SimpleRegisterCoalescing::unsetRegisterKills(unsigned Start, unsigned End,
+ unsigned Reg) {
+ int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM;
+ int s = Start;
+ while (e >= s) {
+ // Skip deleted instructions
+ MachineInstr *MI = li_->getInstructionFromIndex(e);
+ while ((e - InstrSlots::NUM) >= s && !MI) {
+ e -= InstrSlots::NUM;
+ MI = li_->getInstructionFromIndex(e);
+ }
+ if (e < s || MI == NULL)
+ return;
+
+ for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.isKill() && MO.getReg() &&
+ mri_->regsOverlap(rep(MO.getReg()), Reg)) {
+ MO.unsetIsKill();
+ }
+ }
+
+ e -= InstrSlots::NUM;
+ }
+}
+
+/// hasRegisterDef - True if the instruction defines the specific register.
+///
+bool SimpleRegisterCoalescing::hasRegisterDef(MachineInstr *MI, unsigned Reg) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ mri_->regsOverlap(rep(MO.getReg()), Reg))
+ return true;
+ }
+ return false;
+}
+
+void SimpleRegisterCoalescing::printRegName(unsigned reg) const {
+ if (MRegisterInfo::isPhysicalRegister(reg))
+ cerr << mri_->getName(reg);
+ else
+ cerr << "%reg" << reg;
+}
+
+void SimpleRegisterCoalescing::releaseMemory() {
+ r2rMap_.clear();
+ JoinedLIs.clear();
+}
+
+static bool isZeroLengthInterval(LiveInterval *li) {
+ for (LiveInterval::Ranges::const_iterator
+ i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
+ if (i->end - i->start > LiveIntervals::InstrSlots::NUM)
+ return false;
+ return true;
+}
+
+bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
+ mf_ = &fn;
+ tm_ = &fn.getTarget();
+ mri_ = tm_->getRegisterInfo();
+ tii_ = tm_->getInstrInfo();
+ li_ = &getAnalysis<LiveIntervals>();
+ lv_ = &getAnalysis<LiveVariables>();
+
+ DOUT << "********** SIMPLE REGISTER COALESCING **********\n"
+ << "********** Function: "
+ << ((Value*)mf_->getFunction())->getName() << '\n';
+
+ allocatableRegs_ = mri_->getAllocatableSet(fn);
+ for (MRegisterInfo::regclass_iterator I = mri_->regclass_begin(),
+ E = mri_->regclass_end(); I != E; ++I)
+ allocatableRCRegs_.insert(std::make_pair(*I,mri_->getAllocatableSet(fn, *I)));
+
+ r2rMap_.grow(mf_->getSSARegMap()->getLastVirtReg());
+
+ // Join (coalesce) intervals if requested.
+ if (EnableJoining) {
+ joinIntervals();
+ DOUT << "********** INTERVALS POST JOINING **********\n";
+ for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
+ I->second.print(DOUT, mri_);
+ DOUT << "\n";
+ }
+ }
+
+ // perform a final pass over the instructions and compute spill
+ // weights, coalesce virtual registers and remove identity moves.
+ const LoopInfo &loopInfo = getAnalysis<LoopInfo>();
+
+ for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+ mbbi != mbbe; ++mbbi) {
+ MachineBasicBlock* mbb = mbbi;
+ unsigned loopDepth = loopInfo.getLoopDepth(mbb->getBasicBlock());
+
+ for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
+ mii != mie; ) {
+ // if the move will be an identity move delete it
+ unsigned srcReg, dstReg, RegRep;
+ if (tii_->isMoveInstr(*mii, srcReg, dstReg) &&
+ (RegRep = rep(srcReg)) == rep(dstReg)) {
+ // remove from def list
+ LiveInterval &RegInt = li_->getOrCreateInterval(RegRep);
+ MachineOperand *MO = mii->findRegisterDefOperand(dstReg);
+ // If def of this move instruction is dead, remove its live range from
+ // the dstination register's live interval.
+ if (MO->isDead()) {
+ unsigned MoveIdx = li_->getDefIndex(li_->getInstructionIndex(mii));
+ LiveInterval::iterator MLR = RegInt.FindLiveRangeContaining(MoveIdx);
+ RegInt.removeRange(MLR->start, MoveIdx+1);
+ if (RegInt.empty())
+ li_->removeInterval(RegRep);
+ }
+ li_->RemoveMachineInstrFromMaps(mii);
+ mii = mbbi->erase(mii);
+ ++numPeep;
+ } else {
+ SmallSet<unsigned, 4> UniqueUses;
+ for (unsigned i = 0, e = mii->getNumOperands(); i != e; ++i) {
+ const MachineOperand &mop = mii->getOperand(i);
+ if (mop.isRegister() && mop.getReg() &&
+ MRegisterInfo::isVirtualRegister(mop.getReg())) {
+ // replace register with representative register
+ unsigned reg = rep(mop.getReg());
+ mii->getOperand(i).setReg(reg);
+
+ // Multiple uses of reg by the same instruction. It should not
+ // contribute to spill weight again.
+ if (UniqueUses.count(reg) != 0)
+ continue;
+ LiveInterval &RegInt = li_->getInterval(reg);
+ float w = (mop.isUse()+mop.isDef()) * powf(10.0F, (float)loopDepth);
+ // If the definition instruction is re-materializable, its spill
+ // weight is half of what it would have been normally unless it's
+ // a load from fixed stack slot.
+ int Dummy;
+ if (RegInt.remat && !tii_->isLoadFromStackSlot(RegInt.remat, Dummy))
+ w /= 2;
+ RegInt.weight += w;
+ UniqueUses.insert(reg);
+ }
+ }
+ ++mii;
+ }
+ }
+ }
+
+ for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
+ LiveInterval &LI = I->second;
+ if (MRegisterInfo::isVirtualRegister(LI.reg)) {
+ // If the live interval length is essentially zero, i.e. in every live
+ // range the use follows def immediately, it doesn't make sense to spill
+ // it and hope it will be easier to allocate for this li.
+ if (isZeroLengthInterval(&LI))
+ LI.weight = HUGE_VALF;
+
+ // Slightly prefer live interval that has been assigned a preferred reg.
+ if (LI.preference)
+ LI.weight *= 1.01F;
+
+ // Divide the weight of the interval by its size. This encourages
+ // spilling of intervals that are large and have few uses, and
+ // discourages spilling of small intervals with many uses.
+ LI.weight /= LI.getSize();
+ }
+ }
+
+ DEBUG(dump());
+ return true;
+}
+
+/// print - Implement the dump method.
+void SimpleRegisterCoalescing::print(std::ostream &O, const Module* m) const {
+ li_->print(O, m);
+}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
new file mode 100644
index 0000000..372b1b3
--- /dev/null
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -0,0 +1,228 @@
+//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TwoAddress instruction pass which is used
+// by most register allocators. Two-Address instructions are rewritten
+// from:
+//
+// A = B op C
+//
+// to:
+//
+// A = B
+// A op= C
+//
+// Note that if a register allocator chooses to use this pass, that it
+// has to be capable of handling the non-SSA nature of these rewritten
+// virtual registers.
+//
+// It is also worth noting that the duplicate operand of the two
+// address instruction is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "twoaddrinstr"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
+STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
+STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
+
+namespace {
+ struct VISIBILITY_HIDDEN TwoAddressInstructionPass
+ : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ TwoAddressInstructionPass() : MachineFunctionPass((intptr_t)&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ /// runOnMachineFunction - pass entry point
+ bool runOnMachineFunction(MachineFunction&);
+ };
+
+ char TwoAddressInstructionPass::ID = 0;
+ RegisterPass<TwoAddressInstructionPass>
+ X("twoaddressinstruction", "Two-Address instruction pass");
+}
+
+const PassInfo *llvm::TwoAddressInstructionPassID = X.getPassInfo();
+
+void TwoAddressInstructionPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveVariables>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreservedID(PHIEliminationID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// runOnMachineFunction - Reduce two-address instructions to two
+/// operands.
+///
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+ DOUT << "Machine Function\n";
+ const TargetMachine &TM = MF.getTarget();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const MRegisterInfo &MRI = *TM.getRegisterInfo();
+ LiveVariables &LV = getAnalysis<LiveVariables>();
+
+ bool MadeChange = false;
+
+ DOUT << "********** REWRITING TWO-ADDR INSTRS **********\n";
+ DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me; ++mi) {
+ const TargetInstrDescriptor *TID = mi->getInstrDescriptor();
+
+ bool FirstTied = true;
+ for (unsigned si = 1, e = TID->numOperands; si < e; ++si) {
+ int ti = TID->getOperandConstraint(si, TOI::TIED_TO);
+ if (ti == -1)
+ continue;
+
+ if (FirstTied) {
+ ++NumTwoAddressInstrs;
+ DOUT << '\t'; DEBUG(mi->print(*cerr.stream(), &TM));
+ }
+ FirstTied = false;
+
+ assert(mi->getOperand(si).isRegister() && mi->getOperand(si).getReg() &&
+ mi->getOperand(si).isUse() && "two address instruction invalid");
+
+ // if the two operands are the same we just remove the use
+ // and mark the def as def&use, otherwise we have to insert a copy.
+ if (mi->getOperand(ti).getReg() != mi->getOperand(si).getReg()) {
+ // rewrite:
+ // a = b op c
+ // to:
+ // a = b
+ // a = a op c
+ unsigned regA = mi->getOperand(ti).getReg();
+ unsigned regB = mi->getOperand(si).getReg();
+
+ assert(MRegisterInfo::isVirtualRegister(regA) &&
+ MRegisterInfo::isVirtualRegister(regB) &&
+ "cannot update physical register live information");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of a in the instruction (a =
+ // b + a for example) because our transformation will not work. This
+ // should never occur because we are in SSA form.
+ for (unsigned i = 0; i != mi->getNumOperands(); ++i)
+ assert((int)i == ti ||
+ !mi->getOperand(i).isRegister() ||
+ mi->getOperand(i).getReg() != regA);
+#endif
+
+ // If this instruction is not the killing user of B, see if we can
+ // rearrange the code to make it so. Making it the killing user will
+ // allow us to coalesce A and B together, eliminating the copy we are
+ // about to insert.
+ if (!LV.KillsRegister(mi, regB)) {
+ // If this instruction is commutative, check to see if C dies. If
+ // so, swap the B and C operands. This makes the live ranges of A
+ // and C joinable.
+ // FIXME: This code also works for A := B op C instructions.
+ if ((TID->Flags & M_COMMUTABLE) && mi->getNumOperands() == 3) {
+ assert(mi->getOperand(3-si).isRegister() &&
+ "Not a proper commutative instruction!");
+ unsigned regC = mi->getOperand(3-si).getReg();
+ if (LV.KillsRegister(mi, regC)) {
+ DOUT << "2addr: COMMUTING : " << *mi;
+ MachineInstr *NewMI = TII.commuteInstruction(mi);
+ if (NewMI == 0) {
+ DOUT << "2addr: COMMUTING FAILED!\n";
+ } else {
+ DOUT << "2addr: COMMUTED TO: " << *NewMI;
+ // If the instruction changed to commute it, update livevar.
+ if (NewMI != mi) {
+ LV.instructionChanged(mi, NewMI); // Update live variables
+ mbbi->insert(mi, NewMI); // Insert the new inst
+ mbbi->erase(mi); // Nuke the old inst.
+ mi = NewMI;
+ }
+
+ ++NumCommuted;
+ regB = regC;
+ goto InstructionRearranged;
+ }
+ }
+ }
+
+ // If this instruction is potentially convertible to a true
+ // three-address instruction,
+ if (TID->Flags & M_CONVERTIBLE_TO_3_ADDR)
+ // FIXME: This assumes there are no more operands which are tied
+ // to another register.
+#ifndef NDEBUG
+ for (unsigned i = si+1, e = TID->numOperands; i < e; ++i)
+ assert(TID->getOperandConstraint(i, TOI::TIED_TO) == -1);
+#endif
+
+ if (MachineInstr *New = TII.convertToThreeAddress(mbbi, mi, LV)) {
+ DOUT << "2addr: CONVERTING 2-ADDR: " << *mi;
+ DOUT << "2addr: TO 3-ADDR: " << *New;
+ mbbi->erase(mi); // Nuke the old inst.
+ mi = New;
+ ++NumConvertedTo3Addr;
+ // Done with this instruction.
+ break;
+ }
+ }
+
+ InstructionRearranged:
+ const TargetRegisterClass* rc = MF.getSSARegMap()->getRegClass(regA);
+ MRI.copyRegToReg(*mbbi, mi, regA, regB, rc);
+
+ MachineBasicBlock::iterator prevMi = prior(mi);
+ DOUT << "\t\tprepend:\t"; DEBUG(prevMi->print(*cerr.stream(), &TM));
+
+ // Update live variables for regA
+ LiveVariables::VarInfo& varInfo = LV.getVarInfo(regA);
+ varInfo.DefInst = prevMi;
+
+ if (LV.removeVirtualRegisterKilled(regB, mbbi, mi))
+ LV.addVirtualRegisterKilled(regB, prevMi);
+
+ if (LV.removeVirtualRegisterDead(regB, mbbi, mi))
+ LV.addVirtualRegisterDead(regB, prevMi);
+
+ // replace all occurences of regB with regA
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ if (mi->getOperand(i).isRegister() &&
+ mi->getOperand(i).getReg() == regB)
+ mi->getOperand(i).setReg(regA);
+ }
+ }
+
+ assert(mi->getOperand(ti).isDef() && mi->getOperand(si).isUse());
+ mi->getOperand(ti).setReg(mi->getOperand(si).getReg());
+ MadeChange = true;
+
+ DOUT << "\t\trewrite to:\t"; DEBUG(mi->print(*cerr.stream(), &TM));
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
new file mode 100644
index 0000000..0a611ab
--- /dev/null
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -0,0 +1,81 @@
+//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass. Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node. To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack. In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks. Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+using namespace llvm;
+
+namespace {
+ class VISIBILITY_HIDDEN UnreachableBlockElim : public FunctionPass {
+ virtual bool runOnFunction(Function &F);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableBlockElim() : FunctionPass((intptr_t)&ID) {}
+ };
+ char UnreachableBlockElim::ID = 0;
+ RegisterPass<UnreachableBlockElim>
+ X("unreachableblockelim", "Remove unreachable blocks from the CFG");
+}
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+ return new UnreachableBlockElim();
+}
+
+bool UnreachableBlockElim::runOnFunction(Function &F) {
+ std::set<BasicBlock*> Reachable;
+
+ // Mark all reachable blocks.
+ for (df_ext_iterator<Function*> I = df_ext_begin(&F, Reachable),
+ E = df_ext_end(&F, Reachable); I != E; ++I)
+ /* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<BasicBlock*> DeadBlocks;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (!Reachable.count(I)) {
+ BasicBlock *BB = I;
+ DeadBlocks.push_back(BB);
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
+ BB->getInstList().pop_front();
+ }
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ if (DeadBlocks.empty()) return false;
+
+ // Actually remove the blocks now.
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+ F.getBasicBlockList().erase(DeadBlocks[i]);
+
+ return true;
+}
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
new file mode 100644
index 0000000..196e849
--- /dev/null
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -0,0 +1,1118 @@
+//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtRegMap class.
+//
+// It also contains implementations of the the Spiller interface, which, given a
+// virtual register map and a machine function, eliminates all virtual
+// references by replacing them with physical register references - adding spill
+// code as necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spiller"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSpills, "Number of register spills");
+STATISTIC(NumReMats, "Number of re-materialization");
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumReused, "Number of values reused");
+STATISTIC(NumDSE , "Number of dead stores elided");
+STATISTIC(NumDCE , "Number of copies elided");
+
+namespace {
+ enum SpillerName { simple, local };
+
+ static cl::opt<SpillerName>
+ SpillerOpt("spiller",
+ cl::desc("Spiller to use: (default: local)"),
+ cl::Prefix,
+ cl::values(clEnumVal(simple, " simple spiller"),
+ clEnumVal(local, " local spiller"),
+ clEnumValEnd),
+ cl::init(local));
+}
+
+//===----------------------------------------------------------------------===//
+// VirtRegMap implementation
+//===----------------------------------------------------------------------===//
+
+VirtRegMap::VirtRegMap(MachineFunction &mf)
+ : TII(*mf.getTarget().getInstrInfo()), MF(mf),
+ Virt2PhysMap(NO_PHYS_REG), Virt2StackSlotMap(NO_STACK_SLOT),
+ ReMatId(MAX_STACK_SLOT+1) {
+ grow();
+}
+
+void VirtRegMap::grow() {
+ Virt2PhysMap.grow(MF.getSSARegMap()->getLastVirtReg());
+ Virt2StackSlotMap.grow(MF.getSSARegMap()->getLastVirtReg());
+}
+
+int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
+ assert(MRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ const TargetRegisterClass* RC = MF.getSSARegMap()->getRegClass(virtReg);
+ int frameIndex = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment());
+ Virt2StackSlotMap[virtReg] = frameIndex;
+ ++NumSpills;
+ return frameIndex;
+}
+
+void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int frameIndex) {
+ assert(MRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ assert((frameIndex >= 0 ||
+ (frameIndex >= MF.getFrameInfo()->getObjectIndexBegin())) &&
+ "illegal fixed frame index");
+ Virt2StackSlotMap[virtReg] = frameIndex;
+}
+
+int VirtRegMap::assignVirtReMatId(unsigned virtReg) {
+ assert(MRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign re-mat id to already spilled register");
+ const MachineInstr *DefMI = getReMaterializedMI(virtReg);
+ int FrameIdx;
+ if (TII.isLoadFromStackSlot((MachineInstr*)DefMI, FrameIdx)) {
+ // Load from stack slot is re-materialize as reload from the stack slot!
+ Virt2StackSlotMap[virtReg] = FrameIdx;
+ return FrameIdx;
+ }
+ Virt2StackSlotMap[virtReg] = ReMatId;
+ return ReMatId++;
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI,
+ unsigned OpNo, MachineInstr *NewMI) {
+ // Move previous memory references folded to new instruction.
+ MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI);
+ for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI),
+ E = MI2VirtMap.end(); I != E && I->first == OldMI; ) {
+ MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second));
+ MI2VirtMap.erase(I++);
+ }
+
+ ModRef MRInfo;
+ const TargetInstrDescriptor *TID = OldMI->getInstrDescriptor();
+ if (TID->getOperandConstraint(OpNo, TOI::TIED_TO) != -1 ||
+ TID->findTiedToSrcOperand(OpNo) != -1) {
+ // Folded a two-address operand.
+ MRInfo = isModRef;
+ } else if (OldMI->getOperand(OpNo).isDef()) {
+ MRInfo = isMod;
+ } else {
+ MRInfo = isRef;
+ }
+
+ // add new memory reference
+ MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::print(std::ostream &OS) const {
+ const MRegisterInfo* MRI = MF.getTarget().getRegisterInfo();
+
+ OS << "********** REGISTER MAP **********\n";
+ for (unsigned i = MRegisterInfo::FirstVirtualRegister,
+ e = MF.getSSARegMap()->getLastVirtReg(); i <= e; ++i) {
+ if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
+ OS << "[reg" << i << " -> " << MRI->getName(Virt2PhysMap[i]) << "]\n";
+
+ }
+
+ for (unsigned i = MRegisterInfo::FirstVirtualRegister,
+ e = MF.getSSARegMap()->getLastVirtReg(); i <= e; ++i)
+ if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
+ OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] << "]\n";
+ OS << '\n';
+}
+
+void VirtRegMap::dump() const {
+ print(DOUT);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Simple Spiller Implementation
+//===----------------------------------------------------------------------===//
+
+Spiller::~Spiller() {}
+
+namespace {
+ struct VISIBILITY_HIDDEN SimpleSpiller : public Spiller {
+ bool runOnMachineFunction(MachineFunction& mf, VirtRegMap &VRM);
+ };
+}
+
+bool SimpleSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) {
+ DOUT << "********** REWRITE MACHINE CODE **********\n";
+ DOUT << "********** Function: " << MF.getFunction()->getName() << '\n';
+ const TargetMachine &TM = MF.getTarget();
+ const MRegisterInfo &MRI = *TM.getRegisterInfo();
+
+ // LoadedRegs - Keep track of which vregs are loaded, so that we only load
+ // each vreg once (in the case where a spilled vreg is used by multiple
+ // operands). This is always smaller than the number of operands to the
+ // current machine instr, so it should be small.
+ std::vector<unsigned> LoadedRegs;
+
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ DOUT << MBBI->getBasicBlock()->getName() << ":\n";
+ MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::iterator MII = MBB.begin(),
+ E = MBB.end(); MII != E; ++MII) {
+ MachineInstr &MI = *MII;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isRegister() && MO.getReg())
+ if (MRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned VirtReg = MO.getReg();
+ unsigned PhysReg = VRM.getPhys(VirtReg);
+ if (VRM.hasStackSlot(VirtReg)) {
+ int StackSlot = VRM.getStackSlot(VirtReg);
+ const TargetRegisterClass* RC =
+ MF.getSSARegMap()->getRegClass(VirtReg);
+
+ if (MO.isUse() &&
+ std::find(LoadedRegs.begin(), LoadedRegs.end(), VirtReg)
+ == LoadedRegs.end()) {
+ MRI.loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC);
+ LoadedRegs.push_back(VirtReg);
+ ++NumLoads;
+ DOUT << '\t' << *prior(MII);
+ }
+
+ if (MO.isDef()) {
+ MRI.storeRegToStackSlot(MBB, next(MII), PhysReg, StackSlot, RC);
+ ++NumStores;
+ }
+ }
+ MF.setPhysRegUsed(PhysReg);
+ MI.getOperand(i).setReg(PhysReg);
+ } else {
+ MF.setPhysRegUsed(MO.getReg());
+ }
+ }
+
+ DOUT << '\t' << MI;
+ LoadedRegs.clear();
+ }
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Local Spiller Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// LocalSpiller - This spiller does a simple pass over the machine basic
+ /// block to attempt to keep spills in registers as much as possible for
+ /// blocks that have low register pressure (the vreg may be spilled due to
+ /// register pressure in other blocks).
+ class VISIBILITY_HIDDEN LocalSpiller : public Spiller {
+ const MRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ public:
+ bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) {
+ MRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getTarget().getInstrInfo();
+ DOUT << "\n**** Local spiller rewriting function '"
+ << MF.getFunction()->getName() << "':\n";
+
+ std::vector<MachineInstr *> ReMatedMIs;
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB)
+ RewriteMBB(*MBB, VRM, ReMatedMIs);
+ for (unsigned i = 0, e = ReMatedMIs.size(); i != e; ++i)
+ delete ReMatedMIs[i];
+ return true;
+ }
+ private:
+ void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
+ std::vector<MachineInstr*> &ReMatedMIs);
+ };
+}
+
+/// AvailableSpills - As the local spiller is scanning and rewriting an MBB from
+/// top down, keep track of which spills slots are available in each register.
+///
+/// Note that not all physregs are created equal here. In particular, some
+/// physregs are reloads that we are allowed to clobber or ignore at any time.
+/// Other physregs are values that the register allocated program is using that
+/// we cannot CHANGE, but we can read if we like. We keep track of this on a
+/// per-stack-slot basis as the low bit in the value of the SpillSlotsAvailable
+/// entries. The predicate 'canClobberPhysReg()' checks this bit and
+/// addAvailable sets it if.
+namespace {
+class VISIBILITY_HIDDEN AvailableSpills {
+ const MRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ // SpillSlotsAvailable - This map keeps track of all of the spilled virtual
+ // register values that are still available, due to being loaded or stored to,
+ // but not invalidated yet.
+ std::map<int, unsigned> SpillSlotsAvailable;
+
+ // PhysRegsAvailable - This is the inverse of SpillSlotsAvailable, indicating
+ // which stack slot values are currently held by a physreg. This is used to
+ // invalidate entries in SpillSlotsAvailable when a physreg is modified.
+ std::multimap<unsigned, int> PhysRegsAvailable;
+
+ void disallowClobberPhysRegOnly(unsigned PhysReg);
+
+ void ClobberPhysRegOnly(unsigned PhysReg);
+public:
+ AvailableSpills(const MRegisterInfo *mri, const TargetInstrInfo *tii)
+ : MRI(mri), TII(tii) {
+ }
+
+ const MRegisterInfo *getRegInfo() const { return MRI; }
+
+ /// getSpillSlotPhysReg - If the specified stack slot is available in a
+ /// physical register, return that PhysReg, otherwise return 0.
+ unsigned getSpillSlotPhysReg(int Slot) const {
+ std::map<int, unsigned>::const_iterator I = SpillSlotsAvailable.find(Slot);
+ if (I != SpillSlotsAvailable.end()) {
+ return I->second >> 1; // Remove the CanClobber bit.
+ }
+ return 0;
+ }
+
+ /// addAvailable - Mark that the specified stack slot is available in the
+ /// specified physreg. If CanClobber is true, the physreg can be modified at
+ /// any time without changing the semantics of the program.
+ void addAvailable(int Slot, MachineInstr *MI, unsigned Reg,
+ bool CanClobber = true) {
+ // If this stack slot is thought to be available in some other physreg,
+ // remove its record.
+ ModifyStackSlot(Slot);
+
+ PhysRegsAvailable.insert(std::make_pair(Reg, Slot));
+ SpillSlotsAvailable[Slot] = (Reg << 1) | (unsigned)CanClobber;
+
+ if (Slot > VirtRegMap::MAX_STACK_SLOT)
+ DOUT << "Remembering RM#" << Slot-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Remembering SS#" << Slot;
+ DOUT << " in physreg " << MRI->getName(Reg) << "\n";
+ }
+
+ /// canClobberPhysReg - Return true if the spiller is allowed to change the
+ /// value of the specified stackslot register if it desires. The specified
+ /// stack slot must be available in a physreg for this query to make sense.
+ bool canClobberPhysReg(int Slot) const {
+ assert(SpillSlotsAvailable.count(Slot) && "Slot not available!");
+ return SpillSlotsAvailable.find(Slot)->second & 1;
+ }
+
+ /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+ /// stackslot register. The register is still available but is no longer
+ /// allowed to be modifed.
+ void disallowClobberPhysReg(unsigned PhysReg);
+
+ /// ClobberPhysReg - This is called when the specified physreg changes
+ /// value. We use this to invalidate any info about stuff we thing lives in
+ /// it and any of its aliases.
+ void ClobberPhysReg(unsigned PhysReg);
+
+ /// ModifyStackSlot - This method is called when the value in a stack slot
+ /// changes. This removes information about which register the previous value
+ /// for this slot lives in (as the previous value is dead now).
+ void ModifyStackSlot(int Slot);
+};
+}
+
+/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified
+/// stackslot register. The register is still available but is no longer
+/// allowed to be modifed.
+void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
+ std::multimap<unsigned, int>::iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int Slot = I->second;
+ I++;
+ assert((SpillSlotsAvailable[Slot] >> 1) == PhysReg &&
+ "Bidirectional map mismatch!");
+ SpillSlotsAvailable[Slot] &= ~1;
+ DOUT << "PhysReg " << MRI->getName(PhysReg)
+ << " copied, it is available for use but can no longer be modified\n";
+ }
+}
+
+/// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+/// stackslot register and its aliases. The register and its aliases may
+/// still available but is no longer allowed to be modifed.
+void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) {
+ for (const unsigned *AS = MRI->getAliasSet(PhysReg); *AS; ++AS)
+ disallowClobberPhysRegOnly(*AS);
+ disallowClobberPhysRegOnly(PhysReg);
+}
+
+/// ClobberPhysRegOnly - This is called when the specified physreg changes
+/// value. We use this to invalidate any info about stuff we thing lives in it.
+void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
+ std::multimap<unsigned, int>::iterator I =
+ PhysRegsAvailable.lower_bound(PhysReg);
+ while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+ int Slot = I->second;
+ PhysRegsAvailable.erase(I++);
+ assert((SpillSlotsAvailable[Slot] >> 1) == PhysReg &&
+ "Bidirectional map mismatch!");
+ SpillSlotsAvailable.erase(Slot);
+ DOUT << "PhysReg " << MRI->getName(PhysReg)
+ << " clobbered, invalidating ";
+ if (Slot > VirtRegMap::MAX_STACK_SLOT)
+ DOUT << "RM#" << Slot-VirtRegMap::MAX_STACK_SLOT-1 << "\n";
+ else
+ DOUT << "SS#" << Slot << "\n";
+ }
+}
+
+/// ClobberPhysReg - This is called when the specified physreg changes
+/// value. We use this to invalidate any info about stuff we thing lives in
+/// it and any of its aliases.
+void AvailableSpills::ClobberPhysReg(unsigned PhysReg) {
+ for (const unsigned *AS = MRI->getAliasSet(PhysReg); *AS; ++AS)
+ ClobberPhysRegOnly(*AS);
+ ClobberPhysRegOnly(PhysReg);
+}
+
+/// ModifyStackSlot - This method is called when the value in a stack slot
+/// changes. This removes information about which register the previous value
+/// for this slot lives in (as the previous value is dead now).
+void AvailableSpills::ModifyStackSlot(int Slot) {
+ std::map<int, unsigned>::iterator It = SpillSlotsAvailable.find(Slot);
+ if (It == SpillSlotsAvailable.end()) return;
+ unsigned Reg = It->second >> 1;
+ SpillSlotsAvailable.erase(It);
+
+ // This register may hold the value of multiple stack slots, only remove this
+ // stack slot from the set of values the register contains.
+ std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
+ for (; ; ++I) {
+ assert(I != PhysRegsAvailable.end() && I->first == Reg &&
+ "Map inverse broken!");
+ if (I->second == Slot) break;
+ }
+ PhysRegsAvailable.erase(I);
+}
+
+
+
+/// InvalidateKills - MI is going to be deleted. If any of its operands are
+/// marked kill, then invalidate the information.
+static void InvalidateKills(MachineInstr &MI, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (KillOps[Reg] == &MO) {
+ RegKills.reset(Reg);
+ KillOps[Reg] = NULL;
+ }
+ }
+}
+
+/// UpdateKills - Track and update kill info. If a MI reads a register that is
+/// marked kill, then it must be due to register reuse. Transfer the kill info
+/// over.
+static void UpdateKills(MachineInstr &MI, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ const TargetInstrDescriptor *TID = MI.getInstrDescriptor();
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ if (RegKills[Reg]) {
+ // That can't be right. Register is killed but not re-defined and it's
+ // being reused. Let's fix that.
+ KillOps[Reg]->unsetIsKill();
+ if (i < TID->numOperands &&
+ TID->getOperandConstraint(i, TOI::TIED_TO) == -1)
+ // Unless it's a two-address operand, this is the new kill.
+ MO.setIsKill();
+ }
+
+ if (MO.isKill()) {
+ RegKills.set(Reg);
+ KillOps[Reg] = &MO;
+ }
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ RegKills.reset(Reg);
+ KillOps[Reg] = NULL;
+ }
+}
+
+
+// ReusedOp - For each reused operand, we keep track of a bit of information, in
+// case we need to rollback upon processing a new operand. See comments below.
+namespace {
+ struct ReusedOp {
+ // The MachineInstr operand that reused an available value.
+ unsigned Operand;
+
+ // StackSlot - The spill slot of the value being reused.
+ unsigned StackSlot;
+
+ // PhysRegReused - The physical register the value was available in.
+ unsigned PhysRegReused;
+
+ // AssignedPhysReg - The physreg that was assigned for use by the reload.
+ unsigned AssignedPhysReg;
+
+ // VirtReg - The virtual register itself.
+ unsigned VirtReg;
+
+ ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr,
+ unsigned vreg)
+ : Operand(o), StackSlot(ss), PhysRegReused(prr), AssignedPhysReg(apr),
+ VirtReg(vreg) {}
+ };
+
+ /// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
+ /// is reused instead of reloaded.
+ class VISIBILITY_HIDDEN ReuseInfo {
+ MachineInstr &MI;
+ std::vector<ReusedOp> Reuses;
+ BitVector PhysRegsClobbered;
+ public:
+ ReuseInfo(MachineInstr &mi, const MRegisterInfo *mri) : MI(mi) {
+ PhysRegsClobbered.resize(mri->getNumRegs());
+ }
+
+ bool hasReuses() const {
+ return !Reuses.empty();
+ }
+
+ /// addReuse - If we choose to reuse a virtual register that is already
+ /// available instead of reloading it, remember that we did so.
+ void addReuse(unsigned OpNo, unsigned StackSlot,
+ unsigned PhysRegReused, unsigned AssignedPhysReg,
+ unsigned VirtReg) {
+ // If the reload is to the assigned register anyway, no undo will be
+ // required.
+ if (PhysRegReused == AssignedPhysReg) return;
+
+ // Otherwise, remember this.
+ Reuses.push_back(ReusedOp(OpNo, StackSlot, PhysRegReused,
+ AssignedPhysReg, VirtReg));
+ }
+
+ void markClobbered(unsigned PhysReg) {
+ PhysRegsClobbered.set(PhysReg);
+ }
+
+ bool isClobbered(unsigned PhysReg) const {
+ return PhysRegsClobbered.test(PhysReg);
+ }
+
+ /// GetRegForReload - We are about to emit a reload into PhysReg. If there
+ /// is some other operand that is using the specified register, either pick
+ /// a new register to use, or evict the previous reload and use this reg.
+ unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+ AvailableSpills &Spills,
+ std::map<int, MachineInstr*> &MaybeDeadStores,
+ SmallSet<unsigned, 8> &Rejected,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ if (Reuses.empty()) return PhysReg; // This is most often empty.
+
+ for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
+ ReusedOp &Op = Reuses[ro];
+ // If we find some other reuse that was supposed to use this register
+ // exactly for its reload, we can change this reload to use ITS reload
+ // register. That is, unless its reload register has already been
+ // considered and subsequently rejected because it has also been reused
+ // by another operand.
+ if (Op.PhysRegReused == PhysReg &&
+ Rejected.count(Op.AssignedPhysReg) == 0) {
+ // Yup, use the reload register that we didn't use before.
+ unsigned NewReg = Op.AssignedPhysReg;
+ Rejected.insert(PhysReg);
+ return GetRegForReload(NewReg, MI, Spills, MaybeDeadStores, Rejected,
+ RegKills, KillOps);
+ } else {
+ // Otherwise, we might also have a problem if a previously reused
+ // value aliases the new register. If so, codegen the previous reload
+ // and use this one.
+ unsigned PRRU = Op.PhysRegReused;
+ const MRegisterInfo *MRI = Spills.getRegInfo();
+ if (MRI->areAliases(PRRU, PhysReg)) {
+ // Okay, we found out that an alias of a reused register
+ // was used. This isn't good because it means we have
+ // to undo a previous reuse.
+ MachineBasicBlock *MBB = MI->getParent();
+ const TargetRegisterClass *AliasRC =
+ MBB->getParent()->getSSARegMap()->getRegClass(Op.VirtReg);
+
+ // Copy Op out of the vector and remove it, we're going to insert an
+ // explicit load for it.
+ ReusedOp NewOp = Op;
+ Reuses.erase(Reuses.begin()+ro);
+
+ // Ok, we're going to try to reload the assigned physreg into the
+ // slot that we were supposed to in the first place. However, that
+ // register could hold a reuse. Check to see if it conflicts or
+ // would prefer us to use a different register.
+ unsigned NewPhysReg = GetRegForReload(NewOp.AssignedPhysReg,
+ MI, Spills, MaybeDeadStores,
+ Rejected, RegKills, KillOps);
+
+ MRI->loadRegFromStackSlot(*MBB, MI, NewPhysReg,
+ NewOp.StackSlot, AliasRC);
+ Spills.ClobberPhysReg(NewPhysReg);
+ Spills.ClobberPhysReg(NewOp.PhysRegReused);
+
+ // Any stores to this stack slot are not dead anymore.
+ MaybeDeadStores.erase(NewOp.StackSlot);
+
+ MI->getOperand(NewOp.Operand).setReg(NewPhysReg);
+
+ Spills.addAvailable(NewOp.StackSlot, MI, NewPhysReg);
+ ++NumLoads;
+ MachineBasicBlock::iterator MII = MI;
+ --MII;
+ UpdateKills(*MII, RegKills, KillOps);
+ DOUT << '\t' << *MII;
+
+ DOUT << "Reuse undone!\n";
+ --NumReused;
+
+ // Finally, PhysReg is now available, go ahead and use it.
+ return PhysReg;
+ }
+ }
+ }
+ return PhysReg;
+ }
+
+ /// GetRegForReload - Helper for the above GetRegForReload(). Add a
+ /// 'Rejected' set to remember which registers have been considered and
+ /// rejected for the reload. This avoids infinite looping in case like
+ /// this:
+ /// t1 := op t2, t3
+ /// t2 <- assigned r0 for use by the reload but ended up reuse r1
+ /// t3 <- assigned r1 for use by the reload but ended up reuse r0
+ /// t1 <- desires r1
+ /// sees r1 is taken by t2, tries t2's reload register r0
+ /// sees r0 is taken by t3, tries t3's reload register r1
+ /// sees r1 is taken by t2, tries t2's reload register r0 ...
+ unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI,
+ AvailableSpills &Spills,
+ std::map<int, MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ SmallSet<unsigned, 8> Rejected;
+ return GetRegForReload(PhysReg, MI, Spills, MaybeDeadStores, Rejected,
+ RegKills, KillOps);
+ }
+ };
+}
+
+
+/// rewriteMBB - Keep track of which spills are available even after the
+/// register allocator is done with them. If possible, avoid reloading vregs.
+void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
+ std::vector<MachineInstr*> &ReMatedMIs) {
+ DOUT << MBB.getBasicBlock()->getName() << ":\n";
+
+ // Spills - Keep track of which spilled values are available in physregs so
+ // that we can choose to reuse the physregs instead of emitting reloads.
+ AvailableSpills Spills(MRI, TII);
+
+ // MaybeDeadStores - When we need to write a value back into a stack slot,
+ // keep track of the inserted store. If the stack slot value is never read
+ // (because the value was used from some available register, for example), and
+ // subsequently stored to, the original store is dead. This map keeps track
+ // of inserted stores that are not used. If we see a subsequent store to the
+ // same stack slot, the original store is deleted.
+ std::map<int, MachineInstr*> MaybeDeadStores;
+
+ // Keep track of kill information.
+ BitVector RegKills(MRI->getNumRegs());
+ std::vector<MachineOperand*> KillOps;
+ KillOps.resize(MRI->getNumRegs(), NULL);
+
+ MachineFunction &MF = *MBB.getParent();
+ for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+ MII != E; ) {
+ MachineInstr &MI = *MII;
+ MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+ VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+
+ bool Erased = false;
+ bool BackTracked = false;
+
+ /// ReusedOperands - Keep track of operand reuse in case we need to undo
+ /// reuse.
+ ReuseInfo ReusedOperands(MI, MRI);
+
+ // Loop over all of the implicit defs, clearing them from our available
+ // sets.
+ const TargetInstrDescriptor *TID = MI.getInstrDescriptor();
+
+ // If this instruction is being rematerialized, just remove it!
+ int FrameIdx;
+ if (TII->isTriviallyReMaterializable(&MI) ||
+ TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+ Erased = true;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isRegister() || MO.getReg() == 0)
+ continue; // Ignore non-register operands.
+ if (MO.isDef() && !VRM.isReMaterialized(MO.getReg())) {
+ Erased = false;
+ break;
+ }
+ }
+ if (Erased) {
+ VRM.RemoveFromFoldedVirtMap(&MI);
+ ReMatedMIs.push_back(MI.removeFromParent());
+ goto ProcessNextInst;
+ }
+ }
+
+ if (TID->ImplicitDefs) {
+ const unsigned *ImpDef = TID->ImplicitDefs;
+ for ( ; *ImpDef; ++ImpDef) {
+ MF.setPhysRegUsed(*ImpDef);
+ ReusedOperands.markClobbered(*ImpDef);
+ Spills.ClobberPhysReg(*ImpDef);
+ }
+ }
+
+ // Process all of the spilled uses and all non spilled reg references.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isRegister() || MO.getReg() == 0)
+ continue; // Ignore non-register operands.
+
+ if (MRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ // Ignore physregs for spilling, but remember that it is used by this
+ // function.
+ MF.setPhysRegUsed(MO.getReg());
+ ReusedOperands.markClobbered(MO.getReg());
+ continue;
+ }
+
+ assert(MRegisterInfo::isVirtualRegister(MO.getReg()) &&
+ "Not a virtual or a physical register?");
+
+ unsigned VirtReg = MO.getReg();
+ if (!VRM.hasStackSlot(VirtReg)) {
+ // This virtual register was assigned a physreg!
+ unsigned Phys = VRM.getPhys(VirtReg);
+ MF.setPhysRegUsed(Phys);
+ if (MO.isDef())
+ ReusedOperands.markClobbered(Phys);
+ MI.getOperand(i).setReg(Phys);
+ continue;
+ }
+
+ // This virtual register is now known to be a spilled value.
+ if (!MO.isUse())
+ continue; // Handle defs in the loop below (handle use&def here though)
+
+ bool doReMat = VRM.isReMaterialized(VirtReg);
+ int StackSlot = VRM.getStackSlot(VirtReg);
+ unsigned PhysReg;
+
+ // Check to see if this stack slot is available.
+ if ((PhysReg = Spills.getSpillSlotPhysReg(StackSlot))) {
+ // This spilled operand might be part of a two-address operand. If this
+ // is the case, then changing it will necessarily require changing the
+ // def part of the instruction as well. However, in some cases, we
+ // aren't allowed to modify the reused register. If none of these cases
+ // apply, reuse it.
+ bool CanReuse = true;
+ int ti = TID->getOperandConstraint(i, TOI::TIED_TO);
+ if (ti != -1 &&
+ MI.getOperand(ti).isReg() &&
+ MI.getOperand(ti).getReg() == VirtReg) {
+ // Okay, we have a two address operand. We can reuse this physreg as
+ // long as we are allowed to clobber the value and there isn't an
+ // earlier def that has already clobbered the physreg.
+ CanReuse = Spills.canClobberPhysReg(StackSlot) &&
+ !ReusedOperands.isClobbered(PhysReg);
+ }
+
+ if (CanReuse) {
+ // If this stack slot value is already available, reuse it!
+ if (StackSlot > VirtRegMap::MAX_STACK_SLOT)
+ DOUT << "Reusing RM#" << StackSlot-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Reusing SS#" << StackSlot;
+ DOUT << " from physreg "
+ << MRI->getName(PhysReg) << " for vreg"
+ << VirtReg <<" instead of reloading into physreg "
+ << MRI->getName(VRM.getPhys(VirtReg)) << "\n";
+ MI.getOperand(i).setReg(PhysReg);
+
+ // The only technical detail we have is that we don't know that
+ // PhysReg won't be clobbered by a reloaded stack slot that occurs
+ // later in the instruction. In particular, consider 'op V1, V2'.
+ // If V1 is available in physreg R0, we would choose to reuse it
+ // here, instead of reloading it into the register the allocator
+ // indicated (say R1). However, V2 might have to be reloaded
+ // later, and it might indicate that it needs to live in R0. When
+ // this occurs, we need to have information available that
+ // indicates it is safe to use R1 for the reload instead of R0.
+ //
+ // To further complicate matters, we might conflict with an alias,
+ // or R0 and R1 might not be compatible with each other. In this
+ // case, we actually insert a reload for V1 in R1, ensuring that
+ // we can get at R0 or its alias.
+ ReusedOperands.addReuse(i, StackSlot, PhysReg,
+ VRM.getPhys(VirtReg), VirtReg);
+ if (ti != -1)
+ // Only mark it clobbered if this is a use&def operand.
+ ReusedOperands.markClobbered(PhysReg);
+ ++NumReused;
+ continue;
+ }
+
+ // Otherwise we have a situation where we have a two-address instruction
+ // whose mod/ref operand needs to be reloaded. This reload is already
+ // available in some register "PhysReg", but if we used PhysReg as the
+ // operand to our 2-addr instruction, the instruction would modify
+ // PhysReg. This isn't cool if something later uses PhysReg and expects
+ // to get its initial value.
+ //
+ // To avoid this problem, and to avoid doing a load right after a store,
+ // we emit a copy from PhysReg into the designated register for this
+ // operand.
+ unsigned DesignatedReg = VRM.getPhys(VirtReg);
+ assert(DesignatedReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ DesignatedReg = ReusedOperands.GetRegForReload(DesignatedReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps);
+
+ // If the mapped designated register is actually the physreg we have
+ // incoming, we don't need to inserted a dead copy.
+ if (DesignatedReg == PhysReg) {
+ // If this stack slot value is already available, reuse it!
+ if (StackSlot > VirtRegMap::MAX_STACK_SLOT)
+ DOUT << "Reusing RM#" << StackSlot-VirtRegMap::MAX_STACK_SLOT-1;
+ else
+ DOUT << "Reusing SS#" << StackSlot;
+ DOUT << " from physreg " << MRI->getName(PhysReg) << " for vreg"
+ << VirtReg
+ << " instead of reloading into same physreg.\n";
+ MI.getOperand(i).setReg(PhysReg);
+ ReusedOperands.markClobbered(PhysReg);
+ ++NumReused;
+ continue;
+ }
+
+ const TargetRegisterClass* RC = MF.getSSARegMap()->getRegClass(VirtReg);
+ MF.setPhysRegUsed(DesignatedReg);
+ ReusedOperands.markClobbered(DesignatedReg);
+ MRI->copyRegToReg(MBB, &MI, DesignatedReg, PhysReg, RC);
+
+ MachineInstr *CopyMI = prior(MII);
+ UpdateKills(*CopyMI, RegKills, KillOps);
+
+ // This invalidates DesignatedReg.
+ Spills.ClobberPhysReg(DesignatedReg);
+
+ Spills.addAvailable(StackSlot, &MI, DesignatedReg);
+ MI.getOperand(i).setReg(DesignatedReg);
+ DOUT << '\t' << *prior(MII);
+ ++NumReused;
+ continue;
+ }
+
+ // Otherwise, reload it and remember that we have it.
+ PhysReg = VRM.getPhys(VirtReg);
+ assert(PhysReg && "Must map virtreg to physreg!");
+ const TargetRegisterClass* RC = MF.getSSARegMap()->getRegClass(VirtReg);
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps);
+
+ MF.setPhysRegUsed(PhysReg);
+ ReusedOperands.markClobbered(PhysReg);
+ if (doReMat) {
+ MRI->reMaterialize(MBB, &MI, PhysReg, VRM.getReMaterializedMI(VirtReg));
+ ++NumReMats;
+ } else {
+ MRI->loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC);
+ ++NumLoads;
+ }
+ // This invalidates PhysReg.
+ Spills.ClobberPhysReg(PhysReg);
+
+ // Any stores to this stack slot are not dead anymore.
+ if (!doReMat)
+ MaybeDeadStores.erase(StackSlot);
+ Spills.addAvailable(StackSlot, &MI, PhysReg);
+ // Assumes this is the last use. IsKill will be unset if reg is reused
+ // unless it's a two-address operand.
+ if (TID->getOperandConstraint(i, TOI::TIED_TO) == -1)
+ MI.getOperand(i).setIsKill();
+ MI.getOperand(i).setReg(PhysReg);
+ UpdateKills(*prior(MII), RegKills, KillOps);
+ DOUT << '\t' << *prior(MII);
+ }
+
+ DOUT << '\t' << MI;
+
+ // If we have folded references to memory operands, make sure we clear all
+ // physical registers that may contain the value of the spilled virtual
+ // register
+ for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
+ DOUT << "Folded vreg: " << I->second.first << " MR: "
+ << I->second.second;
+ unsigned VirtReg = I->second.first;
+ VirtRegMap::ModRef MR = I->second.second;
+ if (!VRM.hasStackSlot(VirtReg)) {
+ DOUT << ": No stack slot!\n";
+ continue;
+ }
+ int SS = VRM.getStackSlot(VirtReg);
+ DOUT << " - StackSlot: " << SS << "\n";
+
+ // If this folded instruction is just a use, check to see if it's a
+ // straight load from the virt reg slot.
+ if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
+ int FrameIdx;
+ if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+ if (FrameIdx == SS) {
+ // If this spill slot is available, turn it into a copy (or nothing)
+ // instead of leaving it as a load!
+ if (unsigned InReg = Spills.getSpillSlotPhysReg(SS)) {
+ DOUT << "Promoted Load To Copy: " << MI;
+ if (DestReg != InReg) {
+ MRI->copyRegToReg(MBB, &MI, DestReg, InReg,
+ MF.getSSARegMap()->getRegClass(VirtReg));
+ // Revisit the copy so we make sure to notice the effects of the
+ // operation on the destreg (either needing to RA it if it's
+ // virtual or needing to clobber any values if it's physical).
+ NextMII = &MI;
+ --NextMII; // backtrack to the copy.
+ BackTracked = true;
+ } else
+ DOUT << "Removing now-noop copy: " << MI;
+
+ VRM.RemoveFromFoldedVirtMap(&MI);
+ MBB.erase(&MI);
+ Erased = true;
+ goto ProcessNextInst;
+ }
+ }
+ }
+ }
+
+ // If this reference is not a use, any previous store is now dead.
+ // Otherwise, the store to this stack slot is not dead anymore.
+ std::map<int, MachineInstr*>::iterator MDSI = MaybeDeadStores.find(SS);
+ if (MDSI != MaybeDeadStores.end()) {
+ if (MR & VirtRegMap::isRef) // Previous store is not dead.
+ MaybeDeadStores.erase(MDSI);
+ else {
+ // If we get here, the store is dead, nuke it now.
+ assert(VirtRegMap::isMod && "Can't be modref!");
+ DOUT << "Removed dead store:\t" << *MDSI->second;
+ InvalidateKills(*MDSI->second, RegKills, KillOps);
+ MBB.erase(MDSI->second);
+ VRM.RemoveFromFoldedVirtMap(MDSI->second);
+ MaybeDeadStores.erase(MDSI);
+ ++NumDSE;
+ }
+ }
+
+ // If the spill slot value is available, and this is a new definition of
+ // the value, the value is not available anymore.
+ if (MR & VirtRegMap::isMod) {
+ // Notice that the value in this stack slot has been modified.
+ Spills.ModifyStackSlot(SS);
+
+ // If this is *just* a mod of the value, check to see if this is just a
+ // store to the spill slot (i.e. the spill got merged into the copy). If
+ // so, realize that the vreg is available now, and add the store to the
+ // MaybeDeadStore info.
+ int StackSlot;
+ if (!(MR & VirtRegMap::isRef)) {
+ if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
+ assert(MRegisterInfo::isPhysicalRegister(SrcReg) &&
+ "Src hasn't been allocated yet?");
+ // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark
+ // this as a potentially dead store in case there is a subsequent
+ // store into the stack slot without a read from it.
+ MaybeDeadStores[StackSlot] = &MI;
+
+ // If the stack slot value was previously available in some other
+ // register, change it now. Otherwise, make the register available,
+ // in PhysReg.
+ Spills.addAvailable(StackSlot, &MI, SrcReg, false/*don't clobber*/);
+ }
+ }
+ }
+ }
+
+ // Process all of the spilled defs.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isRegister() && MO.getReg() && MO.isDef()) {
+ unsigned VirtReg = MO.getReg();
+
+ if (!MRegisterInfo::isVirtualRegister(VirtReg)) {
+ // Check to see if this is a noop copy. If so, eliminate the
+ // instruction before considering the dest reg to be changed.
+ unsigned Src, Dst;
+ if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) {
+ ++NumDCE;
+ DOUT << "Removing now-noop copy: " << MI;
+ MBB.erase(&MI);
+ Erased = true;
+ VRM.RemoveFromFoldedVirtMap(&MI);
+ Spills.disallowClobberPhysReg(VirtReg);
+ goto ProcessNextInst;
+ }
+
+ // If it's not a no-op copy, it clobbers the value in the destreg.
+ Spills.ClobberPhysReg(VirtReg);
+ ReusedOperands.markClobbered(VirtReg);
+
+ // Check to see if this instruction is a load from a stack slot into
+ // a register. If so, this provides the stack slot value in the reg.
+ int FrameIdx;
+ if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+ assert(DestReg == VirtReg && "Unknown load situation!");
+
+ // Otherwise, if it wasn't available, remember that it is now!
+ Spills.addAvailable(FrameIdx, &MI, DestReg);
+ goto ProcessNextInst;
+ }
+
+ continue;
+ }
+
+ // The only vregs left are stack slot definitions.
+ int StackSlot = VRM.getStackSlot(VirtReg);
+ const TargetRegisterClass *RC = MF.getSSARegMap()->getRegClass(VirtReg);
+
+ // If this def is part of a two-address operand, make sure to execute
+ // the store from the correct physical register.
+ unsigned PhysReg;
+ int TiedOp = MI.getInstrDescriptor()->findTiedToSrcOperand(i);
+ if (TiedOp != -1)
+ PhysReg = MI.getOperand(TiedOp).getReg();
+ else {
+ PhysReg = VRM.getPhys(VirtReg);
+ if (ReusedOperands.isClobbered(PhysReg)) {
+ // Another def has taken the assigned physreg. It must have been a
+ // use&def which got it due to reuse. Undo the reuse!
+ PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps);
+ }
+ }
+
+ MF.setPhysRegUsed(PhysReg);
+ ReusedOperands.markClobbered(PhysReg);
+ MRI->storeRegToStackSlot(MBB, next(MII), PhysReg, StackSlot, RC);
+ DOUT << "Store:\t" << *next(MII);
+ MI.getOperand(i).setReg(PhysReg);
+
+ // If there is a dead store to this stack slot, nuke it now.
+ MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
+ if (LastStore) {
+ DOUT << "Removed dead store:\t" << *LastStore;
+ ++NumDSE;
+ InvalidateKills(*LastStore, RegKills, KillOps);
+ MBB.erase(LastStore);
+ VRM.RemoveFromFoldedVirtMap(LastStore);
+ }
+ LastStore = next(MII);
+
+ // If the stack slot value was previously available in some other
+ // register, change it now. Otherwise, make the register available,
+ // in PhysReg.
+ Spills.ModifyStackSlot(StackSlot);
+ Spills.ClobberPhysReg(PhysReg);
+ Spills.addAvailable(StackSlot, LastStore, PhysReg);
+ ++NumStores;
+
+ // Check to see if this is a noop copy. If so, eliminate the
+ // instruction before considering the dest reg to be changed.
+ {
+ unsigned Src, Dst;
+ if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) {
+ ++NumDCE;
+ DOUT << "Removing now-noop copy: " << MI;
+ MBB.erase(&MI);
+ Erased = true;
+ VRM.RemoveFromFoldedVirtMap(&MI);
+ UpdateKills(*LastStore, RegKills, KillOps);
+ goto ProcessNextInst;
+ }
+ }
+ }
+ }
+ ProcessNextInst:
+ if (!Erased && !BackTracked)
+ for (MachineBasicBlock::iterator II = MI; II != NextMII; ++II)
+ UpdateKills(*II, RegKills, KillOps);
+ MII = NextMII;
+ }
+}
+
+
+llvm::Spiller* llvm::createSpiller() {
+ switch (SpillerOpt) {
+ default: assert(0 && "Unreachable!");
+ case local:
+ return new LocalSpiller();
+ case simple:
+ return new SimpleSpiller();
+ }
+}
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
new file mode 100644
index 0000000..b7cbe51c
--- /dev/null
+++ b/lib/CodeGen/VirtRegMap.h
@@ -0,0 +1,211 @@
+//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a virtual register map. This maps virtual registers to
+// physical registers and virtual registers to stack slots. It is created and
+// updated by a register allocator and then used by a machine code rewriter that
+// adds spill code and rewrites virtual into physical register references.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGMAP_H
+#define LLVM_CODEGEN_VIRTREGMAP_H
+
+#include "llvm/Target/MRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Support/Streams.h"
+#include <map>
+
+namespace llvm {
+ class MachineInstr;
+ class TargetInstrInfo;
+
+ class VirtRegMap {
+ public:
+ enum {
+ NO_PHYS_REG = 0,
+ NO_STACK_SLOT = (1L << 30)-1,
+ MAX_STACK_SLOT = (1L << 18)-1
+ };
+
+ enum ModRef { isRef = 1, isMod = 2, isModRef = 3 };
+ typedef std::multimap<MachineInstr*,
+ std::pair<unsigned, ModRef> > MI2VirtMapTy;
+
+ private:
+ const TargetInstrInfo &TII;
+
+ MachineFunction &MF;
+ /// Virt2PhysMap - This is a virtual to physical register
+ /// mapping. Each virtual register is required to have an entry in
+ /// it; even spilled virtual registers (the register mapped to a
+ /// spilled register is the temporary used to load it from the
+ /// stack).
+ IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
+ /// Virt2StackSlotMap - This is virtual register to stack slot
+ /// mapping. Each spilled virtual register has an entry in it
+ /// which corresponds to the stack slot this register is spilled
+ /// at.
+ IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
+ /// MI2VirtMap - This is MachineInstr to virtual register
+ /// mapping. In the case of memory spill code being folded into
+ /// instructions, we need to know which virtual register was
+ /// read/written by this instruction.
+ MI2VirtMapTy MI2VirtMap;
+
+ /// ReMatMap - This is virtual register to re-materialized instruction
+ /// mapping. Each virtual register whose definition is going to be
+ /// re-materialized has an entry in it.
+ std::map<unsigned, const MachineInstr*> ReMatMap;
+
+ /// ReMatId - Instead of assigning a stack slot to a to be rematerialized
+ /// virtual register, an unique id is being assigned. This keeps track of
+ /// the highest id used so far. Note, this starts at (1<<18) to avoid
+ /// conflicts with stack slot numbers.
+ int ReMatId;
+
+ VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT
+ void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
+
+ public:
+ VirtRegMap(MachineFunction &mf);
+
+ void grow();
+
+ /// @brief returns true if the specified virtual register is
+ /// mapped to a physical register
+ bool hasPhys(unsigned virtReg) const {
+ return getPhys(virtReg) != NO_PHYS_REG;
+ }
+
+ /// @brief returns the physical register mapped to the specified
+ /// virtual register
+ unsigned getPhys(unsigned virtReg) const {
+ assert(MRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2PhysMap[virtReg];
+ }
+
+ /// @brief creates a mapping for the specified virtual register to
+ /// the specified physical register
+ void assignVirt2Phys(unsigned virtReg, unsigned physReg) {
+ assert(MRegisterInfo::isVirtualRegister(virtReg) &&
+ MRegisterInfo::isPhysicalRegister(physReg));
+ assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+ "attempt to assign physical register to already mapped "
+ "virtual register");
+ Virt2PhysMap[virtReg] = physReg;
+ }
+
+ /// @brief clears the specified virtual register's, physical
+ /// register mapping
+ void clearVirt(unsigned virtReg) {
+ assert(MRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
+ "attempt to clear a not assigned virtual register");
+ Virt2PhysMap[virtReg] = NO_PHYS_REG;
+ }
+
+ /// @brief clears all virtual to physical register mappings
+ void clearAllVirt() {
+ Virt2PhysMap.clear();
+ grow();
+ }
+
+ /// @brief returns true is the specified virtual register is
+ /// mapped to a stack slot
+ bool hasStackSlot(unsigned virtReg) const {
+ return getStackSlot(virtReg) != NO_STACK_SLOT;
+ }
+
+ /// @brief returns the stack slot mapped to the specified virtual
+ /// register
+ int getStackSlot(unsigned virtReg) const {
+ assert(MRegisterInfo::isVirtualRegister(virtReg));
+ return Virt2StackSlotMap[virtReg];
+ }
+
+ /// @brief create a mapping for the specifed virtual register to
+ /// the next available stack slot
+ int assignVirt2StackSlot(unsigned virtReg);
+ /// @brief create a mapping for the specified virtual register to
+ /// the specified stack slot
+ void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
+
+ /// @brief assign an unique re-materialization id to the specified
+ /// virtual register.
+ int assignVirtReMatId(unsigned virtReg);
+
+ /// @brief returns true if the specified virtual register is being
+ /// re-materialized.
+ bool isReMaterialized(unsigned virtReg) const {
+ return ReMatMap.count(virtReg) != 0;
+ }
+
+ /// @brief returns the original machine instruction being re-issued
+ /// to re-materialize the specified virtual register.
+ const MachineInstr *getReMaterializedMI(unsigned virtReg) {
+ return ReMatMap[virtReg];
+ }
+
+ /// @brief records the specified virtual register will be
+ /// re-materialized and the original instruction which will be re-issed
+ /// for this purpose.
+ void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) {
+ ReMatMap[virtReg] = def;
+ }
+
+ /// @brief Updates information about the specified virtual register's value
+ /// folded into newMI machine instruction. The OpNum argument indicates the
+ /// operand number of OldMI that is folded.
+ void virtFolded(unsigned VirtReg, MachineInstr *OldMI, unsigned OpNum,
+ MachineInstr *NewMI);
+
+ /// @brief returns the virtual registers' values folded in memory
+ /// operands of this instruction
+ std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator>
+ getFoldedVirts(MachineInstr* MI) const {
+ return MI2VirtMap.equal_range(MI);
+ }
+
+ /// RemoveFromFoldedVirtMap - If the specified machine instruction is in
+ /// the folded instruction map, remove its entry from the map.
+ void RemoveFromFoldedVirtMap(MachineInstr *MI) {
+ MI2VirtMap.erase(MI);
+ }
+
+ void print(std::ostream &OS) const;
+ void print(std::ostream *OS) const { if (OS) print(*OS); }
+ void dump() const;
+ };
+
+ inline std::ostream *operator<<(std::ostream *OS, const VirtRegMap &VRM) {
+ VRM.print(OS);
+ return OS;
+ }
+ inline std::ostream &operator<<(std::ostream &OS, const VirtRegMap &VRM) {
+ VRM.print(OS);
+ return OS;
+ }
+
+ /// Spiller interface: Implementations of this interface assign spilled
+ /// virtual registers to stack slots, rewriting the code.
+ struct Spiller {
+ virtual ~Spiller();
+ virtual bool runOnMachineFunction(MachineFunction &MF,
+ VirtRegMap &VRM) = 0;
+ };
+
+ /// createSpiller - Create an return a spiller object, as specified on the
+ /// command line.
+ Spiller* createSpiller();
+
+} // End llvm namespace
+
+#endif