diff options
author | Dan Gohman <djg@cray.com> | 2007-07-18 16:29:46 +0000 |
---|---|---|
committer | Dan Gohman <djg@cray.com> | 2007-07-18 16:29:46 +0000 |
commit | f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc (patch) | |
tree | ebb79ea1ee5e3bc1fdf38541a811a8b804f0679a /lib/CodeGen | |
download | external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.zip external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.tar.gz external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.tar.bz2 |
It's not necessary to do rounding for alloca operations when the requested
alignment is equal to the stack alignment.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40004 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
47 files changed, 47212 insertions, 0 deletions
diff --git a/lib/CodeGen/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter.cpp new file mode 100644 index 0000000..586472c --- /dev/null +++ b/lib/CodeGen/AsmPrinter.cpp @@ -0,0 +1,1210 @@ +//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AsmPrinter class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Streams.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include <cerrno> +using namespace llvm; + +static cl::opt<bool> +AsmVerbose("asm-verbose", cl::Hidden, cl::desc("Add comments to directives.")); + +char AsmPrinter::ID = 0; +AsmPrinter::AsmPrinter(std::ostream &o, TargetMachine &tm, + const TargetAsmInfo *T) + : MachineFunctionPass((intptr_t)&ID), FunctionNumber(0), O(o), TM(tm), TAI(T) +{} + +std::string AsmPrinter::getSectionForFunction(const Function &F) const { + return TAI->getTextSection(); +} + + +/// SwitchToTextSection - Switch to the specified text section of the executable +/// if we are not already in it! +/// +void AsmPrinter::SwitchToTextSection(const char *NewSection, + const GlobalValue *GV) { + std::string NS; + if (GV && GV->hasSection()) + NS = TAI->getSwitchToSectionDirective() + GV->getSection(); + else + NS = NewSection; + + // If we're already in this section, we're done. + if (CurrentSection == NS) return; + + // Close the current section, if applicable. + if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) + O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << "\n"; + + CurrentSection = NS; + + if (!CurrentSection.empty()) + O << CurrentSection << TAI->getTextSectionStartSuffix() << '\n'; +} + +/// SwitchToDataSection - Switch to the specified data section of the executable +/// if we are not already in it! +/// +void AsmPrinter::SwitchToDataSection(const char *NewSection, + const GlobalValue *GV) { + std::string NS; + if (GV && GV->hasSection()) + NS = TAI->getSwitchToSectionDirective() + GV->getSection(); + else + NS = NewSection; + + // If we're already in this section, we're done. + if (CurrentSection == NS) return; + + // Close the current section, if applicable. + if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) + O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << "\n"; + + CurrentSection = NS; + + if (!CurrentSection.empty()) + O << CurrentSection << TAI->getDataSectionStartSuffix() << '\n'; +} + + +bool AsmPrinter::doInitialization(Module &M) { + Mang = new Mangler(M, TAI->getGlobalPrefix()); + + if (!M.getModuleInlineAsm().empty()) + O << TAI->getCommentString() << " Start of file scope inline assembly\n" + << M.getModuleInlineAsm() + << "\n" << TAI->getCommentString() + << " End of file scope inline assembly\n"; + + SwitchToDataSection(""); // Reset back to no section. + + if (MachineModuleInfo *MMI = getAnalysisToUpdate<MachineModuleInfo>()) { + MMI->AnalyzeModule(M); + } + + return false; +} + +bool AsmPrinter::doFinalization(Module &M) { + if (TAI->getWeakRefDirective()) { + if (!ExtWeakSymbols.empty()) + SwitchToDataSection(""); + + for (std::set<const GlobalValue*>::iterator i = ExtWeakSymbols.begin(), + e = ExtWeakSymbols.end(); i != e; ++i) { + const GlobalValue *GV = *i; + std::string Name = Mang->getValueName(GV); + O << TAI->getWeakRefDirective() << Name << "\n"; + } + } + + if (TAI->getSetDirective()) { + if (!M.alias_empty()) + SwitchToTextSection(TAI->getTextSection()); + + O << "\n"; + for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); + I!=E; ++I) { + std::string Name = Mang->getValueName(I); + std::string Target; + + if (const GlobalValue *GV = I->getAliasedGlobal()) + Target = Mang->getValueName(GV); + else + assert(0 && "Unsupported aliasee"); + + if (I->hasExternalLinkage()) + O << "\t.globl\t" << Name << "\n"; + else if (I->hasWeakLinkage()) + O << TAI->getWeakRefDirective() << Name << "\n"; + else if (!I->hasInternalLinkage()) + assert(0 && "Invalid alias linkage"); + + O << TAI->getSetDirective() << Name << ", " << Target << "\n"; + } + } + + delete Mang; Mang = 0; + return false; +} + +void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { + // What's my mangled name? + CurrentFnName = Mang->getValueName(MF.getFunction()); + IncrementFunctionNumber(); +} + +/// EmitConstantPool - Print to the current output stream assembly +/// representations of the constants in the constant pool MCP. This is +/// used to print out constants which have been "spilled to memory" by +/// the code generator. +/// +void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) { + const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); + if (CP.empty()) return; + + // Some targets require 4-, 8-, and 16- byte constant literals to be placed + // in special sections. + std::vector<std::pair<MachineConstantPoolEntry,unsigned> > FourByteCPs; + std::vector<std::pair<MachineConstantPoolEntry,unsigned> > EightByteCPs; + std::vector<std::pair<MachineConstantPoolEntry,unsigned> > SixteenByteCPs; + std::vector<std::pair<MachineConstantPoolEntry,unsigned> > OtherCPs; + std::vector<std::pair<MachineConstantPoolEntry,unsigned> > TargetCPs; + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + MachineConstantPoolEntry CPE = CP[i]; + const Type *Ty = CPE.getType(); + if (TAI->getFourByteConstantSection() && + TM.getTargetData()->getTypeSize(Ty) == 4) + FourByteCPs.push_back(std::make_pair(CPE, i)); + else if (TAI->getEightByteConstantSection() && + TM.getTargetData()->getTypeSize(Ty) == 8) + EightByteCPs.push_back(std::make_pair(CPE, i)); + else if (TAI->getSixteenByteConstantSection() && + TM.getTargetData()->getTypeSize(Ty) == 16) + SixteenByteCPs.push_back(std::make_pair(CPE, i)); + else + OtherCPs.push_back(std::make_pair(CPE, i)); + } + + unsigned Alignment = MCP->getConstantPoolAlignment(); + EmitConstantPool(Alignment, TAI->getFourByteConstantSection(), FourByteCPs); + EmitConstantPool(Alignment, TAI->getEightByteConstantSection(), EightByteCPs); + EmitConstantPool(Alignment, TAI->getSixteenByteConstantSection(), + SixteenByteCPs); + EmitConstantPool(Alignment, TAI->getConstantPoolSection(), OtherCPs); +} + +void AsmPrinter::EmitConstantPool(unsigned Alignment, const char *Section, + std::vector<std::pair<MachineConstantPoolEntry,unsigned> > &CP) { + if (CP.empty()) return; + + SwitchToDataSection(Section); + EmitAlignment(Alignment); + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' + << CP[i].second << ":\t\t\t\t\t" << TAI->getCommentString() << " "; + WriteTypeSymbolic(O, CP[i].first.getType(), 0) << '\n'; + if (CP[i].first.isMachineConstantPoolEntry()) + EmitMachineConstantPoolValue(CP[i].first.Val.MachineCPVal); + else + EmitGlobalConstant(CP[i].first.Val.ConstVal); + if (i != e-1) { + const Type *Ty = CP[i].first.getType(); + unsigned EntSize = + TM.getTargetData()->getTypeSize(Ty); + unsigned ValEnd = CP[i].first.getOffset() + EntSize; + // Emit inter-object padding for alignment. + EmitZeros(CP[i+1].first.getOffset()-ValEnd); + } + } +} + +/// EmitJumpTableInfo - Print assembly representations of the jump tables used +/// by the current function to the current output stream. +/// +void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, + MachineFunction &MF) { + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + bool IsPic = TM.getRelocationModel() == Reloc::PIC_; + + // Use JumpTableDirective otherwise honor the entry size from the jump table + // info. + const char *JTEntryDirective = TAI->getJumpTableDirective(); + bool HadJTEntryDirective = JTEntryDirective != NULL; + if (!HadJTEntryDirective) { + JTEntryDirective = MJTI->getEntrySize() == 4 ? + TAI->getData32bitsDirective() : TAI->getData64bitsDirective(); + } + + // Pick the directive to use to print the jump table entries, and switch to + // the appropriate section. + TargetLowering *LoweringInfo = TM.getTargetLowering(); + + const char* JumpTableDataSection = TAI->getJumpTableDataSection(); + if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) || + !JumpTableDataSection) { + // In PIC mode, we need to emit the jump table to the same section as the + // function body itself, otherwise the label differences won't make sense. + // We should also do if the section name is NULL. + const Function *F = MF.getFunction(); + SwitchToTextSection(getSectionForFunction(*F).c_str(), F); + } else { + SwitchToDataSection(JumpTableDataSection); + } + + EmitAlignment(Log2_32(MJTI->getAlignment())); + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + const std::vector<MachineBasicBlock*> &JTBBs = JT[i].MBBs; + + // If this jump table was deleted, ignore it. + if (JTBBs.empty()) continue; + + // For PIC codegen, if possible we want to use the SetDirective to reduce + // the number of relocations the assembler will generate for the jump table. + // Set directives are all printed before the jump table itself. + std::set<MachineBasicBlock*> EmittedSets; + if (TAI->getSetDirective() && IsPic) + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) + if (EmittedSets.insert(JTBBs[ii]).second) + printSetLabel(i, JTBBs[ii]); + + // On some targets (e.g. darwin) we want to emit two consequtive labels + // before each jump table. The first label is never referenced, but tells + // the assembler and linker the extents of the jump table object. The + // second label is actually referenced by the code. + if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix()) + O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n"; + + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << i << ":\n"; + + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { + O << JTEntryDirective << ' '; + // If we have emitted set directives for the jump table entries, print + // them rather than the entries themselves. If we're emitting PIC, then + // emit the table entries as differences between two text section labels. + // If we're emitting non-PIC code, then emit the entries as direct + // references to the target basic blocks. + if (!EmittedSets.empty()) { + O << TAI->getPrivateGlobalPrefix() << getFunctionNumber() + << '_' << i << "_set_" << JTBBs[ii]->getNumber(); + } else if (IsPic) { + printBasicBlockLabel(JTBBs[ii], false, false); + // If the arch uses custom Jump Table directives, don't calc relative to + // JT + if (!HadJTEntryDirective) + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" + << getFunctionNumber() << '_' << i; + } else { + printBasicBlockLabel(JTBBs[ii], false, false); + } + O << '\n'; + } + } +} + +/// EmitSpecialLLVMGlobal - Check to see if the specified global is a +/// special global used by LLVM. If so, emit it and return true, otherwise +/// do nothing and return false. +bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { + // Ignore debug and non-emitted data. + if (GV->getSection() == "llvm.metadata") return true; + + if (!GV->hasAppendingLinkage()) return false; + + assert(GV->hasInitializer() && "Not a special LLVM global!"); + + if (GV->getName() == "llvm.used") { + if (TAI->getUsedDirective() != 0) // No need to emit this at all. + EmitLLVMUsedList(GV->getInitializer()); + return true; + } + + const TargetData *TD = TM.getTargetData(); + unsigned Align = Log2_32(TD->getPointerPrefAlignment()); + if (GV->getName() == "llvm.global_ctors" && GV->use_empty()) { + SwitchToDataSection(TAI->getStaticCtorsSection()); + EmitAlignment(Align, 0); + EmitXXStructorList(GV->getInitializer()); + return true; + } + + if (GV->getName() == "llvm.global_dtors" && GV->use_empty()) { + SwitchToDataSection(TAI->getStaticDtorsSection()); + EmitAlignment(Align, 0); + EmitXXStructorList(GV->getInitializer()); + return true; + } + + return false; +} + +/// EmitLLVMUsedList - For targets that define a TAI::UsedDirective, mark each +/// global in the specified llvm.used list as being used with this directive. +void AsmPrinter::EmitLLVMUsedList(Constant *List) { + const char *Directive = TAI->getUsedDirective(); + + // Should be an array of 'sbyte*'. + ConstantArray *InitList = dyn_cast<ConstantArray>(List); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + O << Directive; + EmitConstantValueOnly(InitList->getOperand(i)); + O << "\n"; + } +} + +/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the +/// function pointers, ignoring the init priority. +void AsmPrinter::EmitXXStructorList(Constant *List) { + // Should be an array of '{ int, void ()* }' structs. The first value is the + // init priority, which we ignore. + if (!isa<ConstantArray>(List)) return; + ConstantArray *InitList = cast<ConstantArray>(List); + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ + if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + + if (CS->getOperand(1)->isNullValue()) + return; // Found a null terminator, exit printing. + // Emit the function pointer. + EmitGlobalConstant(CS->getOperand(1)); + } +} + +/// getGlobalLinkName - Returns the asm/link name of of the specified +/// global variable. Should be overridden by each target asm printer to +/// generate the appropriate value. +const std::string AsmPrinter::getGlobalLinkName(const GlobalVariable *GV) const{ + std::string LinkName; + + if (isa<Function>(GV)) { + LinkName += TAI->getFunctionAddrPrefix(); + LinkName += Mang->getValueName(GV); + LinkName += TAI->getFunctionAddrSuffix(); + } else { + LinkName += TAI->getGlobalVarAddrPrefix(); + LinkName += Mang->getValueName(GV); + LinkName += TAI->getGlobalVarAddrSuffix(); + } + + return LinkName; +} + +/// EmitExternalGlobal - Emit the external reference to a global variable. +/// Should be overridden if an indirect reference should be used. +void AsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) { + O << getGlobalLinkName(GV); +} + + + +//===----------------------------------------------------------------------===// +/// LEB 128 number encoding. + +/// PrintULEB128 - Print a series of hexidecimal values (separated by commas) +/// representing an unsigned leb128 value. +void AsmPrinter::PrintULEB128(unsigned Value) const { + do { + unsigned Byte = Value & 0x7f; + Value >>= 7; + if (Value) Byte |= 0x80; + O << "0x" << std::hex << Byte << std::dec; + if (Value) O << ", "; + } while (Value); +} + +/// SizeULEB128 - Compute the number of bytes required for an unsigned leb128 +/// value. +unsigned AsmPrinter::SizeULEB128(unsigned Value) { + unsigned Size = 0; + do { + Value >>= 7; + Size += sizeof(int8_t); + } while (Value); + return Size; +} + +/// PrintSLEB128 - Print a series of hexidecimal values (separated by commas) +/// representing a signed leb128 value. +void AsmPrinter::PrintSLEB128(int Value) const { + int Sign = Value >> (8 * sizeof(Value) - 1); + bool IsMore; + + do { + unsigned Byte = Value & 0x7f; + Value >>= 7; + IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; + if (IsMore) Byte |= 0x80; + O << "0x" << std::hex << Byte << std::dec; + if (IsMore) O << ", "; + } while (IsMore); +} + +/// SizeSLEB128 - Compute the number of bytes required for a signed leb128 +/// value. +unsigned AsmPrinter::SizeSLEB128(int Value) { + unsigned Size = 0; + int Sign = Value >> (8 * sizeof(Value) - 1); + bool IsMore; + + do { + unsigned Byte = Value & 0x7f; + Value >>= 7; + IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; + Size += sizeof(int8_t); + } while (IsMore); + return Size; +} + +//===--------------------------------------------------------------------===// +// Emission and print routines +// + +/// PrintHex - Print a value as a hexidecimal value. +/// +void AsmPrinter::PrintHex(int Value) const { + O << "0x" << std::hex << Value << std::dec; +} + +/// EOL - Print a newline character to asm stream. If a comment is present +/// then it will be printed first. Comments should not contain '\n'. +void AsmPrinter::EOL() const { + O << "\n"; +} +void AsmPrinter::EOL(const std::string &Comment) const { + if (AsmVerbose && !Comment.empty()) { + O << "\t" + << TAI->getCommentString() + << " " + << Comment; + } + O << "\n"; +} + +/// EmitULEB128Bytes - Emit an assembler byte data directive to compose an +/// unsigned leb128 value. +void AsmPrinter::EmitULEB128Bytes(unsigned Value) const { + if (TAI->hasLEB128()) { + O << "\t.uleb128\t" + << Value; + } else { + O << TAI->getData8bitsDirective(); + PrintULEB128(Value); + } +} + +/// EmitSLEB128Bytes - print an assembler byte data directive to compose a +/// signed leb128 value. +void AsmPrinter::EmitSLEB128Bytes(int Value) const { + if (TAI->hasLEB128()) { + O << "\t.sleb128\t" + << Value; + } else { + O << TAI->getData8bitsDirective(); + PrintSLEB128(Value); + } +} + +/// EmitInt8 - Emit a byte directive and value. +/// +void AsmPrinter::EmitInt8(int Value) const { + O << TAI->getData8bitsDirective(); + PrintHex(Value & 0xFF); +} + +/// EmitInt16 - Emit a short directive and value. +/// +void AsmPrinter::EmitInt16(int Value) const { + O << TAI->getData16bitsDirective(); + PrintHex(Value & 0xFFFF); +} + +/// EmitInt32 - Emit a long directive and value. +/// +void AsmPrinter::EmitInt32(int Value) const { + O << TAI->getData32bitsDirective(); + PrintHex(Value); +} + +/// EmitInt64 - Emit a long long directive and value. +/// +void AsmPrinter::EmitInt64(uint64_t Value) const { + if (TAI->getData64bitsDirective()) { + O << TAI->getData64bitsDirective(); + PrintHex(Value); + } else { + if (TM.getTargetData()->isBigEndian()) { + EmitInt32(unsigned(Value >> 32)); O << "\n"; + EmitInt32(unsigned(Value)); + } else { + EmitInt32(unsigned(Value)); O << "\n"; + EmitInt32(unsigned(Value >> 32)); + } + } +} + +/// toOctal - Convert the low order bits of X into an octal digit. +/// +static inline char toOctal(int X) { + return (X&7)+'0'; +} + +/// printStringChar - Print a char, escaped if necessary. +/// +static void printStringChar(std::ostream &O, unsigned char C) { + if (C == '"') { + O << "\\\""; + } else if (C == '\\') { + O << "\\\\"; + } else if (isprint(C)) { + O << C; + } else { + switch(C) { + case '\b': O << "\\b"; break; + case '\f': O << "\\f"; break; + case '\n': O << "\\n"; break; + case '\r': O << "\\r"; break; + case '\t': O << "\\t"; break; + default: + O << '\\'; + O << toOctal(C >> 6); + O << toOctal(C >> 3); + O << toOctal(C >> 0); + break; + } + } +} + +/// EmitString - Emit a string with quotes and a null terminator. +/// Special characters are emitted properly. +/// \literal (Eg. '\t') \endliteral +void AsmPrinter::EmitString(const std::string &String) const { + const char* AscizDirective = TAI->getAscizDirective(); + if (AscizDirective) + O << AscizDirective; + else + O << TAI->getAsciiDirective(); + O << "\""; + for (unsigned i = 0, N = String.size(); i < N; ++i) { + unsigned char C = String[i]; + printStringChar(O, C); + } + if (AscizDirective) + O << "\""; + else + O << "\\0\""; +} + + +//===----------------------------------------------------------------------===// + +// EmitAlignment - Emit an alignment directive to the specified power of +// two boundary. For example, if you pass in 3 here, you will get an 8 +// byte alignment. If a global value is specified, and if that global has +// an explicit alignment requested, it will unconditionally override the +// alignment request. However, if ForcedAlignBits is specified, this value +// has final say: the ultimate alignment will be the max of ForcedAlignBits +// and the alignment computed with NumBits and the global. +// +// The algorithm is: +// Align = NumBits; +// if (GV && GV->hasalignment) Align = GV->getalignment(); +// Align = std::max(Align, ForcedAlignBits); +// +void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV, + unsigned ForcedAlignBits) const { + if (GV && GV->getAlignment()) + NumBits = Log2_32(GV->getAlignment()); + NumBits = std::max(NumBits, ForcedAlignBits); + + if (NumBits == 0) return; // No need to emit alignment. + if (TAI->getAlignmentIsInBytes()) NumBits = 1 << NumBits; + O << TAI->getAlignDirective() << NumBits << "\n"; +} + + +/// EmitZeros - Emit a block of zeros. +/// +void AsmPrinter::EmitZeros(uint64_t NumZeros) const { + if (NumZeros) { + if (TAI->getZeroDirective()) { + O << TAI->getZeroDirective() << NumZeros; + if (TAI->getZeroDirectiveSuffix()) + O << TAI->getZeroDirectiveSuffix(); + O << "\n"; + } else { + for (; NumZeros; --NumZeros) + O << TAI->getData8bitsDirective() << "0\n"; + } + } +} + +// Print out the specified constant, without a storage class. Only the +// constants valid in constant expressions can occur here. +void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { + if (CV->isNullValue() || isa<UndefValue>(CV)) + O << "0"; + else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + O << CI->getZExtValue(); + } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { + // This is a constant address for a global variable or function. Use the + // name of the variable or function as the address value, possibly + // decorating it with GlobalVarAddrPrefix/Suffix or + // FunctionAddrPrefix/Suffix (these all default to "" ) + if (isa<Function>(GV)) { + O << TAI->getFunctionAddrPrefix() + << Mang->getValueName(GV) + << TAI->getFunctionAddrSuffix(); + } else { + O << TAI->getGlobalVarAddrPrefix() + << Mang->getValueName(GV) + << TAI->getGlobalVarAddrSuffix(); + } + } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + const TargetData *TD = TM.getTargetData(); + unsigned Opcode = CE->getOpcode(); + switch (Opcode) { + case Instruction::GetElementPtr: { + // generate a symbolic expression for the byte address + const Constant *ptrVal = CE->getOperand(0); + SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end()); + if (int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], + idxVec.size())) { + if (Offset) + O << "("; + EmitConstantValueOnly(ptrVal); + if (Offset > 0) + O << ") + " << Offset; + else if (Offset < 0) + O << ") - " << -Offset; + } else { + EmitConstantValueOnly(ptrVal); + } + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + assert(0 && "FIXME: Don't yet support this kind of constant cast expr"); + break; + case Instruction::BitCast: + return EmitConstantValueOnly(CE->getOperand(0)); + + case Instruction::IntToPtr: { + // Handle casts to pointers by changing them into casts to the appropriate + // integer type. This promotes constant folding and simplifies this code. + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(), false/*ZExt*/); + return EmitConstantValueOnly(Op); + } + + + case Instruction::PtrToInt: { + // Support only foldable casts to/from pointers that can be eliminated by + // changing the pointer to the appropriately sized integer type. + Constant *Op = CE->getOperand(0); + const Type *Ty = CE->getType(); + + // We can emit the pointer value into this slot if the slot is an + // integer slot greater or equal to the size of the pointer. + if (Ty->isInteger() && + TD->getTypeSize(Ty) >= TD->getTypeSize(Op->getType())) + return EmitConstantValueOnly(Op); + + assert(0 && "FIXME: Don't yet support this kind of constant cast expr"); + EmitConstantValueOnly(Op); + break; + } + case Instruction::Add: + case Instruction::Sub: + O << "("; + EmitConstantValueOnly(CE->getOperand(0)); + O << (Opcode==Instruction::Add ? ") + (" : ") - ("); + EmitConstantValueOnly(CE->getOperand(1)); + O << ")"; + break; + default: + assert(0 && "Unsupported operator!"); + } + } else { + assert(0 && "Unknown constant value!"); + } +} + +/// printAsCString - Print the specified array as a C compatible string, only if +/// the predicate isString is true. +/// +static void printAsCString(std::ostream &O, const ConstantArray *CVA, + unsigned LastElt) { + assert(CVA->isString() && "Array is not string compatible!"); + + O << "\""; + for (unsigned i = 0; i != LastElt; ++i) { + unsigned char C = + (unsigned char)cast<ConstantInt>(CVA->getOperand(i))->getZExtValue(); + printStringChar(O, C); + } + O << "\""; +} + +/// EmitString - Emit a zero-byte-terminated string constant. +/// +void AsmPrinter::EmitString(const ConstantArray *CVA) const { + unsigned NumElts = CVA->getNumOperands(); + if (TAI->getAscizDirective() && NumElts && + cast<ConstantInt>(CVA->getOperand(NumElts-1))->getZExtValue() == 0) { + O << TAI->getAscizDirective(); + printAsCString(O, CVA, NumElts-1); + } else { + O << TAI->getAsciiDirective(); + printAsCString(O, CVA, NumElts); + } + O << "\n"; +} + +/// EmitGlobalConstant - Print a general LLVM constant to the .s file. +/// +void AsmPrinter::EmitGlobalConstant(const Constant *CV) { + const TargetData *TD = TM.getTargetData(); + + if (CV->isNullValue() || isa<UndefValue>(CV)) { + EmitZeros(TD->getTypeSize(CV->getType())); + return; + } else if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) { + if (CVA->isString()) { + EmitString(CVA); + } else { // Not a string. Print the values in successive locations + for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i) + EmitGlobalConstant(CVA->getOperand(i)); + } + return; + } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) { + // Print the fields in successive locations. Pad to align if needed! + const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType()); + uint64_t sizeSoFar = 0; + for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) { + const Constant* field = CVS->getOperand(i); + + // Check if padding is needed and insert one or more 0s. + uint64_t fieldSize = TD->getTypeSize(field->getType()); + uint64_t padSize = ((i == e-1? cvsLayout->getSizeInBytes() + : cvsLayout->getElementOffset(i+1)) + - cvsLayout->getElementOffset(i)) - fieldSize; + sizeSoFar += fieldSize + padSize; + + // Now print the actual field value + EmitGlobalConstant(field); + + // Insert the field padding unless it's zero bytes... + EmitZeros(padSize); + } + assert(sizeSoFar == cvsLayout->getSizeInBytes() && + "Layout of constant struct may be incorrect!"); + return; + } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { + // FP Constants are printed as integer constants to avoid losing + // precision... + double Val = CFP->getValue(); + if (CFP->getType() == Type::DoubleTy) { + if (TAI->getData64bitsDirective()) + O << TAI->getData64bitsDirective() << DoubleToBits(Val) << "\t" + << TAI->getCommentString() << " double value: " << Val << "\n"; + else if (TD->isBigEndian()) { + O << TAI->getData32bitsDirective() << unsigned(DoubleToBits(Val) >> 32) + << "\t" << TAI->getCommentString() + << " double most significant word " << Val << "\n"; + O << TAI->getData32bitsDirective() << unsigned(DoubleToBits(Val)) + << "\t" << TAI->getCommentString() + << " double least significant word " << Val << "\n"; + } else { + O << TAI->getData32bitsDirective() << unsigned(DoubleToBits(Val)) + << "\t" << TAI->getCommentString() + << " double least significant word " << Val << "\n"; + O << TAI->getData32bitsDirective() << unsigned(DoubleToBits(Val) >> 32) + << "\t" << TAI->getCommentString() + << " double most significant word " << Val << "\n"; + } + return; + } else { + O << TAI->getData32bitsDirective() << FloatToBits(Val) + << "\t" << TAI->getCommentString() << " float " << Val << "\n"; + return; + } + } else if (CV->getType() == Type::Int64Ty) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + uint64_t Val = CI->getZExtValue(); + + if (TAI->getData64bitsDirective()) + O << TAI->getData64bitsDirective() << Val << "\n"; + else if (TD->isBigEndian()) { + O << TAI->getData32bitsDirective() << unsigned(Val >> 32) + << "\t" << TAI->getCommentString() + << " Double-word most significant word " << Val << "\n"; + O << TAI->getData32bitsDirective() << unsigned(Val) + << "\t" << TAI->getCommentString() + << " Double-word least significant word " << Val << "\n"; + } else { + O << TAI->getData32bitsDirective() << unsigned(Val) + << "\t" << TAI->getCommentString() + << " Double-word least significant word " << Val << "\n"; + O << TAI->getData32bitsDirective() << unsigned(Val >> 32) + << "\t" << TAI->getCommentString() + << " Double-word most significant word " << Val << "\n"; + } + return; + } + } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { + const VectorType *PTy = CP->getType(); + + for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I) + EmitGlobalConstant(CP->getOperand(I)); + + return; + } + + const Type *type = CV->getType(); + printDataDirective(type); + EmitConstantValueOnly(CV); + O << "\n"; +} + +void +AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + // Target doesn't support this yet! + abort(); +} + +/// PrintSpecial - Print information related to the specified machine instr +/// that is independent of the operand, and may be independent of the instr +/// itself. This can be useful for portably encoding the comment character +/// or other bits of target-specific knowledge into the asmstrings. The +/// syntax used is ${:comment}. Targets can override this to add support +/// for their own strange codes. +void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) { + if (!strcmp(Code, "private")) { + O << TAI->getPrivateGlobalPrefix(); + } else if (!strcmp(Code, "comment")) { + O << TAI->getCommentString(); + } else if (!strcmp(Code, "uid")) { + // Assign a unique ID to this machine instruction. + static const MachineInstr *LastMI = 0; + static const Function *F = 0; + static unsigned Counter = 0U-1; + + // Comparing the address of MI isn't sufficient, because machineinstrs may + // be allocated to the same address across functions. + const Function *ThisF = MI->getParent()->getParent()->getFunction(); + + // If this is a new machine instruction, bump the counter. + if (LastMI != MI || F != ThisF) { + ++Counter; + LastMI = MI; + F = ThisF; + } + O << Counter; + } else { + cerr << "Unknown special formatter '" << Code + << "' for machine instr: " << *MI; + exit(1); + } +} + + +/// printInlineAsm - This method formats and prints the specified machine +/// instruction that is an inline asm. +void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { + unsigned NumOperands = MI->getNumOperands(); + + // Count the number of register definitions. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != NumOperands-1 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isExternalSymbol() && "No asm string?"); + + // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + + // If this asmstr is empty, don't bother printing the #APP/#NOAPP markers. + if (AsmStr[0] == 0) { + O << "\n"; // Tab already printed, avoid double indenting next instr. + return; + } + + O << TAI->getInlineAsmStart() << "\n\t"; + + // The variant of the current asmprinter. + int AsmPrinterVariant = TAI->getAssemblerDialect(); + + int CurVariant = -1; // The number of the {.|.|.} region we are in. + const char *LastEmitted = AsmStr; // One past the last character emitted. + + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + O.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + O << "\n"; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; + + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': // $$ -> $ + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + O << '$'; + ++LastEmitted; // Consume second '$' character. + break; + case '(': // $( -> same as GCC's { character. + ++LastEmitted; // Consume '(' character. + if (CurVariant != -1) { + cerr << "Nested variants found in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + CurVariant = 0; // We're in the first variant now. + break; + case '|': + ++LastEmitted; // consume '|' character. + if (CurVariant == -1) { + cerr << "Found '|' character outside of variant in inline asm " + << "string: '" << AsmStr << "'\n"; + exit(1); + } + ++CurVariant; // We're in the next variant. + break; + case ')': // $) -> same as GCC's } char. + ++LastEmitted; // consume ')' character. + if (CurVariant == -1) { + cerr << "Found '}' character outside of variant in inline asm " + << "string: '" << AsmStr << "'\n"; + exit(1); + } + CurVariant = -1; + break; + } + if (Done) break; + + bool HasCurlyBraces = false; + if (*LastEmitted == '{') { // ${variable} + ++LastEmitted; // Consume '{' character. + HasCurlyBraces = true; + } + + const char *IDStart = LastEmitted; + char *IDEnd; + errno = 0; + long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs. + if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) { + cerr << "Bad $ operand number in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + LastEmitted = IDEnd; + + char Modifier[2] = { 0, 0 }; + + if (HasCurlyBraces) { + // If we have curly braces, check for a modifier character. This + // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. + if (*LastEmitted == ':') { + ++LastEmitted; // Consume ':' character. + if (*LastEmitted == 0) { + cerr << "Bad ${:} expression in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + + Modifier[0] = *LastEmitted; + ++LastEmitted; // Consume modifier character. + } + + if (*LastEmitted != '}') { + cerr << "Bad ${} expression in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + ++LastEmitted; // Consume '}' character. + } + + if ((unsigned)Val >= NumOperands-1) { + cerr << "Invalid $ operand number in inline asm string: '" + << AsmStr << "'\n"; + exit(1); + } + + // Okay, we finally have a value number. Ask the target to print this + // operand! + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { + unsigned OpNo = 1; + + bool Error = false; + + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImmedValue(); + OpNo += (OpFlags >> 3) + 1; + } + + if (OpNo >= MI->getNumOperands()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImmedValue(); + ++OpNo; // Skip over the ID number. + + AsmPrinter *AP = const_cast<AsmPrinter*>(this); + if ((OpFlags & 7) == 4 /*ADDR MODE*/) { + Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant, + Modifier[0] ? Modifier : 0); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant, + Modifier[0] ? Modifier : 0); + } + } + if (Error) { + cerr << "Invalid operand found in inline asm: '" + << AsmStr << "'\n"; + MI->dump(); + exit(1); + } + } + break; + } + } + } + O << "\n\t" << TAI->getInlineAsmEnd() << "\n"; +} + +/// printLabel - This method prints a local label used by debug and +/// exception handling tables. +void AsmPrinter::printLabel(const MachineInstr *MI) const { + O << "\n" + << TAI->getPrivateGlobalPrefix() + << "label" + << MI->getOperand(0).getImmedValue() + << ":\n"; +} + +/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM +/// instruction, using the specified assembler variant. Targets should +/// overried this to format as appropriate. +bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode) { + // Target doesn't support this yet! + return true; +} + +bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + // Target doesn't support this yet! + return true; +} + +/// printBasicBlockLabel - This method prints the label for the specified +/// MachineBasicBlock +void AsmPrinter::printBasicBlockLabel(const MachineBasicBlock *MBB, + bool printColon, + bool printComment) const { + O << TAI->getPrivateGlobalPrefix() << "BB" << FunctionNumber << "_" + << MBB->getNumber(); + if (printColon) + O << ':'; + if (printComment && MBB->getBasicBlock()) + O << '\t' << TAI->getCommentString() << MBB->getBasicBlock()->getName(); +} + +/// printSetLabel - This method prints a set label for the specified +/// MachineBasicBlock +void AsmPrinter::printSetLabel(unsigned uid, + const MachineBasicBlock *MBB) const { + if (!TAI->getSetDirective()) + return; + + O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() + << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ','; + printBasicBlockLabel(MBB, false, false); + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << uid << '\n'; +} + +void AsmPrinter::printSetLabel(unsigned uid, unsigned uid2, + const MachineBasicBlock *MBB) const { + if (!TAI->getSetDirective()) + return; + + O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() + << getFunctionNumber() << '_' << uid << '_' << uid2 + << "_set_" << MBB->getNumber() << ','; + printBasicBlockLabel(MBB, false, false); + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << uid << '_' << uid2 << '\n'; +} + +/// printDataDirective - This method prints the asm directive for the +/// specified type. +void AsmPrinter::printDataDirective(const Type *type) { + const TargetData *TD = TM.getTargetData(); + switch (type->getTypeID()) { + case Type::IntegerTyID: { + unsigned BitWidth = cast<IntegerType>(type)->getBitWidth(); + if (BitWidth <= 8) + O << TAI->getData8bitsDirective(); + else if (BitWidth <= 16) + O << TAI->getData16bitsDirective(); + else if (BitWidth <= 32) + O << TAI->getData32bitsDirective(); + else if (BitWidth <= 64) { + assert(TAI->getData64bitsDirective() && + "Target cannot handle 64-bit constant exprs!"); + O << TAI->getData64bitsDirective(); + } + break; + } + case Type::PointerTyID: + if (TD->getPointerSize() == 8) { + assert(TAI->getData64bitsDirective() && + "Target cannot handle 64-bit pointer exprs!"); + O << TAI->getData64bitsDirective(); + } else { + O << TAI->getData32bitsDirective(); + } + break; + case Type::FloatTyID: case Type::DoubleTyID: + assert (0 && "Should have already output floating point constant."); + default: + assert (0 && "Can't handle printing this type of thing"); + break; + } +} + diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp new file mode 100644 index 0000000..0fca985 --- /dev/null +++ b/lib/CodeGen/BranchFolding.cpp @@ -0,0 +1,1093 @@ +//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass forwards branches to unconditional branches to make them branch +// directly to the target block. This pass often results in dead MBB's, which +// it then removes. +// +// Note that this pass must be run after register allocation, it cannot handle +// SSA form. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "branchfolding" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); +STATISTIC(NumBranchOpts, "Number of branches optimized"); +STATISTIC(NumTailMerge , "Number of block tails merged"); +static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", + cl::init(cl::BOU_UNSET), cl::Hidden); +namespace { + // Throttle for huge numbers of predecessors (compile speed problems) + cl::opt<unsigned> + TailMergeThreshold("tail-merge-threshold", + cl::desc("Max number of predecessors to consider tail merging"), + cl::init(100), cl::Hidden); + + struct BranchFolder : public MachineFunctionPass { + static char ID; + BranchFolder(bool defaultEnableTailMerge) : + MachineFunctionPass((intptr_t)&ID) { + switch (FlagEnableTailMerge) { + case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; + case cl::BOU_TRUE: EnableTailMerge = true; break; + case cl::BOU_FALSE: EnableTailMerge = false; break; + } + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Control Flow Optimizer"; } + const TargetInstrInfo *TII; + MachineModuleInfo *MMI; + bool MadeChange; + private: + // Tail Merging. + bool EnableTailMerge; + bool TailMergeBlocks(MachineFunction &MF); + bool TryMergeBlocks(MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest); + MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1); + + std::vector<std::pair<unsigned,MachineBasicBlock*> > MergePotentials; + const MRegisterInfo *RegInfo; + RegScavenger *RS; + // Branch optzn. + bool OptimizeBranches(MachineFunction &MF); + void OptimizeBlock(MachineBasicBlock *MBB); + void RemoveDeadBlock(MachineBasicBlock *MBB); + + bool CanFallThrough(MachineBasicBlock *CurBB); + bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable, + MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const std::vector<MachineOperand> &Cond); + }; + char BranchFolder::ID = 0; +} + +FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { + return new BranchFolder(DefaultEnableTailMerge); } + +/// RemoveDeadBlock - Remove the specified dead machine basic block from the +/// function, updating the CFG. +void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { + assert(MBB->pred_empty() && "MBB must be dead!"); + DOUT << "\nRemoving MBB: " << *MBB; + + MachineFunction *MF = MBB->getParent(); + // drop all successors. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_end()-1); + + // If there is DWARF info to active, check to see if there are any LABEL + // records in the basic block. If so, unregister them from MachineModuleInfo. + if (MMI && !MBB->empty()) { + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if ((unsigned)I->getOpcode() == TargetInstrInfo::LABEL) { + // The label ID # is always operand #0, an immediate. + MMI->InvalidateLabel(I->getOperand(0).getImm()); + } + } + } + + // Remove the block. + MF->getBasicBlockList().erase(MBB); +} + +bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + if (!TII) return false; + + // Fix CFG. The later algorithms expect it to be right. + bool EverMadeChange = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { + MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0; + std::vector<MachineOperand> Cond; + if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) + EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); + } + + RegInfo = MF.getTarget().getRegisterInfo(); + RS = RegInfo->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; + + MMI = getAnalysisToUpdate<MachineModuleInfo>(); + + bool MadeChangeThisIteration = true; + while (MadeChangeThisIteration) { + MadeChangeThisIteration = false; + MadeChangeThisIteration |= TailMergeBlocks(MF); + MadeChangeThisIteration |= OptimizeBranches(MF); + EverMadeChange |= MadeChangeThisIteration; + } + + // See if any jump tables have become mergable or dead as the code generator + // did its thing. + MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables(); + if (!JTs.empty()) { + // Figure out how these jump tables should be merged. + std::vector<unsigned> JTMapping; + JTMapping.reserve(JTs.size()); + + // We always keep the 0th jump table. + JTMapping.push_back(0); + + // Scan the jump tables, seeing if there are any duplicates. Note that this + // is N^2, which should be fixed someday. + for (unsigned i = 1, e = JTs.size(); i != e; ++i) + JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); + + // If a jump table was merge with another one, walk the function rewriting + // references to jump tables to reference the new JT ID's. Keep track of + // whether we see a jump table idx, if not, we can delete the JT. + std::vector<bool> JTIsLive; + JTIsLive.resize(JTs.size()); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); + BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + MachineOperand &Op = I->getOperand(op); + if (!Op.isJumpTableIndex()) continue; + unsigned NewIdx = JTMapping[Op.getJumpTableIndex()]; + Op.setJumpTableIndex(NewIdx); + + // Remember that this JT is live. + JTIsLive[NewIdx] = true; + } + } + + // Finally, remove dead jump tables. This happens either because the + // indirect jump was unreachable (and thus deleted) or because the jump + // table was merged with some other one. + for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) + if (!JTIsLive[i]) { + JTI->RemoveJumpTable(i); + EverMadeChange = true; + } + } + + delete RS; + return EverMadeChange; +} + +//===----------------------------------------------------------------------===// +// Tail Merging of Blocks +//===----------------------------------------------------------------------===// + +/// HashMachineInstr - Compute a hash value for MI and its operands. +static unsigned HashMachineInstr(const MachineInstr *MI) { + unsigned Hash = MI->getOpcode(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &Op = MI->getOperand(i); + + // Merge in bits from the operand if easy. + unsigned OperandHash = 0; + switch (Op.getType()) { + case MachineOperand::MO_Register: OperandHash = Op.getReg(); break; + case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break; + case MachineOperand::MO_MachineBasicBlock: + OperandHash = Op.getMachineBasicBlock()->getNumber(); + break; + case MachineOperand::MO_FrameIndex: OperandHash = Op.getFrameIndex(); break; + case MachineOperand::MO_ConstantPoolIndex: + OperandHash = Op.getConstantPoolIndex(); + break; + case MachineOperand::MO_JumpTableIndex: + OperandHash = Op.getJumpTableIndex(); + break; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + // Global address / external symbol are too hard, don't bother, but do + // pull in the offset. + OperandHash = Op.getOffset(); + break; + default: break; + } + + Hash += ((OperandHash << 3) | Op.getType()) << (i&31); + } + return Hash; +} + +/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks +/// with no successors, we hash two instructions, because cross-jumping +/// only saves code when at least two instructions are removed (since a +/// branch must be inserted). For blocks with a successor, one of the +/// two blocks to be tail-merged will end with a branch already, so +/// it gains to cross-jump even for one instruction. + +static unsigned HashEndOfMBB(const MachineBasicBlock *MBB, + unsigned minCommonTailLength) { + MachineBasicBlock::const_iterator I = MBB->end(); + if (I == MBB->begin()) + return 0; // Empty MBB. + + --I; + unsigned Hash = HashMachineInstr(I); + + if (I == MBB->begin() || minCommonTailLength == 1) + return Hash; // Single instr MBB. + + --I; + // Hash in the second-to-last instruction. + Hash ^= HashMachineInstr(I) << 2; + return Hash; +} + +/// ComputeCommonTailLength - Given two machine basic blocks, compute the number +/// of instructions they actually have in common together at their end. Return +/// iterators for the first shared instruction in each block. +static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2, + MachineBasicBlock::iterator &I1, + MachineBasicBlock::iterator &I2) { + I1 = MBB1->end(); + I2 = MBB2->end(); + + unsigned TailLen = 0; + while (I1 != MBB1->begin() && I2 != MBB2->begin()) { + --I1; --I2; + if (!I1->isIdenticalTo(I2)) { + ++I1; ++I2; + break; + } + ++TailLen; + } + return TailLen; +} + +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. This +/// returns true if OldInst's block is modified, false if NewDest is modified. +void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest) { + MachineBasicBlock *OldBB = OldInst->getParent(); + + // Remove all the old successors of OldBB from the CFG. + while (!OldBB->succ_empty()) + OldBB->removeSuccessor(OldBB->succ_begin()); + + // Remove all the dead instructions from the end of OldBB. + OldBB->erase(OldInst, OldBB->end()); + + // If OldBB isn't immediately before OldBB, insert a branch to it. + if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest)) + TII->InsertBranch(*OldBB, NewDest, 0, std::vector<MachineOperand>()); + OldBB->addSuccessor(NewDest); + ++NumTailMerge; +} + +/// SplitMBBAt - Given a machine basic block and an iterator into it, split the +/// MBB so that the part before the iterator falls into the part starting at the +/// iterator. This returns the new MBB. +MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1) { + // Create the fall-through block. + MachineFunction::iterator MBBI = &CurMBB; + MachineBasicBlock *NewMBB = new MachineBasicBlock(CurMBB.getBasicBlock()); + CurMBB.getParent()->getBasicBlockList().insert(++MBBI, NewMBB); + + // Move all the successors of this block to the specified block. + while (!CurMBB.succ_empty()) { + MachineBasicBlock *S = *(CurMBB.succ_end()-1); + NewMBB->addSuccessor(S); + CurMBB.removeSuccessor(S); + } + + // Add an edge from CurMBB to NewMBB for the fall-through. + CurMBB.addSuccessor(NewMBB); + + // Splice the code over. + NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); + + // For targets that use the register scavenger, we must maintain LiveIns. + if (RS) { + RS->enterBasicBlock(&CurMBB); + if (!CurMBB.empty()) + RS->forward(prior(CurMBB.end())); + BitVector RegsLiveAtExit(RegInfo->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (unsigned int i=0, e=RegInfo->getNumRegs(); i!=e; i++) + if (RegsLiveAtExit[i]) + NewMBB->addLiveIn(i); + } + + return NewMBB; +} + +/// EstimateRuntime - Make a rough estimate for how long it will take to run +/// the specified code. +static unsigned EstimateRuntime(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + const TargetInstrInfo *TII) { + unsigned Time = 0; + for (; I != E; ++I) { + const TargetInstrDescriptor &TID = TII->get(I->getOpcode()); + if (TID.Flags & M_CALL_FLAG) + Time += 10; + else if (TID.Flags & (M_LOAD_FLAG|M_STORE_FLAG)) + Time += 2; + else + ++Time; + } + return Time; +} + +/// ShouldSplitFirstBlock - We need to either split MBB1 at MBB1I or MBB2 at +/// MBB2I and then insert an unconditional branch in the other block. Determine +/// which is the best to split +static bool ShouldSplitFirstBlock(MachineBasicBlock *MBB1, + MachineBasicBlock::iterator MBB1I, + MachineBasicBlock *MBB2, + MachineBasicBlock::iterator MBB2I, + const TargetInstrInfo *TII, + MachineBasicBlock *PredBB) { + // If one block is the entry block, split the other one; we can't generate + // a branch to the entry block, as its label is not emitted. + MachineBasicBlock *Entry = MBB1->getParent()->begin(); + if (MBB1 == Entry) + return false; + if (MBB2 == Entry) + return true; + + // If one block falls through into the common successor, choose that + // one to split; it is one instruction less to do that. + if (PredBB) { + if (MBB1 == PredBB) + return true; + else if (MBB2 == PredBB) + return false; + } + // TODO: if we had some notion of which block was hotter, we could split + // the hot block, so it is the fall-through. Since we don't have profile info + // make a decision based on which will hurt most to split. + unsigned MBB1Time = EstimateRuntime(MBB1->begin(), MBB1I, TII); + unsigned MBB2Time = EstimateRuntime(MBB2->begin(), MBB2I, TII); + + // If the MBB1 prefix takes "less time" to run than the MBB2 prefix, split the + // MBB1 block so it falls through. This will penalize the MBB2 path, but will + // have a lower overall impact on the program execution. + return MBB1Time < MBB2Time; +} + +// CurMBB needs to add an unconditional branch to SuccMBB (we removed these +// branches temporarily for tail merging). In the case where CurMBB ends +// with a conditional branch to the next block, optimize by reversing the +// test and conditionally branching to SuccMBB instead. + +static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB, + const TargetInstrInfo *TII) { + MachineFunction *MF = CurMBB->getParent(); + MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB)); + MachineBasicBlock *TBB = 0, *FBB = 0; + std::vector<MachineOperand> Cond; + if (I != MF->end() && + !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond)) { + MachineBasicBlock *NextBB = I; + if (TBB == NextBB && Cond.size() && !FBB) { + if (!TII->ReverseBranchCondition(Cond)) { + TII->RemoveBranch(*CurMBB); + TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond); + return; + } + } + } + TII->InsertBranch(*CurMBB, SuccBB, NULL, std::vector<MachineOperand>()); +} + +static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p, + const std::pair<unsigned,MachineBasicBlock*> &q) { + if (p.first < q.first) + return true; + else if (p.first > q.first) + return false; + else if (p.second->getNumber() < q.second->getNumber()) + return true; + else if (p.second->getNumber() > q.second->getNumber()) + return false; + else { + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. +#ifndef _GLIBCXX_DEBUG + assert(0 && "Predecessor appears twice"); +#endif + return(false); + } +} + +// See if any of the blocks in MergePotentials (which all have a common single +// successor, or all have no successor) can be tail-merged. If there is a +// successor, any blocks in MergePotentials that are not tail-merged and +// are not immediately before Succ must have an unconditional branch to +// Succ added (but the predecessor/successor lists need no adjustment). +// The lone predecessor of Succ that falls through into Succ, +// if any, is given in PredBB. + +bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, + MachineBasicBlock* PredBB) { + unsigned minCommonTailLength = (SuccBB ? 1 : 2); + MadeChange = false; + + // Sort by hash value so that blocks with identical end sequences sort + // together. + std::stable_sort(MergePotentials.begin(), MergePotentials.end(), MergeCompare); + + // Walk through equivalence sets looking for actual exact matches. + while (MergePotentials.size() > 1) { + unsigned CurHash = (MergePotentials.end()-1)->first; + unsigned PrevHash = (MergePotentials.end()-2)->first; + MachineBasicBlock *CurMBB = (MergePotentials.end()-1)->second; + + // If there is nothing that matches the hash of the current basic block, + // give up. + if (CurHash != PrevHash) { + if (SuccBB && CurMBB != PredBB) + FixTail(CurMBB, SuccBB, TII); + MergePotentials.pop_back(); + continue; + } + + // Look through all the pairs of blocks that have the same hash as this + // one, and find the pair that has the largest number of instructions in + // common. + // Since instructions may get combined later (e.g. single stores into + // store multiple) this measure is not particularly accurate. + MachineBasicBlock::iterator BBI1, BBI2; + + unsigned FoundI = ~0U, FoundJ = ~0U; + unsigned maxCommonTailLength = 0U; + for (int i = MergePotentials.size()-1; + i != -1 && MergePotentials[i].first == CurHash; --i) { + for (int j = i-1; + j != -1 && MergePotentials[j].first == CurHash; --j) { + MachineBasicBlock::iterator TrialBBI1, TrialBBI2; + unsigned CommonTailLen = ComputeCommonTailLength( + MergePotentials[i].second, + MergePotentials[j].second, + TrialBBI1, TrialBBI2); + if (CommonTailLen >= minCommonTailLength && + CommonTailLen > maxCommonTailLength) { + FoundI = i; + FoundJ = j; + maxCommonTailLength = CommonTailLen; + BBI1 = TrialBBI1; + BBI2 = TrialBBI2; + } + } + } + + // If we didn't find any pair that has at least minCommonTailLength + // instructions in common, bail out. All entries with this + // hash code can go away now. + if (FoundI == ~0U) { + for (int i = MergePotentials.size()-1; + i != -1 && MergePotentials[i].first == CurHash; --i) { + // Put the unconditional branch back, if we need one. + CurMBB = MergePotentials[i].second; + if (SuccBB && CurMBB != PredBB) + FixTail(CurMBB, SuccBB, TII); + MergePotentials.pop_back(); + } + continue; + } + + // Otherwise, move the block(s) to the right position(s). So that + // BBI1/2 will be valid, the last must be I and the next-to-last J. + if (FoundI != MergePotentials.size()-1) + std::swap(MergePotentials[FoundI], *(MergePotentials.end()-1)); + if (FoundJ != MergePotentials.size()-2) + std::swap(MergePotentials[FoundJ], *(MergePotentials.end()-2)); + + CurMBB = (MergePotentials.end()-1)->second; + MachineBasicBlock *MBB2 = (MergePotentials.end()-2)->second; + + // If neither block is the entire common tail, split the tail of one block + // to make it redundant with the other tail. Also, we cannot jump to the + // entry block, so if one block is the entry block, split the other one. + MachineBasicBlock *Entry = CurMBB->getParent()->begin(); + if (CurMBB->begin() == BBI1 && CurMBB != Entry) + ; // CurMBB is common tail + else if (MBB2->begin() == BBI2 && MBB2 != Entry) + ; // MBB2 is common tail + else { + if (0) { // Enable this to disable partial tail merges. + MergePotentials.pop_back(); + continue; + } + + // Decide whether we want to split CurMBB or MBB2. + if (ShouldSplitFirstBlock(CurMBB, BBI1, MBB2, BBI2, TII, PredBB)) { + CurMBB = SplitMBBAt(*CurMBB, BBI1); + BBI1 = CurMBB->begin(); + MergePotentials.back().second = CurMBB; + } else { + MBB2 = SplitMBBAt(*MBB2, BBI2); + BBI2 = MBB2->begin(); + (MergePotentials.end()-2)->second = MBB2; + } + } + + if (MBB2->begin() == BBI2 && MBB2 != Entry) { + // Hack the end off CurMBB, making it jump to MBBI@ instead. + ReplaceTailWithBranchTo(BBI1, MBB2); + // This modifies CurMBB, so remove it from the worklist. + MergePotentials.pop_back(); + } else { + assert(CurMBB->begin() == BBI1 && CurMBB != Entry && + "Didn't split block correctly?"); + // Hack the end off MBB2, making it jump to CurMBB instead. + ReplaceTailWithBranchTo(BBI2, CurMBB); + // This modifies MBB2, so remove it from the worklist. + MergePotentials.erase(MergePotentials.end()-2); + } + MadeChange = true; + } + return MadeChange; +} + +bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { + + if (!EnableTailMerge) return false; + + MadeChange = false; + + // First find blocks with no successors. + MergePotentials.clear(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + if (I->succ_empty()) + MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I)); + } + // See if we can do any tail merging on those. + if (MergePotentials.size() < TailMergeThreshold) + MadeChange |= TryMergeBlocks(NULL, NULL); + + // Look at blocks (IBB) with multiple predecessors (PBB). + // We change each predecessor to a canonical form, by + // (1) temporarily removing any unconditional branch from the predecessor + // to IBB, and + // (2) alter conditional branches so they branch to the other block + // not IBB; this may require adding back an unconditional branch to IBB + // later, where there wasn't one coming in. E.g. + // Bcc IBB + // fallthrough to QBB + // here becomes + // Bncc QBB + // with a conceptual B to IBB after that, which never actually exists. + // With those changes, we see whether the predecessors' tails match, + // and merge them if so. We change things out of canonical form and + // back to the way they were later in the process. (OptimizeBranches + // would undo some of this, but we can't use it, because we'd get into + // a compile-time infinite loop repeatedly doing and undoing the same + // transformations.) + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + if (!I->succ_empty() && I->pred_size() >= 2 && + I->pred_size() < TailMergeThreshold) { + MachineBasicBlock *IBB = I; + MachineBasicBlock *PredBB = prior(I); + MergePotentials.clear(); + for (MachineBasicBlock::pred_iterator P = I->pred_begin(), + E2 = I->pred_end(); + P != E2; ++P) { + MachineBasicBlock* PBB = *P; + // Skip blocks that loop to themselves, can't tail merge these. + if (PBB==IBB) + continue; + MachineBasicBlock *TBB = 0, *FBB = 0; + std::vector<MachineOperand> Cond; + if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond)) { + // Failing case: IBB is the target of a cbr, and + // we cannot reverse the branch. + std::vector<MachineOperand> NewCond(Cond); + if (Cond.size() && TBB==IBB) { + if (TII->ReverseBranchCondition(NewCond)) + continue; + // This is the QBB case described above + if (!FBB) + FBB = next(MachineFunction::iterator(PBB)); + } + // Failing case: the only way IBB can be reached from PBB is via + // exception handling. Happens for landing pads. Would be nice + // to have a bit in the edge so we didn't have to do all this. + if (IBB->isLandingPad()) { + MachineFunction::iterator IP = PBB; IP++; + MachineBasicBlock* PredNextBB = NULL; + if (IP!=MF.end()) + PredNextBB = IP; + if (TBB==NULL) { + if (IBB!=PredNextBB) // fallthrough + continue; + } else if (FBB) { + if (TBB!=IBB && FBB!=IBB) // cbr then ubr + continue; + } else if (Cond.size() == 0) { + if (TBB!=IBB) // ubr + continue; + } else { + if (TBB!=IBB && IBB!=PredNextBB) // cbr + continue; + } + } + // Remove the unconditional branch at the end, if any. + if (TBB && (Cond.size()==0 || FBB)) { + TII->RemoveBranch(*PBB); + if (Cond.size()) + // reinsert conditional branch only, for now + TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond); + } + MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P)); + } + } + if (MergePotentials.size() >= 2) + MadeChange |= TryMergeBlocks(I, PredBB); + // Reinsert an unconditional branch if needed. + // The 1 below can be either an original single predecessor, or a result + // of removing blocks in TryMergeBlocks. + PredBB = prior(I); // this may have been changed in TryMergeBlocks + if (MergePotentials.size()==1 && + (MergePotentials.begin())->second != PredBB) + FixTail((MergePotentials.begin())->second, I, TII); + } + } + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Branch Optimization +//===----------------------------------------------------------------------===// + +bool BranchFolder::OptimizeBranches(MachineFunction &MF) { + MadeChange = false; + + // Make sure blocks are numbered in order + MF.RenumberBlocks(); + + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = I++; + OptimizeBlock(MBB); + + // If it is dead, remove it. + if (MBB->pred_empty()) { + RemoveDeadBlock(MBB); + MadeChange = true; + ++NumDeadBlocks; + } + } + return MadeChange; +} + + +/// CanFallThrough - Return true if the specified block (with the specified +/// branch condition) can implicitly transfer control to the block after it by +/// falling off the end of it. This should return false if it can reach the +/// block after it, but it uses an explicit branch to do so (e.g. a table jump). +/// +/// True is a conservative answer. +/// +bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB, + bool BranchUnAnalyzable, + MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const std::vector<MachineOperand> &Cond) { + MachineFunction::iterator Fallthrough = CurBB; + ++Fallthrough; + // If FallthroughBlock is off the end of the function, it can't fall through. + if (Fallthrough == CurBB->getParent()->end()) + return false; + + // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible. + if (!CurBB->isSuccessor(Fallthrough)) + return false; + + // If we couldn't analyze the branch, assume it could fall through. + if (BranchUnAnalyzable) return true; + + // If there is no branch, control always falls through. + if (TBB == 0) return true; + + // If there is some explicit branch to the fallthrough block, it can obviously + // reach, even though the branch should get folded to fall through implicitly. + if (MachineFunction::iterator(TBB) == Fallthrough || + MachineFunction::iterator(FBB) == Fallthrough) + return true; + + // If it's an unconditional branch to some block not the fall through, it + // doesn't fall through. + if (Cond.empty()) return false; + + // Otherwise, if it is conditional and has no explicit false block, it falls + // through. + return FBB == 0; +} + +/// CanFallThrough - Return true if the specified can implicitly transfer +/// control to the block after it by falling off the end of it. This should +/// return false if it can reach the block after it, but it uses an explicit +/// branch to do so (e.g. a table jump). +/// +/// True is a conservative answer. +/// +bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) { + MachineBasicBlock *TBB = 0, *FBB = 0; + std::vector<MachineOperand> Cond; + bool CurUnAnalyzable = TII->AnalyzeBranch(*CurBB, TBB, FBB, Cond); + return CanFallThrough(CurBB, CurUnAnalyzable, TBB, FBB, Cond); +} + +/// IsBetterFallthrough - Return true if it would be clearly better to +/// fall-through to MBB1 than to fall through into MBB2. This has to return +/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will +/// result in infinite loops. +static bool IsBetterFallthrough(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2, + const TargetInstrInfo &TII) { + // Right now, we use a simple heuristic. If MBB2 ends with a call, and + // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to + // optimize branches that branch to either a return block or an assert block + // into a fallthrough to the return. + if (MBB1->empty() || MBB2->empty()) return false; + + MachineInstr *MBB1I = --MBB1->end(); + MachineInstr *MBB2I = --MBB2->end(); + return TII.isCall(MBB2I->getOpcode()) && !TII.isCall(MBB1I->getOpcode()); +} + +/// OptimizeBlock - Analyze and optimize control flow related to the specified +/// block. This is never called on the entry block. +void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { + MachineFunction::iterator FallThrough = MBB; + ++FallThrough; + + // If this block is empty, make everyone use its fall-through, not the block + // explicitly. Landing pads should not do this since the landing-pad table + // points to this block. + if (MBB->empty() && !MBB->isLandingPad()) { + // Dead block? Leave for cleanup later. + if (MBB->pred_empty()) return; + + if (FallThrough == MBB->getParent()->end()) { + // TODO: Simplify preds to not branch here if possible! + } else { + // Rewrite all predecessors of the old block to go to the fallthrough + // instead. + while (!MBB->pred_empty()) { + MachineBasicBlock *Pred = *(MBB->pred_end()-1); + Pred->ReplaceUsesOfBlockWith(MBB, FallThrough); + } + + // If MBB was the target of a jump table, update jump tables to go to the + // fallthrough instead. + MBB->getParent()->getJumpTableInfo()-> + ReplaceMBBInJumpTables(MBB, FallThrough); + MadeChange = true; + } + return; + } + + // Check to see if we can simplify the terminator of the block before this + // one. + MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB)); + + MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + std::vector<MachineOperand> PriorCond; + bool PriorUnAnalyzable = + TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond); + if (!PriorUnAnalyzable) { + // If the CFG for the prior block has extra edges, remove them. + MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, + !PriorCond.empty()); + + // If the previous branch is conditional and both conditions go to the same + // destination, remove the branch, replacing it with an unconditional one or + // a fall-through. + if (PriorTBB && PriorTBB == PriorFBB) { + TII->RemoveBranch(PrevBB); + PriorCond.clear(); + if (PriorTBB != MBB) + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + + // If the previous branch *only* branches to *this* block (conditional or + // not) remove the branch. + if (PriorTBB == MBB && PriorFBB == 0) { + TII->RemoveBranch(PrevBB); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + + // If the prior block branches somewhere else on the condition and here if + // the condition is false, remove the uncond second branch. + if (PriorFBB == MBB) { + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + + // If the prior block branches here on true and somewhere else on false, and + // if the branch condition is reversible, reverse the branch to create a + // fall-through. + if (PriorTBB == MBB) { + std::vector<MachineOperand> NewPriorCond(PriorCond); + if (!TII->ReverseBranchCondition(NewPriorCond)) { + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + } + + // If this block doesn't fall through (e.g. it ends with an uncond branch or + // has no successors) and if the pred falls through into this block, and if + // it would otherwise fall through into the block after this, move this + // block to the end of the function. + // + // We consider it more likely that execution will stay in the function (e.g. + // due to loops) than it is to exit it. This asserts in loops etc, moving + // the assert condition out of the loop body. + if (!PriorCond.empty() && PriorFBB == 0 && + MachineFunction::iterator(PriorTBB) == FallThrough && + !CanFallThrough(MBB)) { + bool DoTransform = true; + + // We have to be careful that the succs of PredBB aren't both no-successor + // blocks. If neither have successors and if PredBB is the second from + // last block in the function, we'd just keep swapping the two blocks for + // last. Only do the swap if one is clearly better to fall through than + // the other. + if (FallThrough == --MBB->getParent()->end() && + !IsBetterFallthrough(PriorTBB, MBB, *TII)) + DoTransform = false; + + // We don't want to do this transformation if we have control flow like: + // br cond BB2 + // BB1: + // .. + // jmp BBX + // BB2: + // .. + // ret + // + // In this case, we could actually be moving the return block *into* a + // loop! + if (DoTransform && !MBB->succ_empty() && + (!CanFallThrough(PriorTBB) || PriorTBB->empty())) + DoTransform = false; + + + if (DoTransform) { + // Reverse the branch so we will fall through on the previous true cond. + std::vector<MachineOperand> NewPriorCond(PriorCond); + if (!TII->ReverseBranchCondition(NewPriorCond)) { + DOUT << "\nMoving MBB: " << *MBB; + DOUT << "To make fallthrough to: " << *PriorTBB << "\n"; + + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond); + + // Move this block to the end of the function. + MBB->moveAfter(--MBB->getParent()->end()); + MadeChange = true; + ++NumBranchOpts; + return; + } + } + } + } + + // Analyze the branch in the current block. + MachineBasicBlock *CurTBB = 0, *CurFBB = 0; + std::vector<MachineOperand> CurCond; + bool CurUnAnalyzable = TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond); + if (!CurUnAnalyzable) { + // If the CFG for the prior block has extra edges, remove them. + MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); + + // If this is a two-way branch, and the FBB branches to this block, reverse + // the condition so the single-basic-block loop is faster. Instead of: + // Loop: xxx; jcc Out; jmp Loop + // we want: + // Loop: xxx; jncc Loop; jmp Out + if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { + std::vector<MachineOperand> NewCond(CurCond); + if (!TII->ReverseBranchCondition(NewCond)) { + TII->RemoveBranch(*MBB); + TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond); + MadeChange = true; + ++NumBranchOpts; + return OptimizeBlock(MBB); + } + } + + + // If this branch is the only thing in its block, see if we can forward + // other blocks across it. + if (CurTBB && CurCond.empty() && CurFBB == 0 && + TII->isBranch(MBB->begin()->getOpcode()) && CurTBB != MBB) { + // This block may contain just an unconditional branch. Because there can + // be 'non-branch terminators' in the block, try removing the branch and + // then seeing if the block is empty. + TII->RemoveBranch(*MBB); + + // If this block is just an unconditional branch to CurTBB, we can + // usually completely eliminate the block. The only case we cannot + // completely eliminate the block is when the block before this one + // falls through into MBB and we can't understand the prior block's branch + // condition. + if (MBB->empty()) { + bool PredHasNoFallThrough = TII->BlockHasNoFallThrough(PrevBB); + if (PredHasNoFallThrough || !PriorUnAnalyzable || + !PrevBB.isSuccessor(MBB)) { + // If the prior block falls through into us, turn it into an + // explicit branch to us to make updates simpler. + if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && + PriorTBB != MBB && PriorFBB != MBB) { + if (PriorTBB == 0) { + assert(PriorCond.empty() && PriorFBB == 0 && + "Bad branch analysis"); + PriorTBB = MBB; + } else { + assert(PriorFBB == 0 && "Machine CFG out of date!"); + PriorFBB = MBB; + } + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond); + } + + // Iterate through all the predecessors, revectoring each in-turn. + size_t PI = 0; + bool DidChange = false; + bool HasBranchToSelf = false; + while(PI != MBB->pred_size()) { + MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI); + if (PMBB == MBB) { + // If this block has an uncond branch to itself, leave it. + ++PI; + HasBranchToSelf = true; + } else { + DidChange = true; + PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB); + } + } + + // Change any jumptables to go to the new MBB. + MBB->getParent()->getJumpTableInfo()-> + ReplaceMBBInJumpTables(MBB, CurTBB); + if (DidChange) { + ++NumBranchOpts; + MadeChange = true; + if (!HasBranchToSelf) return; + } + } + } + + // Add the branch back if the block is more than just an uncond branch. + TII->InsertBranch(*MBB, CurTBB, 0, CurCond); + } + } + + // If the prior block doesn't fall through into this block, and if this + // block doesn't fall through into some other block, see if we can find a + // place to move this block where a fall-through will happen. + if (!CanFallThrough(&PrevBB, PriorUnAnalyzable, + PriorTBB, PriorFBB, PriorCond)) { + // Now we know that there was no fall-through into this block, check to + // see if it has a fall-through into its successor. + bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, + CurCond); + + if (!MBB->isLandingPad()) { + // Check all the predecessors of this block. If one of them has no fall + // throughs, move this block right after it. + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + E = MBB->pred_end(); PI != E; ++PI) { + // Analyze the branch at the end of the pred. + MachineBasicBlock *PredBB = *PI; + MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; + if (PredBB != MBB && !CanFallThrough(PredBB) + && (!CurFallsThru || !CurTBB || !CurFBB) + && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { + // If the current block doesn't fall through, just move it. + // If the current block can fall through and does not end with a + // conditional branch, we need to append an unconditional jump to + // the (current) next block. To avoid a possible compile-time + // infinite loop, move blocks only backward in this case. + // Also, if there are already 2 branches here, we cannot add a third; + // this means we have the case + // Bcc next + // B elsewhere + // next: + if (CurFallsThru) { + MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB)); + CurCond.clear(); + TII->InsertBranch(*MBB, NextBB, 0, CurCond); + } + MBB->moveAfter(PredBB); + MadeChange = true; + return OptimizeBlock(MBB); + } + } + } + + if (!CurFallsThru) { + // Check all successors to see if we can move this block before it. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++SI) { + // Analyze the branch at the end of the block before the succ. + MachineBasicBlock *SuccBB = *SI; + MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev; + std::vector<MachineOperand> SuccPrevCond; + + // If this block doesn't already fall-through to that successor, and if + // the succ doesn't already have a block that can fall through into it, + // and if the successor isn't an EH destination, we can arrange for the + // fallthrough to happen. + if (SuccBB != MBB && !CanFallThrough(SuccPrev) && + !SuccBB->isLandingPad()) { + MBB->moveBefore(SuccBB); + MadeChange = true; + return OptimizeBlock(MBB); + } + } + + // Okay, there is no really great place to put this block. If, however, + // the block before this one would be a fall-through if this block were + // removed, move this block to the end of the function. + if (FallThrough != MBB->getParent()->end() && + PrevBB.isSuccessor(FallThrough)) { + MBB->moveAfter(--MBB->getParent()->end()); + MadeChange = true; + return; + } + } + } +} diff --git a/lib/CodeGen/DwarfWriter.cpp b/lib/CodeGen/DwarfWriter.cpp new file mode 100644 index 0000000..257a776 --- /dev/null +++ b/lib/CodeGen/DwarfWriter.cpp @@ -0,0 +1,3616 @@ +//===-- llvm/CodeGen/DwarfWriter.cpp - Dwarf Framework ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by James M. Laskey and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/DwarfWriter.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <ostream> +#include <string> +using namespace llvm; +using namespace llvm::dwarf; + +namespace llvm { + +//===----------------------------------------------------------------------===// + +/// Configuration values for initial hash set sizes (log2). +/// +static const unsigned InitDiesSetSize = 9; // 512 +static const unsigned InitAbbreviationsSetSize = 9; // 512 +static const unsigned InitValuesSetSize = 9; // 512 + +//===----------------------------------------------------------------------===// +/// Forward declarations. +/// +class DIE; +class DIEValue; + +//===----------------------------------------------------------------------===// +/// DWLabel - Labels are used to track locations in the assembler file. +/// Labels appear in the form <prefix><Tag><Number>, where the tag is a +/// category of label (Ex. location) and number is a value unique in that +/// category. +class DWLabel { +public: + /// Tag - Label category tag. Should always be a staticly declared C string. + /// + const char *Tag; + + /// Number - Value to make label unique. + /// + unsigned Number; + + DWLabel(const char *T, unsigned N) : Tag(T), Number(N) {} + + void Profile(FoldingSetNodeID &ID) const { + ID.AddString(std::string(Tag)); + ID.AddInteger(Number); + } + +#ifndef NDEBUG + void print(std::ostream *O) const { + if (O) print(*O); + } + void print(std::ostream &O) const { + O << "." << Tag; + if (Number) O << Number; + } +#endif +}; + +//===----------------------------------------------------------------------===// +/// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a +/// Dwarf abbreviation. +class DIEAbbrevData { +private: + /// Attribute - Dwarf attribute code. + /// + unsigned Attribute; + + /// Form - Dwarf form code. + /// + unsigned Form; + +public: + DIEAbbrevData(unsigned A, unsigned F) + : Attribute(A) + , Form(F) + {} + + // Accessors. + unsigned getAttribute() const { return Attribute; } + unsigned getForm() const { return Form; } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID)const { + ID.AddInteger(Attribute); + ID.AddInteger(Form); + } +}; + +//===----------------------------------------------------------------------===// +/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug +/// information object. +class DIEAbbrev : public FoldingSetNode { +private: + /// Tag - Dwarf tag code. + /// + unsigned Tag; + + /// Unique number for node. + /// + unsigned Number; + + /// ChildrenFlag - Dwarf children flag. + /// + unsigned ChildrenFlag; + + /// Data - Raw data bytes for abbreviation. + /// + std::vector<DIEAbbrevData> Data; + +public: + + DIEAbbrev(unsigned T, unsigned C) + : Tag(T) + , ChildrenFlag(C) + , Data() + {} + ~DIEAbbrev() {} + + // Accessors. + unsigned getTag() const { return Tag; } + unsigned getNumber() const { return Number; } + unsigned getChildrenFlag() const { return ChildrenFlag; } + const std::vector<DIEAbbrevData> &getData() const { return Data; } + void setTag(unsigned T) { Tag = T; } + void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; } + void setNumber(unsigned N) { Number = N; } + + /// AddAttribute - Adds another set of attribute information to the + /// abbreviation. + void AddAttribute(unsigned Attribute, unsigned Form) { + Data.push_back(DIEAbbrevData(Attribute, Form)); + } + + /// AddFirstAttribute - Adds a set of attribute information to the front + /// of the abbreviation. + void AddFirstAttribute(unsigned Attribute, unsigned Form) { + Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form)); + } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) { + ID.AddInteger(Tag); + ID.AddInteger(ChildrenFlag); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) + Data[i].Profile(ID); + } + + /// Emit - Print the abbreviation using the specified Dwarf writer. + /// + void Emit(const DwarfDebug &DD) const; + +#ifndef NDEBUG + void print(std::ostream *O) { + if (O) print(*O); + } + void print(std::ostream &O); + void dump(); +#endif +}; + +//===----------------------------------------------------------------------===// +/// DIE - A structured debug information entry. Has an abbreviation which +/// describes it's organization. +class DIE : public FoldingSetNode { +protected: + /// Abbrev - Buffer for constructing abbreviation. + /// + DIEAbbrev Abbrev; + + /// Offset - Offset in debug info section. + /// + unsigned Offset; + + /// Size - Size of instance + children. + /// + unsigned Size; + + /// Children DIEs. + /// + std::vector<DIE *> Children; + + /// Attributes values. + /// + std::vector<DIEValue *> Values; + +public: + DIE(unsigned Tag) + : Abbrev(Tag, DW_CHILDREN_no) + , Offset(0) + , Size(0) + , Children() + , Values() + {} + virtual ~DIE(); + + // Accessors. + DIEAbbrev &getAbbrev() { return Abbrev; } + unsigned getAbbrevNumber() const { + return Abbrev.getNumber(); + } + unsigned getTag() const { return Abbrev.getTag(); } + unsigned getOffset() const { return Offset; } + unsigned getSize() const { return Size; } + const std::vector<DIE *> &getChildren() const { return Children; } + std::vector<DIEValue *> &getValues() { return Values; } + void setTag(unsigned Tag) { Abbrev.setTag(Tag); } + void setOffset(unsigned O) { Offset = O; } + void setSize(unsigned S) { Size = S; } + + /// AddValue - Add a value and attributes to a DIE. + /// + void AddValue(unsigned Attribute, unsigned Form, DIEValue *Value) { + Abbrev.AddAttribute(Attribute, Form); + Values.push_back(Value); + } + + /// SiblingOffset - Return the offset of the debug information entry's + /// sibling. + unsigned SiblingOffset() const { return Offset + Size; } + + /// AddSiblingOffset - Add a sibling offset field to the front of the DIE. + /// + void AddSiblingOffset(); + + /// AddChild - Add a child to the DIE. + /// + void AddChild(DIE *Child) { + Abbrev.setChildrenFlag(DW_CHILDREN_yes); + Children.push_back(Child); + } + + /// Detach - Detaches objects connected to it after copying. + /// + void Detach() { + Children.clear(); + } + + /// Profile - Used to gather unique data for the value folding set. + /// + void Profile(FoldingSetNodeID &ID) ; + +#ifndef NDEBUG + void print(std::ostream *O, unsigned IncIndent = 0) { + if (O) print(*O, IncIndent); + } + void print(std::ostream &O, unsigned IncIndent = 0); + void dump(); +#endif +}; + +//===----------------------------------------------------------------------===// +/// DIEValue - A debug information entry value. +/// +class DIEValue : public FoldingSetNode { +public: + enum { + isInteger, + isString, + isLabel, + isAsIsLabel, + isDelta, + isEntry, + isBlock + }; + + /// Type - Type of data stored in the value. + /// + unsigned Type; + + DIEValue(unsigned T) + : Type(T) + {} + virtual ~DIEValue() {} + + // Accessors + unsigned getType() const { return Type; } + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *) { return true; } + + /// EmitValue - Emit value via the Dwarf writer. + /// + virtual void EmitValue(DwarfDebug &DD, unsigned Form) = 0; + + /// SizeOf - Return the size of a value in bytes. + /// + virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const = 0; + + /// Profile - Used to gather unique data for the value folding set. + /// + virtual void Profile(FoldingSetNodeID &ID) = 0; + +#ifndef NDEBUG + void print(std::ostream *O) { + if (O) print(*O); + } + virtual void print(std::ostream &O) = 0; + void dump(); +#endif +}; + +//===----------------------------------------------------------------------===// +/// DWInteger - An integer value DIE. +/// +class DIEInteger : public DIEValue { +private: + uint64_t Integer; + +public: + DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} + + // Implement isa/cast/dyncast. + static bool classof(const DIEInteger *) { return true; } + static bool classof(const DIEValue *I) { return I->Type == isInteger; } + + /// BestForm - Choose the best form for integer. + /// + static unsigned BestForm(bool IsSigned, uint64_t Integer) { + if (IsSigned) { + if ((char)Integer == (signed)Integer) return DW_FORM_data1; + if ((short)Integer == (signed)Integer) return DW_FORM_data2; + if ((int)Integer == (signed)Integer) return DW_FORM_data4; + } else { + if ((unsigned char)Integer == Integer) return DW_FORM_data1; + if ((unsigned short)Integer == Integer) return DW_FORM_data2; + if ((unsigned int)Integer == Integer) return DW_FORM_data4; + } + return DW_FORM_data8; + } + + /// EmitValue - Emit integer of appropriate size. + /// + virtual void EmitValue(DwarfDebug &DD, unsigned Form); + + /// SizeOf - Determine size of integer value in bytes. + /// + virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, unsigned Integer) { + ID.AddInteger(isInteger); + ID.AddInteger(Integer); + } + virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, Integer); } + +#ifndef NDEBUG + virtual void print(std::ostream &O) { + O << "Int: " << (int64_t)Integer + << " 0x" << std::hex << Integer << std::dec; + } +#endif +}; + +//===----------------------------------------------------------------------===// +/// DIEString - A string value DIE. +/// +class DIEString : public DIEValue { +public: + const std::string String; + + DIEString(const std::string &S) : DIEValue(isString), String(S) {} + + // Implement isa/cast/dyncast. + static bool classof(const DIEString *) { return true; } + static bool classof(const DIEValue *S) { return S->Type == isString; } + + /// EmitValue - Emit string value. + /// + virtual void EmitValue(DwarfDebug &DD, unsigned Form); + + /// SizeOf - Determine size of string value in bytes. + /// + virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const { + return String.size() + sizeof(char); // sizeof('\0'); + } + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const std::string &String) { + ID.AddInteger(isString); + ID.AddString(String); + } + virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, String); } + +#ifndef NDEBUG + virtual void print(std::ostream &O) { + O << "Str: \"" << String << "\""; + } +#endif +}; + +//===----------------------------------------------------------------------===// +/// DIEDwarfLabel - A Dwarf internal label expression DIE. +// +class DIEDwarfLabel : public DIEValue { +public: + + const DWLabel Label; + + DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {} + + // Implement isa/cast/dyncast. + static bool classof(const DIEDwarfLabel *) { return true; } + static bool classof(const DIEValue *L) { return L->Type == isLabel; } + + /// EmitValue - Emit label value. + /// + virtual void EmitValue(DwarfDebug &DD, unsigned Form); + + /// SizeOf - Determine size of label value in bytes. + /// + virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const DWLabel &Label) { + ID.AddInteger(isLabel); + Label.Profile(ID); + } + virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, Label); } + +#ifndef NDEBUG + virtual void print(std::ostream &O) { + O << "Lbl: "; + Label.print(O); + } +#endif +}; + + +//===----------------------------------------------------------------------===// +/// DIEObjectLabel - A label to an object in code or data. +// +class DIEObjectLabel : public DIEValue { +public: + const std::string Label; + + DIEObjectLabel(const std::string &L) : DIEValue(isAsIsLabel), Label(L) {} + + // Implement isa/cast/dyncast. + static bool classof(const DIEObjectLabel *) { return true; } + static bool classof(const DIEValue *L) { return L->Type == isAsIsLabel; } + + /// EmitValue - Emit label value. + /// + virtual void EmitValue(DwarfDebug &DD, unsigned Form); + + /// SizeOf - Determine size of label value in bytes. + /// + virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const std::string &Label) { + ID.AddInteger(isAsIsLabel); + ID.AddString(Label); + } + virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, Label); } + +#ifndef NDEBUG + virtual void print(std::ostream &O) { + O << "Obj: " << Label; + } +#endif +}; + +//===----------------------------------------------------------------------===// +/// DIEDelta - A simple label difference DIE. +/// +class DIEDelta : public DIEValue { +public: + const DWLabel LabelHi; + const DWLabel LabelLo; + + DIEDelta(const DWLabel &Hi, const DWLabel &Lo) + : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {} + + // Implement isa/cast/dyncast. + static bool classof(const DIEDelta *) { return true; } + static bool classof(const DIEValue *D) { return D->Type == isDelta; } + + /// EmitValue - Emit delta value. + /// + virtual void EmitValue(DwarfDebug &DD, unsigned Form); + + /// SizeOf - Determine size of delta value in bytes. + /// + virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const; + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi, + const DWLabel &LabelLo) { + ID.AddInteger(isDelta); + LabelHi.Profile(ID); + LabelLo.Profile(ID); + } + virtual void Profile(FoldingSetNodeID &ID) { Profile(ID, LabelHi, LabelLo); } + +#ifndef NDEBUG + virtual void print(std::ostream &O) { + O << "Del: "; + LabelHi.print(O); + O << "-"; + LabelLo.print(O); + } +#endif +}; + +//===----------------------------------------------------------------------===// +/// DIEntry - A pointer to another debug information entry. An instance of this +/// class can also be used as a proxy for a debug information entry not yet +/// defined (ie. types.) +class DIEntry : public DIEValue { +public: + DIE *Entry; + + DIEntry(DIE *E) : DIEValue(isEntry), Entry(E) {} + + // Implement isa/cast/dyncast. + static bool classof(const DIEntry *) { return true; } + static bool classof(const DIEValue *E) { return E->Type == isEntry; } + + /// EmitValue - Emit debug information entry offset. + /// + virtual void EmitValue(DwarfDebug &DD, unsigned Form); + + /// SizeOf - Determine size of debug information entry in bytes. + /// + virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const { + return sizeof(int32_t); + } + + /// Profile - Used to gather unique data for the value folding set. + /// + static void Profile(FoldingSetNodeID &ID, DIE *Entry) { + ID.AddInteger(isEntry); + ID.AddPointer(Entry); + } + virtual void Profile(FoldingSetNodeID &ID) { + ID.AddInteger(isEntry); + + if (Entry) { + ID.AddPointer(Entry); + } else { + ID.AddPointer(this); + } + } + +#ifndef NDEBUG + virtual void print(std::ostream &O) { + O << "Die: 0x" << std::hex << (intptr_t)Entry << std::dec; + } +#endif +}; + +//===----------------------------------------------------------------------===// +/// DIEBlock - A block of values. Primarily used for location expressions. +// +class DIEBlock : public DIEValue, public DIE { +public: + unsigned Size; // Size in bytes excluding size header. + + DIEBlock() + : DIEValue(isBlock) + , DIE(0) + , Size(0) + {} + ~DIEBlock() { + } + + // Implement isa/cast/dyncast. + static bool classof(const DIEBlock *) { return true; } + static bool classof(const DIEValue *E) { return E->Type == isBlock; } + + /// ComputeSize - calculate the size of the block. + /// + unsigned ComputeSize(DwarfDebug &DD); + + /// BestForm - Choose the best form for data. + /// + unsigned BestForm() const { + if ((unsigned char)Size == Size) return DW_FORM_block1; + if ((unsigned short)Size == Size) return DW_FORM_block2; + if ((unsigned int)Size == Size) return DW_FORM_block4; + return DW_FORM_block; + } + + /// EmitValue - Emit block data. + /// + virtual void EmitValue(DwarfDebug &DD, unsigned Form); + + /// SizeOf - Determine size of block data in bytes. + /// + virtual unsigned SizeOf(const DwarfDebug &DD, unsigned Form) const; + + + /// Profile - Used to gather unique data for the value folding set. + /// + virtual void Profile(FoldingSetNodeID &ID) { + ID.AddInteger(isBlock); + DIE::Profile(ID); + } + +#ifndef NDEBUG + virtual void print(std::ostream &O) { + O << "Blk: "; + DIE::print(O, 5); + } +#endif +}; + +//===----------------------------------------------------------------------===// +/// CompileUnit - This dwarf writer support class manages information associate +/// with a source file. +class CompileUnit { +private: + /// Desc - Compile unit debug descriptor. + /// + CompileUnitDesc *Desc; + + /// ID - File identifier for source. + /// + unsigned ID; + + /// Die - Compile unit debug information entry. + /// + DIE *Die; + + /// DescToDieMap - Tracks the mapping of unit level debug informaton + /// descriptors to debug information entries. + std::map<DebugInfoDesc *, DIE *> DescToDieMap; + + /// DescToDIEntryMap - Tracks the mapping of unit level debug informaton + /// descriptors to debug information entries using a DIEntry proxy. + std::map<DebugInfoDesc *, DIEntry *> DescToDIEntryMap; + + /// Globals - A map of globally visible named entities for this unit. + /// + std::map<std::string, DIE *> Globals; + + /// DiesSet - Used to uniquely define dies within the compile unit. + /// + FoldingSet<DIE> DiesSet; + + /// Dies - List of all dies in the compile unit. + /// + std::vector<DIE *> Dies; + +public: + CompileUnit(CompileUnitDesc *CUD, unsigned I, DIE *D) + : Desc(CUD) + , ID(I) + , Die(D) + , DescToDieMap() + , DescToDIEntryMap() + , Globals() + , DiesSet(InitDiesSetSize) + , Dies() + {} + + ~CompileUnit() { + delete Die; + + for (unsigned i = 0, N = Dies.size(); i < N; ++i) + delete Dies[i]; + } + + // Accessors. + CompileUnitDesc *getDesc() const { return Desc; } + unsigned getID() const { return ID; } + DIE* getDie() const { return Die; } + std::map<std::string, DIE *> &getGlobals() { return Globals; } + + /// hasContent - Return true if this compile unit has something to write out. + /// + bool hasContent() const { + return !Die->getChildren().empty(); + } + + /// AddGlobal - Add a new global entity to the compile unit. + /// + void AddGlobal(const std::string &Name, DIE *Die) { + Globals[Name] = Die; + } + + /// getDieMapSlotFor - Returns the debug information entry map slot for the + /// specified debug descriptor. + DIE *&getDieMapSlotFor(DebugInfoDesc *DID) { + return DescToDieMap[DID]; + } + + /// getDIEntrySlotFor - Returns the debug information entry proxy slot for the + /// specified debug descriptor. + DIEntry *&getDIEntrySlotFor(DebugInfoDesc *DID) { + return DescToDIEntryMap[DID]; + } + + /// AddDie - Adds or interns the DIE to the compile unit. + /// + DIE *AddDie(DIE &Buffer) { + FoldingSetNodeID ID; + Buffer.Profile(ID); + void *Where; + DIE *Die = DiesSet.FindNodeOrInsertPos(ID, Where); + + if (!Die) { + Die = new DIE(Buffer); + DiesSet.InsertNode(Die, Where); + this->Die->AddChild(Die); + Buffer.Detach(); + } + + return Die; + } +}; + +//===----------------------------------------------------------------------===// +/// Dwarf - Emits general Dwarf directives. +/// +class Dwarf { + +protected: + + //===--------------------------------------------------------------------===// + // Core attributes used by the Dwarf writer. + // + + // + /// O - Stream to .s file. + /// + std::ostream &O; + + /// Asm - Target of Dwarf emission. + /// + AsmPrinter *Asm; + + /// TAI - Target Asm Printer. + const TargetAsmInfo *TAI; + + /// TD - Target data. + const TargetData *TD; + + /// RI - Register Information. + const MRegisterInfo *RI; + + /// M - Current module. + /// + Module *M; + + /// MF - Current machine function. + /// + MachineFunction *MF; + + /// MMI - Collected machine module information. + /// + MachineModuleInfo *MMI; + + /// SubprogramCount - The running count of functions being compiled. + /// + unsigned SubprogramCount; + + unsigned SetCounter; + Dwarf(std::ostream &OS, AsmPrinter *A, const TargetAsmInfo *T) + : O(OS) + , Asm(A) + , TAI(T) + , TD(Asm->TM.getTargetData()) + , RI(Asm->TM.getRegisterInfo()) + , M(NULL) + , MF(NULL) + , MMI(NULL) + , SubprogramCount(0) + , SetCounter(1) + { + } + +public: + + //===--------------------------------------------------------------------===// + // Accessors. + // + AsmPrinter *getAsm() const { return Asm; } + MachineModuleInfo *getMMI() const { return MMI; } + const TargetAsmInfo *getTargetAsmInfo() const { return TAI; } + + /// PrintLabelName - Print label name in form used by Dwarf writer. + /// + void PrintLabelName(DWLabel Label) const { + PrintLabelName(Label.Tag, Label.Number); + } + void PrintLabelName(const char *Tag, unsigned Number, + bool isInSection = false) const { + if (isInSection && TAI->getDwarfSectionOffsetDirective()) + O << TAI->getDwarfSectionOffsetDirective() << Tag; + else + O << TAI->getPrivateGlobalPrefix() << Tag; + if (Number) O << Number; + } + + /// EmitLabel - Emit location label for internal use by Dwarf. + /// + void EmitLabel(DWLabel Label) const { + EmitLabel(Label.Tag, Label.Number); + } + void EmitLabel(const char *Tag, unsigned Number) const { + PrintLabelName(Tag, Number); + O << ":\n"; + } + + /// EmitReference - Emit a reference to a label. + /// + void EmitReference(DWLabel Label, bool IsPCRelative = false) const { + EmitReference(Label.Tag, Label.Number, IsPCRelative); + } + void EmitReference(const char *Tag, unsigned Number, + bool IsPCRelative = false) const { + if (TAI->getAddressSize() == sizeof(int32_t)) + O << TAI->getData32bitsDirective(); + else + O << TAI->getData64bitsDirective(); + + PrintLabelName(Tag, Number); + + if (IsPCRelative) O << "-" << TAI->getPCSymbol(); + } + void EmitReference(const std::string &Name, bool IsPCRelative = false) const { + if (TAI->getAddressSize() == sizeof(int32_t)) + O << TAI->getData32bitsDirective(); + else + O << TAI->getData64bitsDirective(); + + O << Name; + + if (IsPCRelative) O << "-" << TAI->getPCSymbol(); + } + + /// EmitDifference - Emit the difference between two labels. Some + /// assemblers do not behave with absolute expressions with data directives, + /// so there is an option (needsSet) to use an intermediary set expression. + void EmitDifference(DWLabel LabelHi, DWLabel LabelLo, + bool IsSmall = false) { + EmitDifference(LabelHi.Tag, LabelHi.Number, + LabelLo.Tag, LabelLo.Number, + IsSmall); + } + void EmitDifference(const char *TagHi, unsigned NumberHi, + const char *TagLo, unsigned NumberLo, + bool IsSmall = false) { + if (TAI->needsSet()) { + O << "\t.set\t"; + PrintLabelName("set", SetCounter); + O << ","; + PrintLabelName(TagHi, NumberHi); + O << "-"; + PrintLabelName(TagLo, NumberLo); + O << "\n"; + + if (IsSmall || TAI->getAddressSize() == sizeof(int32_t)) + O << TAI->getData32bitsDirective(); + else + O << TAI->getData64bitsDirective(); + + PrintLabelName("set", SetCounter); + + ++SetCounter; + } else { + if (IsSmall || TAI->getAddressSize() == sizeof(int32_t)) + O << TAI->getData32bitsDirective(); + else + O << TAI->getData64bitsDirective(); + + PrintLabelName(TagHi, NumberHi); + O << "-"; + PrintLabelName(TagLo, NumberLo); + } + } + + void EmitSectionOffset(const char* Label, const char* Section, + unsigned LabelNumber, unsigned SectionNumber, + bool IsSmall = false, bool isEH = false) { + bool printAbsolute = false; + if (TAI->needsSet()) { + O << "\t.set\t"; + PrintLabelName("set", SetCounter); + O << ","; + PrintLabelName(Label, LabelNumber, true); + + if (isEH) + printAbsolute = TAI->isAbsoluteEHSectionOffsets(); + else + printAbsolute = TAI->isAbsoluteDebugSectionOffsets(); + + if (!printAbsolute) { + O << "-"; + PrintLabelName(Section, SectionNumber); + } + O << "\n"; + + if (IsSmall || TAI->getAddressSize() == sizeof(int32_t)) + O << TAI->getData32bitsDirective(); + else + O << TAI->getData64bitsDirective(); + + PrintLabelName("set", SetCounter); + ++SetCounter; + } else { + if (IsSmall || TAI->getAddressSize() == sizeof(int32_t)) + O << TAI->getData32bitsDirective(); + else + O << TAI->getData64bitsDirective(); + + PrintLabelName(Label, LabelNumber, true); + + if (isEH) + printAbsolute = TAI->isAbsoluteEHSectionOffsets(); + else + printAbsolute = TAI->isAbsoluteDebugSectionOffsets(); + + if (!printAbsolute) { + O << "-"; + PrintLabelName(Section, SectionNumber); + } + } + } + + /// EmitFrameMoves - Emit frame instructions to describe the layout of the + /// frame. + void EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID, + const std::vector<MachineMove> &Moves) { + int stackGrowth = + Asm->TM.getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsUp ? + TAI->getAddressSize() : -TAI->getAddressSize(); + bool IsLocal = BaseLabel && strcmp(BaseLabel, "label") == 0; + + for (unsigned i = 0, N = Moves.size(); i < N; ++i) { + const MachineMove &Move = Moves[i]; + unsigned LabelID = Move.getLabelID(); + + if (LabelID) { + LabelID = MMI->MappedLabel(LabelID); + + // Throw out move if the label is invalid. + if (!LabelID) continue; + } + + const MachineLocation &Dst = Move.getDestination(); + const MachineLocation &Src = Move.getSource(); + + // Advance row if new location. + if (BaseLabel && LabelID && (BaseLabelID != LabelID || !IsLocal)) { + Asm->EmitInt8(DW_CFA_advance_loc4); + Asm->EOL("DW_CFA_advance_loc4"); + EmitDifference("label", LabelID, BaseLabel, BaseLabelID, true); + Asm->EOL(); + + BaseLabelID = LabelID; + BaseLabel = "label"; + IsLocal = true; + } + + // If advancing cfa. + if (Dst.isRegister() && Dst.getRegister() == MachineLocation::VirtualFP) { + if (!Src.isRegister()) { + if (Src.getRegister() == MachineLocation::VirtualFP) { + Asm->EmitInt8(DW_CFA_def_cfa_offset); + Asm->EOL("DW_CFA_def_cfa_offset"); + } else { + Asm->EmitInt8(DW_CFA_def_cfa); + Asm->EOL("DW_CFA_def_cfa"); + Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Src.getRegister())); + Asm->EOL("Register"); + } + + int Offset = -Src.getOffset(); + + Asm->EmitULEB128Bytes(Offset); + Asm->EOL("Offset"); + } else { + assert(0 && "Machine move no supported yet."); + } + } else if (Src.isRegister() && + Src.getRegister() == MachineLocation::VirtualFP) { + if (Dst.isRegister()) { + Asm->EmitInt8(DW_CFA_def_cfa_register); + Asm->EOL("DW_CFA_def_cfa_register"); + Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getRegister())); + Asm->EOL("Register"); + } else { + assert(0 && "Machine move no supported yet."); + } + } else { + unsigned Reg = RI->getDwarfRegNum(Src.getRegister()); + int Offset = Dst.getOffset() / stackGrowth; + + if (Offset < 0) { + Asm->EmitInt8(DW_CFA_offset_extended_sf); + Asm->EOL("DW_CFA_offset_extended_sf"); + Asm->EmitULEB128Bytes(Reg); + Asm->EOL("Reg"); + Asm->EmitSLEB128Bytes(Offset); + Asm->EOL("Offset"); + } else if (Reg < 64) { + Asm->EmitInt8(DW_CFA_offset + Reg); + Asm->EOL("DW_CFA_offset + Reg"); + Asm->EmitULEB128Bytes(Offset); + Asm->EOL("Offset"); + } else { + Asm->EmitInt8(DW_CFA_offset_extended); + Asm->EOL("DW_CFA_offset_extended"); + Asm->EmitULEB128Bytes(Reg); + Asm->EOL("Reg"); + Asm->EmitULEB128Bytes(Offset); + Asm->EOL("Offset"); + } + } + } + } + +}; + +//===----------------------------------------------------------------------===// +/// DwarfDebug - Emits Dwarf debug directives. +/// +class DwarfDebug : public Dwarf { + +private: + //===--------------------------------------------------------------------===// + // Attributes used to construct specific Dwarf sections. + // + + /// CompileUnits - All the compile units involved in this build. The index + /// of each entry in this vector corresponds to the sources in MMI. + std::vector<CompileUnit *> CompileUnits; + + /// AbbreviationsSet - Used to uniquely define abbreviations. + /// + FoldingSet<DIEAbbrev> AbbreviationsSet; + + /// Abbreviations - A list of all the unique abbreviations in use. + /// + std::vector<DIEAbbrev *> Abbreviations; + + /// ValuesSet - Used to uniquely define values. + /// + FoldingSet<DIEValue> ValuesSet; + + /// Values - A list of all the unique values in use. + /// + std::vector<DIEValue *> Values; + + /// StringPool - A UniqueVector of strings used by indirect references. + /// + UniqueVector<std::string> StringPool; + + /// UnitMap - Map debug information descriptor to compile unit. + /// + std::map<DebugInfoDesc *, CompileUnit *> DescToUnitMap; + + /// SectionMap - Provides a unique id per text section. + /// + UniqueVector<std::string> SectionMap; + + /// SectionSourceLines - Tracks line numbers per text section. + /// + std::vector<std::vector<SourceLineInfo> > SectionSourceLines; + + /// didInitial - Flag to indicate if initial emission has been done. + /// + bool didInitial; + + /// shouldEmit - Flag to indicate if debug information should be emitted. + /// + bool shouldEmit; + + struct FunctionDebugFrameInfo { + unsigned Number; + std::vector<MachineMove> Moves; + + FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M): + Number(Num), Moves(M) { }; + }; + + std::vector<FunctionDebugFrameInfo> DebugFrames; + +public: + + /// ShouldEmitDwarf - Returns true if Dwarf declarations should be made. + /// + bool ShouldEmitDwarf() const { return shouldEmit; } + + /// AssignAbbrevNumber - Define a unique number for the abbreviation. + /// + void AssignAbbrevNumber(DIEAbbrev &Abbrev) { + // Profile the node so that we can make it unique. + FoldingSetNodeID ID; + Abbrev.Profile(ID); + + // Check the set for priors. + DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev); + + // If it's newly added. + if (InSet == &Abbrev) { + // Add to abbreviation list. + Abbreviations.push_back(&Abbrev); + // Assign the vector position + 1 as its number. + Abbrev.setNumber(Abbreviations.size()); + } else { + // Assign existing abbreviation number. + Abbrev.setNumber(InSet->getNumber()); + } + } + + /// NewString - Add a string to the constant pool and returns a label. + /// + DWLabel NewString(const std::string &String) { + unsigned StringID = StringPool.insert(String); + return DWLabel("string", StringID); + } + + /// NewDIEntry - Creates a new DIEntry to be a proxy for a debug information + /// entry. + DIEntry *NewDIEntry(DIE *Entry = NULL) { + DIEntry *Value; + + if (Entry) { + FoldingSetNodeID ID; + DIEntry::Profile(ID, Entry); + void *Where; + Value = static_cast<DIEntry *>(ValuesSet.FindNodeOrInsertPos(ID, Where)); + + if (Value) return Value; + + Value = new DIEntry(Entry); + ValuesSet.InsertNode(Value, Where); + } else { + Value = new DIEntry(Entry); + } + + Values.push_back(Value); + return Value; + } + + /// SetDIEntry - Set a DIEntry once the debug information entry is defined. + /// + void SetDIEntry(DIEntry *Value, DIE *Entry) { + Value->Entry = Entry; + // Add to values set if not already there. If it is, we merely have a + // duplicate in the values list (no harm.) + ValuesSet.GetOrInsertNode(Value); + } + + /// AddUInt - Add an unsigned integer attribute data and value. + /// + void AddUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(false, Integer); + + FoldingSetNodeID ID; + DIEInteger::Profile(ID, Integer); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + if (!Value) { + Value = new DIEInteger(Integer); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); + } + + /// AddSInt - Add an signed integer attribute data and value. + /// + void AddSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(true, Integer); + + FoldingSetNodeID ID; + DIEInteger::Profile(ID, (uint64_t)Integer); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + if (!Value) { + Value = new DIEInteger(Integer); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); + } + + /// AddString - Add a std::string attribute data and value. + /// + void AddString(DIE *Die, unsigned Attribute, unsigned Form, + const std::string &String) { + FoldingSetNodeID ID; + DIEString::Profile(ID, String); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + if (!Value) { + Value = new DIEString(String); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); + } + + /// AddLabel - Add a Dwarf label attribute data and value. + /// + void AddLabel(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Label) { + FoldingSetNodeID ID; + DIEDwarfLabel::Profile(ID, Label); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + if (!Value) { + Value = new DIEDwarfLabel(Label); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); + } + + /// AddObjectLabel - Add an non-Dwarf label attribute data and value. + /// + void AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form, + const std::string &Label) { + FoldingSetNodeID ID; + DIEObjectLabel::Profile(ID, Label); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + if (!Value) { + Value = new DIEObjectLabel(Label); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); + } + + /// AddDelta - Add a label delta attribute data and value. + /// + void AddDelta(DIE *Die, unsigned Attribute, unsigned Form, + const DWLabel &Hi, const DWLabel &Lo) { + FoldingSetNodeID ID; + DIEDelta::Profile(ID, Hi, Lo); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + if (!Value) { + Value = new DIEDelta(Hi, Lo); + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } + + Die->AddValue(Attribute, Form, Value); + } + + /// AddDIEntry - Add a DIE attribute data and value. + /// + void AddDIEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) { + Die->AddValue(Attribute, Form, NewDIEntry(Entry)); + } + + /// AddBlock - Add block data. + /// + void AddBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block) { + Block->ComputeSize(*this); + FoldingSetNodeID ID; + Block->Profile(ID); + void *Where; + DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); + if (!Value) { + Value = Block; + ValuesSet.InsertNode(Value, Where); + Values.push_back(Value); + } else { + delete Block; + } + + Die->AddValue(Attribute, Block->BestForm(), Value); + } + +private: + + /// AddSourceLine - Add location information to specified debug information + /// entry. + void AddSourceLine(DIE *Die, CompileUnitDesc *File, unsigned Line) { + if (File && Line) { + CompileUnit *FileUnit = FindCompileUnit(File); + unsigned FileID = FileUnit->getID(); + AddUInt(Die, DW_AT_decl_file, 0, FileID); + AddUInt(Die, DW_AT_decl_line, 0, Line); + } + } + + /// AddAddress - Add an address attribute to a die based on the location + /// provided. + void AddAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location) { + unsigned Reg = RI->getDwarfRegNum(Location.getRegister()); + DIEBlock *Block = new DIEBlock(); + + if (Location.isRegister()) { + if (Reg < 32) { + AddUInt(Block, 0, DW_FORM_data1, DW_OP_reg0 + Reg); + } else { + AddUInt(Block, 0, DW_FORM_data1, DW_OP_regx); + AddUInt(Block, 0, DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) { + AddUInt(Block, 0, DW_FORM_data1, DW_OP_breg0 + Reg); + } else { + AddUInt(Block, 0, DW_FORM_data1, DW_OP_bregx); + AddUInt(Block, 0, DW_FORM_udata, Reg); + } + AddUInt(Block, 0, DW_FORM_sdata, Location.getOffset()); + } + + AddBlock(Die, Attribute, 0, Block); + } + + /// AddBasicType - Add a new basic type attribute to the specified entity. + /// + void AddBasicType(DIE *Entity, CompileUnit *Unit, + const std::string &Name, + unsigned Encoding, unsigned Size) { + DIE *Die = ConstructBasicType(Unit, Name, Encoding, Size); + AddDIEntry(Entity, DW_AT_type, DW_FORM_ref4, Die); + } + + /// ConstructBasicType - Construct a new basic type. + /// + DIE *ConstructBasicType(CompileUnit *Unit, + const std::string &Name, + unsigned Encoding, unsigned Size) { + DIE Buffer(DW_TAG_base_type); + AddUInt(&Buffer, DW_AT_byte_size, 0, Size); + AddUInt(&Buffer, DW_AT_encoding, DW_FORM_data1, Encoding); + if (!Name.empty()) AddString(&Buffer, DW_AT_name, DW_FORM_string, Name); + return Unit->AddDie(Buffer); + } + + /// AddPointerType - Add a new pointer type attribute to the specified entity. + /// + void AddPointerType(DIE *Entity, CompileUnit *Unit, const std::string &Name) { + DIE *Die = ConstructPointerType(Unit, Name); + AddDIEntry(Entity, DW_AT_type, DW_FORM_ref4, Die); + } + + /// ConstructPointerType - Construct a new pointer type. + /// + DIE *ConstructPointerType(CompileUnit *Unit, const std::string &Name) { + DIE Buffer(DW_TAG_pointer_type); + AddUInt(&Buffer, DW_AT_byte_size, 0, TAI->getAddressSize()); + if (!Name.empty()) AddString(&Buffer, DW_AT_name, DW_FORM_string, Name); + return Unit->AddDie(Buffer); + } + + /// AddType - Add a new type attribute to the specified entity. + /// + void AddType(DIE *Entity, TypeDesc *TyDesc, CompileUnit *Unit) { + if (!TyDesc) { + AddBasicType(Entity, Unit, "", DW_ATE_signed, sizeof(int32_t)); + } else { + // Check for pre-existence. + DIEntry *&Slot = Unit->getDIEntrySlotFor(TyDesc); + + // If it exists then use the existing value. + if (Slot) { + Entity->AddValue(DW_AT_type, DW_FORM_ref4, Slot); + return; + } + + if (SubprogramDesc *SubprogramTy = dyn_cast<SubprogramDesc>(TyDesc)) { + // FIXME - Not sure why programs and variables are coming through here. + // Short cut for handling subprogram types (not really a TyDesc.) + AddPointerType(Entity, Unit, SubprogramTy->getName()); + } else if (GlobalVariableDesc *GlobalTy = + dyn_cast<GlobalVariableDesc>(TyDesc)) { + // FIXME - Not sure why programs and variables are coming through here. + // Short cut for handling global variable types (not really a TyDesc.) + AddPointerType(Entity, Unit, GlobalTy->getName()); + } else { + // Set up proxy. + Slot = NewDIEntry(); + + // Construct type. + DIE Buffer(DW_TAG_base_type); + ConstructType(Buffer, TyDesc, Unit); + + // Add debug information entry to entity and unit. + DIE *Die = Unit->AddDie(Buffer); + SetDIEntry(Slot, Die); + Entity->AddValue(DW_AT_type, DW_FORM_ref4, Slot); + } + } + } + + /// ConstructType - Adds all the required attributes to the type. + /// + void ConstructType(DIE &Buffer, TypeDesc *TyDesc, CompileUnit *Unit) { + // Get core information. + const std::string &Name = TyDesc->getName(); + uint64_t Size = TyDesc->getSize() >> 3; + + if (BasicTypeDesc *BasicTy = dyn_cast<BasicTypeDesc>(TyDesc)) { + // Fundamental types like int, float, bool + Buffer.setTag(DW_TAG_base_type); + AddUInt(&Buffer, DW_AT_encoding, DW_FORM_data1, BasicTy->getEncoding()); + } else if (DerivedTypeDesc *DerivedTy = dyn_cast<DerivedTypeDesc>(TyDesc)) { + // Fetch tag. + unsigned Tag = DerivedTy->getTag(); + // FIXME - Workaround for templates. + if (Tag == DW_TAG_inheritance) Tag = DW_TAG_reference_type; + // Pointers, typedefs et al. + Buffer.setTag(Tag); + // Map to main type, void will not have a type. + if (TypeDesc *FromTy = DerivedTy->getFromType()) + AddType(&Buffer, FromTy, Unit); + } else if (CompositeTypeDesc *CompTy = dyn_cast<CompositeTypeDesc>(TyDesc)){ + // Fetch tag. + unsigned Tag = CompTy->getTag(); + + // Set tag accordingly. + if (Tag == DW_TAG_vector_type) + Buffer.setTag(DW_TAG_array_type); + else + Buffer.setTag(Tag); + + std::vector<DebugInfoDesc *> &Elements = CompTy->getElements(); + + switch (Tag) { + case DW_TAG_vector_type: + AddUInt(&Buffer, DW_AT_GNU_vector, DW_FORM_flag, 1); + // Fall thru + case DW_TAG_array_type: { + // Add element type. + if (TypeDesc *FromTy = CompTy->getFromType()) + AddType(&Buffer, FromTy, Unit); + + // Don't emit size attribute. + Size = 0; + + // Construct an anonymous type for index type. + DIE *IndexTy = ConstructBasicType(Unit, "", DW_ATE_signed, + sizeof(int32_t)); + + // Add subranges to array type. + for(unsigned i = 0, N = Elements.size(); i < N; ++i) { + SubrangeDesc *SRD = cast<SubrangeDesc>(Elements[i]); + int64_t Lo = SRD->getLo(); + int64_t Hi = SRD->getHi(); + DIE *Subrange = new DIE(DW_TAG_subrange_type); + + // If a range is available. + if (Lo != Hi) { + AddDIEntry(Subrange, DW_AT_type, DW_FORM_ref4, IndexTy); + // Only add low if non-zero. + if (Lo) AddSInt(Subrange, DW_AT_lower_bound, 0, Lo); + AddSInt(Subrange, DW_AT_upper_bound, 0, Hi); + } + + Buffer.AddChild(Subrange); + } + break; + } + case DW_TAG_structure_type: + case DW_TAG_union_type: { + // Add elements to structure type. + for(unsigned i = 0, N = Elements.size(); i < N; ++i) { + DebugInfoDesc *Element = Elements[i]; + + if (DerivedTypeDesc *MemberDesc = dyn_cast<DerivedTypeDesc>(Element)){ + // Add field or base class. + + unsigned Tag = MemberDesc->getTag(); + + // Extract the basic information. + const std::string &Name = MemberDesc->getName(); + uint64_t Size = MemberDesc->getSize(); + uint64_t Align = MemberDesc->getAlign(); + uint64_t Offset = MemberDesc->getOffset(); + + // Construct member debug information entry. + DIE *Member = new DIE(Tag); + + // Add name if not "". + if (!Name.empty()) + AddString(Member, DW_AT_name, DW_FORM_string, Name); + // Add location if available. + AddSourceLine(Member, MemberDesc->getFile(), MemberDesc->getLine()); + + // Most of the time the field info is the same as the members. + uint64_t FieldSize = Size; + uint64_t FieldAlign = Align; + uint64_t FieldOffset = Offset; + + // Set the member type. + TypeDesc *FromTy = MemberDesc->getFromType(); + AddType(Member, FromTy, Unit); + + // Walk up typedefs until a real size is found. + while (FromTy) { + if (FromTy->getTag() != DW_TAG_typedef) { + FieldSize = FromTy->getSize(); + FieldAlign = FromTy->getSize(); + break; + } + + FromTy = dyn_cast<DerivedTypeDesc>(FromTy)->getFromType(); + } + + // Unless we have a bit field. + if (Tag == DW_TAG_member && FieldSize != Size) { + // Construct the alignment mask. + uint64_t AlignMask = ~(FieldAlign - 1); + // Determine the high bit + 1 of the declared size. + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + // Work backwards to determine the base offset of the field. + FieldOffset = HiMark - FieldSize; + // Now normalize offset to the field. + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size); + + // Add size and offset. + AddUInt(Member, DW_AT_byte_size, 0, FieldSize >> 3); + AddUInt(Member, DW_AT_bit_size, 0, Size); + AddUInt(Member, DW_AT_bit_offset, 0, Offset); + } + + // Add computation for offset. + DIEBlock *Block = new DIEBlock(); + AddUInt(Block, 0, DW_FORM_data1, DW_OP_plus_uconst); + AddUInt(Block, 0, DW_FORM_udata, FieldOffset >> 3); + AddBlock(Member, DW_AT_data_member_location, 0, Block); + + // Add accessibility (public default unless is base class. + if (MemberDesc->isProtected()) { + AddUInt(Member, DW_AT_accessibility, 0, DW_ACCESS_protected); + } else if (MemberDesc->isPrivate()) { + AddUInt(Member, DW_AT_accessibility, 0, DW_ACCESS_private); + } else if (Tag == DW_TAG_inheritance) { + AddUInt(Member, DW_AT_accessibility, 0, DW_ACCESS_public); + } + + Buffer.AddChild(Member); + } else if (GlobalVariableDesc *StaticDesc = + dyn_cast<GlobalVariableDesc>(Element)) { + // Add static member. + + // Construct member debug information entry. + DIE *Static = new DIE(DW_TAG_variable); + + // Add name and mangled name. + const std::string &Name = StaticDesc->getName(); + const std::string &LinkageName = StaticDesc->getLinkageName(); + AddString(Static, DW_AT_name, DW_FORM_string, Name); + if (!LinkageName.empty()) { + AddString(Static, DW_AT_MIPS_linkage_name, DW_FORM_string, + LinkageName); + } + + // Add location. + AddSourceLine(Static, StaticDesc->getFile(), StaticDesc->getLine()); + + // Add type. + if (TypeDesc *StaticTy = StaticDesc->getType()) + AddType(Static, StaticTy, Unit); + + // Add flags. + if (!StaticDesc->isStatic()) + AddUInt(Static, DW_AT_external, DW_FORM_flag, 1); + AddUInt(Static, DW_AT_declaration, DW_FORM_flag, 1); + + Buffer.AddChild(Static); + } else if (SubprogramDesc *MethodDesc = + dyn_cast<SubprogramDesc>(Element)) { + // Add member function. + + // Construct member debug information entry. + DIE *Method = new DIE(DW_TAG_subprogram); + + // Add name and mangled name. + const std::string &Name = MethodDesc->getName(); + const std::string &LinkageName = MethodDesc->getLinkageName(); + + AddString(Method, DW_AT_name, DW_FORM_string, Name); + bool IsCTor = TyDesc->getName() == Name; + + if (!LinkageName.empty()) { + AddString(Method, DW_AT_MIPS_linkage_name, DW_FORM_string, + LinkageName); + } + + // Add location. + AddSourceLine(Method, MethodDesc->getFile(), MethodDesc->getLine()); + + // Add type. + if (CompositeTypeDesc *MethodTy = + dyn_cast_or_null<CompositeTypeDesc>(MethodDesc->getType())) { + // Get argument information. + std::vector<DebugInfoDesc *> &Args = MethodTy->getElements(); + + // If not a ctor. + if (!IsCTor) { + // Add return type. + AddType(Method, dyn_cast<TypeDesc>(Args[0]), Unit); + } + + // Add arguments. + for(unsigned i = 1, N = Args.size(); i < N; ++i) { + DIE *Arg = new DIE(DW_TAG_formal_parameter); + AddType(Arg, cast<TypeDesc>(Args[i]), Unit); + AddUInt(Arg, DW_AT_artificial, DW_FORM_flag, 1); + Method->AddChild(Arg); + } + } + + // Add flags. + if (!MethodDesc->isStatic()) + AddUInt(Method, DW_AT_external, DW_FORM_flag, 1); + AddUInt(Method, DW_AT_declaration, DW_FORM_flag, 1); + + Buffer.AddChild(Method); + } + } + break; + } + case DW_TAG_enumeration_type: { + // Add enumerators to enumeration type. + for(unsigned i = 0, N = Elements.size(); i < N; ++i) { + EnumeratorDesc *ED = cast<EnumeratorDesc>(Elements[i]); + const std::string &Name = ED->getName(); + int64_t Value = ED->getValue(); + DIE *Enumerator = new DIE(DW_TAG_enumerator); + AddString(Enumerator, DW_AT_name, DW_FORM_string, Name); + AddSInt(Enumerator, DW_AT_const_value, DW_FORM_sdata, Value); + Buffer.AddChild(Enumerator); + } + + break; + } + case DW_TAG_subroutine_type: { + // Add prototype flag. + AddUInt(&Buffer, DW_AT_prototyped, DW_FORM_flag, 1); + // Add return type. + AddType(&Buffer, dyn_cast<TypeDesc>(Elements[0]), Unit); + + // Add arguments. + for(unsigned i = 1, N = Elements.size(); i < N; ++i) { + DIE *Arg = new DIE(DW_TAG_formal_parameter); + AddType(Arg, cast<TypeDesc>(Elements[i]), Unit); + Buffer.AddChild(Arg); + } + + break; + } + default: break; + } + } + + // Add size if non-zero (derived types don't have a size.) + if (Size) AddUInt(&Buffer, DW_AT_byte_size, 0, Size); + // Add name if not anonymous or intermediate type. + if (!Name.empty()) AddString(&Buffer, DW_AT_name, DW_FORM_string, Name); + // Add source line info if available. + AddSourceLine(&Buffer, TyDesc->getFile(), TyDesc->getLine()); + } + + /// NewCompileUnit - Create new compile unit and it's debug information entry. + /// + CompileUnit *NewCompileUnit(CompileUnitDesc *UnitDesc, unsigned ID) { + // Construct debug information entry. + DIE *Die = new DIE(DW_TAG_compile_unit); + if (TAI->isAbsoluteDebugSectionOffsets()) + AddLabel(Die, DW_AT_stmt_list, DW_FORM_data4, DWLabel("section_line", 0)); + else + AddDelta(Die, DW_AT_stmt_list, DW_FORM_data4, DWLabel("section_line", 0), + DWLabel("section_line", 0)); + AddString(Die, DW_AT_producer, DW_FORM_string, UnitDesc->getProducer()); + AddUInt (Die, DW_AT_language, DW_FORM_data1, UnitDesc->getLanguage()); + AddString(Die, DW_AT_name, DW_FORM_string, UnitDesc->getFileName()); + AddString(Die, DW_AT_comp_dir, DW_FORM_string, UnitDesc->getDirectory()); + + // Construct compile unit. + CompileUnit *Unit = new CompileUnit(UnitDesc, ID, Die); + + // Add Unit to compile unit map. + DescToUnitMap[UnitDesc] = Unit; + + return Unit; + } + + /// GetBaseCompileUnit - Get the main compile unit. + /// + CompileUnit *GetBaseCompileUnit() const { + CompileUnit *Unit = CompileUnits[0]; + assert(Unit && "Missing compile unit."); + return Unit; + } + + /// FindCompileUnit - Get the compile unit for the given descriptor. + /// + CompileUnit *FindCompileUnit(CompileUnitDesc *UnitDesc) { + CompileUnit *Unit = DescToUnitMap[UnitDesc]; + assert(Unit && "Missing compile unit."); + return Unit; + } + + /// NewGlobalVariable - Add a new global variable DIE. + /// + DIE *NewGlobalVariable(GlobalVariableDesc *GVD) { + // Get the compile unit context. + CompileUnitDesc *UnitDesc = + static_cast<CompileUnitDesc *>(GVD->getContext()); + CompileUnit *Unit = GetBaseCompileUnit(); + + // Check for pre-existence. + DIE *&Slot = Unit->getDieMapSlotFor(GVD); + if (Slot) return Slot; + + // Get the global variable itself. + GlobalVariable *GV = GVD->getGlobalVariable(); + + const std::string &Name = GVD->getName(); + const std::string &FullName = GVD->getFullName(); + const std::string &LinkageName = GVD->getLinkageName(); + // Create the global's variable DIE. + DIE *VariableDie = new DIE(DW_TAG_variable); + AddString(VariableDie, DW_AT_name, DW_FORM_string, Name); + if (!LinkageName.empty()) { + AddString(VariableDie, DW_AT_MIPS_linkage_name, DW_FORM_string, + LinkageName); + } + AddType(VariableDie, GVD->getType(), Unit); + if (!GVD->isStatic()) + AddUInt(VariableDie, DW_AT_external, DW_FORM_flag, 1); + + // Add source line info if available. + AddSourceLine(VariableDie, UnitDesc, GVD->getLine()); + + // Add address. + DIEBlock *Block = new DIEBlock(); + AddUInt(Block, 0, DW_FORM_data1, DW_OP_addr); + AddObjectLabel(Block, 0, DW_FORM_udata, Asm->getGlobalLinkName(GV)); + AddBlock(VariableDie, DW_AT_location, 0, Block); + + // Add to map. + Slot = VariableDie; + + // Add to context owner. + Unit->getDie()->AddChild(VariableDie); + + // Expose as global. + // FIXME - need to check external flag. + Unit->AddGlobal(FullName, VariableDie); + + return VariableDie; + } + + /// NewSubprogram - Add a new subprogram DIE. + /// + DIE *NewSubprogram(SubprogramDesc *SPD) { + // Get the compile unit context. + CompileUnitDesc *UnitDesc = + static_cast<CompileUnitDesc *>(SPD->getContext()); + CompileUnit *Unit = GetBaseCompileUnit(); + + // Check for pre-existence. + DIE *&Slot = Unit->getDieMapSlotFor(SPD); + if (Slot) return Slot; + + // Gather the details (simplify add attribute code.) + const std::string &Name = SPD->getName(); + const std::string &FullName = SPD->getFullName(); + const std::string &LinkageName = SPD->getLinkageName(); + + DIE *SubprogramDie = new DIE(DW_TAG_subprogram); + AddString(SubprogramDie, DW_AT_name, DW_FORM_string, Name); + if (!LinkageName.empty()) { + AddString(SubprogramDie, DW_AT_MIPS_linkage_name, DW_FORM_string, + LinkageName); + } + if (SPD->getType()) AddType(SubprogramDie, SPD->getType(), Unit); + if (!SPD->isStatic()) + AddUInt(SubprogramDie, DW_AT_external, DW_FORM_flag, 1); + AddUInt(SubprogramDie, DW_AT_prototyped, DW_FORM_flag, 1); + + // Add source line info if available. + AddSourceLine(SubprogramDie, UnitDesc, SPD->getLine()); + + // Add to map. + Slot = SubprogramDie; + + // Add to context owner. + Unit->getDie()->AddChild(SubprogramDie); + + // Expose as global. + Unit->AddGlobal(FullName, SubprogramDie); + + return SubprogramDie; + } + + /// NewScopeVariable - Create a new scope variable. + /// + DIE *NewScopeVariable(DebugVariable *DV, CompileUnit *Unit) { + // Get the descriptor. + VariableDesc *VD = DV->getDesc(); + + // Translate tag to proper Dwarf tag. The result variable is dropped for + // now. + unsigned Tag; + switch (VD->getTag()) { + case DW_TAG_return_variable: return NULL; + case DW_TAG_arg_variable: Tag = DW_TAG_formal_parameter; break; + case DW_TAG_auto_variable: // fall thru + default: Tag = DW_TAG_variable; break; + } + + // Define variable debug information entry. + DIE *VariableDie = new DIE(Tag); + AddString(VariableDie, DW_AT_name, DW_FORM_string, VD->getName()); + + // Add source line info if available. + AddSourceLine(VariableDie, VD->getFile(), VD->getLine()); + + // Add variable type. + AddType(VariableDie, VD->getType(), Unit); + + // Add variable address. + MachineLocation Location; + RI->getLocation(*MF, DV->getFrameIndex(), Location); + AddAddress(VariableDie, DW_AT_location, Location); + + return VariableDie; + } + + /// ConstructScope - Construct the components of a scope. + /// + void ConstructScope(DebugScope *ParentScope, + unsigned ParentStartID, unsigned ParentEndID, + DIE *ParentDie, CompileUnit *Unit) { + // Add variables to scope. + std::vector<DebugVariable *> &Variables = ParentScope->getVariables(); + for (unsigned i = 0, N = Variables.size(); i < N; ++i) { + DIE *VariableDie = NewScopeVariable(Variables[i], Unit); + if (VariableDie) ParentDie->AddChild(VariableDie); + } + + // Add nested scopes. + std::vector<DebugScope *> &Scopes = ParentScope->getScopes(); + for (unsigned j = 0, M = Scopes.size(); j < M; ++j) { + // Define the Scope debug information entry. + DebugScope *Scope = Scopes[j]; + // FIXME - Ignore inlined functions for the time being. + if (!Scope->getParent()) continue; + + unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID()); + unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID()); + + // Ignore empty scopes. + if (StartID == EndID && StartID != 0) continue; + if (Scope->getScopes().empty() && Scope->getVariables().empty()) continue; + + if (StartID == ParentStartID && EndID == ParentEndID) { + // Just add stuff to the parent scope. + ConstructScope(Scope, ParentStartID, ParentEndID, ParentDie, Unit); + } else { + DIE *ScopeDie = new DIE(DW_TAG_lexical_block); + + // Add the scope bounds. + if (StartID) { + AddLabel(ScopeDie, DW_AT_low_pc, DW_FORM_addr, + DWLabel("label", StartID)); + } else { + AddLabel(ScopeDie, DW_AT_low_pc, DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + } + if (EndID) { + AddLabel(ScopeDie, DW_AT_high_pc, DW_FORM_addr, + DWLabel("label", EndID)); + } else { + AddLabel(ScopeDie, DW_AT_high_pc, DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + } + + // Add the scope contents. + ConstructScope(Scope, StartID, EndID, ScopeDie, Unit); + ParentDie->AddChild(ScopeDie); + } + } + } + + /// ConstructRootScope - Construct the scope for the subprogram. + /// + void ConstructRootScope(DebugScope *RootScope) { + // Exit if there is no root scope. + if (!RootScope) return; + + // Get the subprogram debug information entry. + SubprogramDesc *SPD = cast<SubprogramDesc>(RootScope->getDesc()); + + // Get the compile unit context. + CompileUnit *Unit = GetBaseCompileUnit(); + + // Get the subprogram die. + DIE *SPDie = Unit->getDieMapSlotFor(SPD); + assert(SPDie && "Missing subprogram descriptor"); + + // Add the function bounds. + AddLabel(SPDie, DW_AT_low_pc, DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + AddLabel(SPDie, DW_AT_high_pc, DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + MachineLocation Location(RI->getFrameRegister(*MF)); + AddAddress(SPDie, DW_AT_frame_base, Location); + + ConstructScope(RootScope, 0, 0, SPDie, Unit); + } + + /// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc + /// tools to recognize the object file contains Dwarf information. + void EmitInitial() { + // Check to see if we already emitted intial headers. + if (didInitial) return; + didInitial = true; + + // Dwarf sections base addresses. + if (TAI->doesDwarfRequireFrameSection()) { + Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + EmitLabel("section_debug_frame", 0); + } + Asm->SwitchToDataSection(TAI->getDwarfInfoSection()); + EmitLabel("section_info", 0); + Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection()); + EmitLabel("section_abbrev", 0); + Asm->SwitchToDataSection(TAI->getDwarfARangesSection()); + EmitLabel("section_aranges", 0); + Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection()); + EmitLabel("section_macinfo", 0); + Asm->SwitchToDataSection(TAI->getDwarfLineSection()); + EmitLabel("section_line", 0); + Asm->SwitchToDataSection(TAI->getDwarfLocSection()); + EmitLabel("section_loc", 0); + Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection()); + EmitLabel("section_pubnames", 0); + Asm->SwitchToDataSection(TAI->getDwarfStrSection()); + EmitLabel("section_str", 0); + Asm->SwitchToDataSection(TAI->getDwarfRangesSection()); + EmitLabel("section_ranges", 0); + + Asm->SwitchToTextSection(TAI->getTextSection()); + EmitLabel("text_begin", 0); + Asm->SwitchToDataSection(TAI->getDataSection()); + EmitLabel("data_begin", 0); + } + + /// EmitDIE - Recusively Emits a debug information entry. + /// + void EmitDIE(DIE *Die) { + // Get the abbreviation for this DIE. + unsigned AbbrevNumber = Die->getAbbrevNumber(); + const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; + + Asm->EOL(); + + // Emit the code (index) for the abbreviation. + Asm->EmitULEB128Bytes(AbbrevNumber); + Asm->EOL(std::string("Abbrev [" + + utostr(AbbrevNumber) + + "] 0x" + utohexstr(Die->getOffset()) + + ":0x" + utohexstr(Die->getSize()) + " " + + TagString(Abbrev->getTag()))); + + std::vector<DIEValue *> &Values = Die->getValues(); + const std::vector<DIEAbbrevData> &AbbrevData = Abbrev->getData(); + + // Emit the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + unsigned Attr = AbbrevData[i].getAttribute(); + unsigned Form = AbbrevData[i].getForm(); + assert(Form && "Too many attributes for DIE (check abbreviation)"); + + switch (Attr) { + case DW_AT_sibling: { + Asm->EmitInt32(Die->SiblingOffset()); + break; + } + default: { + // Emit an attribute using the defined form. + Values[i]->EmitValue(*this, Form); + break; + } + } + + Asm->EOL(AttributeString(Attr)); + } + + // Emit the DIE children if any. + if (Abbrev->getChildrenFlag() == DW_CHILDREN_yes) { + const std::vector<DIE *> &Children = Die->getChildren(); + + for (unsigned j = 0, M = Children.size(); j < M; ++j) { + EmitDIE(Children[j]); + } + + Asm->EmitInt8(0); Asm->EOL("End Of Children Mark"); + } + } + + /// SizeAndOffsetDie - Compute the size and offset of a DIE. + /// + unsigned SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) { + // Get the children. + const std::vector<DIE *> &Children = Die->getChildren(); + + // If not last sibling and has children then add sibling offset attribute. + if (!Last && !Children.empty()) Die->AddSiblingOffset(); + + // Record the abbreviation. + AssignAbbrevNumber(Die->getAbbrev()); + + // Get the abbreviation for this DIE. + unsigned AbbrevNumber = Die->getAbbrevNumber(); + const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; + + // Set DIE offset + Die->setOffset(Offset); + + // Start the size with the size of abbreviation code. + Offset += Asm->SizeULEB128(AbbrevNumber); + + const std::vector<DIEValue *> &Values = Die->getValues(); + const std::vector<DIEAbbrevData> &AbbrevData = Abbrev->getData(); + + // Size the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + // Size attribute value. + Offset += Values[i]->SizeOf(*this, AbbrevData[i].getForm()); + } + + // Size the DIE children if any. + if (!Children.empty()) { + assert(Abbrev->getChildrenFlag() == DW_CHILDREN_yes && + "Children flag not set"); + + for (unsigned j = 0, M = Children.size(); j < M; ++j) { + Offset = SizeAndOffsetDie(Children[j], Offset, (j + 1) == M); + } + + // End of children marker. + Offset += sizeof(int8_t); + } + + Die->setSize(Offset - Die->getOffset()); + return Offset; + } + + /// SizeAndOffsets - Compute the size and offset of all the DIEs. + /// + void SizeAndOffsets() { + // Process base compile unit. + CompileUnit *Unit = GetBaseCompileUnit(); + // Compute size of compile unit header + unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + SizeAndOffsetDie(Unit->getDie(), Offset, true); + } + + /// EmitDebugInfo - Emit the debug info section. + /// + void EmitDebugInfo() { + // Start debug info section. + Asm->SwitchToDataSection(TAI->getDwarfInfoSection()); + + CompileUnit *Unit = GetBaseCompileUnit(); + DIE *Die = Unit->getDie(); + // Emit the compile units header. + EmitLabel("info_begin", Unit->getID()); + // Emit size of content not including length itself + unsigned ContentSize = Die->getSize() + + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int32_t); // FIXME - extra pad for gdb bug. + + Asm->EmitInt32(ContentSize); Asm->EOL("Length of Compilation Unit Info"); + Asm->EmitInt16(DWARF_VERSION); Asm->EOL("DWARF version number"); + EmitSectionOffset("abbrev_begin", "section_abbrev", 0, 0, true, false); + Asm->EOL("Offset Into Abbrev. Section"); + Asm->EmitInt8(TAI->getAddressSize()); Asm->EOL("Address Size (in bytes)"); + + EmitDIE(Die); + // FIXME - extra padding for gdb bug. + Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); + Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); + Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); + Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); + EmitLabel("info_end", Unit->getID()); + + Asm->EOL(); + } + + /// EmitAbbreviations - Emit the abbreviation section. + /// + void EmitAbbreviations() const { + // Check to see if it is worth the effort. + if (!Abbreviations.empty()) { + // Start the debug abbrev section. + Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection()); + + EmitLabel("abbrev_begin", 0); + + // For each abbrevation. + for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) { + // Get abbreviation data + const DIEAbbrev *Abbrev = Abbreviations[i]; + + // Emit the abbrevations code (base 1 index.) + Asm->EmitULEB128Bytes(Abbrev->getNumber()); + Asm->EOL("Abbreviation Code"); + + // Emit the abbreviations data. + Abbrev->Emit(*this); + + Asm->EOL(); + } + + // Mark end of abbreviations. + Asm->EmitULEB128Bytes(0); Asm->EOL("EOM(3)"); + + EmitLabel("abbrev_end", 0); + + Asm->EOL(); + } + } + + /// EmitDebugLines - Emit source line information. + /// + void EmitDebugLines() { + // Minimum line delta, thus ranging from -10..(255-10). + const int MinLineDelta = -(DW_LNS_fixed_advance_pc + 1); + // Maximum line delta, thus ranging from -10..(255-10). + const int MaxLineDelta = 255 + MinLineDelta; + + // Start the dwarf line section. + Asm->SwitchToDataSection(TAI->getDwarfLineSection()); + + // Construct the section header. + + EmitDifference("line_end", 0, "line_begin", 0, true); + Asm->EOL("Length of Source Line Info"); + EmitLabel("line_begin", 0); + + Asm->EmitInt16(DWARF_VERSION); Asm->EOL("DWARF version number"); + + EmitDifference("line_prolog_end", 0, "line_prolog_begin", 0, true); + Asm->EOL("Prolog Length"); + EmitLabel("line_prolog_begin", 0); + + Asm->EmitInt8(1); Asm->EOL("Minimum Instruction Length"); + + Asm->EmitInt8(1); Asm->EOL("Default is_stmt_start flag"); + + Asm->EmitInt8(MinLineDelta); Asm->EOL("Line Base Value (Special Opcodes)"); + + Asm->EmitInt8(MaxLineDelta); Asm->EOL("Line Range Value (Special Opcodes)"); + + Asm->EmitInt8(-MinLineDelta); Asm->EOL("Special Opcode Base"); + + // Line number standard opcode encodings argument count + Asm->EmitInt8(0); Asm->EOL("DW_LNS_copy arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_pc arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_advance_line arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_file arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_set_column arg count"); + Asm->EmitInt8(0); Asm->EOL("DW_LNS_negate_stmt arg count"); + Asm->EmitInt8(0); Asm->EOL("DW_LNS_set_basic_block arg count"); + Asm->EmitInt8(0); Asm->EOL("DW_LNS_const_add_pc arg count"); + Asm->EmitInt8(1); Asm->EOL("DW_LNS_fixed_advance_pc arg count"); + + const UniqueVector<std::string> &Directories = MMI->getDirectories(); + const UniqueVector<SourceFileInfo> + &SourceFiles = MMI->getSourceFiles(); + + // Emit directories. + for (unsigned DirectoryID = 1, NDID = Directories.size(); + DirectoryID <= NDID; ++DirectoryID) { + Asm->EmitString(Directories[DirectoryID]); Asm->EOL("Directory"); + } + Asm->EmitInt8(0); Asm->EOL("End of directories"); + + // Emit files. + for (unsigned SourceID = 1, NSID = SourceFiles.size(); + SourceID <= NSID; ++SourceID) { + const SourceFileInfo &SourceFile = SourceFiles[SourceID]; + Asm->EmitString(SourceFile.getName()); + Asm->EOL("Source"); + Asm->EmitULEB128Bytes(SourceFile.getDirectoryID()); + Asm->EOL("Directory #"); + Asm->EmitULEB128Bytes(0); + Asm->EOL("Mod date"); + Asm->EmitULEB128Bytes(0); + Asm->EOL("File size"); + } + Asm->EmitInt8(0); Asm->EOL("End of files"); + + EmitLabel("line_prolog_end", 0); + + // A sequence for each text section. + for (unsigned j = 0, M = SectionSourceLines.size(); j < M; ++j) { + // Isolate current sections line info. + const std::vector<SourceLineInfo> &LineInfos = SectionSourceLines[j]; + + Asm->EOL(std::string("Section ") + SectionMap[j + 1]); + + // Dwarf assumes we start with first line of first source file. + unsigned Source = 1; + unsigned Line = 1; + + // Construct rows of the address, source, line, column matrix. + for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) { + const SourceLineInfo &LineInfo = LineInfos[i]; + unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID()); + if (!LabelID) continue; + + unsigned SourceID = LineInfo.getSourceID(); + const SourceFileInfo &SourceFile = SourceFiles[SourceID]; + unsigned DirectoryID = SourceFile.getDirectoryID(); + Asm->EOL(Directories[DirectoryID] + + SourceFile.getName() + + ":" + + utostr_32(LineInfo.getLine())); + + // Define the line address. + Asm->EmitInt8(0); Asm->EOL("Extended Op"); + Asm->EmitInt8(TAI->getAddressSize() + 1); Asm->EOL("Op size"); + Asm->EmitInt8(DW_LNE_set_address); Asm->EOL("DW_LNE_set_address"); + EmitReference("label", LabelID); Asm->EOL("Location label"); + + // If change of source, then switch to the new source. + if (Source != LineInfo.getSourceID()) { + Source = LineInfo.getSourceID(); + Asm->EmitInt8(DW_LNS_set_file); Asm->EOL("DW_LNS_set_file"); + Asm->EmitULEB128Bytes(Source); Asm->EOL("New Source"); + } + + // If change of line. + if (Line != LineInfo.getLine()) { + // Determine offset. + int Offset = LineInfo.getLine() - Line; + int Delta = Offset - MinLineDelta; + + // Update line. + Line = LineInfo.getLine(); + + // If delta is small enough and in range... + if (Delta >= 0 && Delta < (MaxLineDelta - 1)) { + // ... then use fast opcode. + Asm->EmitInt8(Delta - MinLineDelta); Asm->EOL("Line Delta"); + } else { + // ... otherwise use long hand. + Asm->EmitInt8(DW_LNS_advance_line); Asm->EOL("DW_LNS_advance_line"); + Asm->EmitSLEB128Bytes(Offset); Asm->EOL("Line Offset"); + Asm->EmitInt8(DW_LNS_copy); Asm->EOL("DW_LNS_copy"); + } + } else { + // Copy the previous row (different address or source) + Asm->EmitInt8(DW_LNS_copy); Asm->EOL("DW_LNS_copy"); + } + } + + // Define last address of section. + Asm->EmitInt8(0); Asm->EOL("Extended Op"); + Asm->EmitInt8(TAI->getAddressSize() + 1); Asm->EOL("Op size"); + Asm->EmitInt8(DW_LNE_set_address); Asm->EOL("DW_LNE_set_address"); + EmitReference("section_end", j + 1); Asm->EOL("Section end label"); + + // Mark end of matrix. + Asm->EmitInt8(0); Asm->EOL("DW_LNE_end_sequence"); + Asm->EmitULEB128Bytes(1); Asm->EOL(); + Asm->EmitInt8(1); Asm->EOL(); + } + + EmitLabel("line_end", 0); + + Asm->EOL(); + } + + /// EmitCommonDebugFrame - Emit common frame info into a debug frame section. + /// + void EmitCommonDebugFrame() { + if (!TAI->doesDwarfRequireFrameSection()) + return; + + int stackGrowth = + Asm->TM.getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsUp ? + TAI->getAddressSize() : -TAI->getAddressSize(); + + // Start the dwarf frame section. + Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + + EmitLabel("debug_frame_common", 0); + EmitDifference("debug_frame_common_end", 0, + "debug_frame_common_begin", 0, true); + Asm->EOL("Length of Common Information Entry"); + + EmitLabel("debug_frame_common_begin", 0); + Asm->EmitInt32((int)DW_CIE_ID); + Asm->EOL("CIE Identifier Tag"); + Asm->EmitInt8(DW_CIE_VERSION); + Asm->EOL("CIE Version"); + Asm->EmitString(""); + Asm->EOL("CIE Augmentation"); + Asm->EmitULEB128Bytes(1); + Asm->EOL("CIE Code Alignment Factor"); + Asm->EmitSLEB128Bytes(stackGrowth); + Asm->EOL("CIE Data Alignment Factor"); + Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister())); + Asm->EOL("CIE RA Column"); + + std::vector<MachineMove> Moves; + RI->getInitialFrameState(Moves); + + EmitFrameMoves(NULL, 0, Moves); + + Asm->EmitAlignment(2); + EmitLabel("debug_frame_common_end", 0); + + Asm->EOL(); + } + + /// EmitFunctionDebugFrame - Emit per function frame info into a debug frame + /// section. + void EmitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) { + if (!TAI->doesDwarfRequireFrameSection()) + return; + + // Start the dwarf frame section. + Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + + EmitDifference("debug_frame_end", DebugFrameInfo.Number, + "debug_frame_begin", DebugFrameInfo.Number, true); + Asm->EOL("Length of Frame Information Entry"); + + EmitLabel("debug_frame_begin", DebugFrameInfo.Number); + + EmitSectionOffset("debug_frame_common", "section_debug_frame", + 0, 0, true, false); + Asm->EOL("FDE CIE offset"); + + EmitReference("func_begin", DebugFrameInfo.Number); + Asm->EOL("FDE initial location"); + EmitDifference("func_end", DebugFrameInfo.Number, + "func_begin", DebugFrameInfo.Number); + Asm->EOL("FDE address range"); + + EmitFrameMoves("func_begin", DebugFrameInfo.Number, DebugFrameInfo.Moves); + + Asm->EmitAlignment(2); + EmitLabel("debug_frame_end", DebugFrameInfo.Number); + + Asm->EOL(); + } + + /// EmitDebugPubNames - Emit visible names into a debug pubnames section. + /// + void EmitDebugPubNames() { + // Start the dwarf pubnames section. + Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection()); + + CompileUnit *Unit = GetBaseCompileUnit(); + + EmitDifference("pubnames_end", Unit->getID(), + "pubnames_begin", Unit->getID(), true); + Asm->EOL("Length of Public Names Info"); + + EmitLabel("pubnames_begin", Unit->getID()); + + Asm->EmitInt16(DWARF_VERSION); Asm->EOL("DWARF Version"); + + EmitSectionOffset("info_begin", "section_info", + Unit->getID(), 0, true, false); + Asm->EOL("Offset of Compilation Unit Info"); + + EmitDifference("info_end", Unit->getID(), "info_begin", Unit->getID(),true); + Asm->EOL("Compilation Unit Length"); + + std::map<std::string, DIE *> &Globals = Unit->getGlobals(); + + for (std::map<std::string, DIE *>::iterator GI = Globals.begin(), + GE = Globals.end(); + GI != GE; ++GI) { + const std::string &Name = GI->first; + DIE * Entity = GI->second; + + Asm->EmitInt32(Entity->getOffset()); Asm->EOL("DIE offset"); + Asm->EmitString(Name); Asm->EOL("External Name"); + } + + Asm->EmitInt32(0); Asm->EOL("End Mark"); + EmitLabel("pubnames_end", Unit->getID()); + + Asm->EOL(); + } + + /// EmitDebugStr - Emit visible names into a debug str section. + /// + void EmitDebugStr() { + // Check to see if it is worth the effort. + if (!StringPool.empty()) { + // Start the dwarf str section. + Asm->SwitchToDataSection(TAI->getDwarfStrSection()); + + // For each of strings in the string pool. + for (unsigned StringID = 1, N = StringPool.size(); + StringID <= N; ++StringID) { + // Emit a label for reference from debug information entries. + EmitLabel("string", StringID); + // Emit the string itself. + const std::string &String = StringPool[StringID]; + Asm->EmitString(String); Asm->EOL(); + } + + Asm->EOL(); + } + } + + /// EmitDebugLoc - Emit visible names into a debug loc section. + /// + void EmitDebugLoc() { + // Start the dwarf loc section. + Asm->SwitchToDataSection(TAI->getDwarfLocSection()); + + Asm->EOL(); + } + + /// EmitDebugARanges - Emit visible names into a debug aranges section. + /// + void EmitDebugARanges() { + // Start the dwarf aranges section. + Asm->SwitchToDataSection(TAI->getDwarfARangesSection()); + + // FIXME - Mock up + #if 0 + CompileUnit *Unit = GetBaseCompileUnit(); + + // Don't include size of length + Asm->EmitInt32(0x1c); Asm->EOL("Length of Address Ranges Info"); + + Asm->EmitInt16(DWARF_VERSION); Asm->EOL("Dwarf Version"); + + EmitReference("info_begin", Unit->getID()); + Asm->EOL("Offset of Compilation Unit Info"); + + Asm->EmitInt8(TAI->getAddressSize()); Asm->EOL("Size of Address"); + + Asm->EmitInt8(0); Asm->EOL("Size of Segment Descriptor"); + + Asm->EmitInt16(0); Asm->EOL("Pad (1)"); + Asm->EmitInt16(0); Asm->EOL("Pad (2)"); + + // Range 1 + EmitReference("text_begin", 0); Asm->EOL("Address"); + EmitDifference("text_end", 0, "text_begin", 0, true); Asm->EOL("Length"); + + Asm->EmitInt32(0); Asm->EOL("EOM (1)"); + Asm->EmitInt32(0); Asm->EOL("EOM (2)"); + + Asm->EOL(); + #endif + } + + /// EmitDebugRanges - Emit visible names into a debug ranges section. + /// + void EmitDebugRanges() { + // Start the dwarf ranges section. + Asm->SwitchToDataSection(TAI->getDwarfRangesSection()); + + Asm->EOL(); + } + + /// EmitDebugMacInfo - Emit visible names into a debug macinfo section. + /// + void EmitDebugMacInfo() { + // Start the dwarf macinfo section. + Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection()); + + Asm->EOL(); + } + + /// ConstructCompileUnitDIEs - Create a compile unit DIE for each source and + /// header file. + void ConstructCompileUnitDIEs() { + const UniqueVector<CompileUnitDesc *> CUW = MMI->getCompileUnits(); + + for (unsigned i = 1, N = CUW.size(); i <= N; ++i) { + unsigned ID = MMI->RecordSource(CUW[i]); + CompileUnit *Unit = NewCompileUnit(CUW[i], ID); + CompileUnits.push_back(Unit); + } + } + + /// ConstructGlobalDIEs - Create DIEs for each of the externally visible + /// global variables. + void ConstructGlobalDIEs() { + std::vector<GlobalVariableDesc *> GlobalVariables = + MMI->getAnchoredDescriptors<GlobalVariableDesc>(*M); + + for (unsigned i = 0, N = GlobalVariables.size(); i < N; ++i) { + GlobalVariableDesc *GVD = GlobalVariables[i]; + NewGlobalVariable(GVD); + } + } + + /// ConstructSubprogramDIEs - Create DIEs for each of the externally visible + /// subprograms. + void ConstructSubprogramDIEs() { + std::vector<SubprogramDesc *> Subprograms = + MMI->getAnchoredDescriptors<SubprogramDesc>(*M); + + for (unsigned i = 0, N = Subprograms.size(); i < N; ++i) { + SubprogramDesc *SPD = Subprograms[i]; + NewSubprogram(SPD); + } + } + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfDebug(std::ostream &OS, AsmPrinter *A, const TargetAsmInfo *T) + : Dwarf(OS, A, T) + , CompileUnits() + , AbbreviationsSet(InitAbbreviationsSetSize) + , Abbreviations() + , ValuesSet(InitValuesSetSize) + , Values() + , StringPool() + , DescToUnitMap() + , SectionMap() + , SectionSourceLines() + , didInitial(false) + , shouldEmit(false) + { + } + virtual ~DwarfDebug() { + for (unsigned i = 0, N = CompileUnits.size(); i < N; ++i) + delete CompileUnits[i]; + for (unsigned j = 0, M = Values.size(); j < M; ++j) + delete Values[j]; + } + + /// SetModuleInfo - Set machine module information when it's known that pass + /// manager has created it. Set by the target AsmPrinter. + void SetModuleInfo(MachineModuleInfo *mmi) { + // Make sure initial declarations are made. + if (!MMI && mmi->hasDebugInfo()) { + MMI = mmi; + shouldEmit = true; + + // Emit initial sections + EmitInitial(); + + // Create all the compile unit DIEs. + ConstructCompileUnitDIEs(); + + // Create DIEs for each of the externally visible global variables. + ConstructGlobalDIEs(); + + // Create DIEs for each of the externally visible subprograms. + ConstructSubprogramDIEs(); + + // Prime section data. + SectionMap.insert(TAI->getTextSection()); + } + } + + /// BeginModule - Emit all Dwarf sections that should come prior to the + /// content. + void BeginModule(Module *M) { + this->M = M; + + if (!ShouldEmitDwarf()) return; + } + + /// EndModule - Emit all Dwarf sections that should come after the content. + /// + void EndModule() { + if (!ShouldEmitDwarf()) return; + + // Standard sections final addresses. + Asm->SwitchToTextSection(TAI->getTextSection()); + EmitLabel("text_end", 0); + Asm->SwitchToDataSection(TAI->getDataSection()); + EmitLabel("data_end", 0); + + // End text sections. + for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) { + Asm->SwitchToTextSection(SectionMap[i].c_str()); + EmitLabel("section_end", i); + } + + // Emit common frame information. + EmitCommonDebugFrame(); + + // Emit function debug frame information + for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(), + E = DebugFrames.end(); I != E; ++I) + EmitFunctionDebugFrame(*I); + + // Compute DIE offsets and sizes. + SizeAndOffsets(); + + // Emit all the DIEs into a debug info section + EmitDebugInfo(); + + // Corresponding abbreviations into a abbrev section. + EmitAbbreviations(); + + // Emit source line correspondence into a debug line section. + EmitDebugLines(); + + // Emit info into a debug pubnames section. + EmitDebugPubNames(); + + // Emit info into a debug str section. + EmitDebugStr(); + + // Emit info into a debug loc section. + EmitDebugLoc(); + + // Emit info into a debug aranges section. + EmitDebugARanges(); + + // Emit info into a debug ranges section. + EmitDebugRanges(); + + // Emit info into a debug macinfo section. + EmitDebugMacInfo(); + } + + /// BeginFunction - Gather pre-function debug information. Assumes being + /// emitted immediately after the function entry point. + void BeginFunction(MachineFunction *MF) { + this->MF = MF; + + if (!ShouldEmitDwarf()) return; + + // Begin accumulating function debug information. + MMI->BeginFunction(MF); + + // Assumes in correct section after the entry point. + EmitLabel("func_begin", ++SubprogramCount); + } + + /// EndFunction - Gather and emit post-function debug information. + /// + void EndFunction() { + if (!ShouldEmitDwarf()) return; + + // Define end label for subprogram. + EmitLabel("func_end", SubprogramCount); + + // Get function line info. + const std::vector<SourceLineInfo> &LineInfos = MMI->getSourceLines(); + + if (!LineInfos.empty()) { + // Get section line info. + unsigned ID = SectionMap.insert(Asm->CurrentSection); + if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID); + std::vector<SourceLineInfo> &SectionLineInfos = SectionSourceLines[ID-1]; + // Append the function info to section info. + SectionLineInfos.insert(SectionLineInfos.end(), + LineInfos.begin(), LineInfos.end()); + } + + // Construct scopes for subprogram. + ConstructRootScope(MMI->getRootScope()); + + DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount, + MMI->getFrameMoves())); + } +}; + +//===----------------------------------------------------------------------===// +/// DwarfException - Emits Dwarf exception handling directives. +/// +class DwarfException : public Dwarf { + +private: + struct FunctionEHFrameInfo { + std::string FnName; + unsigned Number; + unsigned PersonalityIndex; + bool hasCalls; + bool hasLandingPads; + std::vector<MachineMove> Moves; + + FunctionEHFrameInfo(const std::string &FN, unsigned Num, unsigned P, + bool hC, bool hL, + const std::vector<MachineMove> &M): + FnName(FN), Number(Num), PersonalityIndex(P), + hasCalls(hC), hasLandingPads(hL), Moves(M) { }; + }; + + std::vector<FunctionEHFrameInfo> EHFrames; + + /// shouldEmit - Flag to indicate if debug information should be emitted. + /// + bool shouldEmit; + + /// EmitCommonEHFrame - Emit the common eh unwind frame. + /// + void EmitCommonEHFrame(const Function *Personality, unsigned Index) { + // Size and sign of stack growth. + int stackGrowth = + Asm->TM.getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsUp ? + TAI->getAddressSize() : -TAI->getAddressSize(); + + // Begin eh frame section. + Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection()); + O << "EH_frame" << Index << ":\n"; + EmitLabel("section_eh_frame", Index); + + // Define base labels. + EmitLabel("eh_frame_common", Index); + + // Define the eh frame length. + EmitDifference("eh_frame_common_end", Index, + "eh_frame_common_begin", Index, true); + Asm->EOL("Length of Common Information Entry"); + + // EH frame header. + EmitLabel("eh_frame_common_begin", Index); + Asm->EmitInt32((int)0); + Asm->EOL("CIE Identifier Tag"); + Asm->EmitInt8(DW_CIE_VERSION); + Asm->EOL("CIE Version"); + + // The personality presence indicates that language specific information + // will show up in the eh frame. + Asm->EmitString(Personality ? "zPLR" : "zR"); + Asm->EOL("CIE Augmentation"); + + // Round out reader. + Asm->EmitULEB128Bytes(1); + Asm->EOL("CIE Code Alignment Factor"); + Asm->EmitSLEB128Bytes(stackGrowth); + Asm->EOL("CIE Data Alignment Factor"); + Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister())); + Asm->EOL("CIE RA Column"); + + // If there is a personality, we need to indicate the functions location. + if (Personality) { + Asm->EmitULEB128Bytes(7); + Asm->EOL("Augmentation Size"); + Asm->EmitInt8(DW_EH_PE_pcrel | DW_EH_PE_sdata4); + Asm->EOL("Personality (pcrel sdata4)"); + + O << TAI->getData32bitsDirective(); + Asm->EmitExternalGlobal((const GlobalVariable *)(Personality)); + O << "-" << TAI->getPCSymbol(); + Asm->EOL("Personality"); + + Asm->EmitULEB128Bytes(DW_EH_PE_pcrel); + Asm->EOL("LSDA Encoding (pcrel)"); + Asm->EmitULEB128Bytes(DW_EH_PE_pcrel); + Asm->EOL("FDE Encoding (pcrel)"); + } else { + Asm->EmitULEB128Bytes(1); + Asm->EOL("Augmentation Size"); + Asm->EmitULEB128Bytes(DW_EH_PE_pcrel); + Asm->EOL("FDE Encoding (pcrel)"); + } + + // Indicate locations of general callee saved registers in frame. + std::vector<MachineMove> Moves; + RI->getInitialFrameState(Moves); + EmitFrameMoves(NULL, 0, Moves); + + Asm->EmitAlignment(2); + EmitLabel("eh_frame_common_end", Index); + + Asm->EOL(); + } + + /// EmitEHFrame - Emit function exception frame information. + /// + void EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { + Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection()); + + // Externally visible entry into the functions eh frame info. + if (const char *GlobalDirective = TAI->getGlobalDirective()) + O << GlobalDirective << EHFrameInfo.FnName << ".eh\n"; + + // If there are no calls then you can't unwind. + if (!EHFrameInfo.hasCalls) { + O << EHFrameInfo.FnName << ".eh = 0\n"; + } else { + O << EHFrameInfo.FnName << ".eh:\n"; + + // EH frame header. + EmitDifference("eh_frame_end", EHFrameInfo.Number, + "eh_frame_begin", EHFrameInfo.Number, true); + Asm->EOL("Length of Frame Information Entry"); + + EmitLabel("eh_frame_begin", EHFrameInfo.Number); + + EmitSectionOffset("eh_frame_begin", "eh_frame_common", + EHFrameInfo.Number, EHFrameInfo.PersonalityIndex, + true, true); + Asm->EOL("FDE CIE offset"); + + EmitReference("eh_func_begin", EHFrameInfo.Number, true); + Asm->EOL("FDE initial location"); + EmitDifference("eh_func_end", EHFrameInfo.Number, + "eh_func_begin", EHFrameInfo.Number); + Asm->EOL("FDE address range"); + + // If there is a personality and landing pads then point to the language + // specific data area in the exception table. + if (EHFrameInfo.PersonalityIndex) { + Asm->EmitULEB128Bytes(4); + Asm->EOL("Augmentation size"); + + if (EHFrameInfo.hasLandingPads) { + EmitReference("exception", EHFrameInfo.Number, true); + } else if(TAI->getAddressSize() == 8) { + Asm->EmitInt64((int)0); + } else { + Asm->EmitInt32((int)0); + } + Asm->EOL("Language Specific Data Area"); + } else { + Asm->EmitULEB128Bytes(0); + Asm->EOL("Augmentation size"); + } + + // Indicate locations of function specific callee saved registers in + // frame. + EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves); + + Asm->EmitAlignment(2); + EmitLabel("eh_frame_end", EHFrameInfo.Number); + } + + if (const char *UsedDirective = TAI->getUsedDirective()) + O << UsedDirective << EHFrameInfo.FnName << ".eh\n\n"; + } + + /// EmitExceptionTable - Emit landpads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. + + /// SharedTypeIds - How many leading type ids two landing pads have in common. + static unsigned SharedTypeIds(const LandingPadInfo *L, + const LandingPadInfo *R) { + const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + unsigned Count = 0; + + for (; Count != MinSize; ++Count) + if (LIds[Count] != RIds[Count]) + return Count; + + return Count; + } + + /// PadLT - Order landing pads lexicographically by type id. + static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R) { + const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + + for (unsigned i = 0; i != MinSize; ++i) + if (LIds[i] != RIds[i]) + return LIds[i] < RIds[i]; + + return LSize < RSize; + } + + struct KeyInfo { + static inline unsigned getEmptyKey() { return -1U; } + static inline unsigned getTombstoneKey() { return -2U; } + static unsigned getHashValue(const unsigned &Key) { return Key; } + static bool isPod() { return true; } + }; + + struct PadSite { + unsigned PadIndex; + unsigned SiteIndex; + }; + + typedef DenseMap<unsigned, PadSite, KeyInfo> PadMapType; + + struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + struct ActionEntry *Previous; + }; + + void EmitExceptionTable() { + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + if (PadInfos.empty()) return; + + // Sort the landing pads in order of their type ids. This is used to fold + // duplicate actions. + SmallVector<const LandingPadInfo *, 64> LandingPads; + LandingPads.reserve(PadInfos.size()); + for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) + LandingPads.push_back(&PadInfos[i]); + std::sort(LandingPads.begin(), LandingPads.end(), PadLT); + + // Gather first action index for each landing pad site. + SmallVector<unsigned, 64> FirstActions; + FirstActions.reserve(PadInfos.size()); + + // The actions table. + SmallVector<ActionEntry, 32> Actions; + + // Negative type ids index into FilterIds, positive type ids index into + // TypeInfos. The value written for a positive type id is just the type + // id itself. For a negative type id, however, the value written is the + // (negative) byte offset of the corresponding FilterIds entry. The byte + // offset is usually equal to the type id, because the FilterIds entries + // are written using a variable width encoding which outputs one byte per + // entry as long as the value written is not too large, but can differ. + // This kind of complication does not occur for positive type ids because + // type infos are output using a fixed width encoding. + // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i]. + SmallVector<int, 16> FilterOffsets; + FilterOffsets.reserve(FilterIds.size()); + int Offset = -1; + for(std::vector<unsigned>::const_iterator I = FilterIds.begin(), + E = FilterIds.end(); I != E; ++I) { + FilterOffsets.push_back(Offset); + Offset -= Asm->SizeULEB128(*I); + } + + // Compute sizes for exception table. + unsigned SizeSites = 0; + unsigned SizeActions = 0; + + // Look at each landing pad site to compute size. We need the size of each + // landing pad site info and the size of the landing pad's actions. + int FirstAction = 0; + + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LP = LandingPads[i]; + const std::vector<int> &TypeIds = LP->TypeIds; + const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0; + unsigned SizeSiteActions = 0; + + if (NumShared < TypeIds.size()) { + unsigned SizeAction = 0; + ActionEntry *PrevAction = 0; + + if (NumShared) { + const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size(); + assert(Actions.size()); + PrevAction = &Actions.back(); + SizeAction = Asm->SizeSLEB128(PrevAction->NextAction) + + Asm->SizeSLEB128(PrevAction->ValueForTypeID); + for (unsigned j = NumShared; j != SizePrevIds; ++j) { + SizeAction -= Asm->SizeSLEB128(PrevAction->ValueForTypeID); + SizeAction += -PrevAction->NextAction; + PrevAction = PrevAction->Previous; + } + } + + // Compute the actions. + for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) { + int TypeID = TypeIds[I]; + assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); + int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; + unsigned SizeTypeID = Asm->SizeSLEB128(ValueForTypeID); + + int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; + SizeAction = SizeTypeID + Asm->SizeSLEB128(NextAction); + SizeSiteActions += SizeAction; + + ActionEntry Action = {ValueForTypeID, NextAction, PrevAction}; + Actions.push_back(Action); + + PrevAction = &Actions.back(); + } + + // Record the first action of the landing pad site. + FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; + } // else identical - re-use previous FirstAction + + FirstActions.push_back(FirstAction); + + // Compute this sites contribution to size. + SizeActions += SizeSiteActions; + unsigned M = LP->BeginLabels.size(); + SizeSites += M*(sizeof(int32_t) + // Site start. + sizeof(int32_t) + // Site length. + sizeof(int32_t) + // Landing pad. + Asm->SizeULEB128(FirstAction)); // Action. + } + + // Final tallies. + unsigned SizeTypes = TypeInfos.size() * TAI->getAddressSize(); + + unsigned TypeOffset = sizeof(int8_t) + // Call site format + Asm->SizeULEB128(SizeSites) + // Call-site table length + SizeSites + SizeActions + SizeTypes; + + unsigned TotalSize = sizeof(int8_t) + // LPStart format + sizeof(int8_t) + // TType format + Asm->SizeULEB128(TypeOffset) + // TType base offset + TypeOffset; + + unsigned SizeAlign = (4 - TotalSize) & 3; + + // Begin the exception table. + Asm->SwitchToDataSection(TAI->getDwarfExceptionSection()); + O << "GCC_except_table" << SubprogramCount << ":\n"; + Asm->EmitAlignment(2); + for (unsigned i = 0; i != SizeAlign; ++i) { + Asm->EmitInt8(0); + Asm->EOL("Padding"); + } + EmitLabel("exception", SubprogramCount); + + // Emit the header. + Asm->EmitInt8(DW_EH_PE_omit); + Asm->EOL("LPStart format (DW_EH_PE_omit)"); + Asm->EmitInt8(DW_EH_PE_absptr); + Asm->EOL("TType format (DW_EH_PE_absptr)"); + Asm->EmitULEB128Bytes(TypeOffset); + Asm->EOL("TType base offset"); + Asm->EmitInt8(DW_EH_PE_udata4); + Asm->EOL("Call site format (DW_EH_PE_udata4)"); + Asm->EmitULEB128Bytes(SizeSites); + Asm->EOL("Call-site table length"); + + // Emit the landing pad site information in order of address. + PadMapType PadMap; + + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + unsigned BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "duplicate landing pad labels!"); + PadSite P = { i, j }; + PadMap[BeginLabel] = P; + } + } + + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); + MI != E; ++MI) { + if (MI->getOpcode() != TargetInstrInfo::LABEL) + continue; + + unsigned BeginLabel = MI->getOperand(0).getImmedValue(); + PadMapType::iterator L = PadMap.find(BeginLabel); + + if (L == PadMap.end()) + continue; + + PadSite P = L->second; + const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; + + assert(BeginLabel == LandingPad->BeginLabels[P.SiteIndex] && + "Inconsistent landing pad map!"); + + EmitSectionOffset("label", "eh_func_begin", BeginLabel, SubprogramCount, + false, true); + Asm->EOL("Region start"); + + EmitDifference("label", LandingPad->EndLabels[P.SiteIndex], + "label", BeginLabel); + Asm->EOL("Region length"); + + if (LandingPad->TypeIds.empty()) { + if (TAI->getAddressSize() == sizeof(int32_t)) + Asm->EmitInt32(0); + else + Asm->EmitInt64(0); + } else { + EmitSectionOffset("label", "eh_func_begin", + LandingPad->LandingPadLabel, SubprogramCount, + false, true); + } + Asm->EOL("Landing pad"); + + Asm->EmitULEB128Bytes(FirstActions[P.PadIndex]); + Asm->EOL("Action"); + } + } + + // Emit the actions. + for (unsigned I = 0, N = Actions.size(); I != N; ++I) { + ActionEntry &Action = Actions[I]; + + Asm->EmitSLEB128Bytes(Action.ValueForTypeID); + Asm->EOL("TypeInfo index"); + Asm->EmitSLEB128Bytes(Action.NextAction); + Asm->EOL("Next action"); + } + + // Emit the type ids. + for (unsigned M = TypeInfos.size(); M; --M) { + GlobalVariable *GV = TypeInfos[M - 1]; + + if (TAI->getAddressSize() == sizeof(int32_t)) + O << TAI->getData32bitsDirective(); + else + O << TAI->getData64bitsDirective(); + + if (GV) + O << Asm->getGlobalLinkName(GV); + else + O << "0"; + + Asm->EOL("TypeInfo"); + } + + // Emit the filter typeids. + for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) { + unsigned TypeID = FilterIds[j]; + Asm->EmitULEB128Bytes(TypeID); + Asm->EOL("Filter TypeInfo index"); + } + + Asm->EmitAlignment(2); + } + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfException(std::ostream &OS, AsmPrinter *A, const TargetAsmInfo *T) + : Dwarf(OS, A, T) + , shouldEmit(false) + {} + + virtual ~DwarfException() {} + + /// SetModuleInfo - Set machine module information when it's known that pass + /// manager has created it. Set by the target AsmPrinter. + void SetModuleInfo(MachineModuleInfo *mmi) { + MMI = mmi; + } + + /// BeginModule - Emit all exception information that should come prior to the + /// content. + void BeginModule(Module *M) { + this->M = M; + } + + /// EndModule - Emit all exception information that should come after the + /// content. + void EndModule() { + if (!shouldEmit) return; + + const std::vector<Function *> Personalities = MMI->getPersonalities(); + for (unsigned i =0; i < Personalities.size(); ++i) + EmitCommonEHFrame(Personalities[i], i); + + for (std::vector<FunctionEHFrameInfo>::iterator I = EHFrames.begin(), + E = EHFrames.end(); I != E; ++I) + EmitEHFrame(*I); + } + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + void BeginFunction(MachineFunction *MF) { + this->MF = MF; + + if (MMI && + ExceptionHandling && + TAI->doesSupportExceptionHandling()) { + shouldEmit = true; + // Assumes in correct section after the entry point. + EmitLabel("eh_func_begin", ++SubprogramCount); + } + } + + /// EndFunction - Gather and emit post-function exception information. + /// + void EndFunction() { + if (!shouldEmit) return; + + EmitLabel("eh_func_end", SubprogramCount); + EmitExceptionTable(); + + // Save EH frame information + EHFrames.push_back(FunctionEHFrameInfo(getAsm()->CurrentFnName, + SubprogramCount, + MMI->getPersonalityIndex(), + MF->getFrameInfo()->hasCalls(), + !MMI->getLandingPads().empty(), + MMI->getFrameMoves())); + } +}; + +} // End of namespace llvm + +//===----------------------------------------------------------------------===// + +/// Emit - Print the abbreviation using the specified Dwarf writer. +/// +void DIEAbbrev::Emit(const DwarfDebug &DD) const { + // Emit its Dwarf tag type. + DD.getAsm()->EmitULEB128Bytes(Tag); + DD.getAsm()->EOL(TagString(Tag)); + + // Emit whether it has children DIEs. + DD.getAsm()->EmitULEB128Bytes(ChildrenFlag); + DD.getAsm()->EOL(ChildrenString(ChildrenFlag)); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + const DIEAbbrevData &AttrData = Data[i]; + + // Emit attribute type. + DD.getAsm()->EmitULEB128Bytes(AttrData.getAttribute()); + DD.getAsm()->EOL(AttributeString(AttrData.getAttribute())); + + // Emit form type. + DD.getAsm()->EmitULEB128Bytes(AttrData.getForm()); + DD.getAsm()->EOL(FormEncodingString(AttrData.getForm())); + } + + // Mark end of abbreviation. + DD.getAsm()->EmitULEB128Bytes(0); DD.getAsm()->EOL("EOM(1)"); + DD.getAsm()->EmitULEB128Bytes(0); DD.getAsm()->EOL("EOM(2)"); +} + +#ifndef NDEBUG +void DIEAbbrev::print(std::ostream &O) { + O << "Abbreviation @" + << std::hex << (intptr_t)this << std::dec + << " " + << TagString(Tag) + << " " + << ChildrenString(ChildrenFlag) + << "\n"; + + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + O << " " + << AttributeString(Data[i].getAttribute()) + << " " + << FormEncodingString(Data[i].getForm()) + << "\n"; + } +} +void DIEAbbrev::dump() { print(cerr); } +#endif + +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +void DIEValue::dump() { + print(cerr); +} +#endif + +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit integer of appropriate size. +/// +void DIEInteger::EmitValue(DwarfDebug &DD, unsigned Form) { + switch (Form) { + case DW_FORM_flag: // Fall thru + case DW_FORM_ref1: // Fall thru + case DW_FORM_data1: DD.getAsm()->EmitInt8(Integer); break; + case DW_FORM_ref2: // Fall thru + case DW_FORM_data2: DD.getAsm()->EmitInt16(Integer); break; + case DW_FORM_ref4: // Fall thru + case DW_FORM_data4: DD.getAsm()->EmitInt32(Integer); break; + case DW_FORM_ref8: // Fall thru + case DW_FORM_data8: DD.getAsm()->EmitInt64(Integer); break; + case DW_FORM_udata: DD.getAsm()->EmitULEB128Bytes(Integer); break; + case DW_FORM_sdata: DD.getAsm()->EmitSLEB128Bytes(Integer); break; + default: assert(0 && "DIE Value form not supported yet"); break; + } +} + +/// SizeOf - Determine size of integer value in bytes. +/// +unsigned DIEInteger::SizeOf(const DwarfDebug &DD, unsigned Form) const { + switch (Form) { + case DW_FORM_flag: // Fall thru + case DW_FORM_ref1: // Fall thru + case DW_FORM_data1: return sizeof(int8_t); + case DW_FORM_ref2: // Fall thru + case DW_FORM_data2: return sizeof(int16_t); + case DW_FORM_ref4: // Fall thru + case DW_FORM_data4: return sizeof(int32_t); + case DW_FORM_ref8: // Fall thru + case DW_FORM_data8: return sizeof(int64_t); + case DW_FORM_udata: return DD.getAsm()->SizeULEB128(Integer); + case DW_FORM_sdata: return DD.getAsm()->SizeSLEB128(Integer); + default: assert(0 && "DIE Value form not supported yet"); break; + } + return 0; +} + +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit string value. +/// +void DIEString::EmitValue(DwarfDebug &DD, unsigned Form) { + DD.getAsm()->EmitString(String); +} + +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit label value. +/// +void DIEDwarfLabel::EmitValue(DwarfDebug &DD, unsigned Form) { + DD.EmitReference(Label); +} + +/// SizeOf - Determine size of label value in bytes. +/// +unsigned DIEDwarfLabel::SizeOf(const DwarfDebug &DD, unsigned Form) const { + return DD.getTargetAsmInfo()->getAddressSize(); +} + +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit label value. +/// +void DIEObjectLabel::EmitValue(DwarfDebug &DD, unsigned Form) { + DD.EmitReference(Label); +} + +/// SizeOf - Determine size of label value in bytes. +/// +unsigned DIEObjectLabel::SizeOf(const DwarfDebug &DD, unsigned Form) const { + return DD.getTargetAsmInfo()->getAddressSize(); +} + +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit delta value. +/// +void DIEDelta::EmitValue(DwarfDebug &DD, unsigned Form) { + bool IsSmall = Form == DW_FORM_data4; + DD.EmitDifference(LabelHi, LabelLo, IsSmall); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEDelta::SizeOf(const DwarfDebug &DD, unsigned Form) const { + if (Form == DW_FORM_data4) return 4; + return DD.getTargetAsmInfo()->getAddressSize(); +} + +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit debug information entry offset. +/// +void DIEntry::EmitValue(DwarfDebug &DD, unsigned Form) { + DD.getAsm()->EmitInt32(Entry->getOffset()); +} + +//===----------------------------------------------------------------------===// + +/// ComputeSize - calculate the size of the block. +/// +unsigned DIEBlock::ComputeSize(DwarfDebug &DD) { + if (!Size) { + const std::vector<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + Size += Values[i]->SizeOf(DD, AbbrevData[i].getForm()); + } + } + return Size; +} + +/// EmitValue - Emit block data. +/// +void DIEBlock::EmitValue(DwarfDebug &DD, unsigned Form) { + switch (Form) { + case DW_FORM_block1: DD.getAsm()->EmitInt8(Size); break; + case DW_FORM_block2: DD.getAsm()->EmitInt16(Size); break; + case DW_FORM_block4: DD.getAsm()->EmitInt32(Size); break; + case DW_FORM_block: DD.getAsm()->EmitULEB128Bytes(Size); break; + default: assert(0 && "Improper form for block"); break; + } + + const std::vector<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + DD.getAsm()->EOL(); + Values[i]->EmitValue(DD, AbbrevData[i].getForm()); + } +} + +/// SizeOf - Determine size of block data in bytes. +/// +unsigned DIEBlock::SizeOf(const DwarfDebug &DD, unsigned Form) const { + switch (Form) { + case DW_FORM_block1: return Size + sizeof(int8_t); + case DW_FORM_block2: return Size + sizeof(int16_t); + case DW_FORM_block4: return Size + sizeof(int32_t); + case DW_FORM_block: return Size + DD.getAsm()->SizeULEB128(Size); + default: assert(0 && "Improper form for block"); break; + } + return 0; +} + +//===----------------------------------------------------------------------===// +/// DIE Implementation + +DIE::~DIE() { + for (unsigned i = 0, N = Children.size(); i < N; ++i) + delete Children[i]; +} + +/// AddSiblingOffset - Add a sibling offset field to the front of the DIE. +/// +void DIE::AddSiblingOffset() { + DIEInteger *DI = new DIEInteger(0); + Values.insert(Values.begin(), DI); + Abbrev.AddFirstAttribute(DW_AT_sibling, DW_FORM_ref4); +} + +/// Profile - Used to gather unique data for the value folding set. +/// +void DIE::Profile(FoldingSetNodeID &ID) { + Abbrev.Profile(ID); + + for (unsigned i = 0, N = Children.size(); i < N; ++i) + ID.AddPointer(Children[i]); + + for (unsigned j = 0, M = Values.size(); j < M; ++j) + ID.AddPointer(Values[j]); +} + +#ifndef NDEBUG +void DIE::print(std::ostream &O, unsigned IncIndent) { + static unsigned IndentCount = 0; + IndentCount += IncIndent; + const std::string Indent(IndentCount, ' '); + bool isBlock = Abbrev.getTag() == 0; + + if (!isBlock) { + O << Indent + << "Die: " + << "0x" << std::hex << (intptr_t)this << std::dec + << ", Offset: " << Offset + << ", Size: " << Size + << "\n"; + + O << Indent + << TagString(Abbrev.getTag()) + << " " + << ChildrenString(Abbrev.getChildrenFlag()); + } else { + O << "Size: " << Size; + } + O << "\n"; + + const std::vector<DIEAbbrevData> &Data = Abbrev.getData(); + + IndentCount += 2; + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + O << Indent; + if (!isBlock) { + O << AttributeString(Data[i].getAttribute()); + } else { + O << "Blk[" << i << "]"; + } + O << " " + << FormEncodingString(Data[i].getForm()) + << " "; + Values[i]->print(O); + O << "\n"; + } + IndentCount -= 2; + + for (unsigned j = 0, M = Children.size(); j < M; ++j) { + Children[j]->print(O, 4); + } + + if (!isBlock) O << "\n"; + IndentCount -= IncIndent; +} + +void DIE::dump() { + print(cerr); +} +#endif + +//===----------------------------------------------------------------------===// +/// DwarfWriter Implementation +/// + +DwarfWriter::DwarfWriter(std::ostream &OS, AsmPrinter *A, + const TargetAsmInfo *T) { + DE = new DwarfException(OS, A, T); + DD = new DwarfDebug(OS, A, T); +} + +DwarfWriter::~DwarfWriter() { + delete DE; + delete DD; +} + +/// SetModuleInfo - Set machine module info when it's known that pass manager +/// has created it. Set by the target AsmPrinter. +void DwarfWriter::SetModuleInfo(MachineModuleInfo *MMI) { + DD->SetModuleInfo(MMI); + DE->SetModuleInfo(MMI); +} + +/// BeginModule - Emit all Dwarf sections that should come prior to the +/// content. +void DwarfWriter::BeginModule(Module *M) { + DE->BeginModule(M); + DD->BeginModule(M); +} + +/// EndModule - Emit all Dwarf sections that should come after the content. +/// +void DwarfWriter::EndModule() { + DE->EndModule(); + DD->EndModule(); +} + +/// BeginFunction - Gather pre-function debug information. Assumes being +/// emitted immediately after the function entry point. +void DwarfWriter::BeginFunction(MachineFunction *MF) { + DE->BeginFunction(MF); + DD->BeginFunction(MF); +} + +/// EndFunction - Gather and emit post-function debug information. +/// +void DwarfWriter::EndFunction() { + DD->EndFunction(); + DE->EndFunction(); + + if (MachineModuleInfo *MMI = DD->getMMI() ? DD->getMMI() : DE->getMMI()) { + // Clear function debug information. + MMI->EndFunction(); + } +} diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp new file mode 100644 index 0000000..8ecddb8 --- /dev/null +++ b/lib/CodeGen/ELFWriter.cpp @@ -0,0 +1,547 @@ +//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the target-independent ELF writer. This file writes out +// the ELF file in the following order: +// +// #1. ELF Header +// #2. '.text' section +// #3. '.data' section +// #4. '.bss' section (conceptual position in file) +// ... +// #X. '.shstrtab' section +// #Y. Section Table +// +// The entries in the section table are laid out as: +// #0. Null entry [required] +// #1. ".text" entry - the program code +// #2. ".data" entry - global variables with initializers. [ if needed ] +// #3. ".bss" entry - global variables without initializers. [ if needed ] +// ... +// #N. ".shstrtab" entry - String table for the section names. +// +// NOTE: This code should eventually be extended to support 64-bit ELF (this +// won't be hard), but we haven't done so yet! +// +//===----------------------------------------------------------------------===// + +#include "ELFWriter.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/FileWriters.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetELFWriterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/OutputBuffer.h" +#include "llvm/Support/Streams.h" +#include <list> +using namespace llvm; + +char ELFWriter::ID = 0; +/// AddELFWriter - Concrete function to add the ELF writer to the function pass +/// manager. +MachineCodeEmitter *llvm::AddELFWriter(FunctionPassManager &FPM, + std::ostream &O, + TargetMachine &TM) { + ELFWriter *EW = new ELFWriter(O, TM); + FPM.add(EW); + return &EW->getMachineCodeEmitter(); +} + +//===----------------------------------------------------------------------===// +// ELFCodeEmitter Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + /// ELFCodeEmitter - This class is used by the ELFWriter to emit the code for + /// functions to the ELF file. + class ELFCodeEmitter : public MachineCodeEmitter { + ELFWriter &EW; + TargetMachine &TM; + ELFWriter::ELFSection *ES; // Section to write to. + std::vector<unsigned char> *OutBuffer; + size_t FnStart; + public: + ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), OutBuffer(0) {} + + void startFunction(MachineFunction &F); + bool finishFunction(MachineFunction &F); + + void addRelocation(const MachineRelocation &MR) { + assert(0 && "relo not handled yet!"); + } + + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { + } + + virtual intptr_t getConstantPoolEntryAddress(unsigned Index) const { + assert(0 && "CP not implementated yet!"); + return 0; + } + virtual intptr_t getJumpTableEntryAddress(unsigned Index) const { + assert(0 && "JT not implementated yet!"); + return 0; + } + + virtual intptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(0 && "JT not implementated yet!"); + return 0; + } + + /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! + void startFunctionStub(unsigned StubSize, unsigned Alignment = 1) { + assert(0 && "JIT specific function called!"); + abort(); + } + void *finishFunctionStub(const Function *F) { + assert(0 && "JIT specific function called!"); + abort(); + return 0; + } + }; +} + +/// startFunction - This callback is invoked when a new machine function is +/// about to be emitted. +void ELFCodeEmitter::startFunction(MachineFunction &F) { + // Align the output buffer to the appropriate alignment. + unsigned Align = 16; // FIXME: GENERICIZE!! + // Get the ELF Section that this function belongs in. + ES = &EW.getSection(".text", ELFWriter::ELFSection::SHT_PROGBITS, + ELFWriter::ELFSection::SHF_EXECINSTR | + ELFWriter::ELFSection::SHF_ALLOC); + OutBuffer = &ES->SectionData; + cerr << "FIXME: This code needs to be updated for changes in the " + << "CodeEmitter interfaces. In particular, this should set " + << "BufferBegin/BufferEnd/CurBufferPtr, not deal with OutBuffer!"; + abort(); + + // Upgrade the section alignment if required. + if (ES->Align < Align) ES->Align = Align; + + // Add padding zeros to the end of the buffer to make sure that the + // function will start on the correct byte alignment within the section. + OutputBuffer OB(*OutBuffer, + TM.getTargetData()->getPointerSizeInBits() == 64, + TM.getTargetData()->isLittleEndian()); + OB.align(Align); + FnStart = OutBuffer->size(); +} + +/// finishFunction - This callback is invoked after the function is completely +/// finished. +bool ELFCodeEmitter::finishFunction(MachineFunction &F) { + // We now know the size of the function, add a symbol to represent it. + ELFWriter::ELFSym FnSym(F.getFunction()); + + // Figure out the binding (linkage) of the symbol. + switch (F.getFunction()->getLinkage()) { + default: + // appending linkage is illegal for functions. + assert(0 && "Unknown linkage type!"); + case GlobalValue::ExternalLinkage: + FnSym.SetBind(ELFWriter::ELFSym::STB_GLOBAL); + break; + case GlobalValue::LinkOnceLinkage: + case GlobalValue::WeakLinkage: + FnSym.SetBind(ELFWriter::ELFSym::STB_WEAK); + break; + case GlobalValue::InternalLinkage: + FnSym.SetBind(ELFWriter::ELFSym::STB_LOCAL); + break; + } + + ES->Size = OutBuffer->size(); + + FnSym.SetType(ELFWriter::ELFSym::STT_FUNC); + FnSym.SectionIdx = ES->SectionIdx; + FnSym.Value = FnStart; // Value = Offset from start of Section. + FnSym.Size = OutBuffer->size()-FnStart; + + // Finally, add it to the symtab. + EW.SymbolTable.push_back(FnSym); + return false; +} + +//===----------------------------------------------------------------------===// +// ELFWriter Implementation +//===----------------------------------------------------------------------===// + +ELFWriter::ELFWriter(std::ostream &o, TargetMachine &tm) + : MachineFunctionPass((intptr_t)&ID), O(o), TM(tm) { + e_flags = 0; // e_flags defaults to 0, no flags. + + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + + // Create the machine code emitter object for this target. + MCE = new ELFCodeEmitter(*this); + NumSections = 0; +} + +ELFWriter::~ELFWriter() { + delete MCE; +} + +// doInitialization - Emit the file header and all of the global variables for +// the module to the ELF file. +bool ELFWriter::doInitialization(Module &M) { + Mang = new Mangler(M); + + // Local alias to shortenify coming code. + std::vector<unsigned char> &FH = FileHeader; + OutputBuffer FHOut(FH, is64Bit, isLittleEndian); + + FHOut.outbyte(0x7F); // EI_MAG0 + FHOut.outbyte('E'); // EI_MAG1 + FHOut.outbyte('L'); // EI_MAG2 + FHOut.outbyte('F'); // EI_MAG3 + FHOut.outbyte(is64Bit ? 2 : 1); // EI_CLASS + FHOut.outbyte(isLittleEndian ? 1 : 2); // EI_DATA + FHOut.outbyte(1); // EI_VERSION + FH.resize(16); // EI_PAD up to 16 bytes. + + // This should change for shared objects. + FHOut.outhalf(1); // e_type = ET_REL + FHOut.outword(TM.getELFWriterInfo()->getEMachine()); // target-defined + FHOut.outword(1); // e_version = 1 + FHOut.outaddr(0); // e_entry = 0 -> no entry point in .o file + FHOut.outaddr(0); // e_phoff = 0 -> no program header for .o + + ELFHeader_e_shoff_Offset = FH.size(); + FHOut.outaddr(0); // e_shoff + FHOut.outword(e_flags); // e_flags = whatever the target wants + + FHOut.outhalf(is64Bit ? 64 : 52); // e_ehsize = ELF header size + FHOut.outhalf(0); // e_phentsize = prog header entry size + FHOut.outhalf(0); // e_phnum = # prog header entries = 0 + FHOut.outhalf(is64Bit ? 64 : 40); // e_shentsize = sect hdr entry size + + + ELFHeader_e_shnum_Offset = FH.size(); + FHOut.outhalf(0); // e_shnum = # of section header ents + ELFHeader_e_shstrndx_Offset = FH.size(); + FHOut.outhalf(0); // e_shstrndx = Section # of '.shstrtab' + + // Add the null section, which is required to be first in the file. + getSection("", 0, 0); + + // Start up the symbol table. The first entry in the symtab is the null + // entry. + SymbolTable.push_back(ELFSym(0)); + + return false; +} + +void ELFWriter::EmitGlobal(GlobalVariable *GV) { + // If this is an external global, emit it now. TODO: Note that it would be + // better to ignore the symbol here and only add it to the symbol table if + // referenced. + if (!GV->hasInitializer()) { + ELFSym ExternalSym(GV); + ExternalSym.SetBind(ELFSym::STB_GLOBAL); + ExternalSym.SetType(ELFSym::STT_NOTYPE); + ExternalSym.SectionIdx = ELFSection::SHN_UNDEF; + SymbolTable.push_back(ExternalSym); + return; + } + + const Type *GVType = (const Type*)GV->getType(); + unsigned Align = TM.getTargetData()->getPrefTypeAlignment(GVType); + unsigned Size = TM.getTargetData()->getTypeSize(GVType); + + // If this global has a zero initializer, it is part of the .bss or common + // section. + if (GV->getInitializer()->isNullValue()) { + // If this global is part of the common block, add it now. Variables are + // part of the common block if they are zero initialized and allowed to be + // merged with other symbols. + if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage()) { + ELFSym CommonSym(GV); + // Value for common symbols is the alignment required. + CommonSym.Value = Align; + CommonSym.Size = Size; + CommonSym.SetBind(ELFSym::STB_GLOBAL); + CommonSym.SetType(ELFSym::STT_OBJECT); + // TODO SOMEDAY: add ELF visibility. + CommonSym.SectionIdx = ELFSection::SHN_COMMON; + SymbolTable.push_back(CommonSym); + return; + } + + // Otherwise, this symbol is part of the .bss section. Emit it now. + + // Handle alignment. Ensure section is aligned at least as much as required + // by this symbol. + ELFSection &BSSSection = getBSSSection(); + BSSSection.Align = std::max(BSSSection.Align, Align); + + // Within the section, emit enough virtual padding to get us to an alignment + // boundary. + if (Align) + BSSSection.Size = (BSSSection.Size + Align - 1) & ~(Align-1); + + ELFSym BSSSym(GV); + BSSSym.Value = BSSSection.Size; + BSSSym.Size = Size; + BSSSym.SetType(ELFSym::STT_OBJECT); + + switch (GV->getLinkage()) { + default: // weak/linkonce handled above + assert(0 && "Unexpected linkage type!"); + case GlobalValue::AppendingLinkage: // FIXME: This should be improved! + case GlobalValue::ExternalLinkage: + BSSSym.SetBind(ELFSym::STB_GLOBAL); + break; + case GlobalValue::InternalLinkage: + BSSSym.SetBind(ELFSym::STB_LOCAL); + break; + } + + // Set the idx of the .bss section + BSSSym.SectionIdx = BSSSection.SectionIdx; + SymbolTable.push_back(BSSSym); + + // Reserve space in the .bss section for this symbol. + BSSSection.Size += Size; + return; + } + + // FIXME: handle .rodata + //assert(!GV->isConstant() && "unimp"); + + // FIXME: handle .data + //assert(0 && "unimp"); +} + + +bool ELFWriter::runOnMachineFunction(MachineFunction &MF) { + // Nothing to do here, this is all done through the MCE object above. + return false; +} + +/// doFinalization - Now that the module has been completely processed, emit +/// the ELF file to 'O'. +bool ELFWriter::doFinalization(Module &M) { + // Okay, the ELF header and .text sections have been completed, build the + // .data, .bss, and "common" sections next. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + EmitGlobal(I); + + // Emit the symbol table now, if non-empty. + EmitSymbolTable(); + + // FIXME: Emit the relocations now. + + // Emit the string table for the sections in the ELF file we have. + EmitSectionTableStringTable(); + + // Emit the sections to the .o file, and emit the section table for the file. + OutputSectionsAndSectionTable(); + + // We are done with the abstract symbols. + SectionList.clear(); + NumSections = 0; + + // Release the name mangler object. + delete Mang; Mang = 0; + return false; +} + +/// EmitSymbolTable - If the current symbol table is non-empty, emit the string +/// table for it and then the symbol table itself. +void ELFWriter::EmitSymbolTable() { + if (SymbolTable.size() == 1) return; // Only the null entry. + + // FIXME: compact all local symbols to the start of the symtab. + unsigned FirstNonLocalSymbol = 1; + + ELFSection &StrTab = getSection(".strtab", ELFSection::SHT_STRTAB, 0); + StrTab.Align = 1; + + DataBuffer &StrTabBuf = StrTab.SectionData; + OutputBuffer StrTabOut(StrTabBuf, is64Bit, isLittleEndian); + + // Set the zero'th symbol to a null byte, as required. + StrTabOut.outbyte(0); + SymbolTable[0].NameIdx = 0; + unsigned Index = 1; + for (unsigned i = 1, e = SymbolTable.size(); i != e; ++i) { + // Use the name mangler to uniquify the LLVM symbol. + std::string Name = Mang->getValueName(SymbolTable[i].GV); + + if (Name.empty()) { + SymbolTable[i].NameIdx = 0; + } else { + SymbolTable[i].NameIdx = Index; + + // Add the name to the output buffer, including the null terminator. + StrTabBuf.insert(StrTabBuf.end(), Name.begin(), Name.end()); + + // Add a null terminator. + StrTabBuf.push_back(0); + + // Keep track of the number of bytes emitted to this section. + Index += Name.size()+1; + } + } + assert(Index == StrTabBuf.size()); + StrTab.Size = Index; + + // Now that we have emitted the string table and know the offset into the + // string table of each symbol, emit the symbol table itself. + ELFSection &SymTab = getSection(".symtab", ELFSection::SHT_SYMTAB, 0); + SymTab.Align = is64Bit ? 8 : 4; + SymTab.Link = SymTab.SectionIdx; // Section Index of .strtab. + SymTab.Info = FirstNonLocalSymbol; // First non-STB_LOCAL symbol. + SymTab.EntSize = 16; // Size of each symtab entry. FIXME: wrong for ELF64 + DataBuffer &SymTabBuf = SymTab.SectionData; + OutputBuffer SymTabOut(SymTabBuf, is64Bit, isLittleEndian); + + if (!is64Bit) { // 32-bit and 64-bit formats are shuffled a bit. + for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) { + ELFSym &Sym = SymbolTable[i]; + SymTabOut.outword(Sym.NameIdx); + SymTabOut.outaddr32(Sym.Value); + SymTabOut.outword(Sym.Size); + SymTabOut.outbyte(Sym.Info); + SymTabOut.outbyte(Sym.Other); + SymTabOut.outhalf(Sym.SectionIdx); + } + } else { + for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) { + ELFSym &Sym = SymbolTable[i]; + SymTabOut.outword(Sym.NameIdx); + SymTabOut.outbyte(Sym.Info); + SymTabOut.outbyte(Sym.Other); + SymTabOut.outhalf(Sym.SectionIdx); + SymTabOut.outaddr64(Sym.Value); + SymTabOut.outxword(Sym.Size); + } + } + + SymTab.Size = SymTabBuf.size(); +} + +/// EmitSectionTableStringTable - This method adds and emits a section for the +/// ELF Section Table string table: the string table that holds all of the +/// section names. +void ELFWriter::EmitSectionTableStringTable() { + // First step: add the section for the string table to the list of sections: + ELFSection &SHStrTab = getSection(".shstrtab", ELFSection::SHT_STRTAB, 0); + + // Now that we know which section number is the .shstrtab section, update the + // e_shstrndx entry in the ELF header. + OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian); + FHOut.fixhalf(SHStrTab.SectionIdx, ELFHeader_e_shstrndx_Offset); + + // Set the NameIdx of each section in the string table and emit the bytes for + // the string table. + unsigned Index = 0; + DataBuffer &Buf = SHStrTab.SectionData; + + for (std::list<ELFSection>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + // Set the index into the table. Note if we have lots of entries with + // common suffixes, we could memoize them here if we cared. + I->NameIdx = Index; + + // Add the name to the output buffer, including the null terminator. + Buf.insert(Buf.end(), I->Name.begin(), I->Name.end()); + + // Add a null terminator. + Buf.push_back(0); + + // Keep track of the number of bytes emitted to this section. + Index += I->Name.size()+1; + } + + // Set the size of .shstrtab now that we know what it is. + assert(Index == Buf.size()); + SHStrTab.Size = Index; +} + +/// OutputSectionsAndSectionTable - Now that we have constructed the file header +/// and all of the sections, emit these to the ostream destination and emit the +/// SectionTable. +void ELFWriter::OutputSectionsAndSectionTable() { + // Pass #1: Compute the file offset for each section. + size_t FileOff = FileHeader.size(); // File header first. + + // Emit all of the section data in order. + for (std::list<ELFSection>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + // Align FileOff to whatever the alignment restrictions of the section are. + if (I->Align) + FileOff = (FileOff+I->Align-1) & ~(I->Align-1); + I->Offset = FileOff; + FileOff += I->SectionData.size(); + } + + // Align Section Header. + unsigned TableAlign = is64Bit ? 8 : 4; + FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); + + // Now that we know where all of the sections will be emitted, set the e_shnum + // entry in the ELF header. + OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian); + FHOut.fixhalf(NumSections, ELFHeader_e_shnum_Offset); + + // Now that we know the offset in the file of the section table, update the + // e_shoff address in the ELF header. + FHOut.fixaddr(FileOff, ELFHeader_e_shoff_Offset); + + // Now that we know all of the data in the file header, emit it and all of the + // sections! + O.write((char*)&FileHeader[0], FileHeader.size()); + FileOff = FileHeader.size(); + DataBuffer().swap(FileHeader); + + DataBuffer Table; + OutputBuffer TableOut(Table, is64Bit, isLittleEndian); + + // Emit all of the section data and build the section table itself. + while (!SectionList.empty()) { + const ELFSection &S = *SectionList.begin(); + + // Align FileOff to whatever the alignment restrictions of the section are. + if (S.Align) + for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1); + FileOff != NewFileOff; ++FileOff) + O.put((char)0xAB); + O.write((char*)&S.SectionData[0], S.SectionData.size()); + FileOff += S.SectionData.size(); + + TableOut.outword(S.NameIdx); // sh_name - Symbol table name idx + TableOut.outword(S.Type); // sh_type - Section contents & semantics + TableOut.outword(S.Flags); // sh_flags - Section flags. + TableOut.outaddr(S.Addr); // sh_addr - The mem addr this section is in. + TableOut.outaddr(S.Offset); // sh_offset - Offset from the file start. + TableOut.outword(S.Size); // sh_size - The section size. + TableOut.outword(S.Link); // sh_link - Section header table index link. + TableOut.outword(S.Info); // sh_info - Auxillary information. + TableOut.outword(S.Align); // sh_addralign - Alignment of section. + TableOut.outword(S.EntSize); // sh_entsize - Size of entries in the section + + SectionList.pop_front(); + } + + // Align output for the section table. + for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); + FileOff != NewFileOff; ++FileOff) + O.put((char)0xAB); + + // Emit the section table itself. + O.write((char*)&Table[0], Table.size()); +} diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h new file mode 100644 index 0000000..f27d78f --- /dev/null +++ b/lib/CodeGen/ELFWriter.h @@ -0,0 +1,228 @@ +//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ELFWriter class. +// +//===----------------------------------------------------------------------===// + +#ifndef ELFWRITER_H +#define ELFWRITER_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include <list> + +namespace llvm { + class GlobalVariable; + class Mangler; + class MachineCodeEmitter; + class ELFCodeEmitter; + + /// ELFWriter - This class implements the common target-independent code for + /// writing ELF files. Targets should derive a class from this to + /// parameterize the output format. + /// + class ELFWriter : public MachineFunctionPass { + friend class ELFCodeEmitter; + public: + static char ID; + + MachineCodeEmitter &getMachineCodeEmitter() const { + return *(MachineCodeEmitter*)MCE; + } + + ELFWriter(std::ostream &O, TargetMachine &TM); + ~ELFWriter(); + + typedef std::vector<unsigned char> DataBuffer; + + protected: + /// Output stream to send the resultant object file to. + /// + std::ostream &O; + + /// Target machine description. + /// + TargetMachine &TM; + + /// Mang - The object used to perform name mangling for this module. + /// + Mangler *Mang; + + /// MCE - The MachineCodeEmitter object that we are exposing to emit machine + /// code for functions to the .o file. + ELFCodeEmitter *MCE; + + //===------------------------------------------------------------------===// + // Properties to be set by the derived class ctor, used to configure the + // ELFWriter. + + // e_machine - This field is the target specific value to emit as the + // e_machine member of the ELF header. + unsigned short e_machine; + + // e_flags - The machine flags for the target. This defaults to zero. + unsigned e_flags; + + //===------------------------------------------------------------------===// + // Properties inferred automatically from the target machine. + // + + /// is64Bit/isLittleEndian - This information is inferred from the target + /// machine directly, indicating whether to emit a 32- or 64-bit ELF file. + bool is64Bit, isLittleEndian; + + /// doInitialization - Emit the file header and all of the global variables + /// for the module to the ELF file. + bool doInitialization(Module &M); + + bool runOnMachineFunction(MachineFunction &MF); + + + /// doFinalization - Now that the module has been completely processed, emit + /// the ELF file to 'O'. + bool doFinalization(Module &M); + + private: + // The buffer we accumulate the file header into. Note that this should be + // changed into something much more efficient later (and the bitcode writer + // as well!). + DataBuffer FileHeader; + + /// ELFSection - This struct contains information about each section that is + /// emitted to the file. This is eventually turned into the section header + /// table at the end of the file. + struct ELFSection { + std::string Name; // Name of the section. + unsigned NameIdx; // Index in .shstrtab of name, once emitted. + unsigned Type; + unsigned Flags; + uint64_t Addr; + unsigned Offset; + unsigned Size; + unsigned Link; + unsigned Info; + unsigned Align; + unsigned EntSize; + + /// SectionIdx - The number of the section in the Section Table. + /// + unsigned short SectionIdx; + + /// SectionData - The actual data for this section which we are building + /// up for emission to the file. + DataBuffer SectionData; + + enum { SHT_NULL = 0, SHT_PROGBITS = 1, SHT_SYMTAB = 2, SHT_STRTAB = 3, + SHT_RELA = 4, SHT_HASH = 5, SHT_DYNAMIC = 6, SHT_NOTE = 7, + SHT_NOBITS = 8, SHT_REL = 9, SHT_SHLIB = 10, SHT_DYNSYM = 11 }; + enum { SHN_UNDEF = 0, SHN_ABS = 0xFFF1, SHN_COMMON = 0xFFF2 }; + enum { // SHF - ELF Section Header Flags + SHF_WRITE = 1 << 0, // Writable + SHF_ALLOC = 1 << 1, // Mapped into the process addr space + SHF_EXECINSTR = 1 << 2, // Executable + SHF_MERGE = 1 << 4, // Might be merged if equal + SHF_STRINGS = 1 << 5, // Contains null-terminated strings + SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index + SHF_LINK_ORDER = 1 << 7, // Preserve order after combining + SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required + SHF_GROUP = 1 << 9, // Section is a member of a group + SHF_TLS = 1 << 10 // Section holds thread-local data + }; + + ELFSection(const std::string &name) + : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0), + Link(0), Info(0), Align(0), EntSize(0) { + } + }; + + /// SectionList - This is the list of sections that we have emitted to the + /// file. Once the file has been completely built, the section header table + /// is constructed from this info. + std::list<ELFSection> SectionList; + unsigned NumSections; // Always = SectionList.size() + + /// SectionLookup - This is a mapping from section name to section number in + /// the SectionList. + std::map<std::string, ELFSection*> SectionLookup; + + /// getSection - Return the section with the specified name, creating a new + /// section if one does not already exist. + ELFSection &getSection(const std::string &Name, + unsigned Type, unsigned Flags = 0) { + ELFSection *&SN = SectionLookup[Name]; + if (SN) return *SN; + + SectionList.push_back(Name); + SN = &SectionList.back(); + SN->SectionIdx = NumSections++; + SN->Type = Type; + SN->Flags = Flags; + return *SN; + } + + ELFSection &getDataSection() { + return getSection(".data", ELFSection::SHT_PROGBITS, + ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); + } + ELFSection &getBSSSection() { + return getSection(".bss", ELFSection::SHT_NOBITS, + ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); + } + + /// ELFSym - This struct contains information about each symbol that is + /// added to logical symbol table for the module. This is eventually + /// turned into a real symbol table in the file. + struct ELFSym { + const GlobalValue *GV; // The global value this corresponds to. + unsigned NameIdx; // Index in .strtab of name, once emitted. + uint64_t Value; + unsigned Size; + unsigned char Info; + unsigned char Other; + unsigned short SectionIdx; + + enum { STB_LOCAL = 0, STB_GLOBAL = 1, STB_WEAK = 2 }; + enum { STT_NOTYPE = 0, STT_OBJECT = 1, STT_FUNC = 2, STT_SECTION = 3, + STT_FILE = 4 }; + ELFSym(const GlobalValue *gv) : GV(gv), Value(0), Size(0), Info(0), + Other(0), SectionIdx(0) {} + + void SetBind(unsigned X) { + assert(X == (X & 0xF) && "Bind value out of range!"); + Info = (Info & 0x0F) | (X << 4); + } + void SetType(unsigned X) { + assert(X == (X & 0xF) && "Type value out of range!"); + Info = (Info & 0xF0) | X; + } + }; + + /// SymbolTable - This is the list of symbols we have emitted to the file. + /// This actually gets rearranged before emission to the file (to put the + /// local symbols first in the list). + std::vector<ELFSym> SymbolTable; + + // As we complete the ELF file, we need to update fields in the ELF header + // (e.g. the location of the section table). These members keep track of + // the offset in ELFHeader of these various pieces to update and other + // locations in the file. + unsigned ELFHeader_e_shoff_Offset; // e_shoff in ELF header. + unsigned ELFHeader_e_shstrndx_Offset; // e_shstrndx in ELF header. + unsigned ELFHeader_e_shnum_Offset; // e_shnum in ELF header. + private: + void EmitGlobal(GlobalVariable *GV); + + void EmitSymbolTable(); + + void EmitSectionTableStringTable(); + void OutputSectionsAndSectionTable(); + }; +} + +#endif diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp new file mode 100644 index 0000000..3bddc77 --- /dev/null +++ b/lib/CodeGen/IfConversion.cpp @@ -0,0 +1,1226 @@ +//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the Evan Cheng and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level if-conversion pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ifcvt" +#include "llvm/Function.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +namespace { + // Hidden options for help debugging. + cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); + cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden); + cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden); + cl::opt<bool> DisableSimple("disable-ifcvt-simple", + cl::init(false), cl::Hidden); + cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false", + cl::init(false), cl::Hidden); + cl::opt<bool> DisableTriangle("disable-ifcvt-triangle", + cl::init(false), cl::Hidden); + cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev", + cl::init(false), cl::Hidden); + cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false", + cl::init(false), cl::Hidden); + cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev", + cl::init(false), cl::Hidden); + cl::opt<bool> DisableDiamond("disable-ifcvt-diamond", + cl::init(false), cl::Hidden); +} + +STATISTIC(NumSimple, "Number of simple if-conversions performed"); +STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed"); +STATISTIC(NumTriangle, "Number of triangle if-conversions performed"); +STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed"); +STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed"); +STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); +STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); +STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); +STATISTIC(NumDupBBs, "Number of duplicated blocks"); + +namespace { + class IfConverter : public MachineFunctionPass { + enum IfcvtKind { + ICNotClassfied, // BB data valid, but not classified. + ICSimpleFalse, // Same as ICSimple, but on the false path. + ICSimple, // BB is entry of an one split, no rejoin sub-CFG. + ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition. + ICTriangleRev, // Same as ICTriangle, but true path rev condition. + ICTriangleFalse, // Same as ICTriangle, but on the false path. + ICTriangle, // BB is entry of a triangle sub-CFG. + ICDiamond // BB is entry of a diamond sub-CFG. + }; + + /// BBInfo - One per MachineBasicBlock, this is used to cache the result + /// if-conversion feasibility analysis. This includes results from + /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its + /// classification, and common tail block of its successors (if it's a + /// diamond shape), its size, whether it's predicable, and whether any + /// instruction can clobber the 'would-be' predicate. + /// + /// IsDone - True if BB is not to be considered for ifcvt. + /// IsBeingAnalyzed - True if BB is currently being analyzed. + /// IsAnalyzed - True if BB has been analyzed (info is still valid). + /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed. + /// IsBrAnalyzable - True if AnalyzeBranch() returns false. + /// HasFallThrough - True if BB may fallthrough to the following BB. + /// IsUnpredicable - True if BB is known to be unpredicable. + /// ClobbersPred - True if BB could modify predicates (e.g. has + /// cmp, call, etc.) + /// NonPredSize - Number of non-predicated instructions. + /// BB - Corresponding MachineBasicBlock. + /// TrueBB / FalseBB- See AnalyzeBranch(). + /// BrCond - Conditions for end of block conditional branches. + /// Predicate - Predicate used in the BB. + struct BBInfo { + bool IsDone : 1; + bool IsBeingAnalyzed : 1; + bool IsAnalyzed : 1; + bool IsEnqueued : 1; + bool IsBrAnalyzable : 1; + bool HasFallThrough : 1; + bool IsUnpredicable : 1; + bool CannotBeCopied : 1; + bool ClobbersPred : 1; + unsigned NonPredSize; + MachineBasicBlock *BB; + MachineBasicBlock *TrueBB; + MachineBasicBlock *FalseBB; + std::vector<MachineOperand> BrCond; + std::vector<MachineOperand> Predicate; + BBInfo() : IsDone(false), IsBeingAnalyzed(false), + IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), + HasFallThrough(false), IsUnpredicable(false), + CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), + BB(0), TrueBB(0), FalseBB(0) {} + }; + + /// IfcvtToken - Record information about pending if-conversions to attemp: + /// BBI - Corresponding BBInfo. + /// Kind - Type of block. See IfcvtKind. + /// NeedSubsumsion - True if the to be predicated BB has already been + /// predicated. + /// NumDups - Number of instructions that would be duplicated due + /// to this if-conversion. (For diamonds, the number of + /// identical instructions at the beginnings of both + /// paths). + /// NumDups2 - For diamonds, the number of identical instructions + /// at the ends of both paths. + struct IfcvtToken { + BBInfo &BBI; + IfcvtKind Kind; + bool NeedSubsumsion; + unsigned NumDups; + unsigned NumDups2; + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0) + : BBI(b), Kind(k), NeedSubsumsion(s), NumDups(d), NumDups2(d2) {} + }; + + /// Roots - Basic blocks that do not have successors. These are the starting + /// points of Graph traversal. + std::vector<MachineBasicBlock*> Roots; + + /// BBAnalysis - Results of if-conversion feasibility analysis indexed by + /// basic block number. + std::vector<BBInfo> BBAnalysis; + + const TargetLowering *TLI; + const TargetInstrInfo *TII; + bool MadeChange; + public: + static char ID; + IfConverter() : MachineFunctionPass((intptr_t)&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "If converter"; } + + private: + bool ReverseBranchCondition(BBInfo &BBI); + bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const; + bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, + bool FalseBranch, unsigned &Dups) const; + bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const; + void ScanInstructions(BBInfo &BBI); + BBInfo &AnalyzeBlock(MachineBasicBlock *BB, + std::vector<IfcvtToken*> &Tokens); + bool FeasibilityAnalysis(BBInfo &BBI, std::vector<MachineOperand> &Cond, + bool isTriangle = false, bool RevBranch = false); + bool AnalyzeBlocks(MachineFunction &MF, + std::vector<IfcvtToken*> &Tokens); + void InvalidatePreds(MachineBasicBlock *BB); + void RemoveExtraEdges(BBInfo &BBI); + bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2); + void PredicateBlock(BBInfo &BBI, + MachineBasicBlock::iterator E, + std::vector<MachineOperand> &Cond); + void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, + std::vector<MachineOperand> &Cond, + bool IgnoreBr = false); + void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI); + + bool MeetIfcvtSizeLimit(unsigned Size) const { + return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit(); + } + + // blockAlwaysFallThrough - Block ends without a terminator. + bool blockAlwaysFallThrough(BBInfo &BBI) const { + return BBI.IsBrAnalyzable && BBI.TrueBB == NULL; + } + + // IfcvtTokenCmp - Used to sort if-conversion candidates. + static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) { + int Incr1 = (C1->Kind == ICDiamond) + ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups; + int Incr2 = (C2->Kind == ICDiamond) + ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups; + if (Incr1 > Incr2) + return true; + else if (Incr1 == Incr2) { + // Favors subsumsion. + if (C1->NeedSubsumsion == false && C2->NeedSubsumsion == true) + return true; + else if (C1->NeedSubsumsion == C2->NeedSubsumsion) { + // Favors diamond over triangle, etc. + if ((unsigned)C1->Kind < (unsigned)C2->Kind) + return true; + else if (C1->Kind == C2->Kind) + return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber(); + } + } + return false; + } + }; + + char IfConverter::ID = 0; +} + +FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } + +bool IfConverter::runOnMachineFunction(MachineFunction &MF) { + TLI = MF.getTarget().getTargetLowering(); + TII = MF.getTarget().getInstrInfo(); + if (!TII) return false; + + static int FnNum = -1; + DOUT << "\nIfcvt: function (" << ++FnNum << ") \'" + << MF.getFunction()->getName() << "\'"; + + if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { + DOUT << " skipped\n"; + return false; + } + DOUT << "\n"; + + MF.RenumberBlocks(); + BBAnalysis.resize(MF.getNumBlockIDs()); + + // Look for root nodes, i.e. blocks without successors. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + if (I->succ_size() == 0) + Roots.push_back(I); + + std::vector<IfcvtToken*> Tokens; + MadeChange = false; + unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + + NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds; + while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) { + // Do an intial analysis for each basic block and finding all the potential + // candidates to perform if-convesion. + bool Change = AnalyzeBlocks(MF, Tokens); + while (!Tokens.empty()) { + IfcvtToken *Token = Tokens.back(); + Tokens.pop_back(); + BBInfo &BBI = Token->BBI; + IfcvtKind Kind = Token->Kind; + + // If the block has been evicted out of the queue or it has already been + // marked dead (due to it being predicated), then skip it. + if (BBI.IsDone) + BBI.IsEnqueued = false; + if (!BBI.IsEnqueued) + continue; + + BBI.IsEnqueued = false; + + bool RetVal = false; + switch (Kind) { + default: assert(false && "Unexpected!"); + break; + case ICSimple: + case ICSimpleFalse: { + bool isFalse = Kind == ICSimpleFalse; + if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; + DOUT << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") + << "): BB#" << BBI.BB->getNumber() << " (" + << ((Kind == ICSimpleFalse) + ? BBI.FalseBB->getNumber() + : BBI.TrueBB->getNumber()) << ") "; + RetVal = IfConvertSimple(BBI, Kind); + DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + if (RetVal) + if (isFalse) NumSimpleFalse++; + else NumSimple++; + break; + } + case ICTriangle: + case ICTriangleRev: + case ICTriangleFalse: + case ICTriangleFRev: { + bool isFalse = Kind == ICTriangleFalse; + bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev); + if (DisableTriangle && !isFalse && !isRev) break; + if (DisableTriangleR && !isFalse && isRev) break; + if (DisableTriangleF && isFalse && !isRev) break; + if (DisableTriangleFR && isFalse && isRev) break; + DOUT << "Ifcvt (Triangle"; + if (isFalse) + DOUT << " false"; + if (isRev) + DOUT << " rev"; + DOUT << "): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "; + RetVal = IfConvertTriangle(BBI, Kind); + DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + if (RetVal) { + if (isFalse) { + if (isRev) NumTriangleFRev++; + else NumTriangleFalse++; + } else { + if (isRev) NumTriangleRev++; + else NumTriangle++; + } + } + break; + } + case ICDiamond: { + if (DisableDiamond) break; + DOUT << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "; + RetVal = IfConvertDiamond(BBI, Kind, Token->NumDups, Token->NumDups2); + DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + if (RetVal) NumDiamonds++; + break; + } + } + + Change |= RetVal; + + NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev + + NumTriangleFalse + NumTriangleFRev + NumDiamonds; + if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit) + break; + } + + if (!Change) + break; + MadeChange |= Change; + } + + // Delete tokens in case of early exit. + while (!Tokens.empty()) { + IfcvtToken *Token = Tokens.back(); + Tokens.pop_back(); + delete Token; + } + + Tokens.clear(); + Roots.clear(); + BBAnalysis.clear(); + + return MadeChange; +} + +/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given +/// its 'true' successor. +static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, + MachineBasicBlock *TrueBB) { + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + E = BB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBB = *SI; + if (SuccBB != TrueBB) + return SuccBB; + } + return NULL; +} + +/// ReverseBranchCondition - Reverse the condition of the end of the block +/// branchs. Swap block's 'true' and 'false' successors. +bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { + if (!TII->ReverseBranchCondition(BBI.BrCond)) { + TII->RemoveBranch(*BBI.BB); + TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond); + std::swap(BBI.TrueBB, BBI.FalseBB); + return true; + } + return false; +} + +/// getNextBlock - Returns the next block in the function blocks ordering. If +/// it is the end, returns NULL. +static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { + MachineFunction::iterator I = BB; + MachineFunction::iterator E = BB->getParent()->end(); + if (++I == E) + return NULL; + return I; +} + +/// ValidSimple - Returns true if the 'true' block (along with its +/// predecessor) forms a valid simple shape for ifcvt. It also returns the +/// number of instructions that the ifcvt would need to duplicate if performed +/// in Dups. +bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { + Dups = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) + return false; + + if (TrueBBI.IsBrAnalyzable) + return false; + + if (TrueBBI.BB->pred_size() > 1) { + if (TrueBBI.CannotBeCopied || + TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit()) + return false; + Dups = TrueBBI.NonPredSize; + } + + return true; +} + +/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid triangle shape for ifcvt. +/// If 'FalseBranch' is true, it checks if 'true' block's false branch +/// branches to the false branch rather than the other way around. It also +/// returns the number of instructions that the ifcvt would need to duplicate +/// if performed in 'Dups'. +bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, + bool FalseBranch, unsigned &Dups) const { + Dups = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) + return false; + + if (TrueBBI.BB->pred_size() > 1) { + if (TrueBBI.CannotBeCopied) + return false; + + unsigned Size = TrueBBI.NonPredSize; + if (TrueBBI.IsBrAnalyzable) { + if (TrueBBI.TrueBB && TrueBBI.BrCond.size() == 0) + // End with an unconditional branch. It will be removed. + --Size; + else { + MachineBasicBlock *FExit = FalseBranch + ? TrueBBI.TrueBB : TrueBBI.FalseBB; + if (FExit) + // Require a conditional branch + ++Size; + } + } + if (Size > TLI->getIfCvtDupBlockSizeLimit()) + return false; + Dups = Size; + } + + MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB; + if (!TExit && blockAlwaysFallThrough(TrueBBI)) { + MachineFunction::iterator I = TrueBBI.BB; + if (++I == TrueBBI.BB->getParent()->end()) + return false; + TExit = I; + } + return TExit && TExit == FalseBBI.BB; +} + +static +MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB, + const TargetInstrInfo *TII) { + MachineBasicBlock::iterator I = BB->end(); + while (I != BB->begin()) { + --I; + const TargetInstrDescriptor *TID = I->getInstrDescriptor(); + if ((TID->Flags & M_BRANCH_FLAG) == 0) + break; + } + return I; +} + +/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid diamond shape for ifcvt. +bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const { + Dups1 = Dups2 = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) + return false; + + MachineBasicBlock *TT = TrueBBI.TrueBB; + MachineBasicBlock *FT = FalseBBI.TrueBB; + + if (!TT && blockAlwaysFallThrough(TrueBBI)) + TT = getNextBlock(TrueBBI.BB); + if (!FT && blockAlwaysFallThrough(FalseBBI)) + FT = getNextBlock(FalseBBI.BB); + if (TT != FT) + return false; + if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) + return false; + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) + return false; + + // FIXME: Allow true block to have an early exit? + if (TrueBBI.FalseBB || FalseBBI.FalseBB || + (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) + return false; + + MachineBasicBlock::iterator TI = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FI = FalseBBI.BB->begin(); + while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) { + if (!TI->isIdenticalTo(FI)) + break; + ++Dups1; + ++TI; + ++FI; + } + + TI = firstNonBranchInst(TrueBBI.BB, TII); + FI = firstNonBranchInst(FalseBBI.BB, TII); + while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) { + if (!TI->isIdenticalTo(FI)) + break; + ++Dups2; + --TI; + --FI; + } + + return true; +} + +/// ScanInstructions - Scan all the instructions in the block to determine if +/// the block is predicable. In most cases, that means all the instructions +/// in the block has M_PREDICABLE flag. Also checks if the block contains any +/// instruction which can clobber a predicate (e.g. condition code register). +/// If so, the block is not predicable unless it's the last instruction. +void IfConverter::ScanInstructions(BBInfo &BBI) { + if (BBI.IsDone) + return; + + bool AlreadyPredicated = BBI.Predicate.size() > 0; + // First analyze the end of BB branches. + BBI.TrueBB = BBI.FalseBB = NULL; + BBI.BrCond.clear(); + BBI.IsBrAnalyzable = + !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL; + + if (BBI.BrCond.size()) { + // No false branch. This BB must end with a conditional branch and a + // fallthrough. + if (!BBI.FalseBB) + BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); + assert(BBI.FalseBB && "Expected to find the fallthrough block!"); + } + + // Then scan all the instructions. + BBI.NonPredSize = 0; + BBI.ClobbersPred = false; + bool SeenCondBr = false; + for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); + I != E; ++I) { + const TargetInstrDescriptor *TID = I->getInstrDescriptor(); + if ((TID->Flags & M_NOT_DUPLICABLE) != 0) + BBI.CannotBeCopied = true; + + bool isPredicated = TII->isPredicated(I); + bool isCondBr = BBI.IsBrAnalyzable && + (TID->Flags & M_BRANCH_FLAG) != 0 && (TID->Flags & M_BARRIER_FLAG) == 0; + + if (!isCondBr) { + if (!isPredicated) + BBI.NonPredSize++; + else if (!AlreadyPredicated) { + // FIXME: This instruction is already predicated before the + // if-conversion pass. It's probably something like a conditional move. + // Mark this block unpredicable for now. + BBI.IsUnpredicable = true; + return; + } + + } + + if (BBI.ClobbersPred && !isPredicated) { + // Predicate modification instruction should end the block (except for + // already predicated instructions and end of block branches). + if (isCondBr) { + SeenCondBr = true; + + // Conditional branches is not predicable. But it may be eliminated. + continue; + } + + // Predicate may have been modified, the subsequent (currently) + // unpredicated instructions cannot be correctly predicated. + BBI.IsUnpredicable = true; + return; + } + + // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are + // still potentially predicable. + std::vector<MachineOperand> PredDefs; + if (TII->DefinesPredicate(I, PredDefs)) + BBI.ClobbersPred = true; + + if ((TID->Flags & M_PREDICABLE) == 0) { + BBI.IsUnpredicable = true; + return; + } + } +} + +/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be +/// predicated by the specified predicate. +bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, + std::vector<MachineOperand> &Pred, + bool isTriangle, bool RevBranch) { + // If the block is dead or unpredicable, then it cannot be predicated. + if (BBI.IsDone || BBI.IsUnpredicable) + return false; + + // If it is already predicated, check if its predicate subsumes the new + // predicate. + if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred)) + return false; + + if (BBI.BrCond.size()) { + if (!isTriangle) + return false; + + // Test predicate subsumsion. + std::vector<MachineOperand> RevPred(Pred); + std::vector<MachineOperand> Cond(BBI.BrCond); + if (RevBranch) { + if (TII->ReverseBranchCondition(Cond)) + return false; + } + if (TII->ReverseBranchCondition(RevPred) || + !TII->SubsumesPredicate(Cond, RevPred)) + return false; + } + + return true; +} + +/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from +/// the specified block. Record its successors and whether it looks like an +/// if-conversion candidate. +IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, + std::vector<IfcvtToken*> &Tokens) { + BBInfo &BBI = BBAnalysis[BB->getNumber()]; + + if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed) + return BBI; + + BBI.BB = BB; + BBI.IsBeingAnalyzed = true; + + ScanInstructions(BBI); + + // Unanalyable or ends with fallthrough or unconditional branch. + if (!BBI.IsBrAnalyzable || BBI.BrCond.size() == 0) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + // Do not ifcvt if either path is a back edge to the entry block. + if (BBI.TrueBB == BB || BBI.FalseBB == BB) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens); + BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens); + + if (TrueBBI.IsDone && FalseBBI.IsDone) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + std::vector<MachineOperand> RevCond(BBI.BrCond); + bool CanRevCond = !TII->ReverseBranchCondition(RevCond); + + unsigned Dups = 0; + unsigned Dups2 = 0; + bool TNeedSub = TrueBBI.Predicate.size() > 0; + bool FNeedSub = FalseBBI.Predicate.size() > 0; + bool Enqueued = false; + if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond) && + FeasibilityAnalysis(FalseBBI, RevCond)) { + // Diamond: + // EBB + // / \_ + // | | + // TBB FBB + // \ / + // TailBB + // Note TailBB can be empty. + Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups, + Dups2)); + Enqueued = true; + } + + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { + // Triangle: + // EBB + // | \_ + // | | + // | TBB + // | / + // FBB + Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups)); + Enqueued = true; + } + + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); + Enqueued = true; + } + + if (ValidSimple(TrueBBI, Dups) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { + // Simple (split, no rejoin): + // EBB + // | \_ + // | | + // | TBB---> exit + // | + // FBB + Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups)); + Enqueued = true; + } + + if (CanRevCond) { + // Try the other path... + if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + FeasibilityAnalysis(FalseBBI, RevCond, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); + Enqueued = true; + } + + if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); + Enqueued = true; + } + + if (ValidSimple(FalseBBI, Dups) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + FeasibilityAnalysis(FalseBBI, RevCond)) { + Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); + Enqueued = true; + } + } + + BBI.IsEnqueued = Enqueued; + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; +} + +/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion +/// candidates. It returns true if any CFG restructuring is done to expose more +/// if-conversion opportunities. +bool IfConverter::AnalyzeBlocks(MachineFunction &MF, + std::vector<IfcvtToken*> &Tokens) { + bool Change = false; + std::set<MachineBasicBlock*> Visited; + for (unsigned i = 0, e = Roots.size(); i != e; ++i) { + for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited), + E = idf_ext_end(Roots[i], Visited); I != E; ++I) { + MachineBasicBlock *BB = *I; + AnalyzeBlock(BB, Tokens); + } + } + + // Sort to favor more complex ifcvt scheme. + std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); + + return Change; +} + +/// canFallThroughTo - Returns true either if ToBB is the next block after BB or +/// that all the intervening blocks are empty (given BB can fall through to its +/// next block). +static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { + MachineFunction::iterator I = BB; + MachineFunction::iterator TI = ToBB; + MachineFunction::iterator E = BB->getParent()->end(); + while (++I != TI) + if (I == E || !I->empty()) + return false; + return true; +} + +/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed +/// to determine if it can be if-converted. If predecessor is already enqueued, +/// dequeue it! +void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) { + BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()]; + if (PBBI.IsDone || PBBI.BB == BB) + continue; + PBBI.IsAnalyzed = false; + PBBI.IsEnqueued = false; + } +} + +/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB. +/// +static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, + const TargetInstrInfo *TII) { + std::vector<MachineOperand> NoCond; + TII->InsertBranch(*BB, ToBB, NULL, NoCond); +} + +/// RemoveExtraEdges - Remove true / false edges if either / both are no longer +/// successors. +void IfConverter::RemoveExtraEdges(BBInfo &BBI) { + MachineBasicBlock *TBB = NULL, *FBB = NULL; + std::vector<MachineOperand> Cond; + if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond)) + BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); +} + +/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. +/// +bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + BBInfo *CvtBBI = &TrueBBI; + BBInfo *NextBBI = &FalseBBI; + + std::vector<MachineOperand> Cond(BBI.BrCond); + if (Kind == ICSimpleFalse) + std::swap(CvtBBI, NextBBI); + + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + // Something has changed. It's no longer safe to predicate this block. + BBI.IsAnalyzed = false; + CvtBBI->IsAnalyzed = false; + return false; + } + + if (Kind == ICSimpleFalse) + TII->ReverseBranchCondition(Cond); + + if (CvtBBI->BB->pred_size() > 1) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Copy instructions in the true block, predicate them add them to + // the entry block. + CopyAndPredicateBlock(BBI, *CvtBBI, Cond); + } else { + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + + // Merge converted block into entry block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI); + } + + bool IterIfcvt = true; + if (!canFallThroughTo(BBI.BB, NextBBI->BB)) { + InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + BBI.HasFallThrough = false; + // Now ifcvt'd block will look like this: + // BB: + // ... + // t, f = cmp + // if t op + // b BBf + // + // We cannot further ifcvt this block because the unconditional branch + // will have to be predicated on the new condition, that will not be + // available if cmp executes. + IterIfcvt = false; + } + + RemoveExtraEdges(BBI); + + // Update block info. BB can be iteratively if-converted. + if (!IterIfcvt) + BBI.IsDone = true; + InvalidatePreds(BBI.BB); + CvtBBI->IsDone = true; + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertTriangle - If convert a triangle sub-CFG. +/// +bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + BBInfo *CvtBBI = &TrueBBI; + BBInfo *NextBBI = &FalseBBI; + + std::vector<MachineOperand> Cond(BBI.BrCond); + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) + std::swap(CvtBBI, NextBBI); + + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + // Something has changed. It's no longer safe to predicate this block. + BBI.IsAnalyzed = false; + CvtBBI->IsAnalyzed = false; + return false; + } + + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) + TII->ReverseBranchCondition(Cond); + + if (Kind == ICTriangleRev || Kind == ICTriangleFRev) { + ReverseBranchCondition(*CvtBBI); + // BB has been changed, modify its predecessors (except for this + // one) so they don't get ifcvt'ed based on bad intel. + for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(), + E = CvtBBI->BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PBB = *PI; + if (PBB == BBI.BB) + continue; + BBInfo &PBBI = BBAnalysis[PBB->getNumber()]; + if (PBBI.IsEnqueued) { + PBBI.IsAnalyzed = false; + PBBI.IsEnqueued = false; + } + } + } + + bool HasEarlyExit = CvtBBI->FalseBB != NULL; + bool DupBB = CvtBBI->BB->pred_size() > 1; + if (DupBB) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Copy instructions in the true block, predicate them add them to + // the entry block. + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); + } else { + // Predicate the 'true' block after removing its branch. + CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + } + + if (!DupBB) { + // Now merge the entry of the triangle with the true block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI); + } + + // If 'true' block has a 'false' successor, add an exit branch to it. + if (HasEarlyExit) { + std::vector<MachineOperand> RevCond(CvtBBI->BrCond); + if (TII->ReverseBranchCondition(RevCond)) + assert(false && "Unable to reverse branch condition!"); + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond); + BBI.BB->addSuccessor(CvtBBI->FalseBB); + } + + // Merge in the 'false' block if the 'false' block has no other + // predecessors. Otherwise, add a unconditional branch from to 'false'. + bool FalseBBDead = false; + bool IterIfcvt = true; + bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB); + if (!isFallThrough) { + // Only merge them if the true block does not fallthrough to the false + // block. By not merging them, we make it possible to iteratively + // ifcvt the blocks. + if (!HasEarlyExit && + NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) { + MergeBlocks(BBI, *NextBBI); + FalseBBDead = true; + } else { + InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + BBI.HasFallThrough = false; + } + // Mixed predicated and unpredicated code. This cannot be iteratively + // predicated. + IterIfcvt = false; + } + + RemoveExtraEdges(BBI); + + // Update block info. BB can be iteratively if-converted. + if (!IterIfcvt) + BBI.IsDone = true; + InvalidatePreds(BBI.BB); + CvtBBI->IsDone = true; + if (FalseBBDead) + NextBBI->IsDone = true; + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertDiamond - If convert a diamond sub-CFG. +/// +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + MachineBasicBlock *TailBB = TrueBBI.TrueBB; + // True block must fall through or ended with unanalyzable terminator. + if (!TailBB) { + if (blockAlwaysFallThrough(TrueBBI)) + TailBB = FalseBBI.TrueBB; + assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); + } + + if (TrueBBI.IsDone || FalseBBI.IsDone || + TrueBBI.BB->pred_size() > 1 || + FalseBBI.BB->pred_size() > 1) { + // Something has changed. It's no longer safe to predicate these blocks. + BBI.IsAnalyzed = false; + TrueBBI.IsAnalyzed = false; + FalseBBI.IsAnalyzed = false; + return false; + } + + // Merge the 'true' and 'false' blocks by copying the instructions + // from the 'false' block to the 'true' block. That is, unless the true + // block would clobber the predicate, in that case, do the opposite. + BBInfo *BBI1 = &TrueBBI; + BBInfo *BBI2 = &FalseBBI; + std::vector<MachineOperand> RevCond(BBI.BrCond); + TII->ReverseBranchCondition(RevCond); + std::vector<MachineOperand> *Cond1 = &BBI.BrCond; + std::vector<MachineOperand> *Cond2 = &RevCond; + + // Figure out the more profitable ordering. + bool DoSwap = false; + if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred) + DoSwap = true; + else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) { + if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) + DoSwap = true; + } + if (DoSwap) { + std::swap(BBI1, BBI2); + std::swap(Cond1, Cond2); + } + + // Remove the conditional branch from entry to the blocks. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + + // Remove the duplicated instructions at the beginnings of both paths. + MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); + MachineBasicBlock::iterator DI2 = BBI2->BB->begin(); + BBI1->NonPredSize -= NumDups1; + BBI2->NonPredSize -= NumDups1; + while (NumDups1 != 0) { + ++DI1; + ++DI2; + --NumDups1; + } + BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); + BBI2->BB->erase(BBI2->BB->begin(), DI2); + + // Predicate the 'true' block after removing its branch. + BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); + DI1 = BBI1->BB->end(); + for (unsigned i = 0; i != NumDups2; ++i) + --DI1; + BBI1->BB->erase(DI1, BBI1->BB->end()); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1); + + // Predicate the 'false' block. + BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); + DI2 = BBI2->BB->end(); + while (NumDups2 != 0) { + --DI2; + --NumDups2; + } + PredicateBlock(*BBI2, DI2, *Cond2); + + // Merge the true block into the entry of the diamond. + MergeBlocks(BBI, *BBI1); + MergeBlocks(BBI, *BBI2); + + // If the if-converted block fallthrough or unconditionally branch into the + // tail block, and the tail block does not have other predecessors, then + // fold the tail block in as well. Otherwise, unless it falls through to the + // tail, add a unconditional branch to it. + if (TailBB) { + BBInfo TailBBI = BBAnalysis[TailBB->getNumber()]; + if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, TailBBI); + TailBBI.IsDone = true; + } else { + InsertUncondBranch(BBI.BB, TailBB, TII); + BBI.HasFallThrough = false; + } + } + + RemoveExtraEdges(BBI); + + // Update block info. + BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; + InvalidatePreds(BBI.BB); + + // FIXME: Must maintain LiveIns. + return true; +} + +/// PredicateBlock - Predicate instructions from the start of the block to the +/// specified end with the specified condition. +void IfConverter::PredicateBlock(BBInfo &BBI, + MachineBasicBlock::iterator E, + std::vector<MachineOperand> &Cond) { + for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { + if (TII->isPredicated(I)) + continue; + if (!TII->PredicateInstruction(I, Cond)) { + cerr << "Unable to predicate " << *I << "!\n"; + abort(); + } + } + + std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); + + BBI.IsAnalyzed = false; + BBI.NonPredSize = 0; + + NumIfConvBBs++; +} + +/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to +/// the destination block. Skip end of block branches if IgnoreBr is true. +void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, + std::vector<MachineOperand> &Cond, + bool IgnoreBr) { + for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), + E = FromBBI.BB->end(); I != E; ++I) { + const TargetInstrDescriptor *TID = I->getInstrDescriptor(); + bool isPredicated = TII->isPredicated(I); + // Do not copy the end of the block branches. + if (IgnoreBr && !isPredicated && (TID->Flags & M_BRANCH_FLAG) != 0) + break; + + MachineInstr *MI = I->clone(); + ToBBI.BB->insert(ToBBI.BB->end(), MI); + ToBBI.NonPredSize++; + + if (!isPredicated) + if (!TII->PredicateInstruction(MI, Cond)) { + cerr << "Unable to predicate " << *MI << "!\n"; + abort(); + } + } + + std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + if (!ToBBI.BB->isSuccessor(Succ)) + ToBBI.BB->addSuccessor(Succ); + } + + std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), + std::back_inserter(ToBBI.Predicate)); + std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate)); + + ToBBI.ClobbersPred |= FromBBI.ClobbersPred; + ToBBI.IsAnalyzed = false; + + NumDupBBs++; +} + +/// MergeBlocks - Move all instructions from FromBB to the end of ToBB. +/// +void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) { + ToBBI.BB->splice(ToBBI.BB->end(), + FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); + + // Redirect all branches to FromBB to ToBB. + std::vector<MachineBasicBlock *> Preds(FromBBI.BB->pred_begin(), + FromBBI.BB->pred_end()); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + MachineBasicBlock *Pred = Preds[i]; + if (Pred == ToBBI.BB) + continue; + Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB); + } + + std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + FromBBI.BB->removeSuccessor(Succ); + if (!ToBBI.BB->isSuccessor(Succ)) + ToBBI.BB->addSuccessor(Succ); + } + + // Now FromBBI always fall through to the next block! + if (NBB && !FromBBI.BB->isSuccessor(NBB)) + FromBBI.BB->addSuccessor(NBB); + + std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), + std::back_inserter(ToBBI.Predicate)); + FromBBI.Predicate.clear(); + + ToBBI.NonPredSize += FromBBI.NonPredSize; + FromBBI.NonPredSize = 0; + + ToBBI.ClobbersPred |= FromBBI.ClobbersPred; + ToBBI.HasFallThrough = FromBBI.HasFallThrough; + ToBBI.IsAnalyzed = false; + FromBBI.IsAnalyzed = false; +} diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp new file mode 100644 index 0000000..8ae4df6 --- /dev/null +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -0,0 +1,799 @@ +//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IntrinsicLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/Support/Streams.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +template <class ArgIt> +static void EnsureFunctionExists(Module &M, const char *Name, + ArgIt ArgBegin, ArgIt ArgEnd, + const Type *RetTy) { + // Insert a correctly-typed definition now. + std::vector<const Type *> ParamTys; + for (ArgIt I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back(I->getType()); + M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false)); +} + +/// ReplaceCallWith - This function is used when we want to lower an intrinsic +/// call to a call of an external function. This handles hard cases such as +/// when there was already a prototype for the external function, and if that +/// prototype doesn't match the arguments we expect to pass in. +template <class ArgIt> +static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, + ArgIt ArgBegin, ArgIt ArgEnd, + const Type *RetTy, Constant *&FCache) { + if (!FCache) { + // If we haven't already looked up this function, check to see if the + // program already contains a function with this name. + Module *M = CI->getParent()->getParent()->getParent(); + // Get or insert the definition now. + std::vector<const Type *> ParamTys; + for (ArgIt I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back((*I)->getType()); + FCache = M->getOrInsertFunction(NewFn, + FunctionType::get(RetTy, ParamTys, false)); + } + + SmallVector<Value*, 8> Operands(ArgBegin, ArgEnd); + CallInst *NewCI = new CallInst(FCache, &Operands[0], Operands.size(), + CI->getName(), CI); + if (!CI->use_empty()) + CI->replaceAllUsesWith(NewCI); + return NewCI; +} + +void IntrinsicLowering::AddPrototypes(Module &M) { + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->isDeclaration() && !I->use_empty()) + switch (I->getIntrinsicID()) { + default: break; + case Intrinsic::setjmp: + EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(), + Type::Int32Ty); + break; + case Intrinsic::longjmp: + EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(), + Type::VoidTy); + break; + case Intrinsic::siglongjmp: + EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(), + Type::VoidTy); + break; + case Intrinsic::memcpy_i32: + case Intrinsic::memcpy_i64: + M.getOrInsertFunction("memcpy", PointerType::get(Type::Int8Ty), + PointerType::get(Type::Int8Ty), + PointerType::get(Type::Int8Ty), + TD.getIntPtrType(), (Type *)0); + break; + case Intrinsic::memmove_i32: + case Intrinsic::memmove_i64: + M.getOrInsertFunction("memmove", PointerType::get(Type::Int8Ty), + PointerType::get(Type::Int8Ty), + PointerType::get(Type::Int8Ty), + TD.getIntPtrType(), (Type *)0); + break; + case Intrinsic::memset_i32: + case Intrinsic::memset_i64: + M.getOrInsertFunction("memset", PointerType::get(Type::Int8Ty), + PointerType::get(Type::Int8Ty), Type::Int32Ty, + TD.getIntPtrType(), (Type *)0); + break; + case Intrinsic::sqrt_f32: + case Intrinsic::sqrt_f64: + if(I->arg_begin()->getType() == Type::FloatTy) + EnsureFunctionExists(M, "sqrtf", I->arg_begin(), I->arg_end(), + Type::FloatTy); + else + EnsureFunctionExists(M, "sqrt", I->arg_begin(), I->arg_end(), + Type::DoubleTy); + break; + } +} + +/// LowerBSWAP - Emit the code to lower bswap of V before the specified +/// instruction IP. +static Value *LowerBSWAP(Value *V, Instruction *IP) { + assert(V->getType()->isInteger() && "Can't bswap a non-integer type!"); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + + switch(BitSize) { + default: assert(0 && "Unhandled type size of value to byteswap!"); + case 16: { + Value *Tmp1 = BinaryOperator::createShl(V, + ConstantInt::get(V->getType(),8),"bswap.2",IP); + Value *Tmp2 = BinaryOperator::createLShr(V, + ConstantInt::get(V->getType(),8),"bswap.1",IP); + V = BinaryOperator::createOr(Tmp1, Tmp2, "bswap.i16", IP); + break; + } + case 32: { + Value *Tmp4 = BinaryOperator::createShl(V, + ConstantInt::get(V->getType(),24),"bswap.4", IP); + Value *Tmp3 = BinaryOperator::createShl(V, + ConstantInt::get(V->getType(),8),"bswap.3",IP); + Value *Tmp2 = BinaryOperator::createLShr(V, + ConstantInt::get(V->getType(),8),"bswap.2",IP); + Value *Tmp1 = BinaryOperator::createLShr(V, + ConstantInt::get(V->getType(),24),"bswap.1", IP); + Tmp3 = BinaryOperator::createAnd(Tmp3, + ConstantInt::get(Type::Int32Ty, 0xFF0000), + "bswap.and3", IP); + Tmp2 = BinaryOperator::createAnd(Tmp2, + ConstantInt::get(Type::Int32Ty, 0xFF00), + "bswap.and2", IP); + Tmp4 = BinaryOperator::createOr(Tmp4, Tmp3, "bswap.or1", IP); + Tmp2 = BinaryOperator::createOr(Tmp2, Tmp1, "bswap.or2", IP); + V = BinaryOperator::createOr(Tmp4, Tmp2, "bswap.i32", IP); + break; + } + case 64: { + Value *Tmp8 = BinaryOperator::createShl(V, + ConstantInt::get(V->getType(),56),"bswap.8", IP); + Value *Tmp7 = BinaryOperator::createShl(V, + ConstantInt::get(V->getType(),40),"bswap.7", IP); + Value *Tmp6 = BinaryOperator::createShl(V, + ConstantInt::get(V->getType(),24),"bswap.6", IP); + Value *Tmp5 = BinaryOperator::createShl(V, + ConstantInt::get(V->getType(),8),"bswap.5", IP); + Value* Tmp4 = BinaryOperator::createLShr(V, + ConstantInt::get(V->getType(),8),"bswap.4", IP); + Value* Tmp3 = BinaryOperator::createLShr(V, + ConstantInt::get(V->getType(),24),"bswap.3", IP); + Value* Tmp2 = BinaryOperator::createLShr(V, + ConstantInt::get(V->getType(),40),"bswap.2", IP); + Value* Tmp1 = BinaryOperator::createLShr(V, + ConstantInt::get(V->getType(),56),"bswap.1", IP); + Tmp7 = BinaryOperator::createAnd(Tmp7, + ConstantInt::get(Type::Int64Ty, + 0xFF000000000000ULL), + "bswap.and7", IP); + Tmp6 = BinaryOperator::createAnd(Tmp6, + ConstantInt::get(Type::Int64Ty, 0xFF0000000000ULL), + "bswap.and6", IP); + Tmp5 = BinaryOperator::createAnd(Tmp5, + ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL), + "bswap.and5", IP); + Tmp4 = BinaryOperator::createAnd(Tmp4, + ConstantInt::get(Type::Int64Ty, 0xFF000000ULL), + "bswap.and4", IP); + Tmp3 = BinaryOperator::createAnd(Tmp3, + ConstantInt::get(Type::Int64Ty, 0xFF0000ULL), + "bswap.and3", IP); + Tmp2 = BinaryOperator::createAnd(Tmp2, + ConstantInt::get(Type::Int64Ty, 0xFF00ULL), + "bswap.and2", IP); + Tmp8 = BinaryOperator::createOr(Tmp8, Tmp7, "bswap.or1", IP); + Tmp6 = BinaryOperator::createOr(Tmp6, Tmp5, "bswap.or2", IP); + Tmp4 = BinaryOperator::createOr(Tmp4, Tmp3, "bswap.or3", IP); + Tmp2 = BinaryOperator::createOr(Tmp2, Tmp1, "bswap.or4", IP); + Tmp8 = BinaryOperator::createOr(Tmp8, Tmp6, "bswap.or5", IP); + Tmp4 = BinaryOperator::createOr(Tmp4, Tmp2, "bswap.or6", IP); + V = BinaryOperator::createOr(Tmp8, Tmp4, "bswap.i64", IP); + break; + } + } + return V; +} + +/// LowerCTPOP - Emit the code to lower ctpop of V before the specified +/// instruction IP. +static Value *LowerCTPOP(Value *V, Instruction *IP) { + assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!"); + + static const uint64_t MaskValues[6] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, + 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, + 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL + }; + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + unsigned WordSize = (BitSize + 63) / 64; + Value *Count = ConstantInt::get(V->getType(), 0); + + for (unsigned n = 0; n < WordSize; ++n) { + Value *PartValue = V; + for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); + i <<= 1, ++ct) { + Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]); + Value *LHS = BinaryOperator::createAnd( + PartValue, MaskCst, "cppop.and1", IP); + Value *VShift = BinaryOperator::createLShr(PartValue, + ConstantInt::get(V->getType(), i), "ctpop.sh", IP); + Value *RHS = BinaryOperator::createAnd(VShift, MaskCst, "cppop.and2", IP); + PartValue = BinaryOperator::createAdd(LHS, RHS, "ctpop.step", IP); + } + Count = BinaryOperator::createAdd(PartValue, Count, "ctpop.part", IP); + if (BitSize > 64) { + V = BinaryOperator::createLShr(V, ConstantInt::get(V->getType(), 64), + "ctpop.part.sh", IP); + BitSize -= 64; + } + } + + return CastInst::createIntegerCast(Count, Type::Int32Ty, false, "ctpop", IP); +} + +/// LowerCTLZ - Emit the code to lower ctlz of V before the specified +/// instruction IP. +static Value *LowerCTLZ(Value *V, Instruction *IP) { + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + for (unsigned i = 1; i < BitSize; i <<= 1) { + Value *ShVal = ConstantInt::get(V->getType(), i); + ShVal = BinaryOperator::createLShr(V, ShVal, "ctlz.sh", IP); + V = BinaryOperator::createOr(V, ShVal, "ctlz.step", IP); + } + + V = BinaryOperator::createNot(V, "", IP); + return LowerCTPOP(V, IP); +} + +/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes +/// three integer arguments. The first argument is the Value from which the +/// bits will be selected. It may be of any bit width. The second and third +/// arguments specify a range of bits to select with the second argument +/// specifying the low bit and the third argument specifying the high bit. Both +/// must be type i32. The result is the corresponding selected bits from the +/// Value in the same width as the Value (first argument). If the low bit index +/// is higher than the high bit index then the inverse selection is done and +/// the bits are returned in inverse order. +/// @brief Lowering of llvm.part.select intrinsic. +static Instruction *LowerPartSelect(CallInst *CI) { + // Make sure we're dealing with a part select intrinsic here + Function *F = CI->getCalledFunction(); + const FunctionType *FT = F->getFunctionType(); + if (!F->isDeclaration() || !FT->getReturnType()->isInteger() || + FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() || + !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger()) + return CI; + + // Get the intrinsic implementation function by converting all the . to _ + // in the intrinsic's function name and then reconstructing the function + // declaration. + std::string Name(F->getName()); + for (unsigned i = 4; i < Name.length(); ++i) + if (Name[i] == '.') + Name[i] = '_'; + Module* M = F->getParent(); + F = cast<Function>(M->getOrInsertFunction(Name, FT)); + F->setLinkage(GlobalValue::WeakLinkage); + + // If we haven't defined the impl function yet, do so now + if (F->isDeclaration()) { + + // Get the arguments to the function + Function::arg_iterator args = F->arg_begin(); + Value* Val = args++; Val->setName("Val"); + Value* Lo = args++; Lo->setName("Lo"); + Value* Hi = args++; Hi->setName("High"); + + // We want to select a range of bits here such that [Hi, Lo] is shifted + // down to the low bits. However, it is quite possible that Hi is smaller + // than Lo in which case the bits have to be reversed. + + // Create the blocks we will need for the two cases (forward, reverse) + BasicBlock* CurBB = new BasicBlock("entry", F); + BasicBlock *RevSize = new BasicBlock("revsize", CurBB->getParent()); + BasicBlock *FwdSize = new BasicBlock("fwdsize", CurBB->getParent()); + BasicBlock *Compute = new BasicBlock("compute", CurBB->getParent()); + BasicBlock *Reverse = new BasicBlock("reverse", CurBB->getParent()); + BasicBlock *RsltBlk = new BasicBlock("result", CurBB->getParent()); + + // Cast Hi and Lo to the size of Val so the widths are all the same + if (Hi->getType() != Val->getType()) + Hi = CastInst::createIntegerCast(Hi, Val->getType(), false, + "tmp", CurBB); + if (Lo->getType() != Val->getType()) + Lo = CastInst::createIntegerCast(Lo, Val->getType(), false, + "tmp", CurBB); + + // Compute a few things that both cases will need, up front. + Constant* Zero = ConstantInt::get(Val->getType(), 0); + Constant* One = ConstantInt::get(Val->getType(), 1); + Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType()); + + // Compare the Hi and Lo bit positions. This is used to determine + // which case we have (forward or reverse) + ICmpInst *Cmp = new ICmpInst(ICmpInst::ICMP_ULT, Hi, Lo, "less",CurBB); + new BranchInst(RevSize, FwdSize, Cmp, CurBB); + + // First, copmute the number of bits in the forward case. + Instruction* FBitSize = + BinaryOperator::createSub(Hi, Lo,"fbits", FwdSize); + new BranchInst(Compute, FwdSize); + + // Second, compute the number of bits in the reverse case. + Instruction* RBitSize = + BinaryOperator::createSub(Lo, Hi, "rbits", RevSize); + new BranchInst(Compute, RevSize); + + // Now, compute the bit range. Start by getting the bitsize and the shift + // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for + // the number of bits we want in the range. We shift the bits down to the + // least significant bits, apply the mask to zero out unwanted high bits, + // and we have computed the "forward" result. It may still need to be + // reversed. + + // Get the BitSize from one of the two subtractions + PHINode *BitSize = new PHINode(Val->getType(), "bits", Compute); + BitSize->reserveOperandSpace(2); + BitSize->addIncoming(FBitSize, FwdSize); + BitSize->addIncoming(RBitSize, RevSize); + + // Get the ShiftAmount as the smaller of Hi/Lo + PHINode *ShiftAmt = new PHINode(Val->getType(), "shiftamt", Compute); + ShiftAmt->reserveOperandSpace(2); + ShiftAmt->addIncoming(Lo, FwdSize); + ShiftAmt->addIncoming(Hi, RevSize); + + // Increment the bit size + Instruction *BitSizePlusOne = + BinaryOperator::createAdd(BitSize, One, "bits", Compute); + + // Create a Mask to zero out the high order bits. + Instruction* Mask = + BinaryOperator::createShl(AllOnes, BitSizePlusOne, "mask", Compute); + Mask = BinaryOperator::createNot(Mask, "mask", Compute); + + // Shift the bits down and apply the mask + Instruction* FRes = + BinaryOperator::createLShr(Val, ShiftAmt, "fres", Compute); + FRes = BinaryOperator::createAnd(FRes, Mask, "fres", Compute); + new BranchInst(Reverse, RsltBlk, Cmp, Compute); + + // In the Reverse block we have the mask already in FRes but we must reverse + // it by shifting FRes bits right and putting them in RRes by shifting them + // in from left. + + // First set up our loop counters + PHINode *Count = new PHINode(Val->getType(), "count", Reverse); + Count->reserveOperandSpace(2); + Count->addIncoming(BitSizePlusOne, Compute); + + // Next, get the value that we are shifting. + PHINode *BitsToShift = new PHINode(Val->getType(), "val", Reverse); + BitsToShift->reserveOperandSpace(2); + BitsToShift->addIncoming(FRes, Compute); + + // Finally, get the result of the last computation + PHINode *RRes = new PHINode(Val->getType(), "rres", Reverse); + RRes->reserveOperandSpace(2); + RRes->addIncoming(Zero, Compute); + + // Decrement the counter + Instruction *Decr = BinaryOperator::createSub(Count, One, "decr", Reverse); + Count->addIncoming(Decr, Reverse); + + // Compute the Bit that we want to move + Instruction *Bit = + BinaryOperator::createAnd(BitsToShift, One, "bit", Reverse); + + // Compute the new value for next iteration. + Instruction *NewVal = + BinaryOperator::createLShr(BitsToShift, One, "rshift", Reverse); + BitsToShift->addIncoming(NewVal, Reverse); + + // Shift the bit into the low bits of the result. + Instruction *NewRes = + BinaryOperator::createShl(RRes, One, "lshift", Reverse); + NewRes = BinaryOperator::createOr(NewRes, Bit, "addbit", Reverse); + RRes->addIncoming(NewRes, Reverse); + + // Terminate loop if we've moved all the bits. + ICmpInst *Cond = + new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "cond", Reverse); + new BranchInst(RsltBlk, Reverse, Cond, Reverse); + + // Finally, in the result block, select one of the two results with a PHI + // node and return the result; + CurBB = RsltBlk; + PHINode *BitSelect = new PHINode(Val->getType(), "part_select", CurBB); + BitSelect->reserveOperandSpace(2); + BitSelect->addIncoming(FRes, Compute); + BitSelect->addIncoming(NewRes, Reverse); + new ReturnInst(BitSelect, CurBB); + } + + // Return a call to the implementation function + Value *Args[] = { + CI->getOperand(1), + CI->getOperand(2), + CI->getOperand(3) + }; + return new CallInst(F, Args, sizeof(Args)/sizeof(Args[0]), CI->getName(), CI); +} + +/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes +/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High) +/// The first two arguments can be any bit width. The result is the same width +/// as %Value. The operation replaces bits between %Low and %High with the value +/// in %Replacement. If %Replacement is not the same width, it is truncated or +/// zero extended as appropriate to fit the bits being replaced. If %Low is +/// greater than %High then the inverse set of bits are replaced. +/// @brief Lowering of llvm.bit.part.set intrinsic. +static Instruction *LowerPartSet(CallInst *CI) { + // Make sure we're dealing with a part select intrinsic here + Function *F = CI->getCalledFunction(); + const FunctionType *FT = F->getFunctionType(); + if (!F->isDeclaration() || !FT->getReturnType()->isInteger() || + FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() || + !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() || + !FT->getParamType(3)->isInteger()) + return CI; + + // Get the intrinsic implementation function by converting all the . to _ + // in the intrinsic's function name and then reconstructing the function + // declaration. + std::string Name(F->getName()); + for (unsigned i = 4; i < Name.length(); ++i) + if (Name[i] == '.') + Name[i] = '_'; + Module* M = F->getParent(); + F = cast<Function>(M->getOrInsertFunction(Name, FT)); + F->setLinkage(GlobalValue::WeakLinkage); + + // If we haven't defined the impl function yet, do so now + if (F->isDeclaration()) { + // Get the arguments for the function. + Function::arg_iterator args = F->arg_begin(); + Value* Val = args++; Val->setName("Val"); + Value* Rep = args++; Rep->setName("Rep"); + Value* Lo = args++; Lo->setName("Lo"); + Value* Hi = args++; Hi->setName("Hi"); + + // Get some types we need + const IntegerType* ValTy = cast<IntegerType>(Val->getType()); + const IntegerType* RepTy = cast<IntegerType>(Rep->getType()); + uint32_t ValBits = ValTy->getBitWidth(); + uint32_t RepBits = RepTy->getBitWidth(); + + // Constant Definitions + ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits); + ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy); + ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy); + ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1); + ConstantInt* ValOne = ConstantInt::get(ValTy, 1); + ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0); + ConstantInt* ValZero = ConstantInt::get(ValTy, 0); + + // Basic blocks we fill in below. + BasicBlock* entry = new BasicBlock("entry", F, 0); + BasicBlock* large = new BasicBlock("large", F, 0); + BasicBlock* small = new BasicBlock("small", F, 0); + BasicBlock* reverse = new BasicBlock("reverse", F, 0); + BasicBlock* result = new BasicBlock("result", F, 0); + + // BASIC BLOCK: entry + // First, get the number of bits that we're placing as an i32 + ICmpInst* is_forward = + new ICmpInst(ICmpInst::ICMP_ULT, Lo, Hi, "", entry); + SelectInst* Hi_pn = new SelectInst(is_forward, Hi, Lo, "", entry); + SelectInst* Lo_pn = new SelectInst(is_forward, Lo, Hi, "", entry); + BinaryOperator* NumBits = BinaryOperator::createSub(Hi_pn, Lo_pn, "",entry); + NumBits = BinaryOperator::createAdd(NumBits, One, "", entry); + // Now, convert Lo and Hi to ValTy bit width + if (ValBits > 32) { + Lo = new ZExtInst(Lo_pn, ValTy, "", entry); + } else if (ValBits < 32) { + Lo = new TruncInst(Lo_pn, ValTy, "", entry); + } + // Determine if the replacement bits are larger than the number of bits we + // are replacing and deal with it. + ICmpInst* is_large = + new ICmpInst(ICmpInst::ICMP_ULT, NumBits, RepBitWidth, "", entry); + new BranchInst(large, small, is_large, entry); + + // BASIC BLOCK: large + Instruction* MaskBits = + BinaryOperator::createSub(RepBitWidth, NumBits, "", large); + MaskBits = CastInst::createIntegerCast(MaskBits, RepMask->getType(), + false, "", large); + BinaryOperator* Mask1 = + BinaryOperator::createLShr(RepMask, MaskBits, "", large); + BinaryOperator* Rep2 = BinaryOperator::createAnd(Mask1, Rep, "", large); + new BranchInst(small, large); + + // BASIC BLOCK: small + PHINode* Rep3 = new PHINode(RepTy, "", small); + Rep3->reserveOperandSpace(2); + Rep3->addIncoming(Rep2, large); + Rep3->addIncoming(Rep, entry); + Value* Rep4 = Rep3; + if (ValBits > RepBits) + Rep4 = new ZExtInst(Rep3, ValTy, "", small); + else if (ValBits < RepBits) + Rep4 = new TruncInst(Rep3, ValTy, "", small); + new BranchInst(result, reverse, is_forward, small); + + // BASIC BLOCK: reverse (reverses the bits of the replacement) + // Set up our loop counter as a PHI so we can decrement on each iteration. + // We will loop for the number of bits in the replacement value. + PHINode *Count = new PHINode(Type::Int32Ty, "count", reverse); + Count->reserveOperandSpace(2); + Count->addIncoming(NumBits, small); + + // Get the value that we are shifting bits out of as a PHI because + // we'll change this with each iteration. + PHINode *BitsToShift = new PHINode(Val->getType(), "val", reverse); + BitsToShift->reserveOperandSpace(2); + BitsToShift->addIncoming(Rep4, small); + + // Get the result of the last computation or zero on first iteration + PHINode *RRes = new PHINode(Val->getType(), "rres", reverse); + RRes->reserveOperandSpace(2); + RRes->addIncoming(ValZero, small); + + // Decrement the loop counter by one + Instruction *Decr = BinaryOperator::createSub(Count, One, "", reverse); + Count->addIncoming(Decr, reverse); + + // Get the bit that we want to move into the result + Value *Bit = BinaryOperator::createAnd(BitsToShift, ValOne, "", reverse); + + // Compute the new value of the bits to shift for the next iteration. + Value *NewVal = BinaryOperator::createLShr(BitsToShift, ValOne,"", reverse); + BitsToShift->addIncoming(NewVal, reverse); + + // Shift the bit we extracted into the low bit of the result. + Instruction *NewRes = BinaryOperator::createShl(RRes, ValOne, "", reverse); + NewRes = BinaryOperator::createOr(NewRes, Bit, "", reverse); + RRes->addIncoming(NewRes, reverse); + + // Terminate loop if we've moved all the bits. + ICmpInst *Cond = new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "", reverse); + new BranchInst(result, reverse, Cond, reverse); + + // BASIC BLOCK: result + PHINode *Rplcmnt = new PHINode(Val->getType(), "", result); + Rplcmnt->reserveOperandSpace(2); + Rplcmnt->addIncoming(NewRes, reverse); + Rplcmnt->addIncoming(Rep4, small); + Value* t0 = CastInst::createIntegerCast(NumBits,ValTy,false,"",result); + Value* t1 = BinaryOperator::createShl(ValMask, Lo, "", result); + Value* t2 = BinaryOperator::createNot(t1, "", result); + Value* t3 = BinaryOperator::createShl(t1, t0, "", result); + Value* t4 = BinaryOperator::createOr(t2, t3, "", result); + Value* t5 = BinaryOperator::createAnd(t4, Val, "", result); + Value* t6 = BinaryOperator::createShl(Rplcmnt, Lo, "", result); + Value* Rslt = BinaryOperator::createOr(t5, t6, "part_set", result); + new ReturnInst(Rslt, result); + } + + // Return a call to the implementation function + Value *Args[] = { + CI->getOperand(1), + CI->getOperand(2), + CI->getOperand(3), + CI->getOperand(4) + }; + return new CallInst(F, Args, sizeof(Args)/sizeof(Args[0]), CI->getName(), CI); +} + + +void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { + Function *Callee = CI->getCalledFunction(); + assert(Callee && "Cannot lower an indirect call!"); + + switch (Callee->getIntrinsicID()) { + case Intrinsic::not_intrinsic: + cerr << "Cannot lower a call to a non-intrinsic function '" + << Callee->getName() << "'!\n"; + abort(); + default: + cerr << "Error: Code generator does not support intrinsic function '" + << Callee->getName() << "'!\n"; + abort(); + + // The setjmp/longjmp intrinsics should only exist in the code if it was + // never optimized (ie, right out of the CFE), or if it has been hacked on + // by the lowerinvoke pass. In both cases, the right thing to do is to + // convert the call to an explicit setjmp or longjmp call. + case Intrinsic::setjmp: { + static Constant *SetjmpFCache = 0; + Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin()+1, CI->op_end(), + Type::Int32Ty, SetjmpFCache); + if (CI->getType() != Type::VoidTy) + CI->replaceAllUsesWith(V); + break; + } + case Intrinsic::sigsetjmp: + if (CI->getType() != Type::VoidTy) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + + case Intrinsic::longjmp: { + static Constant *LongjmpFCache = 0; + ReplaceCallWith("longjmp", CI, CI->op_begin()+1, CI->op_end(), + Type::VoidTy, LongjmpFCache); + break; + } + + case Intrinsic::siglongjmp: { + // Insert the call to abort + static Constant *AbortFCache = 0; + ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), + Type::VoidTy, AbortFCache); + break; + } + case Intrinsic::ctpop: + CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI)); + break; + + case Intrinsic::bswap: + CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI)); + break; + + case Intrinsic::ctlz: + CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI)); + break; + + case Intrinsic::cttz: { + // cttz(x) -> ctpop(~X & (X-1)) + Value *Src = CI->getOperand(1); + Value *NotSrc = BinaryOperator::createNot(Src, Src->getName()+".not", CI); + Value *SrcM1 = ConstantInt::get(Src->getType(), 1); + SrcM1 = BinaryOperator::createSub(Src, SrcM1, "", CI); + Src = LowerCTPOP(BinaryOperator::createAnd(NotSrc, SrcM1, "", CI), CI); + CI->replaceAllUsesWith(Src); + break; + } + + case Intrinsic::part_select: + CI->replaceAllUsesWith(LowerPartSelect(CI)); + break; + + case Intrinsic::part_set: + CI->replaceAllUsesWith(LowerPartSet(CI)); + break; + + case Intrinsic::stacksave: + case Intrinsic::stackrestore: { + static bool Warned = false; + if (!Warned) + cerr << "WARNING: this target does not support the llvm.stack" + << (Callee->getIntrinsicID() == Intrinsic::stacksave ? + "save" : "restore") << " intrinsic.\n"; + Warned = true; + if (Callee->getIntrinsicID() == Intrinsic::stacksave) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + } + + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + cerr << "WARNING: this target does not support the llvm." + << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? + "return" : "frame") << "address intrinsic.\n"; + CI->replaceAllUsesWith(ConstantPointerNull::get( + cast<PointerType>(CI->getType()))); + break; + + case Intrinsic::prefetch: + break; // Simply strip out prefetches on unsupported architectures + + case Intrinsic::pcmarker: + break; // Simply strip out pcmarker on unsupported architectures + case Intrinsic::readcyclecounter: { + cerr << "WARNING: this target does not support the llvm.readcyclecoun" + << "ter intrinsic. It is being lowered to a constant 0\n"; + CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0)); + break; + } + + case Intrinsic::dbg_stoppoint: + case Intrinsic::dbg_region_start: + case Intrinsic::dbg_region_end: + case Intrinsic::dbg_func_start: + case Intrinsic::dbg_declare: + break; // Simply strip out debugging intrinsics + + case Intrinsic::eh_exception: + case Intrinsic::eh_selector: + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + + case Intrinsic::eh_typeid_for: + // Return something different to eh_selector. + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); + break; + + case Intrinsic::var_annotation: + break; // Strip out annotate intrinsic + + case Intrinsic::memcpy_i32: + case Intrinsic::memcpy_i64: { + static Constant *MemcpyFCache = 0; + Value *Size = CI->getOperand(3); + const Type *IntPtr = TD.getIntPtrType(); + if (Size->getType()->getPrimitiveSizeInBits() < + IntPtr->getPrimitiveSizeInBits()) + Size = new ZExtInst(Size, IntPtr, "", CI); + else if (Size->getType()->getPrimitiveSizeInBits() > + IntPtr->getPrimitiveSizeInBits()) + Size = new TruncInst(Size, IntPtr, "", CI); + Value *Ops[3]; + Ops[0] = CI->getOperand(1); + Ops[1] = CI->getOperand(2); + Ops[2] = Size; + ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(), + MemcpyFCache); + break; + } + case Intrinsic::memmove_i32: + case Intrinsic::memmove_i64: { + static Constant *MemmoveFCache = 0; + Value *Size = CI->getOperand(3); + const Type *IntPtr = TD.getIntPtrType(); + if (Size->getType()->getPrimitiveSizeInBits() < + IntPtr->getPrimitiveSizeInBits()) + Size = new ZExtInst(Size, IntPtr, "", CI); + else if (Size->getType()->getPrimitiveSizeInBits() > + IntPtr->getPrimitiveSizeInBits()) + Size = new TruncInst(Size, IntPtr, "", CI); + Value *Ops[3]; + Ops[0] = CI->getOperand(1); + Ops[1] = CI->getOperand(2); + Ops[2] = Size; + ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(), + MemmoveFCache); + break; + } + case Intrinsic::memset_i32: + case Intrinsic::memset_i64: { + static Constant *MemsetFCache = 0; + Value *Size = CI->getOperand(3); + const Type *IntPtr = TD.getIntPtrType(); + if (Size->getType()->getPrimitiveSizeInBits() < + IntPtr->getPrimitiveSizeInBits()) + Size = new ZExtInst(Size, IntPtr, "", CI); + else if (Size->getType()->getPrimitiveSizeInBits() > + IntPtr->getPrimitiveSizeInBits()) + Size = new TruncInst(Size, IntPtr, "", CI); + Value *Ops[3]; + Ops[0] = CI->getOperand(1); + // Extend the amount to i32. + Ops[1] = new ZExtInst(CI->getOperand(2), Type::Int32Ty, "", CI); + Ops[2] = Size; + ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(), + MemsetFCache); + break; + } + case Intrinsic::sqrt_f32: { + static Constant *sqrtfFCache = 0; + ReplaceCallWith("sqrtf", CI, CI->op_begin()+1, CI->op_end(), + Type::FloatTy, sqrtfFCache); + break; + } + case Intrinsic::sqrt_f64: { + static Constant *sqrtFCache = 0; + ReplaceCallWith("sqrt", CI, CI->op_begin()+1, CI->op_end(), + Type::DoubleTy, sqrtFCache); + break; + } + } + + assert(CI->use_empty() && + "Lowering should have eliminated any uses of the intrinsic call!"); + CI->eraseFromParent(); +} diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp new file mode 100644 index 0000000..b72704b --- /dev/null +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -0,0 +1,205 @@ +//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVMTargetMachine class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetMachine.h" +#include "llvm/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, + cl::desc("Print LLVM IR produced by the loop-reduce pass")); +static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, + cl::desc("Print LLVM IR input to isel pass")); + +FileModel::Model +LLVMTargetMachine::addPassesToEmitFile(FunctionPassManager &PM, + std::ostream &Out, + CodeGenFileType FileType, + bool Fast) { + // Standard LLVM-Level Passes. + + // Run loop strength reduction before anything else. + if (!Fast) { + PM.add(createLoopStrengthReducePass(getTargetLowering())); + if (PrintLSR) + PM.add(new PrintFunctionPass("\n\n*** Code after LSR *** \n", &cerr)); + } + + // FIXME: Implement efficient support for garbage collection intrinsics. + PM.add(createLowerGCPass()); + + if (!ExceptionHandling) + PM.add(createLowerInvokePass(getTargetLowering())); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); + + if (!Fast) + PM.add(createCodeGenPreparePass(getTargetLowering())); + + if (PrintISelInput) + PM.add(new PrintFunctionPass("\n\n*** Final LLVM Code input to ISel *** \n", + &cerr)); + + // Ask the target for an isel. + if (addInstSelector(PM, Fast)) + return FileModel::Error; + + // Print the instruction selected machine code... + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + // Perform register allocation to convert to a concrete x86 representation + PM.add(createRegisterAllocator()); + + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + // Run post-ra passes. + if (addPostRegAlloc(PM, Fast) && PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + PM.add(createPrologEpilogCodeInserter()); + + // Second pass scheduler. + if (!Fast) + PM.add(createPostRAScheduler()); + + // Branch folding must be run after regalloc and prolog/epilog insertion. + if (!Fast) + PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); + + // Fold redundant debug labels. + PM.add(createDebugLabelFoldingPass()); + + if (PrintMachineCode) // Print the register-allocated code + PM.add(createMachineFunctionPrinterPass(cerr)); + + if (addPreEmitPass(PM, Fast) && PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + switch (FileType) { + default: + break; + case TargetMachine::AssemblyFile: + if (addAssemblyEmitter(PM, Fast, Out)) + return FileModel::Error; + return FileModel::AsmFile; + case TargetMachine::ObjectFile: + if (getMachOWriterInfo()) + return FileModel::MachOFile; + else if (getELFWriterInfo()) + return FileModel::ElfFile; + } + + return FileModel::Error; +} + +/// addPassesToEmitFileFinish - If the passes to emit the specified file had to +/// be split up (e.g., to add an object writer pass), this method can be used to +/// finish up adding passes to emit the file, if necessary. +bool LLVMTargetMachine::addPassesToEmitFileFinish(FunctionPassManager &PM, + MachineCodeEmitter *MCE, + bool Fast) { + if (MCE) + addSimpleCodeEmitter(PM, Fast, *MCE); + + // Delete machine code for this function + PM.add(createMachineCodeDeleter()); + + return false; // success! +} + +/// addPassesToEmitMachineCode - Add passes to the specified pass manager to +/// get machine code emitted. This uses a MachineCodeEmitter object to handle +/// actually outputting the machine code and resolving things like the address +/// of functions. This method should returns true if machine code emission is +/// not supported. +/// +bool LLVMTargetMachine::addPassesToEmitMachineCode(FunctionPassManager &PM, + MachineCodeEmitter &MCE, + bool Fast) { + // Standard LLVM-Level Passes. + + // Run loop strength reduction before anything else. + if (!Fast) { + PM.add(createLoopStrengthReducePass(getTargetLowering())); + if (PrintLSR) + PM.add(new PrintFunctionPass("\n\n*** Code after LSR *** \n", &cerr)); + } + + // FIXME: Implement efficient support for garbage collection intrinsics. + PM.add(createLowerGCPass()); + + // FIXME: Implement the invoke/unwind instructions! + PM.add(createLowerInvokePass(getTargetLowering())); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); + + if (!Fast) + PM.add(createCodeGenPreparePass(getTargetLowering())); + + if (PrintISelInput) + PM.add(new PrintFunctionPass("\n\n*** Final LLVM Code input to ISel *** \n", + &cerr)); + + // Ask the target for an isel. + if (addInstSelector(PM, Fast)) + return true; + + // Print the instruction selected machine code... + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + // Perform register allocation to convert to a concrete x86 representation + PM.add(createRegisterAllocator()); + + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + // Run post-ra passes. + if (addPostRegAlloc(PM, Fast) && PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + PM.add(createPrologEpilogCodeInserter()); + + if (PrintMachineCode) // Print the register-allocated code + PM.add(createMachineFunctionPrinterPass(cerr)); + + // Second pass scheduler. + if (!Fast) + PM.add(createPostRAScheduler()); + + // Branch folding must be run after regalloc and prolog/epilog insertion. + if (!Fast) + PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); + + if (addPreEmitPass(PM, Fast) && PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(cerr)); + + addCodeEmitter(PM, Fast, MCE); + + // Delete machine code for this function + PM.add(createMachineCodeDeleter()); + + return false; // success! +} diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp new file mode 100644 index 0000000..45c1dd0 --- /dev/null +++ b/lib/CodeGen/LiveInterval.cpp @@ -0,0 +1,526 @@ +//===-- LiveInterval.cpp - Live Interval Representation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveRange and LiveInterval classes. Given some +// numbering of each the machine instructions an interval [i, j) is said to be a +// live interval for register v if there is no instruction with number j' > j +// such that v is live at j' abd there is no instruction with number i' < i such +// that v is live at i'. In this implementation intervals can have holes, +// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each +// individual range is represented as an instance of LiveRange, and the whole +// interval is represented as an instance of LiveInterval. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Streams.h" +#include "llvm/Target/MRegisterInfo.h" +#include <algorithm> +#include <map> +#include <ostream> +using namespace llvm; + +// An example for liveAt(): +// +// this = [1,4), liveAt(0) will return false. The instruction defining this +// spans slots [0,3]. The interval belongs to an spilled definition of the +// variable it represents. This is because slot 1 is used (def slot) and spans +// up to slot 3 (store slot). +// +bool LiveInterval::liveAt(unsigned I) const { + Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); + + if (r == ranges.begin()) + return false; + + --r; + return r->contains(I); +} + +// overlaps - Return true if the intersection of the two live intervals is +// not empty. +// +// An example for overlaps(): +// +// 0: A = ... +// 4: B = ... +// 8: C = A + B ;; last use of A +// +// The live intervals should look like: +// +// A = [3, 11) +// B = [7, x) +// C = [11, y) +// +// A->overlaps(C) should return false since we want to be able to join +// A and C. +// +bool LiveInterval::overlapsFrom(const LiveInterval& other, + const_iterator StartPos) const { + const_iterator i = begin(); + const_iterator ie = end(); + const_iterator j = StartPos; + const_iterator je = other.end(); + + assert((StartPos->start <= i->start || StartPos == other.begin()) && + StartPos != other.end() && "Bogus start position hint!"); + + if (i->start < j->start) { + i = std::upper_bound(i, ie, j->start); + if (i != ranges.begin()) --i; + } else if (j->start < i->start) { + ++StartPos; + if (StartPos != other.end() && StartPos->start <= i->start) { + assert(StartPos < other.end() && i < end()); + j = std::upper_bound(j, je, i->start); + if (j != other.ranges.begin()) --j; + } + } else { + return true; + } + + if (j == je) return false; + + while (i != ie) { + if (i->start > j->start) { + std::swap(i, j); + std::swap(ie, je); + } + + if (i->end > j->start) + return true; + ++i; + } + + return false; +} + +/// extendIntervalEndTo - This method is used when we want to extend the range +/// specified by I to end at the specified endpoint. To do this, we should +/// merge and eliminate all ranges that this will overlap with. The iterator is +/// not invalidated. +void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) { + assert(I != ranges.end() && "Not a valid interval!"); + unsigned ValId = I->ValId; + + // Search for the first interval that we can't merge with. + Ranges::iterator MergeTo = next(I); + for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) { + assert(MergeTo->ValId == ValId && "Cannot merge with differing values!"); + } + + // If NewEnd was in the middle of an interval, make sure to get its endpoint. + I->end = std::max(NewEnd, prior(MergeTo)->end); + + // Erase any dead ranges. + ranges.erase(next(I), MergeTo); + + // If the newly formed range now touches the range after it and if they have + // the same value number, merge the two ranges into one range. + Ranges::iterator Next = next(I); + if (Next != ranges.end() && Next->start <= I->end && Next->ValId == ValId) { + I->end = Next->end; + ranges.erase(Next); + } +} + + +/// extendIntervalStartTo - This method is used when we want to extend the range +/// specified by I to start at the specified endpoint. To do this, we should +/// merge and eliminate all ranges that this will overlap with. +LiveInterval::Ranges::iterator +LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) { + assert(I != ranges.end() && "Not a valid interval!"); + unsigned ValId = I->ValId; + + // Search for the first interval that we can't merge with. + Ranges::iterator MergeTo = I; + do { + if (MergeTo == ranges.begin()) { + I->start = NewStart; + ranges.erase(MergeTo, I); + return I; + } + assert(MergeTo->ValId == ValId && "Cannot merge with differing values!"); + --MergeTo; + } while (NewStart <= MergeTo->start); + + // If we start in the middle of another interval, just delete a range and + // extend that interval. + if (MergeTo->end >= NewStart && MergeTo->ValId == ValId) { + MergeTo->end = I->end; + } else { + // Otherwise, extend the interval right after. + ++MergeTo; + MergeTo->start = NewStart; + MergeTo->end = I->end; + } + + ranges.erase(next(MergeTo), next(I)); + return MergeTo; +} + +LiveInterval::iterator +LiveInterval::addRangeFrom(LiveRange LR, iterator From) { + unsigned Start = LR.start, End = LR.end; + iterator it = std::upper_bound(From, ranges.end(), Start); + + // If the inserted interval starts in the middle or right at the end of + // another interval, just extend that interval to contain the range of LR. + if (it != ranges.begin()) { + iterator B = prior(it); + if (LR.ValId == B->ValId) { + if (B->start <= Start && B->end >= Start) { + extendIntervalEndTo(B, End); + return B; + } + } else { + // Check to make sure that we are not overlapping two live ranges with + // different ValId's. + assert(B->end <= Start && + "Cannot overlap two LiveRanges with differing ValID's" + " (did you def the same reg twice in a MachineInstr?)"); + } + } + + // Otherwise, if this range ends in the middle of, or right next to, another + // interval, merge it into that interval. + if (it != ranges.end()) + if (LR.ValId == it->ValId) { + if (it->start <= End) { + it = extendIntervalStartTo(it, Start); + + // If LR is a complete superset of an interval, we may need to grow its + // endpoint as well. + if (End > it->end) + extendIntervalEndTo(it, End); + return it; + } + } else { + // Check to make sure that we are not overlapping two live ranges with + // different ValId's. + assert(it->start >= End && + "Cannot overlap two LiveRanges with differing ValID's"); + } + + // Otherwise, this is just a new range that doesn't interact with anything. + // Insert it. + return ranges.insert(it, LR); +} + + +/// removeRange - Remove the specified range from this interval. Note that +/// the range must already be in this interval in its entirety. +void LiveInterval::removeRange(unsigned Start, unsigned End) { + // Find the LiveRange containing this span. + Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); + assert(I != ranges.begin() && "Range is not in interval!"); + --I; + assert(I->contains(Start) && I->contains(End-1) && + "Range is not entirely in interval!"); + + // If the span we are removing is at the start of the LiveRange, adjust it. + if (I->start == Start) { + if (I->end == End) + ranges.erase(I); // Removed the whole LiveRange. + else + I->start = End; + return; + } + + // Otherwise if the span we are removing is at the end of the LiveRange, + // adjust the other way. + if (I->end == End) { + I->end = Start; + return; + } + + // Otherwise, we are splitting the LiveRange into two pieces. + unsigned OldEnd = I->end; + I->end = Start; // Trim the old interval. + + // Insert the new one. + ranges.insert(next(I), LiveRange(End, OldEnd, I->ValId)); +} + +/// getLiveRangeContaining - Return the live range that contains the +/// specified index, or null if there is none. +LiveInterval::const_iterator +LiveInterval::FindLiveRangeContaining(unsigned Idx) const { + const_iterator It = std::upper_bound(begin(), end(), Idx); + if (It != ranges.begin()) { + --It; + if (It->contains(Idx)) + return It; + } + + return end(); +} + +LiveInterval::iterator +LiveInterval::FindLiveRangeContaining(unsigned Idx) { + iterator It = std::upper_bound(begin(), end(), Idx); + if (It != begin()) { + --It; + if (It->contains(Idx)) + return It; + } + + return end(); +} + +/// join - Join two live intervals (this, and other) together. This applies +/// mappings to the value numbers in the LHS/RHS intervals as specified. If +/// the intervals are not joinable, this aborts. +void LiveInterval::join(LiveInterval &Other, int *LHSValNoAssignments, + int *RHSValNoAssignments, + SmallVector<std::pair<unsigned, + unsigned>, 16> &NewValueNumberInfo) { + + // Try to do the least amount of work possible. In particular, if there are + // more liverange chunks in the other set than there are in the 'this' set, + // swap sets to merge the fewest chunks in possible. + // + // Also, if one range is a physreg and one is a vreg, we always merge from the + // vreg into the physreg, which leaves the vreg intervals pristine. + if ((Other.ranges.size() > ranges.size() && + MRegisterInfo::isVirtualRegister(reg)) || + MRegisterInfo::isPhysicalRegister(Other.reg)) { + swap(Other); + std::swap(LHSValNoAssignments, RHSValNoAssignments); + } + + // Determine if any of our live range values are mapped. This is uncommon, so + // we want to avoid the interval scan if not. + bool MustMapCurValNos = false; + for (unsigned i = 0, e = getNumValNums(); i != e; ++i) { + if (ValueNumberInfo[i].first == ~2U) continue; // tombstone value # + if (i != (unsigned)LHSValNoAssignments[i]) { + MustMapCurValNos = true; + break; + } + } + + // If we have to apply a mapping to our base interval assignment, rewrite it + // now. + if (MustMapCurValNos) { + // Map the first live range. + iterator OutIt = begin(); + OutIt->ValId = LHSValNoAssignments[OutIt->ValId]; + ++OutIt; + for (iterator I = OutIt, E = end(); I != E; ++I) { + OutIt->ValId = LHSValNoAssignments[I->ValId]; + + // If this live range has the same value # as its immediate predecessor, + // and if they are neighbors, remove one LiveRange. This happens when we + // have [0,3:0)[4,7:1) and map 0/1 onto the same value #. + if (OutIt->ValId == (OutIt-1)->ValId && (OutIt-1)->end == OutIt->start) { + (OutIt-1)->end = OutIt->end; + } else { + if (I != OutIt) { + OutIt->start = I->start; + OutIt->end = I->end; + } + + // Didn't merge, on to the next one. + ++OutIt; + } + } + + // If we merge some live ranges, chop off the end. + ranges.erase(OutIt, end()); + } + + // Okay, now insert the RHS live ranges into the LHS. + iterator InsertPos = begin(); + for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) { + // Map the ValId in the other live range to the current live range. + I->ValId = RHSValNoAssignments[I->ValId]; + InsertPos = addRangeFrom(*I, InsertPos); + } + + ValueNumberInfo.clear(); + ValueNumberInfo.append(NewValueNumberInfo.begin(), NewValueNumberInfo.end()); + weight += Other.weight; + if (Other.preference && !preference) + preference = Other.preference; +} + +/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live +/// interval as the specified value number. The LiveRanges in RHS are +/// allowed to overlap with LiveRanges in the current interval, but only if +/// the overlapping LiveRanges have the specified value number. +void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, + unsigned LHSValNo) { + // TODO: Make this more efficient. + iterator InsertPos = begin(); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + // Map the ValId in the other live range to the current live range. + LiveRange Tmp = *I; + Tmp.ValId = LHSValNo; + InsertPos = addRangeFrom(Tmp, InsertPos); + } +} + + +/// MergeInClobberRanges - For any live ranges that are not defined in the +/// current interval, but are defined in the Clobbers interval, mark them +/// used with an unknown definition value. +void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers) { + if (Clobbers.begin() == Clobbers.end()) return; + + // Find a value # to use for the clobber ranges. If there is already a value# + // for unknown values, use it. + // FIXME: Use a single sentinal number for these! + unsigned ClobberValNo = getNextValue(~0U, 0); + + iterator IP = begin(); + for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) { + unsigned Start = I->start, End = I->end; + IP = std::upper_bound(IP, end(), Start); + + // If the start of this range overlaps with an existing liverange, trim it. + if (IP != begin() && IP[-1].end > Start) { + Start = IP[-1].end; + // Trimmed away the whole range? + if (Start >= End) continue; + } + // If the end of this range overlaps with an existing liverange, trim it. + if (IP != end() && End > IP->start) { + End = IP->start; + // If this trimmed away the whole range, ignore it. + if (Start == End) continue; + } + + // Insert the clobber interval. + IP = addRangeFrom(LiveRange(Start, End, ClobberValNo), IP); + } +} + +/// MergeValueNumberInto - This method is called when two value nubmers +/// are found to be equivalent. This eliminates V1, replacing all +/// LiveRanges with the V1 value number with the V2 value number. This can +/// cause merging of V1/V2 values numbers and compaction of the value space. +void LiveInterval::MergeValueNumberInto(unsigned V1, unsigned V2) { + assert(V1 != V2 && "Identical value#'s are always equivalent!"); + + // This code actually merges the (numerically) larger value number into the + // smaller value number, which is likely to allow us to compactify the value + // space. The only thing we have to be careful of is to preserve the + // instruction that defines the result value. + + // Make sure V2 is smaller than V1. + if (V1 < V2) { + setValueNumberInfo(V1, getValNumInfo(V2)); + std::swap(V1, V2); + } + + // Merge V1 live ranges into V2. + for (iterator I = begin(); I != end(); ) { + iterator LR = I++; + if (LR->ValId != V1) continue; // Not a V1 LiveRange. + + // Okay, we found a V1 live range. If it had a previous, touching, V2 live + // range, extend it. + if (LR != begin()) { + iterator Prev = LR-1; + if (Prev->ValId == V2 && Prev->end == LR->start) { + Prev->end = LR->end; + + // Erase this live-range. + ranges.erase(LR); + I = Prev+1; + LR = Prev; + } + } + + // Okay, now we have a V1 or V2 live range that is maximally merged forward. + // Ensure that it is a V2 live-range. + LR->ValId = V2; + + // If we can merge it into later V2 live ranges, do so now. We ignore any + // following V1 live ranges, as they will be merged in subsequent iterations + // of the loop. + if (I != end()) { + if (I->start == LR->end && I->ValId == V2) { + LR->end = I->end; + ranges.erase(I); + I = LR+1; + } + } + } + + // Now that V1 is dead, remove it. If it is the largest value number, just + // nuke it (and any other deleted values neighboring it), otherwise mark it as + // ~1U so it can be nuked later. + if (V1 == getNumValNums()-1) { + do { + ValueNumberInfo.pop_back(); + } while (ValueNumberInfo.back().first == ~1U); + } else { + ValueNumberInfo[V1].first = ~1U; + } +} + +unsigned LiveInterval::getSize() const { + unsigned Sum = 0; + for (const_iterator I = begin(), E = end(); I != E; ++I) + Sum += I->end - I->start; + return Sum; +} + +std::ostream& llvm::operator<<(std::ostream& os, const LiveRange &LR) { + return os << '[' << LR.start << ',' << LR.end << ':' << LR.ValId << ")"; +} + +void LiveRange::dump() const { + cerr << *this << "\n"; +} + +void LiveInterval::print(std::ostream &OS, const MRegisterInfo *MRI) const { + if (MRI && MRegisterInfo::isPhysicalRegister(reg)) + OS << MRI->getName(reg); + else + OS << "%reg" << reg; + + OS << ',' << weight; + + if (empty()) + OS << "EMPTY"; + else { + OS << " = "; + for (LiveInterval::Ranges::const_iterator I = ranges.begin(), + E = ranges.end(); I != E; ++I) + OS << *I; + } + + // Print value number info. + if (getNumValNums()) { + OS << " "; + for (unsigned i = 0; i != getNumValNums(); ++i) { + if (i) OS << " "; + OS << i << "@"; + if (ValueNumberInfo[i].first == ~0U) { + OS << "?"; + } else { + OS << ValueNumberInfo[i].first; + } + } + } +} + +void LiveInterval::dump() const { + cerr << *this << "\n"; +} + + +void LiveRange::print(std::ostream &os) const { + os << *this; +} diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp new file mode 100644 index 0000000..369493f --- /dev/null +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -0,0 +1,692 @@ +//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveInterval analysis pass which is used +// by the Linear Scan Register allocator. This pass linearizes the +// basic blocks of the function in DFS order and uses the +// LiveVariables pass to conservatively compute live intervals for +// each virtual and physical register. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "liveintervals" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "VirtRegMap.h" +#include "llvm/Value.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <cmath> +using namespace llvm; + +STATISTIC(numIntervals, "Number of original intervals"); +STATISTIC(numIntervalsAfter, "Number of intervals after coalescing"); +STATISTIC(numFolded , "Number of loads/stores folded into instructions"); + +char LiveIntervals::ID = 0; +namespace { + RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis"); +} + +void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<LiveVariables>(); + AU.addRequired<LiveVariables>(); + AU.addPreservedID(PHIEliminationID); + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + AU.addRequired<LoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void LiveIntervals::releaseMemory() { + mi2iMap_.clear(); + i2miMap_.clear(); + r2iMap_.clear(); +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + tm_ = &fn.getTarget(); + mri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + lv_ = &getAnalysis<LiveVariables>(); + allocatableRegs_ = mri_->getAllocatableSet(fn); + + // Number MachineInstrs and MachineBasicBlocks. + // Initialize MBB indexes to a sentinal. + MBB2IdxMap.resize(mf_->getNumBlockIDs(), ~0U); + + unsigned MIIndex = 0; + for (MachineFunction::iterator MBB = mf_->begin(), E = mf_->end(); + MBB != E; ++MBB) { + // Set the MBB2IdxMap entry for this MBB. + MBB2IdxMap[MBB->getNumber()] = MIIndex; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + bool inserted = mi2iMap_.insert(std::make_pair(I, MIIndex)).second; + assert(inserted && "multiple MachineInstr -> index mappings"); + i2miMap_.push_back(I); + MIIndex += InstrSlots::NUM; + } + } + + computeIntervals(); + + numIntervals += getNumIntervals(); + + DOUT << "********** INTERVALS **********\n"; + for (iterator I = begin(), E = end(); I != E; ++I) { + I->second.print(DOUT, mri_); + DOUT << "\n"; + } + + numIntervalsAfter += getNumIntervals(); + DEBUG(dump()); + return true; +} + +/// print - Implement the dump method. +void LiveIntervals::print(std::ostream &O, const Module* ) const { + O << "********** INTERVALS **********\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) { + I->second.print(DOUT, mri_); + DOUT << "\n"; + } + + O << "********** MACHINEINSTRS **********\n"; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + O << ((Value*)mbbi->getBasicBlock())->getName() << ":\n"; + for (MachineBasicBlock::iterator mii = mbbi->begin(), + mie = mbbi->end(); mii != mie; ++mii) { + O << getInstructionIndex(mii) << '\t' << *mii; + } + } +} + +// Not called? +/// CreateNewLiveInterval - Create a new live interval with the given live +/// ranges. The new live interval will have an infinite spill weight. +LiveInterval& +LiveIntervals::CreateNewLiveInterval(const LiveInterval *LI, + const std::vector<LiveRange> &LRs) { + const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(LI->reg); + + // Create a new virtual register for the spill interval. + unsigned NewVReg = mf_->getSSARegMap()->createVirtualRegister(RC); + + // Replace the old virtual registers in the machine operands with the shiny + // new one. + for (std::vector<LiveRange>::const_iterator + I = LRs.begin(), E = LRs.end(); I != E; ++I) { + unsigned Index = getBaseIndex(I->start); + unsigned End = getBaseIndex(I->end - 1) + InstrSlots::NUM; + + for (; Index != End; Index += InstrSlots::NUM) { + // Skip deleted instructions + while (Index != End && !getInstructionFromIndex(Index)) + Index += InstrSlots::NUM; + + if (Index == End) break; + + MachineInstr *MI = getInstructionFromIndex(Index); + + for (unsigned J = 0, e = MI->getNumOperands(); J != e; ++J) { + MachineOperand &MOp = MI->getOperand(J); + if (MOp.isRegister() && MOp.getReg() == LI->reg) + MOp.setReg(NewVReg); + } + } + } + + LiveInterval &NewLI = getOrCreateInterval(NewVReg); + + // The spill weight is now infinity as it cannot be spilled again + NewLI.weight = float(HUGE_VAL); + + for (std::vector<LiveRange>::const_iterator + I = LRs.begin(), E = LRs.end(); I != E; ++I) { + DOUT << " Adding live range " << *I << " to new interval\n"; + NewLI.addRange(*I); + } + + DOUT << "Created new live interval " << NewLI << "\n"; + return NewLI; +} + +std::vector<LiveInterval*> LiveIntervals:: +addIntervalsForSpills(const LiveInterval &li, VirtRegMap &vrm, int slot) { + // since this is called after the analysis is done we don't know if + // LiveVariables is available + lv_ = getAnalysisToUpdate<LiveVariables>(); + + std::vector<LiveInterval*> added; + + assert(li.weight != HUGE_VALF && + "attempt to spill already spilled interval!"); + + DOUT << "\t\t\t\tadding intervals for spills for interval: "; + li.print(DOUT, mri_); + DOUT << '\n'; + + const TargetRegisterClass* rc = mf_->getSSARegMap()->getRegClass(li.reg); + + for (LiveInterval::Ranges::const_iterator + i = li.ranges.begin(), e = li.ranges.end(); i != e; ++i) { + unsigned index = getBaseIndex(i->start); + unsigned end = getBaseIndex(i->end-1) + InstrSlots::NUM; + for (; index != end; index += InstrSlots::NUM) { + // skip deleted instructions + while (index != end && !getInstructionFromIndex(index)) + index += InstrSlots::NUM; + if (index == end) break; + + MachineInstr *MI = getInstructionFromIndex(index); + + RestartInstruction: + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& mop = MI->getOperand(i); + if (mop.isRegister() && mop.getReg() == li.reg) { + MachineInstr *fmi = li.remat ? NULL + : mri_->foldMemoryOperand(MI, i, slot); + if (fmi) { + // Attempt to fold the memory reference into the instruction. If we + // can do this, we don't need to insert spill code. + if (lv_) + lv_->instructionChanged(MI, fmi); + MachineBasicBlock &MBB = *MI->getParent(); + vrm.virtFolded(li.reg, MI, i, fmi); + mi2iMap_.erase(MI); + i2miMap_[index/InstrSlots::NUM] = fmi; + mi2iMap_[fmi] = index; + MI = MBB.insert(MBB.erase(MI), fmi); + ++numFolded; + // Folding the load/store can completely change the instruction in + // unpredictable ways, rescan it from the beginning. + goto RestartInstruction; + } else { + // Create a new virtual register for the spill interval. + unsigned NewVReg = mf_->getSSARegMap()->createVirtualRegister(rc); + + // Scan all of the operands of this instruction rewriting operands + // to use NewVReg instead of li.reg as appropriate. We do this for + // two reasons: + // + // 1. If the instr reads the same spilled vreg multiple times, we + // want to reuse the NewVReg. + // 2. If the instr is a two-addr instruction, we are required to + // keep the src/dst regs pinned. + // + // Keep track of whether we replace a use and/or def so that we can + // create the spill interval with the appropriate range. + mop.setReg(NewVReg); + + bool HasUse = mop.isUse(); + bool HasDef = mop.isDef(); + for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) { + if (MI->getOperand(j).isReg() && + MI->getOperand(j).getReg() == li.reg) { + MI->getOperand(j).setReg(NewVReg); + HasUse |= MI->getOperand(j).isUse(); + HasDef |= MI->getOperand(j).isDef(); + } + } + + // create a new register for this spill + vrm.grow(); + if (li.remat) + vrm.setVirtIsReMaterialized(NewVReg, li.remat); + vrm.assignVirt2StackSlot(NewVReg, slot); + LiveInterval &nI = getOrCreateInterval(NewVReg); + nI.remat = li.remat; + assert(nI.empty()); + + // the spill weight is now infinity as it + // cannot be spilled again + nI.weight = HUGE_VALF; + + if (HasUse) { + LiveRange LR(getLoadIndex(index), getUseIndex(index), + nI.getNextValue(~0U, 0)); + DOUT << " +" << LR; + nI.addRange(LR); + } + if (HasDef) { + LiveRange LR(getDefIndex(index), getStoreIndex(index), + nI.getNextValue(~0U, 0)); + DOUT << " +" << LR; + nI.addRange(LR); + } + + added.push_back(&nI); + + // update live variables if it is available + if (lv_) + lv_->addVirtualRegisterKilled(NewVReg, MI); + + DOUT << "\t\t\t\tadded new interval: "; + nI.print(DOUT, mri_); + DOUT << '\n'; + } + } + } + } + } + + return added; +} + +void LiveIntervals::printRegName(unsigned reg) const { + if (MRegisterInfo::isPhysicalRegister(reg)) + cerr << mri_->getName(reg); + else + cerr << "%reg" << reg; +} + +/// isReDefinedByTwoAddr - Returns true if the Reg re-definition is due to +/// two addr elimination. +static bool isReDefinedByTwoAddr(MachineInstr *MI, unsigned Reg, + const TargetInstrInfo *TII) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO1 = MI->getOperand(i); + if (MO1.isRegister() && MO1.isDef() && MO1.getReg() == Reg) { + for (unsigned j = i+1; j < e; ++j) { + MachineOperand &MO2 = MI->getOperand(j); + if (MO2.isRegister() && MO2.isUse() && MO2.getReg() == Reg && + MI->getInstrDescriptor()-> + getOperandConstraint(j, TOI::TIED_TO) == (int)i) + return true; + } + } + } + return false; +} + +void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, + MachineBasicBlock::iterator mi, + unsigned MIIdx, + LiveInterval &interval) { + DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg)); + LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); + + // Virtual registers may be defined multiple times (due to phi + // elimination and 2-addr elimination). Much of what we do only has to be + // done once for the vreg. We use an empty interval to detect the first + // time we see a vreg. + if (interval.empty()) { + // Remember if the definition can be rematerialized. All load's from fixed + // stack slots are re-materializable. The target may permit other + // instructions to be re-materialized as well. + int FrameIdx = 0; + if (vi.DefInst && + (tii_->isTriviallyReMaterializable(vi.DefInst) || + (tii_->isLoadFromStackSlot(vi.DefInst, FrameIdx) && + mf_->getFrameInfo()->isFixedObjectIndex(FrameIdx)))) + interval.remat = vi.DefInst; + + // Get the Idx of the defining instructions. + unsigned defIndex = getDefIndex(MIIdx); + + unsigned ValNum; + unsigned SrcReg, DstReg; + if (!tii_->isMoveInstr(*mi, SrcReg, DstReg)) + ValNum = interval.getNextValue(~0U, 0); + else + ValNum = interval.getNextValue(defIndex, SrcReg); + + assert(ValNum == 0 && "First value in interval is not 0?"); + ValNum = 0; // Clue in the optimizer. + + // Loop over all of the blocks that the vreg is defined in. There are + // two cases we have to handle here. The most common case is a vreg + // whose lifetime is contained within a basic block. In this case there + // will be a single kill, in MBB, which comes after the definition. + if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) { + // FIXME: what about dead vars? + unsigned killIdx; + if (vi.Kills[0] != mi) + killIdx = getUseIndex(getInstructionIndex(vi.Kills[0]))+1; + else + killIdx = defIndex+1; + + // If the kill happens after the definition, we have an intra-block + // live range. + if (killIdx > defIndex) { + assert(vi.AliveBlocks.none() && + "Shouldn't be alive across any blocks!"); + LiveRange LR(defIndex, killIdx, ValNum); + interval.addRange(LR); + DOUT << " +" << LR << "\n"; + return; + } + } + + // The other case we handle is when a virtual register lives to the end + // of the defining block, potentially live across some blocks, then is + // live into some number of blocks, but gets killed. Start by adding a + // range that goes from this definition to the end of the defining block. + LiveRange NewLR(defIndex, + getInstructionIndex(&mbb->back()) + InstrSlots::NUM, + ValNum); + DOUT << " +" << NewLR; + interval.addRange(NewLR); + + // Iterate over all of the blocks that the variable is completely + // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the + // live interval. + for (unsigned i = 0, e = vi.AliveBlocks.size(); i != e; ++i) { + if (vi.AliveBlocks[i]) { + MachineBasicBlock *MBB = mf_->getBlockNumbered(i); + if (!MBB->empty()) { + LiveRange LR(getMBBStartIdx(i), + getInstructionIndex(&MBB->back()) + InstrSlots::NUM, + ValNum); + interval.addRange(LR); + DOUT << " +" << LR; + } + } + } + + // Finally, this virtual register is live from the start of any killing + // block to the 'use' slot of the killing instruction. + for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { + MachineInstr *Kill = vi.Kills[i]; + LiveRange LR(getMBBStartIdx(Kill->getParent()), + getUseIndex(getInstructionIndex(Kill))+1, + ValNum); + interval.addRange(LR); + DOUT << " +" << LR; + } + + } else { + // Can no longer safely assume definition is rematerializable. + interval.remat = NULL; + + // If this is the second time we see a virtual register definition, it + // must be due to phi elimination or two addr elimination. If this is + // the result of two address elimination, then the vreg is one of the + // def-and-use register operand. + if (isReDefinedByTwoAddr(mi, interval.reg, tii_)) { + // If this is a two-address definition, then we have already processed + // the live range. The only problem is that we didn't realize there + // are actually two values in the live interval. Because of this we + // need to take the LiveRegion that defines this register and split it + // into two values. + unsigned DefIndex = getDefIndex(getInstructionIndex(vi.DefInst)); + unsigned RedefIndex = getDefIndex(MIIdx); + + // Delete the initial value, which should be short and continuous, + // because the 2-addr copy must be in the same MBB as the redef. + interval.removeRange(DefIndex, RedefIndex); + + // Two-address vregs should always only be redefined once. This means + // that at this point, there should be exactly one value number in it. + assert(interval.containsOneValue() && "Unexpected 2-addr liveint!"); + + // The new value number (#1) is defined by the instruction we claimed + // defined value #0. + unsigned ValNo = interval.getNextValue(0, 0); + interval.setValueNumberInfo(1, interval.getValNumInfo(0)); + + // Value#0 is now defined by the 2-addr instruction. + interval.setValueNumberInfo(0, std::make_pair(~0U, 0U)); + + // Add the new live interval which replaces the range for the input copy. + LiveRange LR(DefIndex, RedefIndex, ValNo); + DOUT << " replace range with " << LR; + interval.addRange(LR); + + // If this redefinition is dead, we need to add a dummy unit live + // range covering the def slot. + if (lv_->RegisterDefIsDead(mi, interval.reg)) + interval.addRange(LiveRange(RedefIndex, RedefIndex+1, 0)); + + DOUT << " RESULT: "; + interval.print(DOUT, mri_); + + } else { + // Otherwise, this must be because of phi elimination. If this is the + // first redefinition of the vreg that we have seen, go back and change + // the live range in the PHI block to be a different value number. + if (interval.containsOneValue()) { + assert(vi.Kills.size() == 1 && + "PHI elimination vreg should have one kill, the PHI itself!"); + + // Remove the old range that we now know has an incorrect number. + MachineInstr *Killer = vi.Kills[0]; + unsigned Start = getMBBStartIdx(Killer->getParent()); + unsigned End = getUseIndex(getInstructionIndex(Killer))+1; + DOUT << " Removing [" << Start << "," << End << "] from: "; + interval.print(DOUT, mri_); DOUT << "\n"; + interval.removeRange(Start, End); + DOUT << " RESULT: "; interval.print(DOUT, mri_); + + // Replace the interval with one of a NEW value number. Note that this + // value number isn't actually defined by an instruction, weird huh? :) + LiveRange LR(Start, End, interval.getNextValue(~0U, 0)); + DOUT << " replace range with " << LR; + interval.addRange(LR); + DOUT << " RESULT: "; interval.print(DOUT, mri_); + } + + // In the case of PHI elimination, each variable definition is only + // live until the end of the block. We've already taken care of the + // rest of the live range. + unsigned defIndex = getDefIndex(MIIdx); + + unsigned ValNum; + unsigned SrcReg, DstReg; + if (!tii_->isMoveInstr(*mi, SrcReg, DstReg)) + ValNum = interval.getNextValue(~0U, 0); + else + ValNum = interval.getNextValue(defIndex, SrcReg); + + LiveRange LR(defIndex, + getInstructionIndex(&mbb->back()) + InstrSlots::NUM, ValNum); + interval.addRange(LR); + DOUT << " +" << LR; + } + } + + DOUT << '\n'; +} + +void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, + MachineBasicBlock::iterator mi, + unsigned MIIdx, + LiveInterval &interval, + unsigned SrcReg) { + // A physical register cannot be live across basic block, so its + // lifetime must end somewhere in its defining basic block. + DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg)); + + unsigned baseIndex = MIIdx; + unsigned start = getDefIndex(baseIndex); + unsigned end = start; + + // If it is not used after definition, it is considered dead at + // the instruction defining it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + if (lv_->RegisterDefIsDead(mi, interval.reg)) { + DOUT << " dead"; + end = getDefIndex(start) + 1; + goto exit; + } + + // If it is not dead on definition, it must be killed by a + // subsequent instruction. Hence its interval is: + // [defSlot(def), useSlot(kill)+1) + while (++mi != MBB->end()) { + baseIndex += InstrSlots::NUM; + if (lv_->KillsRegister(mi, interval.reg)) { + DOUT << " killed"; + end = getUseIndex(baseIndex) + 1; + goto exit; + } else if (lv_->ModifiesRegister(mi, interval.reg)) { + // Another instruction redefines the register before it is ever read. + // Then the register is essentially dead at the instruction that defines + // it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + DOUT << " dead"; + end = getDefIndex(start) + 1; + goto exit; + } + } + + // The only case we should have a dead physreg here without a killing or + // instruction where we know it's dead is if it is live-in to the function + // and never used. + assert(!SrcReg && "physreg was not killed in defining block!"); + end = getDefIndex(start) + 1; // It's dead. + +exit: + assert(start < end && "did not find end of interval?"); + + // Already exists? Extend old live interval. + LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start); + unsigned Id = (OldLR != interval.end()) + ? OldLR->ValId + : interval.getNextValue(SrcReg != 0 ? start : ~0U, SrcReg); + LiveRange LR(start, end, Id); + interval.addRange(LR); + DOUT << " +" << LR << '\n'; +} + +void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MI, + unsigned MIIdx, + unsigned reg) { + if (MRegisterInfo::isVirtualRegister(reg)) + handleVirtualRegisterDef(MBB, MI, MIIdx, getOrCreateInterval(reg)); + else if (allocatableRegs_[reg]) { + unsigned SrcReg, DstReg; + if (!tii_->isMoveInstr(*MI, SrcReg, DstReg)) + SrcReg = 0; + handlePhysicalRegisterDef(MBB, MI, MIIdx, getOrCreateInterval(reg), SrcReg); + // Def of a register also defines its sub-registers. + for (const unsigned* AS = mri_->getSubRegisters(reg); *AS; ++AS) + // Avoid processing some defs more than once. + if (!MI->findRegisterDefOperand(*AS)) + handlePhysicalRegisterDef(MBB, MI, MIIdx, getOrCreateInterval(*AS), 0); + } +} + +void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, + unsigned MIIdx, + LiveInterval &interval, bool isAlias) { + DOUT << "\t\tlivein register: "; DEBUG(printRegName(interval.reg)); + + // Look for kills, if it reaches a def before it's killed, then it shouldn't + // be considered a livein. + MachineBasicBlock::iterator mi = MBB->begin(); + unsigned baseIndex = MIIdx; + unsigned start = baseIndex; + unsigned end = start; + while (mi != MBB->end()) { + if (lv_->KillsRegister(mi, interval.reg)) { + DOUT << " killed"; + end = getUseIndex(baseIndex) + 1; + goto exit; + } else if (lv_->ModifiesRegister(mi, interval.reg)) { + // Another instruction redefines the register before it is ever read. + // Then the register is essentially dead at the instruction that defines + // it. Hence its interval is: + // [defSlot(def), defSlot(def)+1) + DOUT << " dead"; + end = getDefIndex(start) + 1; + goto exit; + } + + baseIndex += InstrSlots::NUM; + ++mi; + } + +exit: + // Live-in register might not be used at all. + if (end == MIIdx) { + if (isAlias) { + DOUT << " dead"; + end = getDefIndex(MIIdx) + 1; + } else { + DOUT << " live through"; + end = baseIndex; + } + } + + LiveRange LR(start, end, interval.getNextValue(~0U, 0)); + DOUT << " +" << LR << '\n'; + interval.addRange(LR); +} + +/// computeIntervals - computes the live intervals for virtual +/// registers. for some ordering of the machine instructions [1,N] a +/// live interval is an interval [i, j) where 1 <= i <= j < N for +/// which a variable is live +void LiveIntervals::computeIntervals() { + DOUT << "********** COMPUTING LIVE INTERVALS **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'; + // Track the index of the current machine instr. + unsigned MIIndex = 0; + for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n"; + + MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); + + if (MBB->livein_begin() != MBB->livein_end()) { + // Create intervals for live-ins to this BB first. + for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(), + LE = MBB->livein_end(); LI != LE; ++LI) { + handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI)); + // Multiple live-ins can alias the same register. + for (const unsigned* AS = mri_->getSubRegisters(*LI); *AS; ++AS) + if (!hasInterval(*AS)) + handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS), + true); + } + } + + for (; MI != miEnd; ++MI) { + DOUT << MIIndex << "\t" << *MI; + + // Handle defs. + for (int i = MI->getNumOperands() - 1; i >= 0; --i) { + MachineOperand &MO = MI->getOperand(i); + // handle register defs - build intervals + if (MO.isRegister() && MO.getReg() && MO.isDef()) + handleRegisterDef(MBB, MI, MIIndex, MO.getReg()); + } + + MIIndex += InstrSlots::NUM; + } + } +} + +LiveInterval LiveIntervals::createInterval(unsigned reg) { + float Weight = MRegisterInfo::isPhysicalRegister(reg) ? + HUGE_VALF : 0.0F; + return LiveInterval(reg, Weight); +} diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp new file mode 100644 index 0000000..504b607 --- /dev/null +++ b/lib/CodeGen/LiveVariables.cpp @@ -0,0 +1,643 @@ +//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveVariable analysis pass. For each machine +// instruction in the function, this pass calculates the set of registers that +// are immediately dead after the instruction (i.e., the instruction calculates +// the value, but it is never used) and the set of registers that are used by +// the instruction, but are never used after the instruction (i.e., they are +// killed). +// +// This class computes live variables using are sparse implementation based on +// the machine code SSA form. This class computes live variable information for +// each virtual and _register allocatable_ physical register in a function. It +// uses the dominance properties of SSA form to efficiently compute live +// variables for virtual registers, and assumes that physical registers are only +// live within a single basic block (allowing it to do a single local analysis +// to resolve physical register lifetimes in each basic block). If a physical +// register is not register allocatable, it is not tracked. This is useful for +// things like the stack pointer and condition codes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Config/alloca.h" +#include <algorithm> +using namespace llvm; + +char LiveVariables::ID = 0; +static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis"); + +void LiveVariables::VarInfo::dump() const { + cerr << "Register Defined by: "; + if (DefInst) + cerr << *DefInst; + else + cerr << "<null>\n"; + cerr << " Alive in blocks: "; + for (unsigned i = 0, e = AliveBlocks.size(); i != e; ++i) + if (AliveBlocks[i]) cerr << i << ", "; + cerr << "\n Killed by:"; + if (Kills.empty()) + cerr << " No instructions.\n"; + else { + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + cerr << "\n #" << i << ": " << *Kills[i]; + cerr << "\n"; + } +} + +LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) { + assert(MRegisterInfo::isVirtualRegister(RegIdx) && + "getVarInfo: not a virtual register!"); + RegIdx -= MRegisterInfo::FirstVirtualRegister; + if (RegIdx >= VirtRegInfo.size()) { + if (RegIdx >= 2*VirtRegInfo.size()) + VirtRegInfo.resize(RegIdx*2); + else + VirtRegInfo.resize(2*VirtRegInfo.size()); + } + VarInfo &VI = VirtRegInfo[RegIdx]; + VI.AliveBlocks.resize(MF->getNumBlockIDs()); + return VI; +} + +bool LiveVariables::KillsRegister(MachineInstr *MI, unsigned Reg) const { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isKill()) { + if ((MO.getReg() == Reg) || + (MRegisterInfo::isPhysicalRegister(MO.getReg()) && + MRegisterInfo::isPhysicalRegister(Reg) && + RegInfo->isSubRegister(MO.getReg(), Reg))) + return true; + } + } + return false; +} + +bool LiveVariables::RegisterDefIsDead(MachineInstr *MI, unsigned Reg) const { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDead()) { + if ((MO.getReg() == Reg) || + (MRegisterInfo::isPhysicalRegister(MO.getReg()) && + MRegisterInfo::isPhysicalRegister(Reg) && + RegInfo->isSubRegister(MO.getReg(), Reg))) + return true; + } + } + return false; +} + +bool LiveVariables::ModifiesRegister(MachineInstr *MI, unsigned Reg) const { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) + return true; + } + return false; +} + +void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, + MachineBasicBlock *MBB, + std::vector<MachineBasicBlock*> &WorkList) { + unsigned BBNum = MBB->getNumber(); + + // Check to see if this basic block is one of the killing blocks. If so, + // remove it... + for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) + if (VRInfo.Kills[i]->getParent() == MBB) { + VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry + break; + } + + if (MBB == VRInfo.DefInst->getParent()) return; // Terminate recursion + + if (VRInfo.AliveBlocks[BBNum]) + return; // We already know the block is live + + // Mark the variable known alive in this bb + VRInfo.AliveBlocks[BBNum] = true; + + for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(), + E = MBB->pred_rend(); PI != E; ++PI) + WorkList.push_back(*PI); +} + +void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, + MachineBasicBlock *MBB) { + std::vector<MachineBasicBlock*> WorkList; + MarkVirtRegAliveInBlock(VRInfo, MBB, WorkList); + while (!WorkList.empty()) { + MachineBasicBlock *Pred = WorkList.back(); + WorkList.pop_back(); + MarkVirtRegAliveInBlock(VRInfo, Pred, WorkList); + } +} + + +void LiveVariables::HandleVirtRegUse(VarInfo &VRInfo, MachineBasicBlock *MBB, + MachineInstr *MI) { + assert(VRInfo.DefInst && "Register use before def!"); + + VRInfo.NumUses++; + + // Check to see if this basic block is already a kill block... + if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) { + // Yes, this register is killed in this basic block already. Increase the + // live range by updating the kill instruction. + VRInfo.Kills.back() = MI; + return; + } + +#ifndef NDEBUG + for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) + assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!"); +#endif + + assert(MBB != VRInfo.DefInst->getParent() && + "Should have kill for defblock!"); + + // Add a new kill entry for this basic block. + // If this virtual register is already marked as alive in this basic block, + // that means it is alive in at least one of the successor block, it's not + // a kill. + if (!VRInfo.AliveBlocks[MBB->getNumber()]) + VRInfo.Kills.push_back(MI); + + // Update all dominating blocks to mark them known live. + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + E = MBB->pred_end(); PI != E; ++PI) + MarkVirtRegAliveInBlock(VRInfo, *PI); +} + +bool LiveVariables::addRegisterKilled(unsigned IncomingReg, MachineInstr *MI, + bool AddIfNotFound) { + bool Found = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse()) { + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (Reg == IncomingReg) { + MO.setIsKill(); + Found = true; + break; + } else if (MRegisterInfo::isPhysicalRegister(Reg) && + MRegisterInfo::isPhysicalRegister(IncomingReg) && + RegInfo->isSuperRegister(IncomingReg, Reg) && + MO.isKill()) + // A super-register kill already exists. + return true; + } + } + + // If not found, this means an alias of one of the operand is killed. Add a + // new implicit operand if required. + if (!Found && AddIfNotFound) { + MI->addRegOperand(IncomingReg, false/*IsDef*/,true/*IsImp*/,true/*IsKill*/); + return true; + } + return Found; +} + +bool LiveVariables::addRegisterDead(unsigned IncomingReg, MachineInstr *MI, + bool AddIfNotFound) { + bool Found = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (Reg == IncomingReg) { + MO.setIsDead(); + Found = true; + break; + } else if (MRegisterInfo::isPhysicalRegister(Reg) && + MRegisterInfo::isPhysicalRegister(IncomingReg) && + RegInfo->isSuperRegister(IncomingReg, Reg) && + MO.isDead()) + // There exists a super-register that's marked dead. + return true; + } + } + + // If not found, this means an alias of one of the operand is dead. Add a + // new implicit operand. + if (!Found && AddIfNotFound) { + MI->addRegOperand(IncomingReg, true/*IsDef*/,true/*IsImp*/,false/*IsKill*/, + true/*IsDead*/); + return true; + } + return Found; +} + +void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { + // There is a now a proper use, forget about the last partial use. + PhysRegPartUse[Reg] = NULL; + + // Turn previous partial def's into read/mod/write. + for (unsigned i = 0, e = PhysRegPartDef[Reg].size(); i != e; ++i) { + MachineInstr *Def = PhysRegPartDef[Reg][i]; + // First one is just a def. This means the use is reading some undef bits. + if (i != 0) + Def->addRegOperand(Reg, false/*IsDef*/,true/*IsImp*/,true/*IsKill*/); + Def->addRegOperand(Reg, true/*IsDef*/,true/*IsImp*/); + } + PhysRegPartDef[Reg].clear(); + + // There was an earlier def of a super-register. Add implicit def to that MI. + // A: EAX = ... + // B: = AX + // Add implicit def to A. + if (PhysRegInfo[Reg] && !PhysRegUsed[Reg]) { + MachineInstr *Def = PhysRegInfo[Reg]; + if (!Def->findRegisterDefOperand(Reg)) + Def->addRegOperand(Reg, true/*IsDef*/,true/*IsImp*/); + } + + PhysRegInfo[Reg] = MI; + PhysRegUsed[Reg] = true; + + for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + PhysRegInfo[SubReg] = MI; + PhysRegUsed[SubReg] = true; + } + + // Remember the partial uses. + for (const unsigned *SuperRegs = RegInfo->getSuperRegisters(Reg); + unsigned SuperReg = *SuperRegs; ++SuperRegs) + PhysRegPartUse[SuperReg] = MI; +} + +bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *RefMI, + SmallSet<unsigned, 4> &SubKills) { + for (const unsigned *SubRegs = RegInfo->getImmediateSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + MachineInstr *LastRef = PhysRegInfo[SubReg]; + if (LastRef != RefMI) + SubKills.insert(SubReg); + else if (!HandlePhysRegKill(SubReg, RefMI, SubKills)) + SubKills.insert(SubReg); + } + + if (*RegInfo->getImmediateSubRegisters(Reg) == 0) { + // No sub-registers, just check if reg is killed by RefMI. + if (PhysRegInfo[Reg] == RefMI) + return true; + } else if (SubKills.empty()) + // None of the sub-registers are killed elsewhere... + return true; + return false; +} + +void LiveVariables::addRegisterKills(unsigned Reg, MachineInstr *MI, + SmallSet<unsigned, 4> &SubKills) { + if (SubKills.count(Reg) == 0) + addRegisterKilled(Reg, MI, true); + else { + for (const unsigned *SubRegs = RegInfo->getImmediateSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + addRegisterKills(SubReg, MI, SubKills); + } +} + +bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *RefMI) { + SmallSet<unsigned, 4> SubKills; + if (HandlePhysRegKill(Reg, RefMI, SubKills)) { + addRegisterKilled(Reg, RefMI); + return true; + } else { + // Some sub-registers are killed by another MI. + for (const unsigned *SubRegs = RegInfo->getImmediateSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) + addRegisterKills(SubReg, RefMI, SubKills); + return false; + } +} + +void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) { + // Does this kill a previous version of this register? + if (MachineInstr *LastRef = PhysRegInfo[Reg]) { + if (PhysRegUsed[Reg]) { + if (!HandlePhysRegKill(Reg, LastRef)) { + if (PhysRegPartUse[Reg]) + addRegisterKilled(Reg, PhysRegPartUse[Reg], true); + } + } else if (PhysRegPartUse[Reg]) + // Add implicit use / kill to last use of a sub-register. + addRegisterKilled(Reg, PhysRegPartUse[Reg], true); + else + addRegisterDead(Reg, LastRef); + } + + for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (MachineInstr *LastRef = PhysRegInfo[SubReg]) { + if (PhysRegUsed[SubReg]) { + if (!HandlePhysRegKill(SubReg, LastRef)) { + if (PhysRegPartUse[SubReg]) + addRegisterKilled(SubReg, PhysRegPartUse[SubReg], true); + } + } else if (PhysRegPartUse[SubReg]) + // Add implicit use / kill to last use of a sub-register. + addRegisterKilled(SubReg, PhysRegPartUse[SubReg], true); + else + addRegisterDead(SubReg, LastRef); + } + } + + if (MI) { + for (const unsigned *SuperRegs = RegInfo->getSuperRegisters(Reg); + unsigned SuperReg = *SuperRegs; ++SuperRegs) { + if (PhysRegInfo[SuperReg]) { + // The larger register is previously defined. Now a smaller part is + // being re-defined. Treat it as read/mod/write. + // EAX = + // AX = EAX<imp-use,kill>, EAX<imp-def> + MI->addRegOperand(SuperReg, false/*IsDef*/,true/*IsImp*/,true/*IsKill*/); + MI->addRegOperand(SuperReg, true/*IsDef*/,true/*IsImp*/); + PhysRegInfo[SuperReg] = MI; + PhysRegUsed[SuperReg] = false; + PhysRegPartUse[SuperReg] = NULL; + } else { + // Remember this partial def. + PhysRegPartDef[SuperReg].push_back(MI); + } + } + + PhysRegInfo[Reg] = MI; + PhysRegUsed[Reg] = false; + PhysRegPartUse[Reg] = NULL; + for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + PhysRegInfo[SubReg] = MI; + PhysRegUsed[SubReg] = false; + PhysRegPartUse[SubReg] = NULL; + } + } +} + +bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + RegInfo = MF->getTarget().getRegisterInfo(); + assert(RegInfo && "Target doesn't have register information?"); + + ReservedRegisters = RegInfo->getReservedRegs(mf); + + unsigned NumRegs = RegInfo->getNumRegs(); + PhysRegInfo = new MachineInstr*[NumRegs]; + PhysRegUsed = new bool[NumRegs]; + PhysRegPartUse = new MachineInstr*[NumRegs]; + PhysRegPartDef = new SmallVector<MachineInstr*,4>[NumRegs]; + PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()]; + std::fill(PhysRegInfo, PhysRegInfo + NumRegs, (MachineInstr*)0); + std::fill(PhysRegUsed, PhysRegUsed + NumRegs, false); + std::fill(PhysRegPartUse, PhysRegPartUse + NumRegs, (MachineInstr*)0); + + /// Get some space for a respectable number of registers... + VirtRegInfo.resize(64); + + analyzePHINodes(mf); + + // Calculate live variable information in depth first order on the CFG of the + // function. This guarantees that we will see the definition of a virtual + // register before its uses due to dominance properties of SSA (except for PHI + // nodes, which are treated as a special case). + // + MachineBasicBlock *Entry = MF->begin(); + SmallPtrSet<MachineBasicBlock*,16> Visited; + for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + MachineBasicBlock *MBB = *DFI; + + // Mark live-in registers as live-in. + for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(), + EE = MBB->livein_end(); II != EE; ++II) { + assert(MRegisterInfo::isPhysicalRegister(*II) && + "Cannot have a live-in virtual register!"); + HandlePhysRegDef(*II, 0); + } + + // Loop over all of the instructions, processing them. + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + MachineInstr *MI = I; + + // Process all of the operands of the instruction... + unsigned NumOperandsToProcess = MI->getNumOperands(); + + // Unless it is a PHI node. In this case, ONLY process the DEF, not any + // of the uses. They will be handled in other basic blocks. + if (MI->getOpcode() == TargetInstrInfo::PHI) + NumOperandsToProcess = 1; + + // Process all uses... + for (unsigned i = 0; i != NumOperandsToProcess; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegister() && MO.isUse() && MO.getReg()) { + if (MRegisterInfo::isVirtualRegister(MO.getReg())){ + HandleVirtRegUse(getVarInfo(MO.getReg()), MBB, MI); + } else if (MRegisterInfo::isPhysicalRegister(MO.getReg()) && + !ReservedRegisters[MO.getReg()]) { + HandlePhysRegUse(MO.getReg(), MI); + } + } + } + + // Process all defs... + for (unsigned i = 0; i != NumOperandsToProcess; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegister() && MO.isDef() && MO.getReg()) { + if (MRegisterInfo::isVirtualRegister(MO.getReg())) { + VarInfo &VRInfo = getVarInfo(MO.getReg()); + + assert(VRInfo.DefInst == 0 && "Variable multiply defined!"); + VRInfo.DefInst = MI; + // Defaults to dead + VRInfo.Kills.push_back(MI); + } else if (MRegisterInfo::isPhysicalRegister(MO.getReg()) && + !ReservedRegisters[MO.getReg()]) { + HandlePhysRegDef(MO.getReg(), MI); + } + } + } + } + + // Handle any virtual assignments from PHI nodes which might be at the + // bottom of this basic block. We check all of our successor blocks to see + // if they have PHI nodes, and if so, we simulate an assignment at the end + // of the current block. + if (!PHIVarInfo[MBB->getNumber()].empty()) { + SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()]; + + for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(), + E = VarInfoVec.end(); I != E; ++I) { + VarInfo& VRInfo = getVarInfo(*I); + assert(VRInfo.DefInst && "Register use before def (or no def)!"); + + // Only mark it alive only in the block we are representing. + MarkVirtRegAliveInBlock(VRInfo, MBB); + } + } + + // Finally, if the last instruction in the block is a return, make sure to mark + // it as using all of the live-out values in the function. + if (!MBB->empty() && TII.isReturn(MBB->back().getOpcode())) { + MachineInstr *Ret = &MBB->back(); + for (MachineFunction::liveout_iterator I = MF->liveout_begin(), + E = MF->liveout_end(); I != E; ++I) { + assert(MRegisterInfo::isPhysicalRegister(*I) && + "Cannot have a live-in virtual register!"); + HandlePhysRegUse(*I, Ret); + // Add live-out registers as implicit uses. + if (Ret->findRegisterUseOperandIdx(*I) == -1) + Ret->addRegOperand(*I, false, true); + } + } + + // Loop over PhysRegInfo, killing any registers that are available at the + // end of the basic block. This also resets the PhysRegInfo map. + for (unsigned i = 0; i != NumRegs; ++i) + if (PhysRegInfo[i]) + HandlePhysRegDef(i, 0); + + // Clear some states between BB's. These are purely local information. + for (unsigned i = 0; i != NumRegs; ++i) + PhysRegPartDef[i].clear(); + std::fill(PhysRegInfo, PhysRegInfo + NumRegs, (MachineInstr*)0); + std::fill(PhysRegUsed, PhysRegUsed + NumRegs, false); + std::fill(PhysRegPartUse, PhysRegPartUse + NumRegs, (MachineInstr*)0); + } + + // Convert and transfer the dead / killed information we have gathered into + // VirtRegInfo onto MI's. + // + for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) + for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j) { + if (VirtRegInfo[i].Kills[j] == VirtRegInfo[i].DefInst) + addRegisterDead(i + MRegisterInfo::FirstVirtualRegister, + VirtRegInfo[i].Kills[j]); + else + addRegisterKilled(i + MRegisterInfo::FirstVirtualRegister, + VirtRegInfo[i].Kills[j]); + } + + // Check to make sure there are no unreachable blocks in the MC CFG for the + // function. If so, it is due to a bug in the instruction selector or some + // other part of the code generator if this happens. +#ifndef NDEBUG + for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i) + assert(Visited.count(&*i) != 0 && "unreachable basic block found"); +#endif + + delete[] PhysRegInfo; + delete[] PhysRegUsed; + delete[] PhysRegPartUse; + delete[] PhysRegPartDef; + delete[] PHIVarInfo; + + return false; +} + +/// instructionChanged - When the address of an instruction changes, this +/// method should be called so that live variables can update its internal +/// data structures. This removes the records for OldMI, transfering them to +/// the records for NewMI. +void LiveVariables::instructionChanged(MachineInstr *OldMI, + MachineInstr *NewMI) { + // If the instruction defines any virtual registers, update the VarInfo, + // kill and dead information for the instruction. + for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = OldMI->getOperand(i); + if (MO.isRegister() && MO.getReg() && + MRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + VarInfo &VI = getVarInfo(Reg); + if (MO.isDef()) { + if (MO.isDead()) { + MO.unsetIsDead(); + addVirtualRegisterDead(Reg, NewMI); + } + // Update the defining instruction. + if (VI.DefInst == OldMI) + VI.DefInst = NewMI; + } + if (MO.isUse()) { + if (MO.isKill()) { + MO.unsetIsKill(); + addVirtualRegisterKilled(Reg, NewMI); + } + // If this is a kill of the value, update the VI kills list. + if (VI.removeKill(OldMI)) + VI.Kills.push_back(NewMI); // Yes, there was a kill of it + } + } + } +} + +/// removeVirtualRegistersKilled - Remove all killed info for the specified +/// instruction. +void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isKill()) { + MO.unsetIsKill(); + unsigned Reg = MO.getReg(); + if (MRegisterInfo::isVirtualRegister(Reg)) { + bool removed = getVarInfo(Reg).removeKill(MI); + assert(removed && "kill not in register's VarInfo?"); + } + } + } +} + +/// removeVirtualRegistersDead - Remove all of the dead registers for the +/// specified instruction from the live variable information. +void LiveVariables::removeVirtualRegistersDead(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDead()) { + MO.unsetIsDead(); + unsigned Reg = MO.getReg(); + if (MRegisterInfo::isVirtualRegister(Reg)) { + bool removed = getVarInfo(Reg).removeKill(MI); + assert(removed && "kill not in register's VarInfo?"); + } + } + } +} + +/// analyzePHINodes - Gather information about the PHI nodes in here. In +/// particular, we want to map the variable information of a virtual +/// register which is used in a PHI node. We map that to the BB the vreg is +/// coming from. +/// +void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { + for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + PHIVarInfo[BBI->getOperand(i + 1).getMachineBasicBlock()->getNumber()]. + push_back(BBI->getOperand(i).getReg()); +} diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp new file mode 100644 index 0000000..36060e1 --- /dev/null +++ b/lib/CodeGen/MachOWriter.cpp @@ -0,0 +1,945 @@ +//===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Nate Begeman and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the target-independent Mach-O writer. This file writes +// out the Mach-O file in the following order: +// +// #1 FatHeader (universal-only) +// #2 FatArch (universal-only, 1 per universal arch) +// Per arch: +// #3 Header +// #4 Load Commands +// #5 Sections +// #6 Relocations +// #7 Symbols +// #8 Strings +// +//===----------------------------------------------------------------------===// + +#include "MachOWriter.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/FileWriters.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/OutputBuffer.h" +#include "llvm/Support/Streams.h" +#include <algorithm> +using namespace llvm; + +/// AddMachOWriter - Concrete function to add the Mach-O writer to the function +/// pass manager. +MachineCodeEmitter *llvm::AddMachOWriter(FunctionPassManager &FPM, + std::ostream &O, + TargetMachine &TM) { + MachOWriter *MOW = new MachOWriter(O, TM); + FPM.add(MOW); + return &MOW->getMachineCodeEmitter(); +} + +//===----------------------------------------------------------------------===// +// MachOCodeEmitter Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code + /// for functions to the Mach-O file. + class MachOCodeEmitter : public MachineCodeEmitter { + MachOWriter &MOW; + + /// Target machine description. + TargetMachine &TM; + + /// is64Bit/isLittleEndian - This information is inferred from the target + /// machine directly, indicating what header values and flags to set. + bool is64Bit, isLittleEndian; + + /// Relocations - These are the relocations that the function needs, as + /// emitted. + std::vector<MachineRelocation> Relocations; + + /// CPLocations - This is a map of constant pool indices to offsets from the + /// start of the section for that constant pool index. + std::vector<intptr_t> CPLocations; + + /// CPSections - This is a map of constant pool indices to the MachOSection + /// containing the constant pool entry for that index. + std::vector<unsigned> CPSections; + + /// JTLocations - This is a map of jump table indices to offsets from the + /// start of the section for that jump table index. + std::vector<intptr_t> JTLocations; + + /// MBBLocations - This vector is a mapping from MBB ID's to their address. + /// It is filled in by the StartMachineBasicBlock callback and queried by + /// the getMachineBasicBlockAddress callback. + std::vector<intptr_t> MBBLocations; + + public: + MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM) { + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + } + + virtual void startFunction(MachineFunction &MF); + virtual bool finishFunction(MachineFunction &MF); + + virtual void addRelocation(const MachineRelocation &MR) { + Relocations.push_back(MR); + } + + void emitConstantPool(MachineConstantPool *MCP); + void emitJumpTables(MachineJumpTableInfo *MJTI); + + virtual intptr_t getConstantPoolEntryAddress(unsigned Index) const { + assert(CPLocations.size() > Index && "CP not emitted!"); + return CPLocations[Index]; + } + virtual intptr_t getJumpTableEntryAddress(unsigned Index) const { + assert(JTLocations.size() > Index && "JT not emitted!"); + return JTLocations[Index]; + } + + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { + if (MBBLocations.size() <= (unsigned)MBB->getNumber()) + MBBLocations.resize((MBB->getNumber()+1)*2); + MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); + } + + virtual intptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(MBBLocations.size() > (unsigned)MBB->getNumber() && + MBBLocations[MBB->getNumber()] && "MBB not emitted!"); + return MBBLocations[MBB->getNumber()]; + } + + /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! + virtual void startFunctionStub(unsigned StubSize, unsigned Alignment = 1) { + assert(0 && "JIT specific function called!"); + abort(); + } + virtual void *finishFunctionStub(const Function *F) { + assert(0 && "JIT specific function called!"); + abort(); + return 0; + } + }; +} + +/// startFunction - This callback is invoked when a new machine function is +/// about to be emitted. +void MachOCodeEmitter::startFunction(MachineFunction &MF) { + const TargetData *TD = TM.getTargetData(); + const Function *F = MF.getFunction(); + + // Align the output buffer to the appropriate alignment, power of 2. + unsigned FnAlign = F->getAlignment(); + unsigned TDAlign = TD->getPrefTypeAlignment(F->getType()); + unsigned Align = Log2_32(std::max(FnAlign, TDAlign)); + assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); + + // Get the Mach-O Section that this function belongs in. + MachOWriter::MachOSection *MOS = MOW.getTextSection(); + + // FIXME: better memory management + MOS->SectionData.reserve(4096); + BufferBegin = &MOS->SectionData[0]; + BufferEnd = BufferBegin + MOS->SectionData.capacity(); + + // Upgrade the section alignment if required. + if (MOS->align < Align) MOS->align = Align; + + // Round the size up to the correct alignment for starting the new function. + if ((MOS->size & ((1 << Align) - 1)) != 0) { + MOS->size += (1 << Align); + MOS->size &= ~((1 << Align) - 1); + } + + // FIXME: Using MOS->size directly here instead of calculating it from the + // output buffer size (impossible because the code emitter deals only in raw + // bytes) forces us to manually synchronize size and write padding zero bytes + // to the output buffer for all non-text sections. For text sections, we do + // not synchonize the output buffer, and we just blow up if anyone tries to + // write non-code to it. An assert should probably be added to + // AddSymbolToSection to prevent calling it on the text section. + CurBufferPtr = BufferBegin + MOS->size; + + // Clear per-function data structures. + CPLocations.clear(); + CPSections.clear(); + JTLocations.clear(); + MBBLocations.clear(); +} + +/// finishFunction - This callback is invoked after the function is completely +/// finished. +bool MachOCodeEmitter::finishFunction(MachineFunction &MF) { + // Get the Mach-O Section that this function belongs in. + MachOWriter::MachOSection *MOS = MOW.getTextSection(); + + // Get a symbol for the function to add to the symbol table + // FIXME: it seems like we should call something like AddSymbolToSection + // in startFunction rather than changing the section size and symbol n_value + // here. + const GlobalValue *FuncV = MF.getFunction(); + MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TM); + FnSym.n_value = MOS->size; + MOS->size = CurBufferPtr - BufferBegin; + + // Emit constant pool to appropriate section(s) + emitConstantPool(MF.getConstantPool()); + + // Emit jump tables to appropriate section + emitJumpTables(MF.getJumpTableInfo()); + + // If we have emitted any relocations to function-specific objects such as + // basic blocks, constant pools entries, or jump tables, record their + // addresses now so that we can rewrite them with the correct addresses + // later. + for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { + MachineRelocation &MR = Relocations[i]; + intptr_t Addr; + + if (MR.isBasicBlock()) { + Addr = getMachineBasicBlockAddress(MR.getBasicBlock()); + MR.setConstantVal(MOS->Index); + MR.setResultPointer((void*)Addr); + } else if (MR.isJumpTableIndex()) { + Addr = getJumpTableEntryAddress(MR.getJumpTableIndex()); + MR.setConstantVal(MOW.getJumpTableSection()->Index); + MR.setResultPointer((void*)Addr); + } else if (MR.isConstantPoolIndex()) { + Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex()); + MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]); + MR.setResultPointer((void*)Addr); + } else if (MR.isGlobalValue()) { + // FIXME: This should be a set or something that uniques + MOW.PendingGlobals.push_back(MR.getGlobalValue()); + } else { + assert(0 && "Unhandled relocation type"); + } + MOS->Relocations.push_back(MR); + } + Relocations.clear(); + + // Finally, add it to the symtab. + MOW.SymbolTable.push_back(FnSym); + return false; +} + +/// emitConstantPool - For each constant pool entry, figure out which section +/// the constant should live in, allocate space for it, and emit it to the +/// Section data buffer. +void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { + const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); + if (CP.empty()) return; + + // FIXME: handle PIC codegen + bool isPIC = TM.getRelocationModel() == Reloc::PIC_; + assert(!isPIC && "PIC codegen not yet handled for mach-o jump tables!"); + + // Although there is no strict necessity that I am aware of, we will do what + // gcc for OS X does and put each constant pool entry in a section of constant + // objects of a certain size. That means that float constants go in the + // literal4 section, and double objects go in literal8, etc. + // + // FIXME: revisit this decision if we ever do the "stick everything into one + // "giant object for PIC" optimization. + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + const Type *Ty = CP[i].getType(); + unsigned Size = TM.getTargetData()->getTypeSize(Ty); + + MachOWriter::MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal); + OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + + CPLocations.push_back(Sec->SectionData.size()); + CPSections.push_back(Sec->Index); + + // FIXME: remove when we have unified size + output buffer + Sec->size += Size; + + // Allocate space in the section for the global. + // FIXME: need alignment? + // FIXME: share between here and AddSymbolToSection? + for (unsigned j = 0; j < Size; ++j) + SecDataOut.outbyte(0); + + MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i], + TM.getTargetData(), Sec->Relocations); + } +} + +/// emitJumpTables - Emit all the jump tables for a given jump table info +/// record to the appropriate section. +void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + // FIXME: handle PIC codegen + bool isPIC = TM.getRelocationModel() == Reloc::PIC_; + assert(!isPIC && "PIC codegen not yet handled for mach-o jump tables!"); + + MachOWriter::MachOSection *Sec = MOW.getJumpTableSection(); + unsigned TextSecIndex = MOW.getTextSection()->Index; + OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + // For each jump table, record its offset from the start of the section, + // reserve space for the relocations to the MBBs, and add the relocations. + const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs; + JTLocations.push_back(Sec->SectionData.size()); + for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { + MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(), + MBBs[mi])); + MR.setResultPointer((void *)JTLocations[i]); + MR.setConstantVal(TextSecIndex); + Sec->Relocations.push_back(MR); + SecDataOut.outaddr(0); + } + } + // FIXME: remove when we have unified size + output buffer + Sec->size = Sec->SectionData.size(); +} + +//===----------------------------------------------------------------------===// +// MachOWriter Implementation +//===----------------------------------------------------------------------===// + +char MachOWriter::ID = 0; +MachOWriter::MachOWriter(std::ostream &o, TargetMachine &tm) + : MachineFunctionPass((intptr_t)&ID), O(o), TM(tm) { + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + + // Create the machine code emitter object for this target. + MCE = new MachOCodeEmitter(*this); +} + +MachOWriter::~MachOWriter() { + delete MCE; +} + +void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) { + const Type *Ty = GV->getType()->getElementType(); + unsigned Size = TM.getTargetData()->getTypeSize(Ty); + unsigned Align = GV->getAlignment(); + if (Align == 0) + Align = TM.getTargetData()->getPrefTypeAlignment(Ty); + + // Reserve space in the .bss section for this symbol while maintaining the + // desired section alignment, which must be at least as much as required by + // this symbol. + OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + + if (Align) { + uint64_t OrigSize = Sec->size; + Align = Log2_32(Align); + Sec->align = std::max(unsigned(Sec->align), Align); + Sec->size = (Sec->size + Align - 1) & ~(Align-1); + + // Add alignment padding to buffer as well. + // FIXME: remove when we have unified size + output buffer + unsigned AlignedSize = Sec->size - OrigSize; + for (unsigned i = 0; i < AlignedSize; ++i) + SecDataOut.outbyte(0); + } + // Globals without external linkage apparently do not go in the symbol table. + if (GV->getLinkage() != GlobalValue::InternalLinkage) { + MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TM); + Sym.n_value = Sec->size; + SymbolTable.push_back(Sym); + } + + // Record the offset of the symbol, and then allocate space for it. + // FIXME: remove when we have unified size + output buffer + Sec->size += Size; + + // Now that we know what section the GlovalVariable is going to be emitted + // into, update our mappings. + // FIXME: We may also need to update this when outputting non-GlobalVariable + // GlobalValues such as functions. + GVSection[GV] = Sec; + GVOffset[GV] = Sec->SectionData.size(); + + // Allocate space in the section for the global. + for (unsigned i = 0; i < Size; ++i) + SecDataOut.outbyte(0); +} + +void MachOWriter::EmitGlobal(GlobalVariable *GV) { + const Type *Ty = GV->getType()->getElementType(); + unsigned Size = TM.getTargetData()->getTypeSize(Ty); + bool NoInit = !GV->hasInitializer(); + + // If this global has a zero initializer, it is part of the .bss or common + // section. + if (NoInit || GV->getInitializer()->isNullValue()) { + // If this global is part of the common block, add it now. Variables are + // part of the common block if they are zero initialized and allowed to be + // merged with other symbols. + if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage()) { + MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), MachOSym::NO_SECT,TM); + // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in + // bytes of the symbol. + ExtOrCommonSym.n_value = Size; + SymbolTable.push_back(ExtOrCommonSym); + // Remember that we've seen this symbol + GVOffset[GV] = Size; + return; + } + // Otherwise, this symbol is part of the .bss section. + MachOSection *BSS = getBSSSection(); + AddSymbolToSection(BSS, GV); + return; + } + + // Scalar read-only data goes in a literal section if the scalar is 4, 8, or + // 16 bytes, or a cstring. Other read only data goes into a regular const + // section. Read-write data goes in the data section. + MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) : + getDataSection(); + AddSymbolToSection(Sec, GV); + InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV], + TM.getTargetData(), Sec->Relocations); +} + + +bool MachOWriter::runOnMachineFunction(MachineFunction &MF) { + // Nothing to do here, this is all done through the MCE object. + return false; +} + +bool MachOWriter::doInitialization(Module &M) { + // Set the magic value, now that we know the pointer size and endianness + Header.setMagic(isLittleEndian, is64Bit); + + // Set the file type + // FIXME: this only works for object files, we do not support the creation + // of dynamic libraries or executables at this time. + Header.filetype = MachOHeader::MH_OBJECT; + + Mang = new Mangler(M); + return false; +} + +/// doFinalization - Now that the module has been completely processed, emit +/// the Mach-O file to 'O'. +bool MachOWriter::doFinalization(Module &M) { + // FIXME: we don't handle debug info yet, we should probably do that. + + // Okay, the.text section has been completed, build the .data, .bss, and + // "common" sections next. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + EmitGlobal(I); + + // Emit the header and load commands. + EmitHeaderAndLoadCommands(); + + // Emit the various sections and their relocation info. + EmitSections(); + + // Write the symbol table and the string table to the end of the file. + O.write((char*)&SymT[0], SymT.size()); + O.write((char*)&StrT[0], StrT.size()); + + // We are done with the abstract symbols. + SectionList.clear(); + SymbolTable.clear(); + DynamicSymbolTable.clear(); + + // Release the name mangler object. + delete Mang; Mang = 0; + return false; +} + +void MachOWriter::EmitHeaderAndLoadCommands() { + // Step #0: Fill in the segment load command size, since we need it to figure + // out the rest of the header fields + MachOSegment SEG("", is64Bit); + SEG.nsects = SectionList.size(); + SEG.cmdsize = SEG.cmdSize(is64Bit) + + SEG.nsects * SectionList[0]->cmdSize(is64Bit); + + // Step #1: calculate the number of load commands. We always have at least + // one, for the LC_SEGMENT load command, plus two for the normal + // and dynamic symbol tables, if there are any symbols. + Header.ncmds = SymbolTable.empty() ? 1 : 3; + + // Step #2: calculate the size of the load commands + Header.sizeofcmds = SEG.cmdsize; + if (!SymbolTable.empty()) + Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize; + + // Step #3: write the header to the file + // Local alias to shortenify coming code. + DataBuffer &FH = Header.HeaderData; + OutputBuffer FHOut(FH, is64Bit, isLittleEndian); + + FHOut.outword(Header.magic); + FHOut.outword(TM.getMachOWriterInfo()->getCPUType()); + FHOut.outword(TM.getMachOWriterInfo()->getCPUSubType()); + FHOut.outword(Header.filetype); + FHOut.outword(Header.ncmds); + FHOut.outword(Header.sizeofcmds); + FHOut.outword(Header.flags); + if (is64Bit) + FHOut.outword(Header.reserved); + + // Step #4: Finish filling in the segment load command and write it out + for (std::vector<MachOSection*>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) + SEG.filesize += (*I)->size; + + SEG.vmsize = SEG.filesize; + SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds; + + FHOut.outword(SEG.cmd); + FHOut.outword(SEG.cmdsize); + FHOut.outstring(SEG.segname, 16); + FHOut.outaddr(SEG.vmaddr); + FHOut.outaddr(SEG.vmsize); + FHOut.outaddr(SEG.fileoff); + FHOut.outaddr(SEG.filesize); + FHOut.outword(SEG.maxprot); + FHOut.outword(SEG.initprot); + FHOut.outword(SEG.nsects); + FHOut.outword(SEG.flags); + + // Step #5: Finish filling in the fields of the MachOSections + uint64_t currentAddr = 0; + for (std::vector<MachOSection*>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + MachOSection *MOS = *I; + MOS->addr = currentAddr; + MOS->offset = currentAddr + SEG.fileoff; + + // FIXME: do we need to do something with alignment here? + currentAddr += MOS->size; + } + + // Step #6: Emit the symbol table to temporary buffers, so that we know the + // size of the string table when we write the next load command. This also + // sorts and assigns indices to each of the symbols, which is necessary for + // emitting relocations to externally-defined objects. + BufferSymbolAndStringTable(); + + // Step #7: Calculate the number of relocations for each section and write out + // the section commands for each section + currentAddr += SEG.fileoff; + for (std::vector<MachOSection*>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + MachOSection *MOS = *I; + // Convert the relocations to target-specific relocations, and fill in the + // relocation offset for this section. + CalculateRelocations(*MOS); + MOS->reloff = MOS->nreloc ? currentAddr : 0; + currentAddr += MOS->nreloc * 8; + + // write the finalized section command to the output buffer + FHOut.outstring(MOS->sectname, 16); + FHOut.outstring(MOS->segname, 16); + FHOut.outaddr(MOS->addr); + FHOut.outaddr(MOS->size); + FHOut.outword(MOS->offset); + FHOut.outword(MOS->align); + FHOut.outword(MOS->reloff); + FHOut.outword(MOS->nreloc); + FHOut.outword(MOS->flags); + FHOut.outword(MOS->reserved1); + FHOut.outword(MOS->reserved2); + if (is64Bit) + FHOut.outword(MOS->reserved3); + } + + // Step #8: Emit LC_SYMTAB/LC_DYSYMTAB load commands + SymTab.symoff = currentAddr; + SymTab.nsyms = SymbolTable.size(); + SymTab.stroff = SymTab.symoff + SymT.size(); + SymTab.strsize = StrT.size(); + FHOut.outword(SymTab.cmd); + FHOut.outword(SymTab.cmdsize); + FHOut.outword(SymTab.symoff); + FHOut.outword(SymTab.nsyms); + FHOut.outword(SymTab.stroff); + FHOut.outword(SymTab.strsize); + + // FIXME: set DySymTab fields appropriately + // We should probably just update these in BufferSymbolAndStringTable since + // thats where we're partitioning up the different kinds of symbols. + FHOut.outword(DySymTab.cmd); + FHOut.outword(DySymTab.cmdsize); + FHOut.outword(DySymTab.ilocalsym); + FHOut.outword(DySymTab.nlocalsym); + FHOut.outword(DySymTab.iextdefsym); + FHOut.outword(DySymTab.nextdefsym); + FHOut.outword(DySymTab.iundefsym); + FHOut.outword(DySymTab.nundefsym); + FHOut.outword(DySymTab.tocoff); + FHOut.outword(DySymTab.ntoc); + FHOut.outword(DySymTab.modtaboff); + FHOut.outword(DySymTab.nmodtab); + FHOut.outword(DySymTab.extrefsymoff); + FHOut.outword(DySymTab.nextrefsyms); + FHOut.outword(DySymTab.indirectsymoff); + FHOut.outword(DySymTab.nindirectsyms); + FHOut.outword(DySymTab.extreloff); + FHOut.outword(DySymTab.nextrel); + FHOut.outword(DySymTab.locreloff); + FHOut.outword(DySymTab.nlocrel); + + O.write((char*)&FH[0], FH.size()); +} + +/// EmitSections - Now that we have constructed the file header and load +/// commands, emit the data for each section to the file. +void MachOWriter::EmitSections() { + for (std::vector<MachOSection*>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) + // Emit the contents of each section + O.write((char*)&(*I)->SectionData[0], (*I)->size); + for (std::vector<MachOSection*>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) + // Emit the relocation entry data for each section. + O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size()); +} + +/// PartitionByLocal - Simple boolean predicate that returns true if Sym is +/// a local symbol rather than an external symbol. +bool MachOWriter::PartitionByLocal(const MachOSym &Sym) { + return (Sym.n_type & (MachOSym::N_EXT | MachOSym::N_PEXT)) == 0; +} + +/// PartitionByDefined - Simple boolean predicate that returns true if Sym is +/// defined in this module. +bool MachOWriter::PartitionByDefined(const MachOSym &Sym) { + // FIXME: Do N_ABS or N_INDR count as defined? + return (Sym.n_type & MachOSym::N_SECT) == MachOSym::N_SECT; +} + +/// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them +/// each a string table index so that they appear in the correct order in the +/// output file. +void MachOWriter::BufferSymbolAndStringTable() { + // The order of the symbol table is: + // 1. local symbols + // 2. defined external symbols (sorted by name) + // 3. undefined external symbols (sorted by name) + + // Before sorting the symbols, check the PendingGlobals for any undefined + // globals that need to be put in the symbol table. + for (std::vector<GlobalValue*>::iterator I = PendingGlobals.begin(), + E = PendingGlobals.end(); I != E; ++I) { + if (GVOffset[*I] == 0 && GVSection[*I] == 0) { + MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TM); + SymbolTable.push_back(UndfSym); + GVOffset[*I] = -1; + } + } + + // Sort the symbols by name, so that when we partition the symbols by scope + // of definition, we won't have to sort by name within each partition. + std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSymCmp()); + + // Parition the symbol table entries so that all local symbols come before + // all symbols with external linkage. { 1 | 2 3 } + std::partition(SymbolTable.begin(), SymbolTable.end(), PartitionByLocal); + + // Advance iterator to beginning of external symbols and partition so that + // all external symbols defined in this module come before all external + // symbols defined elsewhere. { 1 | 2 | 3 } + for (std::vector<MachOSym>::iterator I = SymbolTable.begin(), + E = SymbolTable.end(); I != E; ++I) { + if (!PartitionByLocal(*I)) { + std::partition(I, E, PartitionByDefined); + break; + } + } + + // Calculate the starting index for each of the local, extern defined, and + // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB + // load command. + for (std::vector<MachOSym>::iterator I = SymbolTable.begin(), + E = SymbolTable.end(); I != E; ++I) { + if (PartitionByLocal(*I)) { + ++DySymTab.nlocalsym; + ++DySymTab.iextdefsym; + ++DySymTab.iundefsym; + } else if (PartitionByDefined(*I)) { + ++DySymTab.nextdefsym; + ++DySymTab.iundefsym; + } else { + ++DySymTab.nundefsym; + } + } + + // Write out a leading zero byte when emitting string table, for n_strx == 0 + // which means an empty string. + OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian); + StrTOut.outbyte(0); + + // The order of the string table is: + // 1. strings for external symbols + // 2. strings for local symbols + // Since this is the opposite order from the symbol table, which we have just + // sorted, we can walk the symbol table backwards to output the string table. + for (std::vector<MachOSym>::reverse_iterator I = SymbolTable.rbegin(), + E = SymbolTable.rend(); I != E; ++I) { + if (I->GVName == "") { + I->n_strx = 0; + } else { + I->n_strx = StrT.size(); + StrTOut.outstring(I->GVName, I->GVName.length()+1); + } + } + + OutputBuffer SymTOut(SymT, is64Bit, isLittleEndian); + + unsigned index = 0; + for (std::vector<MachOSym>::iterator I = SymbolTable.begin(), + E = SymbolTable.end(); I != E; ++I, ++index) { + // Add the section base address to the section offset in the n_value field + // to calculate the full address. + // FIXME: handle symbols where the n_value field is not the address + GlobalValue *GV = const_cast<GlobalValue*>(I->GV); + if (GV && GVSection[GV]) + I->n_value += GVSection[GV]->addr; + if (GV && (GVOffset[GV] == -1)) + GVOffset[GV] = index; + + // Emit nlist to buffer + SymTOut.outword(I->n_strx); + SymTOut.outbyte(I->n_type); + SymTOut.outbyte(I->n_sect); + SymTOut.outhalf(I->n_desc); + SymTOut.outaddr(I->n_value); + } +} + +/// CalculateRelocations - For each MachineRelocation in the current section, +/// calculate the index of the section containing the object to be relocated, +/// and the offset into that section. From this information, create the +/// appropriate target-specific MachORelocation type and add buffer it to be +/// written out after we are finished writing out sections. +void MachOWriter::CalculateRelocations(MachOSection &MOS) { + for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) { + MachineRelocation &MR = MOS.Relocations[i]; + unsigned TargetSection = MR.getConstantVal(); + unsigned TargetAddr = 0; + unsigned TargetIndex = 0; + + // This is a scattered relocation entry if it points to a global value with + // a non-zero offset. + bool Scattered = false; + bool Extern = false; + + // Since we may not have seen the GlobalValue we were interested in yet at + // the time we emitted the relocation for it, fix it up now so that it + // points to the offset into the correct section. + if (MR.isGlobalValue()) { + GlobalValue *GV = MR.getGlobalValue(); + MachOSection *MOSPtr = GVSection[GV]; + intptr_t Offset = GVOffset[GV]; + + // If we have never seen the global before, it must be to a symbol + // defined in another module (N_UNDF). + if (!MOSPtr) { + // FIXME: need to append stub suffix + Extern = true; + TargetAddr = 0; + TargetIndex = GVOffset[GV]; + } else { + Scattered = TargetSection != 0; + TargetSection = MOSPtr->Index; + } + MR.setResultPointer((void*)Offset); + } + + // If the symbol is locally defined, pass in the address of the section and + // the section index to the code which will generate the target relocation. + if (!Extern) { + MachOSection &To = *SectionList[TargetSection - 1]; + TargetAddr = To.addr; + TargetIndex = To.Index; + } + + OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian); + OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian); + + MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex, + RelocOut, SecOut, Scattered, Extern); + } +} + +// InitMem - Write the value of a Constant to the specified memory location, +// converting it into bytes and relocations. +void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, + const TargetData *TD, + std::vector<MachineRelocation> &MRs) { + typedef std::pair<const Constant*, intptr_t> CPair; + std::vector<CPair> WorkList; + + WorkList.push_back(CPair(C,(intptr_t)Addr + Offset)); + + intptr_t ScatteredOffset = 0; + + while (!WorkList.empty()) { + const Constant *PC = WorkList.back().first; + intptr_t PA = WorkList.back().second; + WorkList.pop_back(); + + if (isa<UndefValue>(PC)) { + continue; + } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(PC)) { + unsigned ElementSize = TD->getTypeSize(CP->getType()->getElementType()); + for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) + WorkList.push_back(CPair(CP->getOperand(i), PA+i*ElementSize)); + } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(PC)) { + // + // FIXME: Handle ConstantExpression. See EE::getConstantValue() + // + switch (CE->getOpcode()) { + case Instruction::GetElementPtr: { + SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end()); + ScatteredOffset = TD->getIndexedOffset(CE->getOperand(0)->getType(), + &Indices[0], Indices.size()); + WorkList.push_back(CPair(CE->getOperand(0), PA)); + break; + } + case Instruction::Add: + default: + cerr << "ConstantExpr not handled as global var init: " << *CE << "\n"; + abort(); + break; + } + } else if (PC->getType()->isFirstClassType()) { + unsigned char *ptr = (unsigned char *)PA; + switch (PC->getType()->getTypeID()) { + case Type::IntegerTyID: { + unsigned NumBits = cast<IntegerType>(PC->getType())->getBitWidth(); + uint64_t val = cast<ConstantInt>(PC)->getZExtValue(); + if (NumBits <= 8) + ptr[0] = val; + else if (NumBits <= 16) { + if (TD->isBigEndian()) + val = ByteSwap_16(val); + ptr[0] = val; + ptr[1] = val >> 8; + } else if (NumBits <= 32) { + if (TD->isBigEndian()) + val = ByteSwap_32(val); + ptr[0] = val; + ptr[1] = val >> 8; + ptr[2] = val >> 16; + ptr[3] = val >> 24; + } else if (NumBits <= 64) { + if (TD->isBigEndian()) + val = ByteSwap_64(val); + ptr[0] = val; + ptr[1] = val >> 8; + ptr[2] = val >> 16; + ptr[3] = val >> 24; + ptr[4] = val >> 32; + ptr[5] = val >> 40; + ptr[6] = val >> 48; + ptr[7] = val >> 56; + } else { + assert(0 && "Not implemented: bit widths > 64"); + } + break; + } + case Type::FloatTyID: { + uint64_t val = FloatToBits(cast<ConstantFP>(PC)->getValue()); + if (TD->isBigEndian()) + val = ByteSwap_32(val); + ptr[0] = val; + ptr[1] = val >> 8; + ptr[2] = val >> 16; + ptr[3] = val >> 24; + break; + } + case Type::DoubleTyID: { + uint64_t val = DoubleToBits(cast<ConstantFP>(PC)->getValue()); + if (TD->isBigEndian()) + val = ByteSwap_64(val); + ptr[0] = val; + ptr[1] = val >> 8; + ptr[2] = val >> 16; + ptr[3] = val >> 24; + ptr[4] = val >> 32; + ptr[5] = val >> 40; + ptr[6] = val >> 48; + ptr[7] = val >> 56; + break; + } + case Type::PointerTyID: + if (isa<ConstantPointerNull>(PC)) + memset(ptr, 0, TD->getPointerSize()); + else if (const GlobalValue* GV = dyn_cast<GlobalValue>(PC)) { + // FIXME: what about function stubs? + MRs.push_back(MachineRelocation::getGV(PA-(intptr_t)Addr, + MachineRelocation::VANILLA, + const_cast<GlobalValue*>(GV), + ScatteredOffset)); + ScatteredOffset = 0; + } else + assert(0 && "Unknown constant pointer type!"); + break; + default: + cerr << "ERROR: Constant unimp for type: " << *PC->getType() << "\n"; + abort(); + } + } else if (isa<ConstantAggregateZero>(PC)) { + memset((void*)PA, 0, (size_t)TD->getTypeSize(PC->getType())); + } else if (const ConstantArray *CPA = dyn_cast<ConstantArray>(PC)) { + unsigned ElementSize = TD->getTypeSize(CPA->getType()->getElementType()); + for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) + WorkList.push_back(CPair(CPA->getOperand(i), PA+i*ElementSize)); + } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(PC)) { + const StructLayout *SL = + TD->getStructLayout(cast<StructType>(CPS->getType())); + for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) + WorkList.push_back(CPair(CPS->getOperand(i), + PA+SL->getElementOffset(i))); + } else { + cerr << "Bad Type: " << *PC->getType() << "\n"; + assert(0 && "Unknown constant type to initialize memory with!"); + } + } +} + +MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, + TargetMachine &TM) : + GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect), + n_desc(0), n_value(0) { + + const TargetAsmInfo *TAI = TM.getTargetAsmInfo(); + + switch (GV->getLinkage()) { + default: + assert(0 && "Unexpected linkage type!"); + break; + case GlobalValue::WeakLinkage: + case GlobalValue::LinkOnceLinkage: + assert(!isa<Function>(gv) && "Unexpected linkage type for Function!"); + case GlobalValue::ExternalLinkage: + GVName = TAI->getGlobalPrefix() + name; + n_type |= GV->hasHiddenVisibility() ? N_PEXT : N_EXT; + break; + case GlobalValue::InternalLinkage: + GVName = TAI->getGlobalPrefix() + name; + break; + } +} diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h new file mode 100644 index 0000000..6d88832 --- /dev/null +++ b/lib/CodeGen/MachOWriter.h @@ -0,0 +1,627 @@ +//=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Nate Begeman and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachOWriter class. +// +//===----------------------------------------------------------------------===// + +#ifndef MACHOWRITER_H +#define MACHOWRITER_H + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetMachOWriterInfo.h" + +namespace llvm { + class GlobalVariable; + class Mangler; + class MachineCodeEmitter; + class MachOCodeEmitter; + class OutputBuffer; + + /// MachOSym - This struct contains information about each symbol that is + /// added to logical symbol table for the module. This is eventually + /// turned into a real symbol table in the file. + struct MachOSym { + const GlobalValue *GV; // The global value this corresponds to. + std::string GVName; // The mangled name of the global value. + uint32_t n_strx; // index into the string table + uint8_t n_type; // type flag + uint8_t n_sect; // section number or NO_SECT + int16_t n_desc; // see <mach-o/stab.h> + uint64_t n_value; // value for this symbol (or stab offset) + + // Constants for the n_sect field + // see <mach-o/nlist.h> + enum { NO_SECT = 0 }; // symbol is not in any section + + // Constants for the n_type field + // see <mach-o/nlist.h> + enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT + N_ABS = 0x2, // absolute, n_sect == NO_SECT + N_SECT = 0xe, // defined in section number n_sect + N_PBUD = 0xc, // prebound undefined (defined in a dylib) + N_INDR = 0xa // indirect + }; + // The following bits are OR'd into the types above. For example, a type + // of 0x0f would be an external N_SECT symbol (0x0e | 0x01). + enum { N_EXT = 0x01, // external symbol bit + N_PEXT = 0x10 // private external symbol bit + }; + + // Constants for the n_desc field + // see <mach-o/loader.h> + enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0, + REFERENCE_FLAG_UNDEFINED_LAZY = 1, + REFERENCE_FLAG_DEFINED = 2, + REFERENCE_FLAG_PRIVATE_DEFINED = 3, + REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4, + REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5 + }; + enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped + N_WEAK_REF = 0x0040, // symbol is weak referenced + N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition + }; + + MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, + TargetMachine &TM); + }; + + /// MachOWriter - This class implements the common target-independent code for + /// writing Mach-O files. Targets should derive a class from this to + /// parameterize the output format. + /// + class MachOWriter : public MachineFunctionPass { + friend class MachOCodeEmitter; + public: + static char ID; + MachineCodeEmitter &getMachineCodeEmitter() const { + return *(MachineCodeEmitter*)MCE; + } + + MachOWriter(std::ostream &O, TargetMachine &TM); + virtual ~MachOWriter(); + + virtual const char *getPassName() const { + return "Mach-O Writer"; + } + + typedef std::vector<unsigned char> DataBuffer; + protected: + /// Output stream to send the resultant object file to. + /// + std::ostream &O; + + /// Target machine description. + /// + TargetMachine &TM; + + /// Mang - The object used to perform name mangling for this module. + /// + Mangler *Mang; + + /// MCE - The MachineCodeEmitter object that we are exposing to emit machine + /// code for functions to the .o file. + MachOCodeEmitter *MCE; + + /// is64Bit/isLittleEndian - This information is inferred from the target + /// machine directly, indicating what header values and flags to set. + bool is64Bit, isLittleEndian; + + /// doInitialization - Emit the file header and all of the global variables + /// for the module to the Mach-O file. + bool doInitialization(Module &M); + + bool runOnMachineFunction(MachineFunction &MF); + + /// doFinalization - Now that the module has been completely processed, emit + /// the Mach-O file to 'O'. + bool doFinalization(Module &M); + + /// MachOHeader - This struct contains the header information about a + /// specific architecture type/subtype pair that is emitted to the file. + struct MachOHeader { + uint32_t magic; // mach magic number identifier + uint32_t filetype; // type of file + uint32_t ncmds; // number of load commands + uint32_t sizeofcmds; // the size of all the load commands + uint32_t flags; // flags + uint32_t reserved; // 64-bit only + + /// HeaderData - The actual data for the header which we are building + /// up for emission to the file. + DataBuffer HeaderData; + + // Constants for the filetype field + // see <mach-o/loader.h> for additional info on the various types + enum { MH_OBJECT = 1, // relocatable object file + MH_EXECUTE = 2, // demand paged executable file + MH_FVMLIB = 3, // fixed VM shared library file + MH_CORE = 4, // core file + MH_PRELOAD = 5, // preloaded executable file + MH_DYLIB = 6, // dynamically bound shared library + MH_DYLINKER = 7, // dynamic link editor + MH_BUNDLE = 8, // dynamically bound bundle file + MH_DYLIB_STUB = 9, // shared library stub for static linking only + MH_DSYM = 10 // companion file wiht only debug sections + }; + + // Constants for the flags field + enum { MH_NOUNDEFS = 1 << 0, + // the object file has no undefined references + MH_INCRLINK = 1 << 1, + // the object file is the output of an incremental link against + // a base file and cannot be link edited again + MH_DYLDLINK = 1 << 2, + // the object file is input for the dynamic linker and cannot be + // statically link edited again. + MH_BINDATLOAD = 1 << 3, + // the object file's undefined references are bound by the + // dynamic linker when loaded. + MH_PREBOUND = 1 << 4, + // the file has its dynamic undefined references prebound + MH_SPLIT_SEGS = 1 << 5, + // the file has its read-only and read-write segments split + // see <mach/shared_memory_server.h> + MH_LAZY_INIT = 1 << 6, + // the shared library init routine is to be run lazily via + // catching memory faults to its writable segments (obsolete) + MH_TWOLEVEL = 1 << 7, + // the image is using two-level namespace bindings + MH_FORCE_FLAT = 1 << 8, + // the executable is forcing all images to use flat namespace + // bindings. + MH_NOMULTIDEFS = 1 << 8, + // this umbrella guarantees no multiple definitions of symbols + // in its sub-images so the two-level namespace hints can + // always be used. + MH_NOFIXPREBINDING = 1 << 10, + // do not have dyld notify the prebidning agent about this + // executable. + MH_PREBINDABLE = 1 << 11, + // the binary is not prebound but can have its prebinding + // redone. only used when MH_PREBOUND is not set. + MH_ALLMODSBOUND = 1 << 12, + // indicates that this binary binds to all two-level namespace + // modules of its dependent libraries. Only used when + // MH_PREBINDABLE and MH_TWOLEVEL are both set. + MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13, + // safe to divide up the sections into sub-sections via symbols + // for dead code stripping. + MH_CANONICAL = 1 << 14, + // the binary has been canonicalized via the unprebind operation + MH_WEAK_DEFINES = 1 << 15, + // the final linked image contains external weak symbols + MH_BINDS_TO_WEAK = 1 << 16, + // the final linked image uses weak symbols + MH_ALLOW_STACK_EXECUTION = 1 << 17 + // When this bit is set, all stacks in the task will be given + // stack execution privilege. Only used in MH_EXECUTE filetype + }; + + MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0), + reserved(0) { } + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 8 * sizeof(uint32_t); + else + return 7 * sizeof(uint32_t); + } + + /// setMagic - This routine sets the appropriate value for the 'magic' + /// field based on pointer size and endianness. + void setMagic(bool isLittleEndian, bool is64Bit) { + if (isLittleEndian) + if (is64Bit) magic = 0xcffaedfe; + else magic = 0xcefaedfe; + else + if (is64Bit) magic = 0xfeedfacf; + else magic = 0xfeedface; + } + }; + + /// Header - An instance of MachOHeader that we will update while we build + /// the file, and then emit during finalization. + MachOHeader Header; + + /// MachOSegment - This struct contains the necessary information to + /// emit the load commands for each section in the file. + struct MachOSegment { + uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64 + uint32_t cmdsize; // Total size of this struct and section commands + std::string segname; // segment name + uint64_t vmaddr; // address of this segment + uint64_t vmsize; // size of this segment, may be larger than filesize + uint64_t fileoff; // offset in file + uint64_t filesize; // amount to read from file + uint32_t maxprot; // maximum VM protection + uint32_t initprot; // initial VM protection + uint32_t nsects; // number of sections in this segment + uint32_t flags; // flags + + // The following constants are getting pulled in by one of the + // system headers, which creates a neat clash with the enum. +#if !defined(VM_PROT_NONE) +#define VM_PROT_NONE 0x00 +#endif +#if !defined(VM_PROT_READ) +#define VM_PROT_READ 0x01 +#endif +#if !defined(VM_PROT_WRITE) +#define VM_PROT_WRITE 0x02 +#endif +#if !defined(VM_PROT_EXECUTE) +#define VM_PROT_EXECUTE 0x04 +#endif +#if !defined(VM_PROT_ALL) +#define VM_PROT_ALL 0x07 +#endif + + // Constants for the vm protection fields + // see <mach-o/vm_prot.h> + enum { SEG_VM_PROT_NONE = VM_PROT_NONE, + SEG_VM_PROT_READ = VM_PROT_READ, // read permission + SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission + SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE, + SEG_VM_PROT_ALL = VM_PROT_ALL + }; + + // Constants for the cmd field + // see <mach-o/loader.h> + enum { LC_SEGMENT = 0x01, // segment of this file to be mapped + LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped + }; + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16; + else + return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits + } + + MachOSegment(const std::string &seg, bool is64Bit) + : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg), + vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL), + initprot(VM_PROT_ALL), nsects(0), flags(0) { } + }; + + /// MachOSection - This struct contains information about each section in a + /// particular segment that is emitted to the file. This is eventually + /// turned into the SectionCommand in the load command for a particlar + /// segment. + struct MachOSection { + std::string sectname; // name of this section, + std::string segname; // segment this section goes in + uint64_t addr; // memory address of this section + uint64_t size; // size in bytes of this section + uint32_t offset; // file offset of this section + uint32_t align; // section alignment (power of 2) + uint32_t reloff; // file offset of relocation entries + uint32_t nreloc; // number of relocation entries + uint32_t flags; // flags (section type and attributes) + uint32_t reserved1; // reserved (for offset or index) + uint32_t reserved2; // reserved (for count or sizeof) + uint32_t reserved3; // reserved (64 bit only) + + /// A unique number for this section, which will be used to match symbols + /// to the correct section. + uint32_t Index; + + /// SectionData - The actual data for this section which we are building + /// up for emission to the file. + DataBuffer SectionData; + + /// RelocBuffer - A buffer to hold the mach-o relocations before we write + /// them out at the appropriate location in the file. + DataBuffer RelocBuffer; + + /// Relocations - The relocations that we have encountered so far in this + /// section that we will need to convert to MachORelocation entries when + /// the file is written. + std::vector<MachineRelocation> Relocations; + + // Constants for the section types (low 8 bits of flags field) + // see <mach-o/loader.h> + enum { S_REGULAR = 0, + // regular section + S_ZEROFILL = 1, + // zero fill on demand section + S_CSTRING_LITERALS = 2, + // section with only literal C strings + S_4BYTE_LITERALS = 3, + // section with only 4 byte literals + S_8BYTE_LITERALS = 4, + // section with only 8 byte literals + S_LITERAL_POINTERS = 5, + // section with only pointers to literals + S_NON_LAZY_SYMBOL_POINTERS = 6, + // section with only non-lazy symbol pointers + S_LAZY_SYMBOL_POINTERS = 7, + // section with only lazy symbol pointers + S_SYMBOL_STUBS = 8, + // section with only symbol stubs + // byte size of stub in the reserved2 field + S_MOD_INIT_FUNC_POINTERS = 9, + // section with only function pointers for initialization + S_MOD_TERM_FUNC_POINTERS = 10, + // section with only function pointers for termination + S_COALESCED = 11, + // section contains symbols that are coalesced + S_GB_ZEROFILL = 12, + // zero fill on demand section (that can be larger than 4GB) + S_INTERPOSING = 13, + // section with only pairs of function pointers for interposing + S_16BYTE_LITERALS = 14 + // section with only 16 byte literals + }; + + // Constants for the section flags (high 24 bits of flags field) + // see <mach-o/loader.h> + enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31, + // section contains only true machine instructions + S_ATTR_NO_TOC = 1 << 30, + // section contains coalesced symbols that are not to be in a + // ranlib table of contents + S_ATTR_STRIP_STATIC_SYMS = 1 << 29, + // ok to strip static symbols in this section in files with the + // MY_DYLDLINK flag + S_ATTR_NO_DEAD_STRIP = 1 << 28, + // no dead stripping + S_ATTR_LIVE_SUPPORT = 1 << 27, + // blocks are live if they reference live blocks + S_ATTR_SELF_MODIFYING_CODE = 1 << 26, + // used with i386 code stubs written on by dyld + S_ATTR_DEBUG = 1 << 25, + // a debug section + S_ATTR_SOME_INSTRUCTIONS = 1 << 10, + // section contains some machine instructions + S_ATTR_EXT_RELOC = 1 << 9, + // section has external relocation entries + S_ATTR_LOC_RELOC = 1 << 8 + // section has local relocation entries + }; + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32; + else + return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits + } + + MachOSection(const std::string &seg, const std::string §) + : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2), + reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0), + reserved3(0) { } + }; + + private: + + /// SectionList - This is the list of sections that we have emitted to the + /// file. Once the file has been completely built, the segment load command + /// SectionCommands are constructed from this info. + std::vector<MachOSection*> SectionList; + + /// SectionLookup - This is a mapping from section name to SectionList entry + std::map<std::string, MachOSection*> SectionLookup; + + /// GVSection - This is a mapping from a GlobalValue to a MachOSection, + /// to aid in emitting relocations. + std::map<GlobalValue*, MachOSection*> GVSection; + + /// GVOffset - This is a mapping from a GlobalValue to an offset from the + /// start of the section in which the GV resides, to aid in emitting + /// relocations. + std::map<GlobalValue*, intptr_t> GVOffset; + + /// getSection - Return the section with the specified name, creating a new + /// section if one does not already exist. + MachOSection *getSection(const std::string &seg, const std::string §, + unsigned Flags = 0) { + MachOSection *MOS = SectionLookup[seg+sect]; + if (MOS) return MOS; + + MOS = new MachOSection(seg, sect); + SectionList.push_back(MOS); + MOS->Index = SectionList.size(); + MOS->flags = MachOSection::S_REGULAR | Flags; + SectionLookup[seg+sect] = MOS; + return MOS; + } + MachOSection *getTextSection(bool isCode = true) { + if (isCode) + return getSection("__TEXT", "__text", + MachOSection::S_ATTR_PURE_INSTRUCTIONS | + MachOSection::S_ATTR_SOME_INSTRUCTIONS); + else + return getSection("__TEXT", "__text"); + } + MachOSection *getBSSSection() { + return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL); + } + MachOSection *getDataSection() { + return getSection("__DATA", "__data"); + } + MachOSection *getConstSection(Constant *C) { + const ConstantArray *CVA = dyn_cast<ConstantArray>(C); + if (CVA && CVA->isCString()) + return getSection("__TEXT", "__cstring", + MachOSection::S_CSTRING_LITERALS); + + const Type *Ty = C->getType(); + if (Ty->isPrimitiveType() || Ty->isInteger()) { + unsigned Size = TM.getTargetData()->getTypeSize(Ty); + switch(Size) { + default: break; // Fall through to __TEXT,__const + case 4: + return getSection("__TEXT", "__literal4", + MachOSection::S_4BYTE_LITERALS); + case 8: + return getSection("__TEXT", "__literal8", + MachOSection::S_8BYTE_LITERALS); + case 16: + return getSection("__TEXT", "__literal16", + MachOSection::S_16BYTE_LITERALS); + } + } + return getSection("__TEXT", "__const"); + } + MachOSection *getJumpTableSection() { + if (TM.getRelocationModel() == Reloc::PIC_) + return getTextSection(false); + else + return getSection("__TEXT", "__const"); + } + + /// MachOSymTab - This struct contains information about the offsets and + /// size of symbol table information. + /// segment. + struct MachOSymTab { + uint32_t cmd; // LC_SYMTAB + uint32_t cmdsize; // sizeof( MachOSymTab ) + uint32_t symoff; // symbol table offset + uint32_t nsyms; // number of symbol table entries + uint32_t stroff; // string table offset + uint32_t strsize; // string table size in bytes + + // Constants for the cmd field + // see <mach-o/loader.h> + enum { LC_SYMTAB = 0x02 // link-edit stab symbol table info + }; + + MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0), + nsyms(0), stroff(0), strsize(0) { } + }; + + /// MachOSymTab - This struct contains information about the offsets and + /// size of symbol table information. + /// segment. + struct MachODySymTab { + uint32_t cmd; // LC_DYSYMTAB + uint32_t cmdsize; // sizeof( MachODySymTab ) + uint32_t ilocalsym; // index to local symbols + uint32_t nlocalsym; // number of local symbols + uint32_t iextdefsym; // index to externally defined symbols + uint32_t nextdefsym; // number of externally defined symbols + uint32_t iundefsym; // index to undefined symbols + uint32_t nundefsym; // number of undefined symbols + uint32_t tocoff; // file offset to table of contents + uint32_t ntoc; // number of entries in table of contents + uint32_t modtaboff; // file offset to module table + uint32_t nmodtab; // number of module table entries + uint32_t extrefsymoff; // offset to referenced symbol table + uint32_t nextrefsyms; // number of referenced symbol table entries + uint32_t indirectsymoff; // file offset to the indirect symbol table + uint32_t nindirectsyms; // number of indirect symbol table entries + uint32_t extreloff; // offset to external relocation entries + uint32_t nextrel; // number of external relocation entries + uint32_t locreloff; // offset to local relocation entries + uint32_t nlocrel; // number of local relocation entries + + // Constants for the cmd field + // see <mach-o/loader.h> + enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info + }; + + MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)), + ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0), + iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0), + nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0), + nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { } + }; + + /// SymTab - The "stab" style symbol table information + MachOSymTab SymTab; + /// DySymTab - symbol table info for the dynamic link editor + MachODySymTab DySymTab; + + struct MachOSymCmp { + // FIXME: this does not appear to be sorting 'f' after 'F' + bool operator()(const MachOSym &LHS, const MachOSym &RHS) { + return LHS.GVName < RHS.GVName; + } + }; + + /// PartitionByLocal - Simple boolean predicate that returns true if Sym is + /// a local symbol rather than an external symbol. + static bool PartitionByLocal(const MachOSym &Sym); + + /// PartitionByDefined - Simple boolean predicate that returns true if Sym + /// is defined in this module. + static bool PartitionByDefined(const MachOSym &Sym); + + protected: + + /// SymbolTable - This is the list of symbols we have emitted to the file. + /// This actually gets rearranged before emission to the file (to put the + /// local symbols first in the list). + std::vector<MachOSym> SymbolTable; + + /// SymT - A buffer to hold the symbol table before we write it out at the + /// appropriate location in the file. + DataBuffer SymT; + + /// StrT - A buffer to hold the string table before we write it out at the + /// appropriate location in the file. + DataBuffer StrT; + + /// PendingSyms - This is a list of externally defined symbols that we have + /// been asked to emit, but have not seen a reference to. When a reference + /// is seen, the symbol will move from this list to the SymbolTable. + std::vector<GlobalValue*> PendingGlobals; + + /// DynamicSymbolTable - This is just a vector of indices into + /// SymbolTable to aid in emitting the DYSYMTAB load command. + std::vector<unsigned> DynamicSymbolTable; + + static void InitMem(const Constant *C, void *Addr, intptr_t Offset, + const TargetData *TD, + std::vector<MachineRelocation> &MRs); + + private: + void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV); + void EmitGlobal(GlobalVariable *GV); + void EmitHeaderAndLoadCommands(); + void EmitSections(); + void BufferSymbolAndStringTable(); + void CalculateRelocations(MachOSection &MOS); + + MachineRelocation GetJTRelocation(unsigned Offset, + MachineBasicBlock *MBB) const { + return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB); + } + + /// GetTargetRelocation - Returns the number of relocations. + unsigned GetTargetRelocation(MachineRelocation &MR, + unsigned FromIdx, + unsigned ToAddr, + unsigned ToIndex, + OutputBuffer &RelocOut, + OutputBuffer &SecOut, + bool Scattered, + bool Extern) { + return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr, + ToIndex, RelocOut, + SecOut, Scattered, + Extern); + } + }; +} + +#endif diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp new file mode 100644 index 0000000..ba428c5 --- /dev/null +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -0,0 +1,287 @@ +//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Collect the sequence of machine instructions for a basic block. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/LeakDetector.h" +#include <algorithm> +using namespace llvm; + +MachineBasicBlock::~MachineBasicBlock() { + LeakDetector::removeGarbageObject(this); +} + +std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) { + MBB.print(OS); + return OS; +} + +// MBBs start out as #-1. When a MBB is added to a MachineFunction, it +// gets the next available unique MBB number. If it is removed from a +// MachineFunction, it goes back to being #-1. +void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock* N) { + assert(N->Parent == 0 && "machine instruction already in a basic block"); + N->Parent = Parent; + N->Number = Parent->addToMBBNumbering(N); + LeakDetector::removeGarbageObject(N); +} + +void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock* N) { + assert(N->Parent != 0 && "machine instruction not in a basic block"); + N->Parent->removeFromMBBNumbering(N->Number); + N->Number = -1; + N->Parent = 0; + LeakDetector::addGarbageObject(N); +} + + +MachineInstr* ilist_traits<MachineInstr>::createSentinel() { + MachineInstr* dummy = new MachineInstr(); + LeakDetector::removeGarbageObject(dummy); + return dummy; +} + +void ilist_traits<MachineInstr>::addNodeToList(MachineInstr* N) { + assert(N->parent == 0 && "machine instruction already in a basic block"); + N->parent = parent; + LeakDetector::removeGarbageObject(N); +} + +void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr* N) { + assert(N->parent != 0 && "machine instruction not in a basic block"); + N->parent = 0; + LeakDetector::addGarbageObject(N); +} + +void ilist_traits<MachineInstr>::transferNodesFromList( + iplist<MachineInstr, ilist_traits<MachineInstr> >& fromList, + ilist_iterator<MachineInstr> first, + ilist_iterator<MachineInstr> last) { + if (parent != fromList.parent) + for (; first != last; ++first) + first->parent = parent; +} + +MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { + const TargetInstrInfo& TII = *getParent()->getTarget().getInstrInfo(); + iterator I = end(); + while (I != begin() && TII.isTerminatorInstr((--I)->getOpcode())); + if (I != end() && !TII.isTerminatorInstr(I->getOpcode())) ++I; + return I; +} + +void MachineBasicBlock::dump() const { + print(*cerr.stream()); +} + +static inline void OutputReg(std::ostream &os, unsigned RegNo, + const MRegisterInfo *MRI = 0) { + if (!RegNo || MRegisterInfo::isPhysicalRegister(RegNo)) { + if (MRI) + os << " %" << MRI->get(RegNo).Name; + else + os << " %mreg(" << RegNo << ")"; + } else + os << " %reg" << RegNo; +} + +void MachineBasicBlock::print(std::ostream &OS) const { + const MachineFunction *MF = getParent(); + if(!MF) { + OS << "Can't print out MachineBasicBlock because parent MachineFunction" + << " is null\n"; + return; + } + + const BasicBlock *LBB = getBasicBlock(); + OS << "\n"; + if (LBB) OS << LBB->getName() << ": "; + OS << (const void*)this + << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber(); + if (isLandingPad()) OS << ", EH LANDING PAD"; + OS << ":\n"; + + const MRegisterInfo *MRI = MF->getTarget().getRegisterInfo(); + if (livein_begin() != livein_end()) { + OS << "Live Ins:"; + for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) + OutputReg(OS, *I, MRI); + OS << "\n"; + } + // Print the preds of this block according to the CFG. + if (!pred_empty()) { + OS << " Predecessors according to CFG:"; + for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) + OS << " " << *PI << " (#" << (*PI)->getNumber() << ")"; + OS << "\n"; + } + + for (const_iterator I = begin(); I != end(); ++I) { + OS << "\t"; + I->print(OS, &getParent()->getTarget()); + } + + // Print the successors of this block according to the CFG. + if (!succ_empty()) { + OS << " Successors according to CFG:"; + for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) + OS << " " << *SI << " (#" << (*SI)->getNumber() << ")"; + OS << "\n"; + } +} + +void MachineBasicBlock::removeLiveIn(unsigned Reg) { + livein_iterator I = std::find(livein_begin(), livein_end(), Reg); + assert(I != livein_end() && "Not a live in!"); + LiveIns.erase(I); +} + +void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) { + MachineFunction::BasicBlockListType &BBList =getParent()->getBasicBlockList(); + getParent()->getBasicBlockList().splice(NewAfter, BBList, this); +} + +void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { + MachineFunction::BasicBlockListType &BBList =getParent()->getBasicBlockList(); + MachineFunction::iterator BBI = NewBefore; + getParent()->getBasicBlockList().splice(++BBI, BBList, this); +} + + +void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) { + Successors.push_back(succ); + succ->addPredecessor(this); +} + +void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) { + succ->removePredecessor(this); + succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); + assert(I != Successors.end() && "Not a current successor!"); + Successors.erase(I); +} + +MachineBasicBlock::succ_iterator MachineBasicBlock::removeSuccessor(succ_iterator I) { + assert(I != Successors.end() && "Not a current successor!"); + (*I)->removePredecessor(this); + return(Successors.erase(I)); +} + +void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { + Predecessors.push_back(pred); +} + +void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { + std::vector<MachineBasicBlock *>::iterator I = + std::find(Predecessors.begin(), Predecessors.end(), pred); + assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); + Predecessors.erase(I); +} + +bool MachineBasicBlock::isSuccessor(MachineBasicBlock *MBB) const { + std::vector<MachineBasicBlock *>::const_iterator I = + std::find(Successors.begin(), Successors.end(), MBB); + return I != Successors.end(); +} + +/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to +/// 'Old', change the code and CFG so that it branches to 'New' instead. +void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Cannot replace self with self!"); + + MachineBasicBlock::iterator I = end(); + while (I != begin()) { + --I; + if (!(I->getInstrDescriptor()->Flags & M_TERMINATOR_FLAG)) break; + + // Scan the operands of this machine instruction, replacing any uses of Old + // with New. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i).isMachineBasicBlock() && + I->getOperand(i).getMachineBasicBlock() == Old) + I->getOperand(i).setMachineBasicBlock(New); + } + + // Update the successor information. If New was already a successor, just + // remove the link to Old instead of creating another one. PR 1444. + removeSuccessor(Old); + if (!isSuccessor(New)) + addSuccessor(New); +} + +/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the +/// CFG to be inserted. If we have proven that MBB can only branch to DestA and +/// DestB, remove any other MBB successors from the CFG. DestA and DestB can +/// be null. +/// Besides DestA and DestB, retain other edges leading to LandingPads (currently +/// there can be only one; we don't check or require that here). +/// Note it is possible that DestA and/or DestB are LandingPads. +bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, + MachineBasicBlock *DestB, + bool isCond) { + bool MadeChange = false; + bool AddedFallThrough = false; + + MachineBasicBlock *FallThru = getNext(); + + // If this block ends with a conditional branch that falls through to its + // successor, set DestB as the successor. + if (isCond) { + if (DestB == 0 && FallThru != getParent()->end()) { + DestB = FallThru; + AddedFallThrough = true; + } + } else { + // If this is an unconditional branch with no explicit dest, it must just be + // a fallthrough into DestB. + if (DestA == 0 && FallThru != getParent()->end()) { + DestA = FallThru; + AddedFallThrough = true; + } + } + + MachineBasicBlock::succ_iterator SI = succ_begin(); + MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB; + while (SI != succ_end()) { + if (*SI == DestA && DestA == DestB) { + DestA = DestB = 0; + ++SI; + } else if (*SI == DestA) { + DestA = 0; + ++SI; + } else if (*SI == DestB) { + DestB = 0; + ++SI; + } else if ((*SI)->isLandingPad() && + *SI!=OrigDestA && *SI!=OrigDestB) { + ++SI; + } else { + // Otherwise, this is a superfluous edge, remove it. + SI = removeSuccessor(SI); + MadeChange = true; + } + } + if (!AddedFallThrough) { + assert(DestA == 0 && DestB == 0 && + "MachineCFG is missing edges!"); + } else if (isCond) { + assert(DestA == 0 && "MachineCFG is missing edges!"); + } + return MadeChange; +} diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp new file mode 100644 index 0000000..c762ae5 --- /dev/null +++ b/lib/CodeGen/MachineFunction.cpp @@ -0,0 +1,483 @@ +//===-- MachineFunction.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Collect native machine code information for a function. This allows +// target-specific information about the generated code to be stored with each +// function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/LeakDetector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Config/config.h" +#include <fstream> +#include <sstream> +using namespace llvm; + +static AnnotationID MF_AID( + AnnotationManager::getID("CodeGen::MachineCodeForFunction")); + +// Out of line virtual function to home classes. +void MachineFunctionPass::virtfn() {} + +namespace { + struct VISIBILITY_HIDDEN Printer : public MachineFunctionPass { + static char ID; + + std::ostream *OS; + const std::string Banner; + + Printer (std::ostream *_OS, const std::string &_Banner) + : MachineFunctionPass((intptr_t)&ID), OS (_OS), Banner (_Banner) { } + + const char *getPassName() const { return "MachineFunction Printer"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnMachineFunction(MachineFunction &MF) { + (*OS) << Banner; + MF.print (*OS); + return false; + } + }; + char Printer::ID = 0; +} + +/// Returns a newly-created MachineFunction Printer pass. The default output +/// stream is std::cerr; the default banner is empty. +/// +FunctionPass *llvm::createMachineFunctionPrinterPass(std::ostream *OS, + const std::string &Banner){ + return new Printer(OS, Banner); +} + +namespace { + struct VISIBILITY_HIDDEN Deleter : public MachineFunctionPass { + static char ID; + Deleter() : MachineFunctionPass((intptr_t)&ID) {} + + const char *getPassName() const { return "Machine Code Deleter"; } + + bool runOnMachineFunction(MachineFunction &MF) { + // Delete the annotation from the function now. + MachineFunction::destruct(MF.getFunction()); + return true; + } + }; + char Deleter::ID = 0; +} + +/// MachineCodeDeletion Pass - This pass deletes all of the machine code for +/// the current function, which should happen after the function has been +/// emitted to a .s file or to memory. +FunctionPass *llvm::createMachineCodeDeleter() { + return new Deleter(); +} + + + +//===---------------------------------------------------------------------===// +// MachineFunction implementation +//===---------------------------------------------------------------------===// + +MachineBasicBlock* ilist_traits<MachineBasicBlock>::createSentinel() { + MachineBasicBlock* dummy = new MachineBasicBlock(); + LeakDetector::removeGarbageObject(dummy); + return dummy; +} + +void ilist_traits<MachineBasicBlock>::transferNodesFromList( + iplist<MachineBasicBlock, ilist_traits<MachineBasicBlock> >& toList, + ilist_iterator<MachineBasicBlock> first, + ilist_iterator<MachineBasicBlock> last) { + if (Parent != toList.Parent) + for (; first != last; ++first) + first->Parent = toList.Parent; +} + +MachineFunction::MachineFunction(const Function *F, + const TargetMachine &TM) + : Annotation(MF_AID), Fn(F), Target(TM) { + SSARegMapping = new SSARegMap(); + MFInfo = 0; + FrameInfo = new MachineFrameInfo(); + ConstantPool = new MachineConstantPool(TM.getTargetData()); + UsedPhysRegs.resize(TM.getRegisterInfo()->getNumRegs()); + + // Set up jump table. + const TargetData &TD = *TM.getTargetData(); + bool IsPic = TM.getRelocationModel() == Reloc::PIC_; + unsigned EntrySize = IsPic ? 4 : TD.getPointerSize(); + unsigned Alignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty) + : TD.getPointerABIAlignment(); + JumpTableInfo = new MachineJumpTableInfo(EntrySize, Alignment); + + BasicBlocks.Parent = this; +} + +MachineFunction::~MachineFunction() { + BasicBlocks.clear(); + delete SSARegMapping; + delete MFInfo; + delete FrameInfo; + delete ConstantPool; + delete JumpTableInfo; +} + + +/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and +/// recomputes them. This guarantees that the MBB numbers are sequential, +/// dense, and match the ordering of the blocks within the function. If a +/// specific MachineBasicBlock is specified, only that block and those after +/// it are renumbered. +void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { + if (empty()) { MBBNumbering.clear(); return; } + MachineFunction::iterator MBBI, E = end(); + if (MBB == 0) + MBBI = begin(); + else + MBBI = MBB; + + // Figure out the block number this should have. + unsigned BlockNo = 0; + if (MBBI != begin()) + BlockNo = prior(MBBI)->getNumber()+1; + + for (; MBBI != E; ++MBBI, ++BlockNo) { + if (MBBI->getNumber() != (int)BlockNo) { + // Remove use of the old number. + if (MBBI->getNumber() != -1) { + assert(MBBNumbering[MBBI->getNumber()] == &*MBBI && + "MBB number mismatch!"); + MBBNumbering[MBBI->getNumber()] = 0; + } + + // If BlockNo is already taken, set that block's number to -1. + if (MBBNumbering[BlockNo]) + MBBNumbering[BlockNo]->setNumber(-1); + + MBBNumbering[BlockNo] = MBBI; + MBBI->setNumber(BlockNo); + } + } + + // Okay, all the blocks are renumbered. If we have compactified the block + // numbering, shrink MBBNumbering now. + assert(BlockNo <= MBBNumbering.size() && "Mismatch!"); + MBBNumbering.resize(BlockNo); +} + + +void MachineFunction::dump() const { print(*cerr.stream()); } + +void MachineFunction::print(std::ostream &OS) const { + OS << "# Machine code for " << Fn->getName () << "():\n"; + + // Print Frame Information + getFrameInfo()->print(*this, OS); + + // Print JumpTable Information + getJumpTableInfo()->print(OS); + + // Print Constant Pool + getConstantPool()->print(OS); + + const MRegisterInfo *MRI = getTarget().getRegisterInfo(); + + if (livein_begin() != livein_end()) { + OS << "Live Ins:"; + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) { + if (MRI) + OS << " " << MRI->getName(I->first); + else + OS << " Reg #" << I->first; + + if (I->second) + OS << " in VR#" << I->second << " "; + } + OS << "\n"; + } + if (liveout_begin() != liveout_end()) { + OS << "Live Outs:"; + for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I) + if (MRI) + OS << " " << MRI->getName(*I); + else + OS << " Reg #" << *I; + OS << "\n"; + } + + for (const_iterator BB = begin(); BB != end(); ++BB) + BB->print(OS); + + OS << "\n# End machine code for " << Fn->getName () << "().\n\n"; +} + +/// CFGOnly flag - This is used to control whether or not the CFG graph printer +/// prints out the contents of basic blocks or not. This is acceptable because +/// this code is only really used for debugging purposes. +/// +static bool CFGOnly = false; + +namespace llvm { + template<> + struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits { + static std::string getGraphName(const MachineFunction *F) { + return "CFG for '" + F->getFunction()->getName() + "' function"; + } + + static std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineFunction *Graph) { + if (CFGOnly && Node->getBasicBlock() && + !Node->getBasicBlock()->getName().empty()) + return Node->getBasicBlock()->getName() + ":"; + + std::ostringstream Out; + if (CFGOnly) { + Out << Node->getNumber() << ':'; + return Out.str(); + } + + Node->print(Out); + + std::string OutStr = Out.str(); + if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); + + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin()+i+1, 'l'); + } + return OutStr; + } + }; +} + +void MachineFunction::viewCFG() const +{ +#ifndef NDEBUG + ViewGraph(this, "mf" + getFunction()->getName()); +#else + cerr << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +void MachineFunction::viewCFGOnly() const +{ + CFGOnly = true; + viewCFG(); + CFGOnly = false; +} + +// The next two methods are used to construct and to retrieve +// the MachineCodeForFunction object for the given function. +// construct() -- Allocates and initializes for a given function and target +// get() -- Returns a handle to the object. +// This should not be called before "construct()" +// for a given Function. +// +MachineFunction& +MachineFunction::construct(const Function *Fn, const TargetMachine &Tar) +{ + assert(Fn->getAnnotation(MF_AID) == 0 && + "Object already exists for this function!"); + MachineFunction* mcInfo = new MachineFunction(Fn, Tar); + Fn->addAnnotation(mcInfo); + return *mcInfo; +} + +void MachineFunction::destruct(const Function *Fn) { + bool Deleted = Fn->deleteAnnotation(MF_AID); + assert(Deleted && "Machine code did not exist for function!"); +} + +MachineFunction& MachineFunction::get(const Function *F) +{ + MachineFunction *mc = (MachineFunction*)F->getAnnotation(MF_AID); + assert(mc && "Call construct() method first to allocate the object"); + return *mc; +} + +void MachineFunction::clearSSARegMap() { + delete SSARegMapping; + SSARegMapping = 0; +} + +//===----------------------------------------------------------------------===// +// MachineFrameInfo implementation +//===----------------------------------------------------------------------===// + +void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{ + int ValOffset = MF.getTarget().getFrameInfo()->getOffsetOfLocalArea(); + + for (unsigned i = 0, e = Objects.size(); i != e; ++i) { + const StackObject &SO = Objects[i]; + OS << " <fi #" << (int)(i-NumFixedObjects) << ">: "; + if (SO.Size == 0) + OS << "variable sized"; + else + OS << "size is " << SO.Size << " byte" << (SO.Size != 1 ? "s," : ","); + OS << " alignment is " << SO.Alignment << " byte" + << (SO.Alignment != 1 ? "s," : ","); + + if (i < NumFixedObjects) + OS << " fixed"; + if (i < NumFixedObjects || SO.SPOffset != -1) { + int64_t Off = SO.SPOffset - ValOffset; + OS << " at location [SP"; + if (Off > 0) + OS << "+" << Off; + else if (Off < 0) + OS << Off; + OS << "]"; + } + OS << "\n"; + } + + if (HasVarSizedObjects) + OS << " Stack frame contains variable sized objects\n"; +} + +void MachineFrameInfo::dump(const MachineFunction &MF) const { + print(MF, *cerr.stream()); +} + + +//===----------------------------------------------------------------------===// +// MachineJumpTableInfo implementation +//===----------------------------------------------------------------------===// + +/// getJumpTableIndex - Create a new jump table entry in the jump table info +/// or return an existing one. +/// +unsigned MachineJumpTableInfo::getJumpTableIndex( + const std::vector<MachineBasicBlock*> &DestBBs) { + assert(!DestBBs.empty() && "Cannot create an empty jump table!"); + for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) + if (JumpTables[i].MBBs == DestBBs) + return i; + + JumpTables.push_back(MachineJumpTableEntry(DestBBs)); + return JumpTables.size()-1; +} + + +void MachineJumpTableInfo::print(std::ostream &OS) const { + // FIXME: this is lame, maybe we could print out the MBB numbers or something + // like {1, 2, 4, 5, 3, 0} + for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { + OS << " <jt #" << i << "> has " << JumpTables[i].MBBs.size() + << " entries\n"; + } +} + +void MachineJumpTableInfo::dump() const { print(*cerr.stream()); } + + +//===----------------------------------------------------------------------===// +// MachineConstantPool implementation +//===----------------------------------------------------------------------===// + +const Type *MachineConstantPoolEntry::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + +MachineConstantPool::~MachineConstantPool() { + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (Constants[i].isMachineConstantPoolEntry()) + delete Constants[i].Val.MachineCPVal; +} + +/// getConstantPoolIndex - Create a new entry in the constant pool or return +/// an existing one. User must specify an alignment in bytes for the object. +/// +unsigned MachineConstantPool::getConstantPoolIndex(Constant *C, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + unsigned AlignMask = (1 << Alignment)-1; + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (Constants[i].Val.ConstVal == C && (Constants[i].Offset & AlignMask)== 0) + return i; + + unsigned Offset = 0; + if (!Constants.empty()) { + Offset = Constants.back().getOffset(); + Offset += TD->getTypeSize(Constants.back().getType()); + Offset = (Offset+AlignMask)&~AlignMask; + } + + Constants.push_back(MachineConstantPoolEntry(C, Offset)); + return Constants.size()-1; +} + +unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + unsigned AlignMask = (1 << Alignment)-1; + int Idx = V->getExistingMachineCPValue(this, Alignment); + if (Idx != -1) + return (unsigned)Idx; + + unsigned Offset = 0; + if (!Constants.empty()) { + Offset = Constants.back().getOffset(); + Offset += TD->getTypeSize(Constants.back().getType()); + Offset = (Offset+AlignMask)&~AlignMask; + } + + Constants.push_back(MachineConstantPoolEntry(V, Offset)); + return Constants.size()-1; +} + + +void MachineConstantPool::print(std::ostream &OS) const { + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + OS << " <cp #" << i << "> is"; + if (Constants[i].isMachineConstantPoolEntry()) + Constants[i].Val.MachineCPVal->print(OS); + else + OS << *(Value*)Constants[i].Val.ConstVal; + OS << " , offset=" << Constants[i].getOffset(); + OS << "\n"; + } +} + +void MachineConstantPool::dump() const { print(*cerr.stream()); } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp new file mode 100644 index 0000000..c5e2ba8 --- /dev/null +++ b/lib/CodeGen/MachineInstr.cpp @@ -0,0 +1,426 @@ +//===-- MachineInstr.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Methods common to all machine instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Support/LeakDetector.h" +#include "llvm/Support/Streams.h" +#include <ostream> +using namespace llvm; + +/// MachineInstr ctor - This constructor creates a dummy MachineInstr with +/// TID NULL and no operands. +MachineInstr::MachineInstr() + : TID(0), NumImplicitOps(0), parent(0) { + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); +} + +void MachineInstr::addImplicitDefUseOperands() { + if (TID->ImplicitDefs) + for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs) { + MachineOperand Op; + Op.opType = MachineOperand::MO_Register; + Op.IsDef = true; + Op.IsImp = true; + Op.IsKill = false; + Op.IsDead = false; + Op.contents.RegNo = *ImpDefs; + Op.auxInfo.subReg = 0; + Operands.push_back(Op); + } + if (TID->ImplicitUses) + for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses) { + MachineOperand Op; + Op.opType = MachineOperand::MO_Register; + Op.IsDef = false; + Op.IsImp = true; + Op.IsKill = false; + Op.IsDead = false; + Op.contents.RegNo = *ImpUses; + Op.auxInfo.subReg = 0; + Operands.push_back(Op); + } +} + +/// MachineInstr ctor - This constructor create a MachineInstr and add the +/// implicit operands. It reserves space for number of operands specified by +/// TargetInstrDescriptor or the numOperands if it is not zero. (for +/// instructions with variable number of operands). +MachineInstr::MachineInstr(const TargetInstrDescriptor &tid) + : TID(&tid), NumImplicitOps(0), parent(0) { + if (TID->ImplicitDefs) + for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs) + NumImplicitOps++; + if (TID->ImplicitUses) + for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses) + NumImplicitOps++; + Operands.reserve(NumImplicitOps + TID->numOperands); + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); +} + +/// MachineInstr ctor - Work exactly the same as the ctor above, except that the +/// MachineInstr is created and added to the end of the specified basic block. +/// +MachineInstr::MachineInstr(MachineBasicBlock *MBB, + const TargetInstrDescriptor &tid) + : TID(&tid), NumImplicitOps(0), parent(0) { + assert(MBB && "Cannot use inserting ctor with null basic block!"); + if (TID->ImplicitDefs) + for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs) + NumImplicitOps++; + if (TID->ImplicitUses) + for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses) + NumImplicitOps++; + Operands.reserve(NumImplicitOps + TID->numOperands); + addImplicitDefUseOperands(); + // Make sure that we get added to a machine basicblock + LeakDetector::addGarbageObject(this); + MBB->push_back(this); // Add instruction to end of basic block! +} + +/// MachineInstr ctor - Copies MachineInstr arg exactly +/// +MachineInstr::MachineInstr(const MachineInstr &MI) { + TID = MI.getInstrDescriptor(); + NumImplicitOps = MI.NumImplicitOps; + Operands.reserve(MI.getNumOperands()); + + // Add operands + for (unsigned i = 0; i != MI.getNumOperands(); ++i) + Operands.push_back(MI.getOperand(i)); + + // Set parent, next, and prev to null + parent = 0; + prev = 0; + next = 0; +} + + +MachineInstr::~MachineInstr() { + LeakDetector::removeGarbageObject(this); +} + +/// getOpcode - Returns the opcode of this MachineInstr. +/// +const int MachineInstr::getOpcode() const { + return TID->Opcode; +} + +/// removeFromParent - This method unlinks 'this' from the containing basic +/// block, and returns it, but does not delete it. +MachineInstr *MachineInstr::removeFromParent() { + assert(getParent() && "Not embedded in a basic block!"); + getParent()->remove(this); + return this; +} + + +/// OperandComplete - Return true if it's illegal to add a new operand +/// +bool MachineInstr::OperandsComplete() const { + unsigned short NumOperands = TID->numOperands; + if ((TID->Flags & M_VARIABLE_OPS) == 0 && + getNumOperands()-NumImplicitOps >= NumOperands) + return true; // Broken: we have all the operands of this instruction! + return false; +} + +/// getNumExplicitOperands - Returns the number of non-implicit operands. +/// +unsigned MachineInstr::getNumExplicitOperands() const { + unsigned NumOperands = TID->numOperands; + if ((TID->Flags & M_VARIABLE_OPS) == 0) + return NumOperands; + + for (unsigned e = getNumOperands(); NumOperands != e; ++NumOperands) { + const MachineOperand &MO = getOperand(NumOperands); + if (!MO.isRegister() || !MO.isImplicit()) + NumOperands++; + } + return NumOperands; +} + +/// isIdenticalTo - Return true if this operand is identical to the specified +/// operand. +bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { + if (getType() != Other.getType()) return false; + + switch (getType()) { + default: assert(0 && "Unrecognized operand type"); + case MachineOperand::MO_Register: + return getReg() == Other.getReg() && isDef() == Other.isDef(); + case MachineOperand::MO_Immediate: + return getImm() == Other.getImm(); + case MachineOperand::MO_MachineBasicBlock: + return getMBB() == Other.getMBB(); + case MachineOperand::MO_FrameIndex: + return getFrameIndex() == Other.getFrameIndex(); + case MachineOperand::MO_ConstantPoolIndex: + return getConstantPoolIndex() == Other.getConstantPoolIndex() && + getOffset() == Other.getOffset(); + case MachineOperand::MO_JumpTableIndex: + return getJumpTableIndex() == Other.getJumpTableIndex(); + case MachineOperand::MO_GlobalAddress: + return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); + case MachineOperand::MO_ExternalSymbol: + return !strcmp(getSymbolName(), Other.getSymbolName()) && + getOffset() == Other.getOffset(); + } +} + +/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of +/// the specific register or -1 if it is not found. It further tightening +/// the search criteria to a use that kills the register if isKill is true. +int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.isUse() && MO.getReg() == Reg) + if (!isKill || MO.isKill()) + return i; + } + return -1; +} + +/// findRegisterDefOperand() - Returns the MachineOperand that is a def of +/// the specific register or NULL if it is not found. +MachineOperand *MachineInstr::findRegisterDefOperand(unsigned Reg) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) + return &MO; + } + return NULL; +} + +/// findFirstPredOperandIdx() - Find the index of the first operand in the +/// operand list that is used to represent the predicate. It returns -1 if +/// none is found. +int MachineInstr::findFirstPredOperandIdx() const { + const TargetInstrDescriptor *TID = getInstrDescriptor(); + if (TID->Flags & M_PREDICABLE) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if ((TID->OpInfo[i].Flags & M_PREDICATE_OPERAND)) + return i; + } + + return -1; +} + +/// copyKillDeadInfo - Copies kill / dead operand properties from MI. +/// +void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || (!MO.isKill() && !MO.isDead())) + continue; + for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) { + MachineOperand &MOp = getOperand(j); + if (!MOp.isIdenticalTo(MO)) + continue; + if (MO.isKill()) + MOp.setIsKill(); + else + MOp.setIsDead(); + break; + } + } +} + +/// copyPredicates - Copies predicate operand(s) from MI. +void MachineInstr::copyPredicates(const MachineInstr *MI) { + const TargetInstrDescriptor *TID = MI->getInstrDescriptor(); + if (TID->Flags & M_PREDICABLE) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if ((TID->OpInfo[i].Flags & M_PREDICATE_OPERAND)) { + const MachineOperand &MO = MI->getOperand(i); + // Predicated operands must be last operands. + if (MO.isReg()) + addRegOperand(MO.getReg(), false); + else { + addImmOperand(MO.getImm()); + } + } + } + } +} + +void MachineInstr::dump() const { + cerr << " " << *this; +} + +static inline void OutputReg(std::ostream &os, unsigned RegNo, + const MRegisterInfo *MRI = 0) { + if (!RegNo || MRegisterInfo::isPhysicalRegister(RegNo)) { + if (MRI) + os << "%" << MRI->get(RegNo).Name; + else + os << "%mreg(" << RegNo << ")"; + } else + os << "%reg" << RegNo; +} + +static void print(const MachineOperand &MO, std::ostream &OS, + const TargetMachine *TM) { + const MRegisterInfo *MRI = 0; + + if (TM) MRI = TM->getRegisterInfo(); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + OutputReg(OS, MO.getReg(), MRI); + break; + case MachineOperand::MO_Immediate: + OS << MO.getImmedValue(); + break; + case MachineOperand::MO_MachineBasicBlock: + OS << "mbb<" + << ((Value*)MO.getMachineBasicBlock()->getBasicBlock())->getName() + << "," << (void*)MO.getMachineBasicBlock() << ">"; + break; + case MachineOperand::MO_FrameIndex: + OS << "<fi#" << MO.getFrameIndex() << ">"; + break; + case MachineOperand::MO_ConstantPoolIndex: + OS << "<cp#" << MO.getConstantPoolIndex() << ">"; + break; + case MachineOperand::MO_JumpTableIndex: + OS << "<jt#" << MO.getJumpTableIndex() << ">"; + break; + case MachineOperand::MO_GlobalAddress: + OS << "<ga:" << ((Value*)MO.getGlobal())->getName(); + if (MO.getOffset()) OS << "+" << MO.getOffset(); + OS << ">"; + break; + case MachineOperand::MO_ExternalSymbol: + OS << "<es:" << MO.getSymbolName(); + if (MO.getOffset()) OS << "+" << MO.getOffset(); + OS << ">"; + break; + default: + assert(0 && "Unrecognized operand type"); + } +} + +void MachineInstr::print(std::ostream &OS, const TargetMachine *TM) const { + unsigned StartOp = 0; + + // Specialize printing if op#0 is definition + if (getNumOperands() && getOperand(0).isReg() && getOperand(0).isDef()) { + ::print(getOperand(0), OS, TM); + if (getOperand(0).isDead()) + OS << "<dead>"; + OS << " = "; + ++StartOp; // Don't print this operand again! + } + + if (TID) + OS << TID->Name; + + for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { + const MachineOperand& mop = getOperand(i); + if (i != StartOp) + OS << ","; + OS << " "; + ::print(mop, OS, TM); + + if (mop.isReg()) { + if (mop.isDef() || mop.isKill() || mop.isDead() || mop.isImplicit()) { + OS << "<"; + bool NeedComma = false; + if (mop.isImplicit()) { + OS << (mop.isDef() ? "imp-def" : "imp-use"); + NeedComma = true; + } else if (mop.isDef()) { + OS << "def"; + NeedComma = true; + } + if (mop.isKill() || mop.isDead()) { + if (NeedComma) + OS << ","; + if (mop.isKill()) + OS << "kill"; + if (mop.isDead()) + OS << "dead"; + } + OS << ">"; + } + } + } + + OS << "\n"; +} + +void MachineInstr::print(std::ostream &os) const { + // If the instruction is embedded into a basic block, we can find the target + // info for the instruction. + if (const MachineBasicBlock *MBB = getParent()) { + const MachineFunction *MF = MBB->getParent(); + if (MF) + print(os, &MF->getTarget()); + else + print(os, 0); + } + + // Otherwise, print it out in the "raw" format without symbolic register names + // and such. + os << getInstrDescriptor()->Name; + + for (unsigned i = 0, N = getNumOperands(); i < N; i++) { + os << "\t" << getOperand(i); + if (getOperand(i).isReg() && getOperand(i).isDef()) + os << "<d>"; + } + + os << "\n"; +} + +void MachineOperand::print(std::ostream &OS) const { + switch (getType()) { + case MO_Register: + OutputReg(OS, getReg()); + break; + case MO_Immediate: + OS << (long)getImmedValue(); + break; + case MO_MachineBasicBlock: + OS << "<mbb:" + << ((Value*)getMachineBasicBlock()->getBasicBlock())->getName() + << "@" << (void*)getMachineBasicBlock() << ">"; + break; + case MO_FrameIndex: + OS << "<fi#" << getFrameIndex() << ">"; + break; + case MO_ConstantPoolIndex: + OS << "<cp#" << getConstantPoolIndex() << ">"; + break; + case MO_JumpTableIndex: + OS << "<jt#" << getJumpTableIndex() << ">"; + break; + case MO_GlobalAddress: + OS << "<ga:" << ((Value*)getGlobal())->getName() << ">"; + break; + case MO_ExternalSymbol: + OS << "<es:" << getSymbolName() << ">"; + break; + default: + assert(0 && "Unrecognized operand type"); + break; + } +} + diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp new file mode 100644 index 0000000..77fb643 --- /dev/null +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -0,0 +1,1905 @@ +//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by James M. Laskey and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineModuleInfo.h" + +#include "llvm/Constants.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Streams.h" +using namespace llvm; +using namespace llvm::dwarf; + +// Handle the Pass registration stuff necessary to use TargetData's. +namespace { + RegisterPass<MachineModuleInfo> X("machinemoduleinfo", "Module Information"); +} +char MachineModuleInfo::ID = 0; + +//===----------------------------------------------------------------------===// + +/// getGlobalVariablesUsing - Return all of the GlobalVariables which have the +/// specified value in their initializer somewhere. +static void +getGlobalVariablesUsing(Value *V, std::vector<GlobalVariable*> &Result) { + // Scan though value users. + for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*I)) { + // If the user is a GlobalVariable then add to result. + Result.push_back(GV); + } else if (Constant *C = dyn_cast<Constant>(*I)) { + // If the user is a constant variable then scan its users + getGlobalVariablesUsing(C, Result); + } + } +} + +/// getGlobalVariablesUsing - Return all of the GlobalVariables that use the +/// named GlobalVariable. +static std::vector<GlobalVariable*> +getGlobalVariablesUsing(Module &M, const std::string &RootName) { + std::vector<GlobalVariable*> Result; // GlobalVariables matching criteria. + + std::vector<const Type*> FieldTypes; + FieldTypes.push_back(Type::Int32Ty); + FieldTypes.push_back(Type::Int32Ty); + + // Get the GlobalVariable root. + GlobalVariable *UseRoot = M.getGlobalVariable(RootName, + StructType::get(FieldTypes)); + + // If present and linkonce then scan for users. + if (UseRoot && UseRoot->hasLinkOnceLinkage()) { + getGlobalVariablesUsing(UseRoot, Result); + } + + return Result; +} + +/// isStringValue - Return true if the given value can be coerced to a string. +/// +static bool isStringValue(Value *V) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + if (GV->hasInitializer() && isa<ConstantArray>(GV->getInitializer())) { + ConstantArray *Init = cast<ConstantArray>(GV->getInitializer()); + return Init->isString(); + } + } else if (Constant *C = dyn_cast<Constant>(V)) { + if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return isStringValue(GV); + else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + if (CE->getNumOperands() == 3 && + cast<Constant>(CE->getOperand(1))->isNullValue() && + isa<ConstantInt>(CE->getOperand(2))) { + return isStringValue(CE->getOperand(0)); + } + } + } + } + return false; +} + +/// getGlobalVariable - Return either a direct or cast Global value. +/// +static GlobalVariable *getGlobalVariable(Value *V) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + return GV; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (CE->getOpcode() == Instruction::BitCast) { + return dyn_cast<GlobalVariable>(CE->getOperand(0)); + } + } + return NULL; +} + +/// isGlobalVariable - Return true if the given value can be coerced to a +/// GlobalVariable. +static bool isGlobalVariable(Value *V) { + if (isa<GlobalVariable>(V) || isa<ConstantPointerNull>(V)) { + return true; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (CE->getOpcode() == Instruction::BitCast) { + return isa<GlobalVariable>(CE->getOperand(0)); + } + } + return false; +} + +/// getUIntOperand - Return ith operand if it is an unsigned integer. +/// +static ConstantInt *getUIntOperand(GlobalVariable *GV, unsigned i) { + // Make sure the GlobalVariable has an initializer. + if (!GV->hasInitializer()) return NULL; + + // Get the initializer constant. + ConstantStruct *CI = dyn_cast<ConstantStruct>(GV->getInitializer()); + if (!CI) return NULL; + + // Check if there is at least i + 1 operands. + unsigned N = CI->getNumOperands(); + if (i >= N) return NULL; + + // Check constant. + return dyn_cast<ConstantInt>(CI->getOperand(i)); +} + +//===----------------------------------------------------------------------===// + +/// ApplyToFields - Target the visitor to each field of the debug information +/// descriptor. +void DIVisitor::ApplyToFields(DebugInfoDesc *DD) { + DD->ApplyToFields(this); +} + +//===----------------------------------------------------------------------===// +/// DICountVisitor - This DIVisitor counts all the fields in the supplied debug +/// the supplied DebugInfoDesc. +class DICountVisitor : public DIVisitor { +private: + unsigned Count; // Running count of fields. + +public: + DICountVisitor() : DIVisitor(), Count(0) {} + + // Accessors. + unsigned getCount() const { return Count; } + + /// Apply - Count each of the fields. + /// + virtual void Apply(int &Field) { ++Count; } + virtual void Apply(unsigned &Field) { ++Count; } + virtual void Apply(int64_t &Field) { ++Count; } + virtual void Apply(uint64_t &Field) { ++Count; } + virtual void Apply(bool &Field) { ++Count; } + virtual void Apply(std::string &Field) { ++Count; } + virtual void Apply(DebugInfoDesc *&Field) { ++Count; } + virtual void Apply(GlobalVariable *&Field) { ++Count; } + virtual void Apply(std::vector<DebugInfoDesc *> &Field) { + ++Count; + } +}; + +//===----------------------------------------------------------------------===// +/// DIDeserializeVisitor - This DIVisitor deserializes all the fields in the +/// supplied DebugInfoDesc. +class DIDeserializeVisitor : public DIVisitor { +private: + DIDeserializer &DR; // Active deserializer. + unsigned I; // Current operand index. + ConstantStruct *CI; // GlobalVariable constant initializer. + +public: + DIDeserializeVisitor(DIDeserializer &D, GlobalVariable *GV) + : DIVisitor() + , DR(D) + , I(0) + , CI(cast<ConstantStruct>(GV->getInitializer())) + {} + + /// Apply - Set the value of each of the fields. + /// + virtual void Apply(int &Field) { + Constant *C = CI->getOperand(I++); + Field = cast<ConstantInt>(C)->getSExtValue(); + } + virtual void Apply(unsigned &Field) { + Constant *C = CI->getOperand(I++); + Field = cast<ConstantInt>(C)->getZExtValue(); + } + virtual void Apply(int64_t &Field) { + Constant *C = CI->getOperand(I++); + Field = cast<ConstantInt>(C)->getSExtValue(); + } + virtual void Apply(uint64_t &Field) { + Constant *C = CI->getOperand(I++); + Field = cast<ConstantInt>(C)->getZExtValue(); + } + virtual void Apply(bool &Field) { + Constant *C = CI->getOperand(I++); + Field = cast<ConstantInt>(C)->getZExtValue(); + } + virtual void Apply(std::string &Field) { + Constant *C = CI->getOperand(I++); + Field = C->getStringValue(); + } + virtual void Apply(DebugInfoDesc *&Field) { + Constant *C = CI->getOperand(I++); + Field = DR.Deserialize(C); + } + virtual void Apply(GlobalVariable *&Field) { + Constant *C = CI->getOperand(I++); + Field = getGlobalVariable(C); + } + virtual void Apply(std::vector<DebugInfoDesc *> &Field) { + Field.resize(0); + Constant *C = CI->getOperand(I++); + GlobalVariable *GV = getGlobalVariable(C); + if (GV->hasInitializer()) { + if (ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer())) { + for (unsigned i = 0, N = CA->getNumOperands(); i < N; ++i) { + GlobalVariable *GVE = getGlobalVariable(CA->getOperand(i)); + DebugInfoDesc *DE = DR.Deserialize(GVE); + Field.push_back(DE); + } + } else if (GV->getInitializer()->isNullValue()) { + if (const ArrayType *T = + dyn_cast<ArrayType>(GV->getType()->getElementType())) { + Field.resize(T->getNumElements()); + } + } + } + } +}; + +//===----------------------------------------------------------------------===// +/// DISerializeVisitor - This DIVisitor serializes all the fields in +/// the supplied DebugInfoDesc. +class DISerializeVisitor : public DIVisitor { +private: + DISerializer &SR; // Active serializer. + std::vector<Constant*> &Elements; // Element accumulator. + +public: + DISerializeVisitor(DISerializer &S, std::vector<Constant*> &E) + : DIVisitor() + , SR(S) + , Elements(E) + {} + + /// Apply - Set the value of each of the fields. + /// + virtual void Apply(int &Field) { + Elements.push_back(ConstantInt::get(Type::Int32Ty, int32_t(Field))); + } + virtual void Apply(unsigned &Field) { + Elements.push_back(ConstantInt::get(Type::Int32Ty, uint32_t(Field))); + } + virtual void Apply(int64_t &Field) { + Elements.push_back(ConstantInt::get(Type::Int64Ty, int64_t(Field))); + } + virtual void Apply(uint64_t &Field) { + Elements.push_back(ConstantInt::get(Type::Int64Ty, uint64_t(Field))); + } + virtual void Apply(bool &Field) { + Elements.push_back(ConstantInt::get(Type::Int1Ty, Field)); + } + virtual void Apply(std::string &Field) { + Elements.push_back(SR.getString(Field)); + } + virtual void Apply(DebugInfoDesc *&Field) { + GlobalVariable *GV = NULL; + + // If non-NULL then convert to global. + if (Field) GV = SR.Serialize(Field); + + // FIXME - At some point should use specific type. + const PointerType *EmptyTy = SR.getEmptyStructPtrType(); + + if (GV) { + // Set to pointer to global. + Elements.push_back(ConstantExpr::getBitCast(GV, EmptyTy)); + } else { + // Use NULL. + Elements.push_back(ConstantPointerNull::get(EmptyTy)); + } + } + virtual void Apply(GlobalVariable *&Field) { + const PointerType *EmptyTy = SR.getEmptyStructPtrType(); + if (Field) { + Elements.push_back(ConstantExpr::getBitCast(Field, EmptyTy)); + } else { + Elements.push_back(ConstantPointerNull::get(EmptyTy)); + } + } + virtual void Apply(std::vector<DebugInfoDesc *> &Field) { + const PointerType *EmptyTy = SR.getEmptyStructPtrType(); + unsigned N = Field.size(); + ArrayType *AT = ArrayType::get(EmptyTy, N); + std::vector<Constant *> ArrayElements; + + for (unsigned i = 0, N = Field.size(); i < N; ++i) { + if (DebugInfoDesc *Element = Field[i]) { + GlobalVariable *GVE = SR.Serialize(Element); + Constant *CE = ConstantExpr::getBitCast(GVE, EmptyTy); + ArrayElements.push_back(cast<Constant>(CE)); + } else { + ArrayElements.push_back(ConstantPointerNull::get(EmptyTy)); + } + } + + Constant *CA = ConstantArray::get(AT, ArrayElements); + GlobalVariable *CAGV = new GlobalVariable(AT, true, + GlobalValue::InternalLinkage, + CA, "llvm.dbg.array", + SR.getModule()); + CAGV->setSection("llvm.metadata"); + Constant *CAE = ConstantExpr::getBitCast(CAGV, EmptyTy); + Elements.push_back(CAE); + } +}; + +//===----------------------------------------------------------------------===// +/// DIGetTypesVisitor - This DIVisitor gathers all the field types in +/// the supplied DebugInfoDesc. +class DIGetTypesVisitor : public DIVisitor { +private: + DISerializer &SR; // Active serializer. + std::vector<const Type*> &Fields; // Type accumulator. + +public: + DIGetTypesVisitor(DISerializer &S, std::vector<const Type*> &F) + : DIVisitor() + , SR(S) + , Fields(F) + {} + + /// Apply - Set the value of each of the fields. + /// + virtual void Apply(int &Field) { + Fields.push_back(Type::Int32Ty); + } + virtual void Apply(unsigned &Field) { + Fields.push_back(Type::Int32Ty); + } + virtual void Apply(int64_t &Field) { + Fields.push_back(Type::Int64Ty); + } + virtual void Apply(uint64_t &Field) { + Fields.push_back(Type::Int64Ty); + } + virtual void Apply(bool &Field) { + Fields.push_back(Type::Int1Ty); + } + virtual void Apply(std::string &Field) { + Fields.push_back(SR.getStrPtrType()); + } + virtual void Apply(DebugInfoDesc *&Field) { + // FIXME - At some point should use specific type. + const PointerType *EmptyTy = SR.getEmptyStructPtrType(); + Fields.push_back(EmptyTy); + } + virtual void Apply(GlobalVariable *&Field) { + const PointerType *EmptyTy = SR.getEmptyStructPtrType(); + Fields.push_back(EmptyTy); + } + virtual void Apply(std::vector<DebugInfoDesc *> &Field) { + const PointerType *EmptyTy = SR.getEmptyStructPtrType(); + Fields.push_back(EmptyTy); + } +}; + +//===----------------------------------------------------------------------===// +/// DIVerifyVisitor - This DIVisitor verifies all the field types against +/// a constant initializer. +class DIVerifyVisitor : public DIVisitor { +private: + DIVerifier &VR; // Active verifier. + bool IsValid; // Validity status. + unsigned I; // Current operand index. + ConstantStruct *CI; // GlobalVariable constant initializer. + +public: + DIVerifyVisitor(DIVerifier &V, GlobalVariable *GV) + : DIVisitor() + , VR(V) + , IsValid(true) + , I(0) + , CI(cast<ConstantStruct>(GV->getInitializer())) + { + } + + // Accessors. + bool isValid() const { return IsValid; } + + /// Apply - Set the value of each of the fields. + /// + virtual void Apply(int &Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isa<ConstantInt>(C); + } + virtual void Apply(unsigned &Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isa<ConstantInt>(C); + } + virtual void Apply(int64_t &Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isa<ConstantInt>(C); + } + virtual void Apply(uint64_t &Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isa<ConstantInt>(C); + } + virtual void Apply(bool &Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isa<ConstantInt>(C) && C->getType() == Type::Int1Ty; + } + virtual void Apply(std::string &Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && + (!C || isStringValue(C) || C->isNullValue()); + } + virtual void Apply(DebugInfoDesc *&Field) { + // FIXME - Prepare the correct descriptor. + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isGlobalVariable(C); + } + virtual void Apply(GlobalVariable *&Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isGlobalVariable(C); + } + virtual void Apply(std::vector<DebugInfoDesc *> &Field) { + Constant *C = CI->getOperand(I++); + IsValid = IsValid && isGlobalVariable(C); + if (!IsValid) return; + + GlobalVariable *GV = getGlobalVariable(C); + IsValid = IsValid && GV && GV->hasInitializer(); + if (!IsValid) return; + + ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); + IsValid = IsValid && CA; + if (!IsValid) return; + + for (unsigned i = 0, N = CA->getNumOperands(); IsValid && i < N; ++i) { + IsValid = IsValid && isGlobalVariable(CA->getOperand(i)); + if (!IsValid) return; + + GlobalVariable *GVE = getGlobalVariable(CA->getOperand(i)); + VR.Verify(GVE); + } + } +}; + + +//===----------------------------------------------------------------------===// + +/// TagFromGlobal - Returns the tag number from a debug info descriptor +/// GlobalVariable. Return DIIValid if operand is not an unsigned int. +unsigned DebugInfoDesc::TagFromGlobal(GlobalVariable *GV) { + ConstantInt *C = getUIntOperand(GV, 0); + return C ? ((unsigned)C->getZExtValue() & ~LLVMDebugVersionMask) : + (unsigned)DW_TAG_invalid; +} + +/// VersionFromGlobal - Returns the version number from a debug info +/// descriptor GlobalVariable. Return DIIValid if operand is not an unsigned +/// int. +unsigned DebugInfoDesc::VersionFromGlobal(GlobalVariable *GV) { + ConstantInt *C = getUIntOperand(GV, 0); + return C ? ((unsigned)C->getZExtValue() & LLVMDebugVersionMask) : + (unsigned)DW_TAG_invalid; +} + +/// DescFactory - Create an instance of debug info descriptor based on Tag. +/// Return NULL if not a recognized Tag. +DebugInfoDesc *DebugInfoDesc::DescFactory(unsigned Tag) { + switch (Tag) { + case DW_TAG_anchor: return new AnchorDesc(); + case DW_TAG_compile_unit: return new CompileUnitDesc(); + case DW_TAG_variable: return new GlobalVariableDesc(); + case DW_TAG_subprogram: return new SubprogramDesc(); + case DW_TAG_lexical_block: return new BlockDesc(); + case DW_TAG_base_type: return new BasicTypeDesc(); + case DW_TAG_typedef: + case DW_TAG_pointer_type: + case DW_TAG_reference_type: + case DW_TAG_const_type: + case DW_TAG_volatile_type: + case DW_TAG_restrict_type: + case DW_TAG_member: + case DW_TAG_inheritance: return new DerivedTypeDesc(Tag); + case DW_TAG_array_type: + case DW_TAG_structure_type: + case DW_TAG_union_type: + case DW_TAG_enumeration_type: + case DW_TAG_vector_type: + case DW_TAG_subroutine_type: return new CompositeTypeDesc(Tag); + case DW_TAG_subrange_type: return new SubrangeDesc(); + case DW_TAG_enumerator: return new EnumeratorDesc(); + case DW_TAG_return_variable: + case DW_TAG_arg_variable: + case DW_TAG_auto_variable: return new VariableDesc(Tag); + default: break; + } + return NULL; +} + +/// getLinkage - get linkage appropriate for this type of descriptor. +/// +GlobalValue::LinkageTypes DebugInfoDesc::getLinkage() const { + return GlobalValue::InternalLinkage; +} + +/// ApplyToFields - Target the vistor to the fields of the descriptor. +/// +void DebugInfoDesc::ApplyToFields(DIVisitor *Visitor) { + Visitor->Apply(Tag); +} + +//===----------------------------------------------------------------------===// + +AnchorDesc::AnchorDesc() +: DebugInfoDesc(DW_TAG_anchor) +, AnchorTag(0) +{} +AnchorDesc::AnchorDesc(AnchoredDesc *D) +: DebugInfoDesc(DW_TAG_anchor) +, AnchorTag(D->getTag()) +{} + +// Implement isa/cast/dyncast. +bool AnchorDesc::classof(const DebugInfoDesc *D) { + return D->getTag() == DW_TAG_anchor; +} + +/// getLinkage - get linkage appropriate for this type of descriptor. +/// +GlobalValue::LinkageTypes AnchorDesc::getLinkage() const { + return GlobalValue::LinkOnceLinkage; +} + +/// ApplyToFields - Target the visitor to the fields of the TransUnitDesc. +/// +void AnchorDesc::ApplyToFields(DIVisitor *Visitor) { + DebugInfoDesc::ApplyToFields(Visitor); + + Visitor->Apply(AnchorTag); +} + +/// getDescString - Return a string used to compose global names and labels. A +/// A global variable name needs to be defined for each debug descriptor that is +/// anchored. NOTE: that each global variable named here also needs to be added +/// to the list of names left external in the internalizer. +/// ExternalNames.insert("llvm.dbg.compile_units"); +/// ExternalNames.insert("llvm.dbg.global_variables"); +/// ExternalNames.insert("llvm.dbg.subprograms"); +const char *AnchorDesc::getDescString() const { + switch (AnchorTag) { + case DW_TAG_compile_unit: return CompileUnitDesc::AnchorString; + case DW_TAG_variable: return GlobalVariableDesc::AnchorString; + case DW_TAG_subprogram: return SubprogramDesc::AnchorString; + default: break; + } + + assert(0 && "Tag does not have a case for anchor string"); + return ""; +} + +/// getTypeString - Return a string used to label this descriptors type. +/// +const char *AnchorDesc::getTypeString() const { + return "llvm.dbg.anchor.type"; +} + +#ifndef NDEBUG +void AnchorDesc::dump() { + cerr << getDescString() << " " + << "Version(" << getVersion() << "), " + << "Tag(" << getTag() << "), " + << "AnchorTag(" << AnchorTag << ")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +AnchoredDesc::AnchoredDesc(unsigned T) +: DebugInfoDesc(T) +, Anchor(NULL) +{} + +/// ApplyToFields - Target the visitor to the fields of the AnchoredDesc. +/// +void AnchoredDesc::ApplyToFields(DIVisitor *Visitor) { + DebugInfoDesc::ApplyToFields(Visitor); + + Visitor->Apply(Anchor); +} + +//===----------------------------------------------------------------------===// + +CompileUnitDesc::CompileUnitDesc() +: AnchoredDesc(DW_TAG_compile_unit) +, Language(0) +, FileName("") +, Directory("") +, Producer("") +{} + +// Implement isa/cast/dyncast. +bool CompileUnitDesc::classof(const DebugInfoDesc *D) { + return D->getTag() == DW_TAG_compile_unit; +} + +/// ApplyToFields - Target the visitor to the fields of the CompileUnitDesc. +/// +void CompileUnitDesc::ApplyToFields(DIVisitor *Visitor) { + AnchoredDesc::ApplyToFields(Visitor); + + // Handle cases out of sync with compiler. + if (getVersion() == 0) { + unsigned DebugVersion; + Visitor->Apply(DebugVersion); + } + + Visitor->Apply(Language); + Visitor->Apply(FileName); + Visitor->Apply(Directory); + Visitor->Apply(Producer); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *CompileUnitDesc::getDescString() const { + return "llvm.dbg.compile_unit"; +} + +/// getTypeString - Return a string used to label this descriptors type. +/// +const char *CompileUnitDesc::getTypeString() const { + return "llvm.dbg.compile_unit.type"; +} + +/// getAnchorString - Return a string used to label this descriptor's anchor. +/// +const char *CompileUnitDesc::AnchorString = "llvm.dbg.compile_units"; +const char *CompileUnitDesc::getAnchorString() const { + return AnchorString; +} + +#ifndef NDEBUG +void CompileUnitDesc::dump() { + cerr << getDescString() << " " + << "Version(" << getVersion() << "), " + << "Tag(" << getTag() << "), " + << "Anchor(" << getAnchor() << "), " + << "Language(" << Language << "), " + << "FileName(\"" << FileName << "\"), " + << "Directory(\"" << Directory << "\"), " + << "Producer(\"" << Producer << "\")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +TypeDesc::TypeDesc(unsigned T) +: DebugInfoDesc(T) +, Context(NULL) +, Name("") +, File(NULL) +, Line(0) +, Size(0) +, Align(0) +, Offset(0) +, Flags(0) +{} + +/// ApplyToFields - Target the visitor to the fields of the TypeDesc. +/// +void TypeDesc::ApplyToFields(DIVisitor *Visitor) { + DebugInfoDesc::ApplyToFields(Visitor); + + Visitor->Apply(Context); + Visitor->Apply(Name); + Visitor->Apply(File); + Visitor->Apply(Line); + Visitor->Apply(Size); + Visitor->Apply(Align); + Visitor->Apply(Offset); + if (getVersion() > LLVMDebugVersion4) Visitor->Apply(Flags); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *TypeDesc::getDescString() const { + return "llvm.dbg.type"; +} + +/// getTypeString - Return a string used to label this descriptor's type. +/// +const char *TypeDesc::getTypeString() const { + return "llvm.dbg.type.type"; +} + +#ifndef NDEBUG +void TypeDesc::dump() { + cerr << getDescString() << " " + << "Version(" << getVersion() << "), " + << "Tag(" << getTag() << "), " + << "Context(" << Context << "), " + << "Name(\"" << Name << "\"), " + << "File(" << File << "), " + << "Line(" << Line << "), " + << "Size(" << Size << "), " + << "Align(" << Align << "), " + << "Offset(" << Offset << "), " + << "Flags(" << Flags << ")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +BasicTypeDesc::BasicTypeDesc() +: TypeDesc(DW_TAG_base_type) +, Encoding(0) +{} + +// Implement isa/cast/dyncast. +bool BasicTypeDesc::classof(const DebugInfoDesc *D) { + return D->getTag() == DW_TAG_base_type; +} + +/// ApplyToFields - Target the visitor to the fields of the BasicTypeDesc. +/// +void BasicTypeDesc::ApplyToFields(DIVisitor *Visitor) { + TypeDesc::ApplyToFields(Visitor); + + Visitor->Apply(Encoding); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *BasicTypeDesc::getDescString() const { + return "llvm.dbg.basictype"; +} + +/// getTypeString - Return a string used to label this descriptor's type. +/// +const char *BasicTypeDesc::getTypeString() const { + return "llvm.dbg.basictype.type"; +} + +#ifndef NDEBUG +void BasicTypeDesc::dump() { + cerr << getDescString() << " " + << "Version(" << getVersion() << "), " + << "Tag(" << getTag() << "), " + << "Context(" << getContext() << "), " + << "Name(\"" << getName() << "\"), " + << "Size(" << getSize() << "), " + << "Encoding(" << Encoding << ")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +DerivedTypeDesc::DerivedTypeDesc(unsigned T) +: TypeDesc(T) +, FromType(NULL) +{} + +// Implement isa/cast/dyncast. +bool DerivedTypeDesc::classof(const DebugInfoDesc *D) { + unsigned T = D->getTag(); + switch (T) { + case DW_TAG_typedef: + case DW_TAG_pointer_type: + case DW_TAG_reference_type: + case DW_TAG_const_type: + case DW_TAG_volatile_type: + case DW_TAG_restrict_type: + case DW_TAG_member: + case DW_TAG_inheritance: + return true; + default: break; + } + return false; +} + +/// ApplyToFields - Target the visitor to the fields of the DerivedTypeDesc. +/// +void DerivedTypeDesc::ApplyToFields(DIVisitor *Visitor) { + TypeDesc::ApplyToFields(Visitor); + + Visitor->Apply(FromType); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *DerivedTypeDesc::getDescString() const { + return "llvm.dbg.derivedtype"; +} + +/// getTypeString - Return a string used to label this descriptor's type. +/// +const char *DerivedTypeDesc::getTypeString() const { + return "llvm.dbg.derivedtype.type"; +} + +#ifndef NDEBUG +void DerivedTypeDesc::dump() { + cerr << getDescString() << " " + << "Version(" << getVersion() << "), " + << "Tag(" << getTag() << "), " + << "Context(" << getContext() << "), " + << "Name(\"" << getName() << "\"), " + << "Size(" << getSize() << "), " + << "File(" << getFile() << "), " + << "Line(" << getLine() << "), " + << "FromType(" << FromType << ")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +CompositeTypeDesc::CompositeTypeDesc(unsigned T) +: DerivedTypeDesc(T) +, Elements() +{} + +// Implement isa/cast/dyncast. +bool CompositeTypeDesc::classof(const DebugInfoDesc *D) { + unsigned T = D->getTag(); + switch (T) { + case DW_TAG_array_type: + case DW_TAG_structure_type: + case DW_TAG_union_type: + case DW_TAG_enumeration_type: + case DW_TAG_vector_type: + case DW_TAG_subroutine_type: + return true; + default: break; + } + return false; +} + +/// ApplyToFields - Target the visitor to the fields of the CompositeTypeDesc. +/// +void CompositeTypeDesc::ApplyToFields(DIVisitor *Visitor) { + DerivedTypeDesc::ApplyToFields(Visitor); + + Visitor->Apply(Elements); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *CompositeTypeDesc::getDescString() const { + return "llvm.dbg.compositetype"; +} + +/// getTypeString - Return a string used to label this descriptor's type. +/// +const char *CompositeTypeDesc::getTypeString() const { + return "llvm.dbg.compositetype.type"; +} + +#ifndef NDEBUG +void CompositeTypeDesc::dump() { + cerr << getDescString() << " " + << "Version(" << getVersion() << "), " + << "Tag(" << getTag() << "), " + << "Context(" << getContext() << "), " + << "Name(\"" << getName() << "\"), " + << "Size(" << getSize() << "), " + << "File(" << getFile() << "), " + << "Line(" << getLine() << "), " + << "FromType(" << getFromType() << "), " + << "Elements.size(" << Elements.size() << ")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +SubrangeDesc::SubrangeDesc() +: DebugInfoDesc(DW_TAG_subrange_type) +, Lo(0) +, Hi(0) +{} + +// Implement isa/cast/dyncast. +bool SubrangeDesc::classof(const DebugInfoDesc *D) { + return D->getTag() == DW_TAG_subrange_type; +} + +/// ApplyToFields - Target the visitor to the fields of the SubrangeDesc. +/// +void SubrangeDesc::ApplyToFields(DIVisitor *Visitor) { + DebugInfoDesc::ApplyToFields(Visitor); + + Visitor->Apply(Lo); + Visitor->Apply(Hi); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *SubrangeDesc::getDescString() const { + return "llvm.dbg.subrange"; +} + +/// getTypeString - Return a string used to label this descriptor's type. +/// +const char *SubrangeDesc::getTypeString() const { + return "llvm.dbg.subrange.type"; +} + +#ifndef NDEBUG +void SubrangeDesc::dump() { + cerr << getDescString() << " " + << "Version(" << getVersion() << "), " + << "Tag(" << getTag() << "), " + << "Lo(" << Lo << "), " + << "Hi(" << Hi << ")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +EnumeratorDesc::EnumeratorDesc() +: DebugInfoDesc(DW_TAG_enumerator) +, Name("") +, Value(0) +{} + +// Implement isa/cast/dyncast. +bool EnumeratorDesc::classof(const DebugInfoDesc *D) { + return D->getTag() == DW_TAG_enumerator; +} + +/// ApplyToFields - Target the visitor to the fields of the EnumeratorDesc. +/// +void EnumeratorDesc::ApplyToFields(DIVisitor *Visitor) { + DebugInfoDesc::ApplyToFields(Visitor); + + Visitor->Apply(Name); + Visitor->Apply(Value); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *EnumeratorDesc::getDescString() const { + return "llvm.dbg.enumerator"; +} + +/// getTypeString - Return a string used to label this descriptor's type. +/// +const char *EnumeratorDesc::getTypeString() const { + return "llvm.dbg.enumerator.type"; +} + +#ifndef NDEBUG +void EnumeratorDesc::dump() { + cerr << getDescString() << " " + << "Version(" << getVersion() << "), " + << "Tag(" << getTag() << "), " + << "Name(" << Name << "), " + << "Value(" << Value << ")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +VariableDesc::VariableDesc(unsigned T) +: DebugInfoDesc(T) +, Context(NULL) +, Name("") +, File(NULL) +, Line(0) +, TyDesc(0) +{} + +// Implement isa/cast/dyncast. +bool VariableDesc::classof(const DebugInfoDesc *D) { + unsigned T = D->getTag(); + switch (T) { + case DW_TAG_auto_variable: + case DW_TAG_arg_variable: + case DW_TAG_return_variable: + return true; + default: break; + } + return false; +} + +/// ApplyToFields - Target the visitor to the fields of the VariableDesc. +/// +void VariableDesc::ApplyToFields(DIVisitor *Visitor) { + DebugInfoDesc::ApplyToFields(Visitor); + + Visitor->Apply(Context); + Visitor->Apply(Name); + Visitor->Apply(File); + Visitor->Apply(Line); + Visitor->Apply(TyDesc); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *VariableDesc::getDescString() const { + return "llvm.dbg.variable"; +} + +/// getTypeString - Return a string used to label this descriptor's type. +/// +const char *VariableDesc::getTypeString() const { + return "llvm.dbg.variable.type"; +} + +#ifndef NDEBUG +void VariableDesc::dump() { + cerr << getDescString() << " " + << "Version(" << getVersion() << "), " + << "Tag(" << getTag() << "), " + << "Context(" << Context << "), " + << "Name(\"" << Name << "\"), " + << "File(" << File << "), " + << "Line(" << Line << "), " + << "TyDesc(" << TyDesc << ")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +GlobalDesc::GlobalDesc(unsigned T) +: AnchoredDesc(T) +, Context(0) +, Name("") +, FullName("") +, LinkageName("") +, File(NULL) +, Line(0) +, TyDesc(NULL) +, IsStatic(false) +, IsDefinition(false) +{} + +/// ApplyToFields - Target the visitor to the fields of the global. +/// +void GlobalDesc::ApplyToFields(DIVisitor *Visitor) { + AnchoredDesc::ApplyToFields(Visitor); + + Visitor->Apply(Context); + Visitor->Apply(Name); + Visitor->Apply(FullName); + Visitor->Apply(LinkageName); + Visitor->Apply(File); + Visitor->Apply(Line); + Visitor->Apply(TyDesc); + Visitor->Apply(IsStatic); + Visitor->Apply(IsDefinition); +} + +//===----------------------------------------------------------------------===// + +GlobalVariableDesc::GlobalVariableDesc() +: GlobalDesc(DW_TAG_variable) +, Global(NULL) +{} + +// Implement isa/cast/dyncast. +bool GlobalVariableDesc::classof(const DebugInfoDesc *D) { + return D->getTag() == DW_TAG_variable; +} + +/// ApplyToFields - Target the visitor to the fields of the GlobalVariableDesc. +/// +void GlobalVariableDesc::ApplyToFields(DIVisitor *Visitor) { + GlobalDesc::ApplyToFields(Visitor); + + Visitor->Apply(Global); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *GlobalVariableDesc::getDescString() const { + return "llvm.dbg.global_variable"; +} + +/// getTypeString - Return a string used to label this descriptors type. +/// +const char *GlobalVariableDesc::getTypeString() const { + return "llvm.dbg.global_variable.type"; +} + +/// getAnchorString - Return a string used to label this descriptor's anchor. +/// +const char *GlobalVariableDesc::AnchorString = "llvm.dbg.global_variables"; +const char *GlobalVariableDesc::getAnchorString() const { + return AnchorString; +} + +#ifndef NDEBUG +void GlobalVariableDesc::dump() { + cerr << getDescString() << " " + << "Version(" << getVersion() << "), " + << "Tag(" << getTag() << "), " + << "Anchor(" << getAnchor() << "), " + << "Name(\"" << getName() << "\"), " + << "FullName(\"" << getFullName() << "\"), " + << "LinkageName(\"" << getLinkageName() << "\"), " + << "File(" << getFile() << ")," + << "Line(" << getLine() << ")," + << "Type(" << getType() << "), " + << "IsStatic(" << (isStatic() ? "true" : "false") << "), " + << "IsDefinition(" << (isDefinition() ? "true" : "false") << "), " + << "Global(" << Global << ")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +SubprogramDesc::SubprogramDesc() +: GlobalDesc(DW_TAG_subprogram) +{} + +// Implement isa/cast/dyncast. +bool SubprogramDesc::classof(const DebugInfoDesc *D) { + return D->getTag() == DW_TAG_subprogram; +} + +/// ApplyToFields - Target the visitor to the fields of the +/// SubprogramDesc. +void SubprogramDesc::ApplyToFields(DIVisitor *Visitor) { + GlobalDesc::ApplyToFields(Visitor); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *SubprogramDesc::getDescString() const { + return "llvm.dbg.subprogram"; +} + +/// getTypeString - Return a string used to label this descriptors type. +/// +const char *SubprogramDesc::getTypeString() const { + return "llvm.dbg.subprogram.type"; +} + +/// getAnchorString - Return a string used to label this descriptor's anchor. +/// +const char *SubprogramDesc::AnchorString = "llvm.dbg.subprograms"; +const char *SubprogramDesc::getAnchorString() const { + return AnchorString; +} + +#ifndef NDEBUG +void SubprogramDesc::dump() { + cerr << getDescString() << " " + << "Version(" << getVersion() << "), " + << "Tag(" << getTag() << "), " + << "Anchor(" << getAnchor() << "), " + << "Name(\"" << getName() << "\"), " + << "FullName(\"" << getFullName() << "\"), " + << "LinkageName(\"" << getLinkageName() << "\"), " + << "File(" << getFile() << ")," + << "Line(" << getLine() << ")," + << "Type(" << getType() << "), " + << "IsStatic(" << (isStatic() ? "true" : "false") << "), " + << "IsDefinition(" << (isDefinition() ? "true" : "false") << ")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +BlockDesc::BlockDesc() +: DebugInfoDesc(DW_TAG_lexical_block) +, Context(NULL) +{} + +// Implement isa/cast/dyncast. +bool BlockDesc::classof(const DebugInfoDesc *D) { + return D->getTag() == DW_TAG_lexical_block; +} + +/// ApplyToFields - Target the visitor to the fields of the BlockDesc. +/// +void BlockDesc::ApplyToFields(DIVisitor *Visitor) { + DebugInfoDesc::ApplyToFields(Visitor); + + Visitor->Apply(Context); +} + +/// getDescString - Return a string used to compose global names and labels. +/// +const char *BlockDesc::getDescString() const { + return "llvm.dbg.block"; +} + +/// getTypeString - Return a string used to label this descriptors type. +/// +const char *BlockDesc::getTypeString() const { + return "llvm.dbg.block.type"; +} + +#ifndef NDEBUG +void BlockDesc::dump() { + cerr << getDescString() << " " + << "Version(" << getVersion() << "), " + << "Tag(" << getTag() << ")," + << "Context(" << Context << ")\n"; +} +#endif + +//===----------------------------------------------------------------------===// + +DebugInfoDesc *DIDeserializer::Deserialize(Value *V) { + return Deserialize(getGlobalVariable(V)); +} +DebugInfoDesc *DIDeserializer::Deserialize(GlobalVariable *GV) { + // Handle NULL. + if (!GV) return NULL; + + // Check to see if it has been already deserialized. + DebugInfoDesc *&Slot = GlobalDescs[GV]; + if (Slot) return Slot; + + // Get the Tag from the global. + unsigned Tag = DebugInfoDesc::TagFromGlobal(GV); + + // Create an empty instance of the correct sort. + Slot = DebugInfoDesc::DescFactory(Tag); + + // If not a user defined descriptor. + if (Slot) { + // Deserialize the fields. + DIDeserializeVisitor DRAM(*this, GV); + DRAM.ApplyToFields(Slot); + } + + return Slot; +} + +//===----------------------------------------------------------------------===// + +/// getStrPtrType - Return a "sbyte *" type. +/// +const PointerType *DISerializer::getStrPtrType() { + // If not already defined. + if (!StrPtrTy) { + // Construct the pointer to signed bytes. + StrPtrTy = PointerType::get(Type::Int8Ty); + } + + return StrPtrTy; +} + +/// getEmptyStructPtrType - Return a "{ }*" type. +/// +const PointerType *DISerializer::getEmptyStructPtrType() { + // If not already defined. + if (!EmptyStructPtrTy) { + // Construct the empty structure type. + const StructType *EmptyStructTy = + StructType::get(std::vector<const Type*>()); + // Construct the pointer to empty structure type. + EmptyStructPtrTy = PointerType::get(EmptyStructTy); + } + + return EmptyStructPtrTy; +} + +/// getTagType - Return the type describing the specified descriptor (via tag.) +/// +const StructType *DISerializer::getTagType(DebugInfoDesc *DD) { + // Attempt to get the previously defined type. + StructType *&Ty = TagTypes[DD->getTag()]; + + // If not already defined. + if (!Ty) { + // Set up fields vector. + std::vector<const Type*> Fields; + // Get types of fields. + DIGetTypesVisitor GTAM(*this, Fields); + GTAM.ApplyToFields(DD); + + // Construct structured type. + Ty = StructType::get(Fields); + + // Register type name with module. + M->addTypeName(DD->getTypeString(), Ty); + } + + return Ty; +} + +/// getString - Construct the string as constant string global. +/// +Constant *DISerializer::getString(const std::string &String) { + // Check string cache for previous edition. + Constant *&Slot = StringCache[String]; + // Return Constant if previously defined. + if (Slot) return Slot; + // If empty string then use a sbyte* null instead. + if (String.empty()) { + Slot = ConstantPointerNull::get(getStrPtrType()); + } else { + // Construct string as an llvm constant. + Constant *ConstStr = ConstantArray::get(String); + // Otherwise create and return a new string global. + GlobalVariable *StrGV = new GlobalVariable(ConstStr->getType(), true, + GlobalVariable::InternalLinkage, + ConstStr, ".str", M); + StrGV->setSection("llvm.metadata"); + // Convert to generic string pointer. + Slot = ConstantExpr::getBitCast(StrGV, getStrPtrType()); + } + return Slot; + +} + +/// Serialize - Recursively cast the specified descriptor into a GlobalVariable +/// so that it can be serialized to a .bc or .ll file. +GlobalVariable *DISerializer::Serialize(DebugInfoDesc *DD) { + // Check if the DebugInfoDesc is already in the map. + GlobalVariable *&Slot = DescGlobals[DD]; + + // See if DebugInfoDesc exists, if so return prior GlobalVariable. + if (Slot) return Slot; + + // Get the type associated with the Tag. + const StructType *Ty = getTagType(DD); + + // Create the GlobalVariable early to prevent infinite recursion. + GlobalVariable *GV = new GlobalVariable(Ty, true, DD->getLinkage(), + NULL, DD->getDescString(), M); + GV->setSection("llvm.metadata"); + + // Insert new GlobalVariable in DescGlobals map. + Slot = GV; + + // Set up elements vector + std::vector<Constant*> Elements; + // Add fields. + DISerializeVisitor SRAM(*this, Elements); + SRAM.ApplyToFields(DD); + + // Set the globals initializer. + GV->setInitializer(ConstantStruct::get(Ty, Elements)); + + return GV; +} + +//===----------------------------------------------------------------------===// + +/// Verify - Return true if the GlobalVariable appears to be a valid +/// serialization of a DebugInfoDesc. +bool DIVerifier::Verify(Value *V) { + return !V || Verify(getGlobalVariable(V)); +} +bool DIVerifier::Verify(GlobalVariable *GV) { + // NULLs are valid. + if (!GV) return true; + + // Check prior validity. + unsigned &ValiditySlot = Validity[GV]; + + // If visited before then use old state. + if (ValiditySlot) return ValiditySlot == Valid; + + // Assume validity for the time being (recursion.) + ValiditySlot = Valid; + + // Make sure the global is internal or link once (anchor.) + if (GV->getLinkage() != GlobalValue::InternalLinkage && + GV->getLinkage() != GlobalValue::LinkOnceLinkage) { + ValiditySlot = Invalid; + return false; + } + + // Get the Tag. + unsigned Tag = DebugInfoDesc::TagFromGlobal(GV); + + // Check for user defined descriptors. + if (Tag == DW_TAG_invalid) { + ValiditySlot = Valid; + return true; + } + + // Get the Version. + unsigned Version = DebugInfoDesc::VersionFromGlobal(GV); + + // Check for version mismatch. + if (Version != LLVMDebugVersion) { + ValiditySlot = Invalid; + return false; + } + + // Construct an empty DebugInfoDesc. + DebugInfoDesc *DD = DebugInfoDesc::DescFactory(Tag); + + // Allow for user defined descriptors. + if (!DD) return true; + + // Get the initializer constant. + ConstantStruct *CI = cast<ConstantStruct>(GV->getInitializer()); + + // Get the operand count. + unsigned N = CI->getNumOperands(); + + // Get the field count. + unsigned &CountSlot = Counts[Tag]; + if (!CountSlot) { + // Check the operand count to the field count + DICountVisitor CTAM; + CTAM.ApplyToFields(DD); + CountSlot = CTAM.getCount(); + } + + // Field count must be at most equal operand count. + if (CountSlot > N) { + delete DD; + ValiditySlot = Invalid; + return false; + } + + // Check each field for valid type. + DIVerifyVisitor VRAM(*this, GV); + VRAM.ApplyToFields(DD); + + // Release empty DebugInfoDesc. + delete DD; + + // If fields are not valid. + if (!VRAM.isValid()) { + ValiditySlot = Invalid; + return false; + } + + return true; +} + +//===----------------------------------------------------------------------===// + +DebugScope::~DebugScope() { + for (unsigned i = 0, N = Scopes.size(); i < N; ++i) delete Scopes[i]; + for (unsigned j = 0, M = Variables.size(); j < M; ++j) delete Variables[j]; +} + +//===----------------------------------------------------------------------===// + +MachineModuleInfo::MachineModuleInfo() +: ImmutablePass((intptr_t)&ID) +, DR() +, VR() +, CompileUnits() +, Directories() +, SourceFiles() +, Lines() +, LabelIDList() +, ScopeMap() +, RootScope(NULL) +, FrameMoves() +, LandingPads() +, Personalities() +, CallsEHReturn(0) +, CallsUnwindInit(0) +{ + // Always emit "no personality" info + Personalities.push_back(NULL); +} +MachineModuleInfo::~MachineModuleInfo() { + +} + +/// doInitialization - Initialize the state for a new module. +/// +bool MachineModuleInfo::doInitialization() { + return false; +} + +/// doFinalization - Tear down the state after completion of a module. +/// +bool MachineModuleInfo::doFinalization() { + return false; +} + +/// BeginFunction - Begin gathering function meta information. +/// +void MachineModuleInfo::BeginFunction(MachineFunction *MF) { + // Coming soon. +} + +/// EndFunction - Discard function meta information. +/// +void MachineModuleInfo::EndFunction() { + // Clean up scope information. + if (RootScope) { + delete RootScope; + ScopeMap.clear(); + RootScope = NULL; + } + + // Clean up line info. + Lines.clear(); + + // Clean up frame info. + FrameMoves.clear(); + + // Clean up exception info. + LandingPads.clear(); + TypeInfos.clear(); + FilterIds.clear(); + FilterEnds.clear(); + CallsEHReturn = 0; + CallsUnwindInit = 0; +} + +/// getDescFor - Convert a Value to a debug information descriptor. +/// +// FIXME - use new Value type when available. +DebugInfoDesc *MachineModuleInfo::getDescFor(Value *V) { + return DR.Deserialize(V); +} + +/// Verify - Verify that a Value is debug information descriptor. +/// +bool MachineModuleInfo::Verify(Value *V) { + return VR.Verify(V); +} + +/// AnalyzeModule - Scan the module for global debug information. +/// +void MachineModuleInfo::AnalyzeModule(Module &M) { + SetupCompileUnits(M); +} + +/// needsFrameInfo - Returns true if we need to gather callee-saved register +/// move info for the frame. +bool MachineModuleInfo::needsFrameInfo() const { + return hasDebugInfo() || ExceptionHandling; +} + +/// SetupCompileUnits - Set up the unique vector of compile units. +/// +void MachineModuleInfo::SetupCompileUnits(Module &M) { + std::vector<CompileUnitDesc *>CU = getAnchoredDescriptors<CompileUnitDesc>(M); + + for (unsigned i = 0, N = CU.size(); i < N; i++) { + CompileUnits.insert(CU[i]); + } +} + +/// getCompileUnits - Return a vector of debug compile units. +/// +const UniqueVector<CompileUnitDesc *> MachineModuleInfo::getCompileUnits()const{ + return CompileUnits; +} + +/// getGlobalVariablesUsing - Return all of the GlobalVariables that use the +/// named GlobalVariable. +std::vector<GlobalVariable*> +MachineModuleInfo::getGlobalVariablesUsing(Module &M, + const std::string &RootName) { + return ::getGlobalVariablesUsing(M, RootName); +} + +/// RecordLabel - Records location information and associates it with a +/// debug label. Returns a unique label ID used to generate a label and +/// provide correspondence to the source line list. +unsigned MachineModuleInfo::RecordLabel(unsigned Line, unsigned Column, + unsigned Source) { + unsigned ID = NextLabelID(); + Lines.push_back(SourceLineInfo(Line, Column, Source, ID)); + return ID; +} + +/// RecordSource - Register a source file with debug info. Returns an source +/// ID. +unsigned MachineModuleInfo::RecordSource(const std::string &Directory, + const std::string &Source) { + unsigned DirectoryID = Directories.insert(Directory); + return SourceFiles.insert(SourceFileInfo(DirectoryID, Source)); +} +unsigned MachineModuleInfo::RecordSource(const CompileUnitDesc *CompileUnit) { + return RecordSource(CompileUnit->getDirectory(), + CompileUnit->getFileName()); +} + +/// RecordRegionStart - Indicate the start of a region. +/// +unsigned MachineModuleInfo::RecordRegionStart(Value *V) { + // FIXME - need to be able to handle split scopes because of bb cloning. + DebugInfoDesc *ScopeDesc = DR.Deserialize(V); + DebugScope *Scope = getOrCreateScope(ScopeDesc); + unsigned ID = NextLabelID(); + if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID); + return ID; +} + +/// RecordRegionEnd - Indicate the end of a region. +/// +unsigned MachineModuleInfo::RecordRegionEnd(Value *V) { + // FIXME - need to be able to handle split scopes because of bb cloning. + DebugInfoDesc *ScopeDesc = DR.Deserialize(V); + DebugScope *Scope = getOrCreateScope(ScopeDesc); + unsigned ID = NextLabelID(); + Scope->setEndLabelID(ID); + return ID; +} + +/// RecordVariable - Indicate the declaration of a local variable. +/// +void MachineModuleInfo::RecordVariable(Value *V, unsigned FrameIndex) { + VariableDesc *VD = cast<VariableDesc>(DR.Deserialize(V)); + DebugScope *Scope = getOrCreateScope(VD->getContext()); + DebugVariable *DV = new DebugVariable(VD, FrameIndex); + Scope->AddVariable(DV); +} + +/// getOrCreateScope - Returns the scope associated with the given descriptor. +/// +DebugScope *MachineModuleInfo::getOrCreateScope(DebugInfoDesc *ScopeDesc) { + DebugScope *&Slot = ScopeMap[ScopeDesc]; + if (!Slot) { + // FIXME - breaks down when the context is an inlined function. + DebugInfoDesc *ParentDesc = NULL; + if (BlockDesc *Block = dyn_cast<BlockDesc>(ScopeDesc)) { + ParentDesc = Block->getContext(); + } + DebugScope *Parent = ParentDesc ? getOrCreateScope(ParentDesc) : NULL; + Slot = new DebugScope(Parent, ScopeDesc); + if (Parent) { + Parent->AddScope(Slot); + } else if (RootScope) { + // FIXME - Add inlined function scopes to the root so we can delete + // them later. Long term, handle inlined functions properly. + RootScope->AddScope(Slot); + } else { + // First function is top level function. + RootScope = Slot; + } + } + return Slot; +} + +//===-EH-------------------------------------------------------------------===// + +/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the +/// specified MachineBasicBlock. +LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo + (MachineBasicBlock *LandingPad) { + unsigned N = LandingPads.size(); + for (unsigned i = 0; i < N; ++i) { + LandingPadInfo &LP = LandingPads[i]; + if (LP.LandingPadBlock == LandingPad) + return LP; + } + + LandingPads.push_back(LandingPadInfo(LandingPad)); + return LandingPads[N]; +} + +/// addInvoke - Provide the begin and end labels of an invoke style call and +/// associate it with a try landing pad block. +void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad, + unsigned BeginLabel, unsigned EndLabel) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.BeginLabels.push_back(BeginLabel); + LP.EndLabels.push_back(EndLabel); +} + +/// addLandingPad - Provide the label of a try LandingPad block. +/// +unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { + unsigned LandingPadLabel = NextLabelID(); + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.LandingPadLabel = LandingPadLabel; + return LandingPadLabel; +} + +/// addPersonality - Provide the personality function for the exception +/// information. +void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, + Function *Personality) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.Personality = Personality; + + for (unsigned i = 0; i < Personalities.size(); ++i) + if (Personalities[i] == Personality) + return; + + Personalities.push_back(Personality); +} + +/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. +/// +void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad, + std::vector<GlobalVariable *> &TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + for (unsigned N = TyInfo.size(); N; --N) + LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); +} + +/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad. +/// +void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad, + std::vector<GlobalVariable *> &TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + std::vector<unsigned> IdsInFilter (TyInfo.size()); + for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) + IdsInFilter[I] = getTypeIDFor(TyInfo[I]); + LP.TypeIds.push_back(getFilterIDFor(IdsInFilter)); +} + +/// TidyLandingPads - Remap landing pad labels and remove any deleted landing +/// pads. +void MachineModuleInfo::TidyLandingPads() { + for (unsigned i = 0; i != LandingPads.size(); ) { + LandingPadInfo &LandingPad = LandingPads[i]; + LandingPad.LandingPadLabel = MappedLabel(LandingPad.LandingPadLabel); + + if (!LandingPad.LandingPadBlock) + // Must not have cleanups if no landing pad. + LandingPad.TypeIds.clear(); + + // Special case: we *should* emit LPs with null LP MBB. This indicates + // "rethrow" case. + if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + for (unsigned j=0; j != LandingPads[i].BeginLabels.size(); ) { + unsigned BeginLabel = MappedLabel(LandingPad.BeginLabels[j]); + unsigned EndLabel = MappedLabel(LandingPad.EndLabels[j]); + + + if (!BeginLabel || !EndLabel) { + LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); + LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); + continue; + } + + LandingPad.BeginLabels[j] = BeginLabel; + LandingPad.EndLabels[j] = EndLabel; + ++j; + } + + ++i; + } +} + +/// getTypeIDFor - Return the type id for the specified typeinfo. This is +/// function wide. +unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) { + for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) + if (TypeInfos[i] == TI) return i + 1; + + TypeInfos.push_back(TI); + return TypeInfos.size(); +} + +/// getFilterIDFor - Return the filter id for the specified typeinfos. This is +/// function wide. +int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) { + // If the new filter coincides with the tail of an existing filter, then + // re-use the existing filter. Folding filters more than this requires + // re-ordering filters and/or their elements - probably not worth it. + for (std::vector<unsigned>::iterator I = FilterEnds.begin(), + E = FilterEnds.end(); I != E; ++I) { + unsigned i = *I, j = TyIds.size(); + + while (i && j) + if (FilterIds[--i] != TyIds[--j]) + goto try_next; + + if (!j) + // The new filter coincides with range [i, end) of the existing filter. + return -(1 + i); + +try_next:; + } + + // Add the new filter. + int FilterID = -(1 + FilterIds.size()); + FilterIds.reserve(FilterIds.size() + TyIds.size() + 1); + for (unsigned I = 0, N = TyIds.size(); I != N; ++I) + FilterIds.push_back(TyIds[I]); + FilterEnds.push_back(FilterIds.size()); + FilterIds.push_back(0); // terminator + return FilterID; +} + +/// getPersonality - Return the personality function for the current function. +Function *MachineModuleInfo::getPersonality() const { + // FIXME: Until PR1414 will be fixed, we're using 1 personality function per + // function + return !LandingPads.empty() ? LandingPads[0].Personality : NULL; +} + +/// getPersonalityIndex - Return unique index for current personality +/// function. NULL personality function should always get zero index. +unsigned MachineModuleInfo::getPersonalityIndex() const { + const Function* Personality = NULL; + + // Scan landing pads. If there is at least one non-NULL personality - use it. + for (unsigned i = 0; i != LandingPads.size(); ++i) + if (LandingPads[i].Personality) { + Personality = LandingPads[i].Personality; + break; + } + + for (unsigned i = 0; i < Personalities.size(); ++i) { + if (Personalities[i] == Personality) + return i; + } + + // This should never happen + assert(0 && "Personality function should be set!"); + return 0; +} + +//===----------------------------------------------------------------------===// +/// DebugLabelFolding pass - This pass prunes out redundant labels. This allows +/// a info consumer to determine if the range of two labels is empty, by seeing +/// if the labels map to the same reduced label. + +namespace llvm { + +struct DebugLabelFolder : public MachineFunctionPass { + static char ID; + DebugLabelFolder() : MachineFunctionPass((intptr_t)&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Label Folder"; } +}; + +char DebugLabelFolder::ID = 0; + +bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) { + // Get machine module info. + MachineModuleInfo *MMI = getAnalysisToUpdate<MachineModuleInfo>(); + if (!MMI) return false; + // Get target instruction info. + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + if (!TII) return false; + + // Track if change is made. + bool MadeChange = false; + // No prior label to begin. + unsigned PriorLabel = 0; + + // Iterate through basic blocks. + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); + BB != E; ++BB) { + // Iterate through instructions. + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + // Is it a label. + if ((unsigned)I->getOpcode() == TargetInstrInfo::LABEL) { + // The label ID # is always operand #0, an immediate. + unsigned NextLabel = I->getOperand(0).getImm(); + + // If there was an immediate prior label. + if (PriorLabel) { + // Remap the current label to prior label. + MMI->RemapLabel(NextLabel, PriorLabel); + // Delete the current label. + I = BB->erase(I); + // Indicate a change has been made. + MadeChange = true; + continue; + } else { + // Start a new round. + PriorLabel = NextLabel; + } + } else { + // No consecutive labels. + PriorLabel = 0; + } + + ++I; + } + } + + return MadeChange; +} + +FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); } + +} + diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp new file mode 100644 index 0000000..a7ba5bb --- /dev/null +++ b/lib/CodeGen/MachinePassRegistry.cpp @@ -0,0 +1,41 @@ +//===-- CodeGen/MachineInstr.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by James M. Laskey and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the machine function pass registry for register allocators +// and instruction schedulers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachinePassRegistry.h" + +using namespace llvm; + + +/// Add - Adds a function pass to the registration list. +/// +void MachinePassRegistry::Add(MachinePassRegistryNode *Node) { + Node->setNext(List); + List = Node; + if (Listener) Listener->NotifyAdd(Node->getName(), + Node->getCtor(), + Node->getDescription()); +} + + +/// Remove - Removes a function pass from the registration list. +/// +void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) { + for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) { + if (*I == Node) { + if (Listener) Listener->NotifyRemove(Node->getName()); + *I = (*I)->getNext(); + break; + } + } +} diff --git a/lib/CodeGen/Makefile b/lib/CodeGen/Makefile new file mode 100644 index 0000000..7e5d8ee --- /dev/null +++ b/lib/CodeGen/Makefile @@ -0,0 +1,22 @@ +##===- lib/CodeGen/Makefile --------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file was developed by the LLVM research group and is distributed under +# the University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMCodeGen +PARALLEL_DIRS = SelectionDAG +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + +# Xcode prior to 2.4 generates an error in -pedantic mode with use of HUGE_VAL +# in this directory. Disable -pedantic for this broken compiler. +ifneq ($(HUGE_VAL_SANITY),yes) +CompileCommonOpts := $(filter-out -pedantic, $(CompileCommonOpts)) +endif + diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp new file mode 100644 index 0000000..fec9e2e --- /dev/null +++ b/lib/CodeGen/PHIElimination.cpp @@ -0,0 +1,342 @@ +//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates machine instruction PHI nodes by inserting copy +// instructions. This destroys SSA information, but is the desired input for +// some register allocators. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "phielim" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include <set> +#include <algorithm> +using namespace llvm; + +STATISTIC(NumAtomic, "Number of atomic phis lowered"); +//STATISTIC(NumSimple, "Number of simple phis lowered"); + +namespace { + struct VISIBILITY_HIDDEN PNE : public MachineFunctionPass { + static char ID; // Pass identification, replacement for typeid + PNE() : MachineFunctionPass((intptr_t)&ID) {} + + bool runOnMachineFunction(MachineFunction &Fn) { + analyzePHINodes(Fn); + + bool Changed = false; + + // Eliminate PHI instructions by inserting copies into predecessor blocks. + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + Changed |= EliminatePHINodes(Fn, *I); + + VRegPHIUseCount.clear(); + return Changed; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<LiveVariables>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions + /// in predecessor basic blocks. + /// + bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); + void LowerAtomicPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt); + + /// analyzePHINodes - Gather information about the PHI nodes in + /// here. In particular, we want to map the number of uses of a virtual + /// register which is used in a PHI node. We map that to the BB the + /// vreg is coming from. This is used later to determine when the vreg + /// is killed in the BB. + /// + void analyzePHINodes(const MachineFunction& Fn); + + typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair; + typedef std::map<BBVRegPair, unsigned> VRegPHIUse; + + VRegPHIUse VRegPHIUseCount; + }; + + char PNE::ID = 0; + RegisterPass<PNE> X("phi-node-elimination", + "Eliminate PHI nodes for register allocation"); +} + +const PassInfo *llvm::PHIEliminationID = X.getPassInfo(); + +/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in +/// predecessor basic blocks. +/// +bool PNE::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) { + if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI) + return false; // Quick exit for basic blocks without PHIs. + + // Get an iterator to the first instruction after the last PHI node (this may + // also be the end of the basic block). + MachineBasicBlock::iterator AfterPHIsIt = MBB.begin(); + while (AfterPHIsIt != MBB.end() && + AfterPHIsIt->getOpcode() == TargetInstrInfo::PHI) + ++AfterPHIsIt; // Skip over all of the PHI nodes... + + while (MBB.front().getOpcode() == TargetInstrInfo::PHI) + LowerAtomicPHINode(MBB, AfterPHIsIt); + + return true; +} + +/// InstructionUsesRegister - Return true if the specified machine instr has a +/// use of the specified register. +static bool InstructionUsesRegister(MachineInstr *MI, unsigned SrcReg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).isRegister() && + MI->getOperand(i).getReg() == SrcReg && + MI->getOperand(i).isUse()) + return true; + return false; +} + +/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, +/// under the assuption that it needs to be lowered in a way that supports +/// atomic execution of PHIs. This lowering method is always correct all of the +/// time. +void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt) { + // Unlink the PHI node from the basic block, but don't delete the PHI yet. + MachineInstr *MPhi = MBB.remove(MBB.begin()); + + unsigned DestReg = MPhi->getOperand(0).getReg(); + + // Create a new register for the incoming PHI arguments. + MachineFunction &MF = *MBB.getParent(); + const TargetRegisterClass *RC = MF.getSSARegMap()->getRegClass(DestReg); + unsigned IncomingReg = MF.getSSARegMap()->createVirtualRegister(RC); + + // Insert a register to register copy in the top of the current block (but + // after any remaining phi nodes) which copies the new incoming register + // into the phi node destination. + // + const MRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + RegInfo->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC); + + // Update live variable information if there is any... + LiveVariables *LV = getAnalysisToUpdate<LiveVariables>(); + if (LV) { + MachineInstr *PHICopy = prior(AfterPHIsIt); + + // Increment use count of the newly created virtual register. + LV->getVarInfo(IncomingReg).NumUses++; + + // Add information to LiveVariables to know that the incoming value is + // killed. Note that because the value is defined in several places (once + // each for each incoming block), the "def" block and instruction fields + // for the VarInfo is not filled in. + // + LV->addVirtualRegisterKilled(IncomingReg, PHICopy); + + // Since we are going to be deleting the PHI node, if it is the last use + // of any registers, or if the value itself is dead, we need to move this + // information over to the new copy we just inserted. + // + LV->removeVirtualRegistersKilled(MPhi); + + // If the result is dead, update LV. + if (LV->RegisterDefIsDead(MPhi, DestReg)) { + LV->addVirtualRegisterDead(DestReg, PHICopy); + LV->removeVirtualRegistersDead(MPhi); + } + + // Realize that the destination register is defined by the PHI copy now, not + // the PHI itself. + LV->getVarInfo(DestReg).DefInst = PHICopy; + } + + // Adjust the VRegPHIUseCount map to account for the removal of this PHI + // node. + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) + --VRegPHIUseCount[BBVRegPair( + MPhi->getOperand(i + 1).getMachineBasicBlock(), + MPhi->getOperand(i).getReg())]; + + // Now loop over all of the incoming arguments, changing them to copy into + // the IncomingReg register in the corresponding predecessor basic block. + // + std::set<MachineBasicBlock*> MBBsInsertedInto; + for (int i = MPhi->getNumOperands() - 1; i >= 2; i-=2) { + unsigned SrcReg = MPhi->getOperand(i-1).getReg(); + assert(MRegisterInfo::isVirtualRegister(SrcReg) && + "Machine PHI Operands must all be virtual registers!"); + + // Get the MachineBasicBlock equivalent of the BasicBlock that is the + // source path the PHI. + MachineBasicBlock &opBlock = *MPhi->getOperand(i).getMachineBasicBlock(); + + // Check to make sure we haven't already emitted the copy for this block. + // This can happen because PHI nodes may have multiple entries for the + // same basic block. + if (!MBBsInsertedInto.insert(&opBlock).second) + continue; // If the copy has already been emitted, we're done. + + // Get an iterator pointing to the first terminator in the block (or end()). + // This is the point where we can insert a copy if we'd like to. + MachineBasicBlock::iterator I = opBlock.getFirstTerminator(); + + // Insert the copy. + RegInfo->copyRegToReg(opBlock, I, IncomingReg, SrcReg, RC); + + // Now update live variable information if we have it. Otherwise we're done + if (!LV) continue; + + // We want to be able to insert a kill of the register if this PHI + // (aka, the copy we just inserted) is the last use of the source + // value. Live variable analysis conservatively handles this by + // saying that the value is live until the end of the block the PHI + // entry lives in. If the value really is dead at the PHI copy, there + // will be no successor blocks which have the value live-in. + // + // Check to see if the copy is the last use, and if so, update the + // live variables information so that it knows the copy source + // instruction kills the incoming value. + // + LiveVariables::VarInfo &InRegVI = LV->getVarInfo(SrcReg); + + // Loop over all of the successors of the basic block, checking to see + // if the value is either live in the block, or if it is killed in the + // block. Also check to see if this register is in use by another PHI + // node which has not yet been eliminated. If so, it will be killed + // at an appropriate point later. + // + + // Is it used by any PHI instructions in this block? + bool ValueIsLive = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0; + + std::vector<MachineBasicBlock*> OpSuccBlocks; + + // Otherwise, scan successors, including the BB the PHI node lives in. + for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), + E = opBlock.succ_end(); SI != E && !ValueIsLive; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + + // Is it alive in this successor? + unsigned SuccIdx = SuccMBB->getNumber(); + if (SuccIdx < InRegVI.AliveBlocks.size() && + InRegVI.AliveBlocks[SuccIdx]) { + ValueIsLive = true; + break; + } + + OpSuccBlocks.push_back(SuccMBB); + } + + // Check to see if this value is live because there is a use in a successor + // that kills it. + if (!ValueIsLive) { + switch (OpSuccBlocks.size()) { + case 1: { + MachineBasicBlock *MBB = OpSuccBlocks[0]; + for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) + if (InRegVI.Kills[i]->getParent() == MBB) { + ValueIsLive = true; + break; + } + break; + } + case 2: { + MachineBasicBlock *MBB1 = OpSuccBlocks[0], *MBB2 = OpSuccBlocks[1]; + for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) + if (InRegVI.Kills[i]->getParent() == MBB1 || + InRegVI.Kills[i]->getParent() == MBB2) { + ValueIsLive = true; + break; + } + break; + } + default: + std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); + for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) + if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), + InRegVI.Kills[i]->getParent())) { + ValueIsLive = true; + break; + } + } + } + + // Okay, if we now know that the value is not live out of the block, + // we can add a kill marker in this block saying that it kills the incoming + // value! + if (!ValueIsLive) { + // In our final twist, we have to decide which instruction kills the + // register. In most cases this is the copy, however, the first + // terminator instruction at the end of the block may also use the value. + // In this case, we should mark *it* as being the killing block, not the + // copy. + bool FirstTerminatorUsesValue = false; + if (I != opBlock.end()) { + FirstTerminatorUsesValue = InstructionUsesRegister(I, SrcReg); + + // Check that no other terminators use values. +#ifndef NDEBUG + for (MachineBasicBlock::iterator TI = next(I); TI != opBlock.end(); + ++TI) { + assert(!InstructionUsesRegister(TI, SrcReg) && + "Terminator instructions cannot use virtual registers unless" + "they are the first terminator in a block!"); + } +#endif + } + + MachineBasicBlock::iterator KillInst; + if (!FirstTerminatorUsesValue) + KillInst = prior(I); + else + KillInst = I; + + // Finally, mark it killed. + LV->addVirtualRegisterKilled(SrcReg, KillInst); + + // This vreg no longer lives all of the way through opBlock. + unsigned opBlockNum = opBlock.getNumber(); + if (opBlockNum < InRegVI.AliveBlocks.size()) + InRegVI.AliveBlocks[opBlockNum] = false; + } + } + + // Really delete the PHI instruction now! + delete MPhi; + ++NumAtomic; +} + +/// analyzePHINodes - Gather information about the PHI nodes in here. In +/// particular, we want to map the number of uses of a virtual register which is +/// used in a PHI node. We map that to the BB the vreg is coming from. This is +/// used later to determine when the vreg is killed in the BB. +/// +void PNE::analyzePHINodes(const MachineFunction& Fn) { + for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + ++VRegPHIUseCount[BBVRegPair( + BBI->getOperand(i + 1).getMachineBasicBlock(), + BBI->getOperand(i).getReg())]; +} diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp new file mode 100644 index 0000000..87510e4 --- /dev/null +++ b/lib/CodeGen/Passes.cpp @@ -0,0 +1,56 @@ +//===-- Passes.cpp - Target independent code generation passes ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines interfaces to access the target independent code +// generation passes provided by the LLVM backend. +// +//===---------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +//===---------------------------------------------------------------------===// +/// +/// RegisterRegAlloc class - Track the registration of register allocators. +/// +//===---------------------------------------------------------------------===// +MachinePassRegistry RegisterRegAlloc::Registry; + + +//===---------------------------------------------------------------------===// +/// +/// RegAlloc command line options. +/// +//===---------------------------------------------------------------------===// +namespace { + cl::opt<RegisterRegAlloc::FunctionPassCtor, false, + RegisterPassParser<RegisterRegAlloc> > + RegAlloc("regalloc", + cl::init(&createLinearScanRegisterAllocator), + cl::desc("Register allocator to use: (default = linearscan)")); +} + + +//===---------------------------------------------------------------------===// +/// +/// createRegisterAllocator - choose the appropriate register allocator. +/// +//===---------------------------------------------------------------------===// +FunctionPass *llvm::createRegisterAllocator() { + RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); + + if (!Ctor) { + Ctor = RegAlloc; + RegisterRegAlloc::setDefault(RegAlloc); + } + + return Ctor(); +} diff --git a/lib/CodeGen/PhysRegTracker.h b/lib/CodeGen/PhysRegTracker.h new file mode 100644 index 0000000..f5a2402 --- /dev/null +++ b/lib/CodeGen/PhysRegTracker.h @@ -0,0 +1,73 @@ +//===-- llvm/CodeGen/PhysRegTracker.h - Physical Register Tracker -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a physical register tracker. The tracker +// tracks physical register usage through addRegUse and +// delRegUse. isRegAvail checks if a physical register is available or +// not taking into consideration register aliases. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PHYSREGTRACKER_H +#define LLVM_CODEGEN_PHYSREGTRACKER_H + +#include "llvm/Target/MRegisterInfo.h" + +namespace llvm { + + class PhysRegTracker { + const MRegisterInfo* mri_; + std::vector<unsigned> regUse_; + + public: + PhysRegTracker(const MRegisterInfo& mri) + : mri_(&mri), + regUse_(mri_->getNumRegs(), 0) { + } + + PhysRegTracker(const PhysRegTracker& rhs) + : mri_(rhs.mri_), + regUse_(rhs.regUse_) { + } + + const PhysRegTracker& operator=(const PhysRegTracker& rhs) { + mri_ = rhs.mri_; + regUse_ = rhs.regUse_; + return *this; + } + + void addRegUse(unsigned physReg) { + assert(MRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + ++regUse_[physReg]; + for (const unsigned* as = mri_->getAliasSet(physReg); *as; ++as) + ++regUse_[*as]; + } + + void delRegUse(unsigned physReg) { + assert(MRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + assert(regUse_[physReg] != 0); + --regUse_[physReg]; + for (const unsigned* as = mri_->getAliasSet(physReg); *as; ++as) { + assert(regUse_[*as] != 0); + --regUse_[*as]; + } + } + + bool isRegAvail(unsigned physReg) const { + assert(MRegisterInfo::isPhysicalRegister(physReg) && + "should be physical register!"); + return regUse_[physReg] == 0; + } + }; + +} // End llvm namespace + +#endif diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp new file mode 100644 index 0000000..06f47c7 --- /dev/null +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -0,0 +1,81 @@ +//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Dale Johannesen and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "post-RA-sched" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/Debug.h" +//#include "llvm/ADT/Statistic.h" +//#include <climits> +//#include <queue> +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +namespace { + bool NoPostRAScheduling; + + // When this works it will be on by default. + cl::opt<bool, true> + DisablePostRAScheduler("disable-post-RA-scheduler", + cl::desc("Disable scheduling after register allocation"), + cl::location(NoPostRAScheduling), + cl::init(true)); + + class VISIBILITY_HIDDEN SchedulePostRATDList : public MachineFunctionPass { + public: + static char ID; + SchedulePostRATDList() : MachineFunctionPass((intptr_t)&ID) {} + private: + MachineFunction *MF; + const TargetMachine *TM; + public: + const char *getPassName() const { + return "Post RA top-down list latency scheduler (STUB)"; + } + + bool runOnMachineFunction(MachineFunction &Fn); + }; + char SchedulePostRATDList::ID = 0; +} + +bool SchedulePostRATDList::runOnMachineFunction(MachineFunction &Fn) { + if (NoPostRAScheduling) + return true; + + DOUT << "SchedulePostRATDList\n"; + MF = &Fn; + TM = &MF->getTarget(); + + // Loop over all of the basic blocks + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) + ; + + return true; +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createPostRAScheduler() { + return new SchedulePostRATDList(); +} diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp new file mode 100644 index 0000000..882c6a7 --- /dev/null +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -0,0 +1,549 @@ +//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/STLExtras.h" +#include <climits> +using namespace llvm; + +namespace { + struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass { + static char ID; + PEI() : MachineFunctionPass((intptr_t)&ID) {} + + const char *getPassName() const { + return "Prolog/Epilog Insertion & Frame Finalization"; + } + + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract + /// frame indexes with appropriate references. + /// + bool runOnMachineFunction(MachineFunction &Fn) { + const MRegisterInfo *MRI = Fn.getTarget().getRegisterInfo(); + RS = MRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + + // Get MachineModuleInfo so that we can track the construction of the + // frame. + if (MachineModuleInfo *MMI = getAnalysisToUpdate<MachineModuleInfo>()) { + Fn.getFrameInfo()->setMachineModuleInfo(MMI); + } + + // Allow the target machine to make some adjustments to the function + // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. + MRI->processFunctionBeforeCalleeSavedScan(Fn, RS); + + // Scan the function for modified callee saved registers and insert spill + // code for any callee saved registers that are modified. Also calculate + // the MaxCallFrameSize and HasCalls variables for the function's frame + // information and eliminates call frame pseudo instructions. + calculateCalleeSavedRegisters(Fn); + + // Add the code to save and restore the callee saved registers + saveCalleeSavedRegisters(Fn); + + // Allow the target machine to make final modifications to the function + // before the frame layout is finalized. + Fn.getTarget().getRegisterInfo()->processFunctionBeforeFrameFinalized(Fn); + + // Calculate actual frame offsets for all of the abstract stack objects... + calculateFrameObjectOffsets(Fn); + + // Add prolog and epilog code to the function. This function is required + // to align the stack frame as necessary for any stack variables or + // called functions. Because of this, calculateCalleeSavedRegisters + // must be called before this function in order to set the HasCalls + // and MaxCallFrameSize variables. + insertPrologEpilogCode(Fn); + + // Replace all MO_FrameIndex operands with physical register references + // and actual offsets. + // + replaceFrameIndices(Fn); + + delete RS; + return true; + } + + private: + RegScavenger *RS; + + // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved + // stack frame indexes. + unsigned MinCSFrameIndex, MaxCSFrameIndex; + + void calculateCalleeSavedRegisters(MachineFunction &Fn); + void saveCalleeSavedRegisters(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); + void replaceFrameIndices(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &Fn); + }; + char PEI::ID = 0; +} + + +/// createPrologEpilogCodeInserter - This function returns a pass that inserts +/// prolog and epilog code, and eliminates abstract frame references. +/// +FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } + + +/// calculateCalleeSavedRegisters - Scan the function for modified callee saved +/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for +/// the function's frame information and eliminates call frame pseudo +/// instructions. +/// +void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { + const MRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); + + // Get the callee saved register list... + const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + + // Get the function call frame set-up and tear-down instruction opcode + int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); + int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); + + // These are used to keep track the callee-save area. Initialize them. + MinCSFrameIndex = INT_MAX; + MaxCSFrameIndex = 0; + + // Early exit for targets which have no callee saved registers and no call + // frame setup/destroy pseudo instructions. + if ((CSRegs == 0 || CSRegs[0] == 0) && + FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) + return; + + unsigned MaxCallFrameSize = 0; + bool HasCalls = false; + + std::vector<MachineBasicBlock::iterator> FrameSDOps; + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" + " instructions should have a single immediate argument!"); + unsigned Size = I->getOperand(0).getImmedValue(); + if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; + HasCalls = true; + FrameSDOps.push_back(I); + } + + MachineFrameInfo *FFI = Fn.getFrameInfo(); + FFI->setHasCalls(HasCalls); + FFI->setMaxCallFrameSize(MaxCallFrameSize); + + for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) { + MachineBasicBlock::iterator I = FrameSDOps[i]; + // If call frames are not being included as part of the stack frame, + // and there is no dynamic allocation (therefore referencing frame slots + // off sp), leave the pseudo ops alone. We'll eliminate them later. + if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn)) + RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + } + + // Now figure out which *callee saved* registers are modified by the current + // function, thus needing to be saved and restored in the prolog/epilog. + // + const TargetRegisterClass* const *CSRegClasses = + RegInfo->getCalleeSavedRegClasses(&Fn); + std::vector<CalleeSavedInfo> CSI; + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (Fn.isPhysRegUsed(Reg)) { + // If the reg is modified, save it! + CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + } else { + for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); + *AliasSet; ++AliasSet) { // Check alias registers too. + if (Fn.isPhysRegUsed(*AliasSet)) { + CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + break; + } + } + } + } + + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! + + unsigned NumFixedSpillSlots; + const std::pair<unsigned,int> *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = CSI[i].getRegClass(); + + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && + FixedSlot->first != Reg) + ++FixedSlot; + + int FrameIdx; + if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + // We may not be able to sastify the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the min. + Align = std::min(Align, StackAlign); + FrameIdx = FFI->CreateStackObject(RC->getSize(), Align); + if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; + } else { + // Spill it to the stack where we must. + FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second); + } + CSI[i].setFrameIdx(FrameIdx); + } + + FFI->setCalleeSavedInfo(CSI); +} + +/// saveCalleeSavedRegisters - Insert spill code for any callee saved registers +/// that are modified in the function. +/// +void PEI::saveCalleeSavedRegisters(MachineFunction &Fn) { + // Get callee saved register information. + MachineFrameInfo *FFI = Fn.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); + + // Early exit if no callee saved registers are modified! + if (CSI.empty()) + return; + + const MRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + + // Now that we have a stack slot for each register to be saved, insert spill + // code into the entry block. + MachineBasicBlock *MBB = Fn.begin(); + MachineBasicBlock::iterator I = MBB->begin(); + if (!RegInfo->spillCalleeSavedRegisters(*MBB, I, CSI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. It's killed at the spill. + MBB->addLiveIn(CSI[i].getReg()); + + // Insert the spill to the stack frame. + RegInfo->storeRegToStackSlot(*MBB, I, CSI[i].getReg(), + CSI[i].getFrameIdx(), CSI[i].getRegClass()); + } + } + + // Add code to restore the callee-save registers in each exiting block. + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + for (MachineFunction::iterator FI = Fn.begin(), E = Fn.end(); FI != E; ++FI) + // If last instruction is a return instruction, add an epilogue. + if (!FI->empty() && TII.isReturn(FI->back().getOpcode())) { + MBB = FI; + I = MBB->end(); --I; + + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && TII.isTerminatorInstr((--I2)->getOpcode())) + I = I2; + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + + // Restore all registers immediately before the return and any terminators + // that preceed it. + if (!RegInfo->restoreCalleeSavedRegisters(*MBB, I, CSI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + RegInfo->loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), + CSI[i].getFrameIdx(), + CSI[i].getRegClass()); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert multiple + // instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } + } + } +} + + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { + const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *FFI = Fn.getFrameInfo(); + + unsigned MaxAlign = 0; + + // Start at the beginning of the local area. + // The Offset is the distance from the stack top in the direction + // of stack growth -- so it's always positive. + int64_t Offset = TFI.getOffsetOfLocalArea(); + if (StackGrowsDown) + Offset = -Offset; + assert(Offset >= 0 + && "Local area offset should be in direction of stack growth"); + + // If there are fixed sized objects that are preallocated in the local area, + // non-fixed objects can't be allocated right at the start of local area. + // We currently don't support filling in holes in between fixed sized objects, + // so we adjust 'Offset' to point to the end of last fixed sized + // preallocated object. + for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + int64_t FixedOff; + if (StackGrowsDown) { + // The maximum distance from the stack pointer is at lower address of + // the object -- which is given by offset. For down growing stack + // the offset is negative, so we negate the offset to get the distance. + FixedOff = -FFI->getObjectOffset(i); + } else { + // The maximum distance from the start pointer is at the upper + // address of the object. + FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i); + } + if (FixedOff > Offset) Offset = FixedOff; + } + + // First assign frame offsets to stack objects that are used to spill + // callee saved registers. + if (StackGrowsDown) { + for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { + // If stack grows down, we need to add size of find the lowest + // address of the object. + Offset += FFI->getObjectSize(i); + + unsigned Align = FFI->getObjectAlignment(i); + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + FFI->setObjectOffset(i, -Offset); // Set the computed offset + } + } else { + for (unsigned i = MaxCSFrameIndex; i >= MinCSFrameIndex; --i) { + unsigned Align = FFI->getObjectAlignment(i); + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + FFI->setObjectOffset(i, Offset); + Offset += FFI->getObjectSize(i); + } + } + + // Make sure the special register scavenging spill slot is closest to the + // frame pointer if a frame pointer is required. + const MRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + if (RS && RegInfo->hasFP(Fn)) { + int SFI = RS->getScavengingFrameIndex(); + if (SFI >= 0) { + // If stack grows down, we need to add size of the lowest + // address of the object. + if (StackGrowsDown) + Offset += FFI->getObjectSize(SFI); + + unsigned Align = FFI->getObjectAlignment(SFI); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + if (StackGrowsDown) { + FFI->setObjectOffset(SFI, -Offset); // Set the computed offset + } else { + FFI->setObjectOffset(SFI, Offset); + Offset += FFI->getObjectSize(SFI); + } + } + } + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && (int)i == RS->getScavengingFrameIndex()) + continue; + + // If stack grows down, we need to add size of find the lowest + // address of the object. + if (StackGrowsDown) + Offset += FFI->getObjectSize(i); + + unsigned Align = FFI->getObjectAlignment(i); + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + if (StackGrowsDown) { + FFI->setObjectOffset(i, -Offset); // Set the computed offset + } else { + FFI->setObjectOffset(i, Offset); + Offset += FFI->getObjectSize(i); + } + } + + // Make sure the special register scavenging spill slot is closest to the + // stack pointer. + if (RS && !RegInfo->hasFP(Fn)) { + int SFI = RS->getScavengingFrameIndex(); + if (SFI >= 0) { + // If stack grows down, we need to add size of find the lowest + // address of the object. + if (StackGrowsDown) + Offset += FFI->getObjectSize(SFI); + + unsigned Align = FFI->getObjectAlignment(SFI); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + if (StackGrowsDown) { + FFI->setObjectOffset(SFI, -Offset); // Set the computed offset + } else { + FFI->setObjectOffset(SFI, Offset); + Offset += FFI->getObjectSize(SFI); + } + } + } + + // Round up the size to a multiple of the alignment, but only if there are + // calls or alloca's in the function. This ensures that any calls to + // subroutines have their stack frames suitable aligned. + if (!RegInfo->targetHandlesStackFrameRounding() && + (FFI->hasCalls() || FFI->hasVarSizedObjects())) { + // If we have reserved argument space for call sites in the function + // immediately on entry to the current function, count it as part of the + // overall stack size. + if (RegInfo->hasReservedCallFrame(Fn)) + Offset += FFI->getMaxCallFrameSize(); + + unsigned AlignMask = TFI.getStackAlignment() - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + } + + // Update frame info to pretend that this is part of the stack... + FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea()); + + // Remember the required stack alignment in case targets need it to perform + // dynamic stack alignment. + assert(FFI->getMaxAlignment() == MaxAlign && + "Stack alignment calculation broken!"); +} + + +/// insertPrologEpilogCode - Scan the function for modified callee saved +/// registers, insert spill code for these callee saved registers, then add +/// prolog and epilog code to the function. +/// +void PEI::insertPrologEpilogCode(MachineFunction &Fn) { + // Add prologue to the function... + Fn.getTarget().getRegisterInfo()->emitPrologue(Fn); + + // Add epilogue to restore the callee-save registers in each exiting block + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + // If last instruction is a return instruction, add an epilogue + if (!I->empty() && TII.isReturn(I->back().getOpcode())) + Fn.getTarget().getRegisterInfo()->emitEpilogue(Fn, *I); + } +} + + +/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical +/// register references and actual offsets. +/// +void PEI::replaceFrameIndices(MachineFunction &Fn) { + if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? + + const TargetMachine &TM = Fn.getTarget(); + assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); + const MRegisterInfo &MRI = *TM.getRegisterInfo(); + const TargetFrameInfo *TFI = TM.getFrameInfo(); + bool StackGrowsDown = + TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + int FrameSetupOpcode = MRI.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = MRI.getCallFrameDestroyOpcode(); + + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + int SPAdj = 0; // SP offset due to call frame setup / destroy. + if (RS) RS->enterBasicBlock(BB); + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + MachineInstr *MI = I; + + // Remember how much SP has been adjustment to create the call frame. + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + int Size = I->getOperand(0).getImmedValue(); + if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) + Size = -Size; + SPAdj += Size; + MachineBasicBlock::iterator PrevI = prior(I); + MRI.eliminateCallFramePseudoInstr(Fn, *BB, I); + // Visit the instructions created by eliminateCallFramePseudoInstr(). + I = next(PrevI); + MI = NULL; + } else { + I++; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).isFrameIndex()) { + // If this instruction has a FrameIndex operand, we need to use that + // target machine register info object to eliminate it. + MRI.eliminateFrameIndex(MI, SPAdj, RS); + + // Revisit the instruction in full. Some instructions (e.g. inline + // asm instructions) can have multiple frame indices. + --I; + MI = 0; + break; + } + } + // Update register states. + if (RS && MI) RS->forward(MI); + } + assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); + } +} diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt new file mode 100644 index 0000000..4fff15c --- /dev/null +++ b/lib/CodeGen/README.txt @@ -0,0 +1,145 @@ +//===---------------------------------------------------------------------===// + +Common register allocation / spilling problem: + + mul lr, r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + ldr r4, [sp, #+52] + mla r4, r3, lr, r4 + +can be: + + mul lr, r4, lr + mov r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + mla r4, r3, lr, r4 + +and then "merge" mul and mov: + + mul r4, r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + mla r4, r3, lr, r4 + +It also increase the likelyhood the store may become dead. + +//===---------------------------------------------------------------------===// + +I think we should have a "hasSideEffects" flag (which is automatically set for +stuff that "isLoad" "isCall" etc), and the remat pass should eventually be able +to remat any instruction that has no side effects, if it can handle it and if +profitable. + +For now, I'd suggest having the remat stuff work like this: + +1. I need to spill/reload this thing. +2. Check to see if it has side effects. +3. Check to see if it is simple enough: e.g. it only has one register +destination and no register input. +4. If so, clone the instruction, do the xform, etc. + +Advantages of this are: + +1. the .td file describes the behavior of the instructions, not the way the + algorithm should work. +2. as remat gets smarter in the future, we shouldn't have to be changing the .td + files. +3. it is easier to explain what the flag means in the .td file, because you + don't have to pull in the explanation of how the current remat algo works. + +Some potential added complexities: + +1. Some instructions have to be glued to it's predecessor or successor. All of + the PC relative instructions and condition code setting instruction. We could + mark them as hasSideEffects, but that's not quite right. PC relative loads + from constantpools can be remat'ed, for example. But it requires more than + just cloning the instruction. Some instructions can be remat'ed but it + expands to more than one instruction. But allocator will have to make a + decision. + +4. As stated in 3, not as simple as cloning in some cases. The target will have + to decide how to remat it. For example, an ARM 2-piece constant generation + instruction is remat'ed as a load from constantpool. + +//===---------------------------------------------------------------------===// + +bb27 ... + ... + %reg1037 = ADDri %reg1039, 1 + %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10 + Successors according to CFG: 0x8b03bf0 (#5) + +bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5): + Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4) + %reg1039 = PHI %reg1070, mbb<bb76.outer,0x8b0c5f0>, %reg1037, mbb<bb27,0x8b0a7c0> + +Note ADDri is not a two-address instruction. However, its result %reg1037 is an +operand of the PHI node in bb76 and its operand %reg1039 is the result of the +PHI node. We should treat it as a two-address code and make sure the ADDri is +scheduled after any node that reads %reg1039. + +//===---------------------------------------------------------------------===// + +Use local info (i.e. register scavenger) to assign it a free register to allow +reuse: + ldr r3, [sp, #+4] + add r3, r3, #3 + ldr r2, [sp, #+8] + add r2, r2, #2 + ldr r1, [sp, #+4] <== + add r1, r1, #1 + ldr r0, [sp, #+4] + add r0, r0, #2 + +//===---------------------------------------------------------------------===// + +LLVM aggressively lift CSE out of loop. Sometimes this can be negative side- +effects: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +load [i + R1] +... +load [i + R2] +... +load [i + R3] + +Suppose there is high register pressure, R1, R2, R3, can be spilled. We need +to implement proper re-materialization to handle this: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +R1 = X + 4 @ re-materialized +load [i + R1] +... +R2 = X + 7 @ re-materialized +load [i + R2] +... +R3 = X + 15 @ re-materialized +load [i + R3] + +Furthermore, with re-association, we can enable sharing: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +T = i + X +load [T + 4] +... +load [T + 7] +... +load [T + 15] +//===---------------------------------------------------------------------===// diff --git a/lib/CodeGen/RegAllocBigBlock.cpp b/lib/CodeGen/RegAllocBigBlock.cpp new file mode 100644 index 0000000..c7f23f5 --- /dev/null +++ b/lib/CodeGen/RegAllocBigBlock.cpp @@ -0,0 +1,893 @@ +//===- RegAllocBigBlock.cpp - A register allocator for large basic blocks -===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Duraid Madina and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RABigBlock class +// +//===----------------------------------------------------------------------===// + +// This register allocator is derived from RegAllocLocal.cpp. Like it, this +// allocator works on one basic block at a time, oblivious to others. +// However, the algorithm used here is suited for long blocks of +// instructions - registers are spilled by greedily choosing those holding +// values that will not be needed for the longest amount of time. This works +// particularly well for blocks with 10 or more times as many instructions +// as machine registers, but can be used for general code. +// +//===----------------------------------------------------------------------===// +// +// TODO: - automagically invoke linearscan for (groups of) small BBs? +// - break ties when picking regs? (probably not worth it in a +// JIT context) +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumFolded, "Number of loads/stores folded into instructions"); + +namespace { + static RegisterRegAlloc + bigBlockRegAlloc("bigblock", " Big-block register allocator", + createBigBlockRegisterAllocator); + +/// VRegKeyInfo - Defines magic values required to use VirtRegs as DenseMap +/// keys. + struct VRegKeyInfo { + static inline unsigned getEmptyKey() { return -1U; } + static inline unsigned getTombstoneKey() { return -2U; } + static unsigned getHashValue(const unsigned &Key) { return Key; } + }; + + +/// This register allocator is derived from RegAllocLocal.cpp. Like it, this +/// allocator works on one basic block at a time, oblivious to others. +/// However, the algorithm used here is suited for long blocks of +/// instructions - registers are spilled by greedily choosing those holding +/// values that will not be needed for the longest amount of time. This works +/// particularly well for blocks with 10 or more times as many instructions +/// as machine registers, but can be used for general code. +/// +/// TODO: - automagically invoke linearscan for (groups of) small BBs? +/// - break ties when picking regs? (probably not worth it in a +/// JIT context) +/// + class VISIBILITY_HIDDEN RABigBlock : public MachineFunctionPass { + public: + static char ID; + RABigBlock() : MachineFunctionPass((intptr_t)&ID) {} + private: + /// TM - For getting at TargetMachine info + /// + const TargetMachine *TM; + + /// MF - Our generic MachineFunction pointer + /// + MachineFunction *MF; + + /// RegInfo - For dealing with machine register info (aliases, folds + /// etc) + const MRegisterInfo *RegInfo; + + /// LV - Our generic LiveVariables pointer + /// + LiveVariables *LV; + + typedef SmallVector<unsigned, 2> VRegTimes; + + /// VRegReadTable - maps VRegs in a BB to the set of times they are read + /// + DenseMap<unsigned, VRegTimes*, VRegKeyInfo> VRegReadTable; + + /// VRegReadIdx - keeps track of the "current time" in terms of + /// positions in VRegReadTable + DenseMap<unsigned, unsigned , VRegKeyInfo> VRegReadIdx; + + /// StackSlotForVirtReg - Maps virtual regs to the frame index where these + /// values are spilled. + IndexedMap<unsigned, VirtReg2IndexFunctor> StackSlotForVirtReg; + + /// Virt2PhysRegMap - This map contains entries for each virtual register + /// that is currently available in a physical register. + IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap; + + /// PhysRegsUsed - This array is effectively a map, containing entries for + /// each physical register that currently has a value (ie, it is in + /// Virt2PhysRegMap). The value mapped to is the virtual register + /// corresponding to the physical register (the inverse of the + /// Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned + /// because it is used by a future instruction, and to -2 if it is not + /// allocatable. If the entry for a physical register is -1, then the + /// physical register is "not in the map". + /// + std::vector<int> PhysRegsUsed; + + /// VirtRegModified - This bitset contains information about which virtual + /// registers need to be spilled back to memory when their registers are + /// scavenged. If a virtual register has simply been rematerialized, there + /// is no reason to spill it to memory when we need the register back. + /// + std::vector<int> VirtRegModified; + + /// MBBLastInsnTime - the number of the the last instruction in MBB + /// + int MBBLastInsnTime; + + /// MBBCurTime - the number of the the instruction being currently processed + /// + int MBBCurTime; + + unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { + return Virt2PhysRegMap[VirtReg]; + } + + unsigned &getVirt2StackSlot(unsigned VirtReg) { + return StackSlotForVirtReg[VirtReg]; + } + + /// markVirtRegModified - Lets us flip bits in the VirtRegModified bitset + /// + void markVirtRegModified(unsigned Reg, bool Val = true) { + assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + Reg -= MRegisterInfo::FirstVirtualRegister; + if (VirtRegModified.size() <= Reg) + VirtRegModified.resize(Reg+1); + VirtRegModified[Reg] = Val; + } + + /// isVirtRegModified - Lets us query the VirtRegModified bitset + /// + bool isVirtRegModified(unsigned Reg) const { + assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + assert(Reg - MRegisterInfo::FirstVirtualRegister < VirtRegModified.size() + && "Illegal virtual register!"); + return VirtRegModified[Reg - MRegisterInfo::FirstVirtualRegister]; + } + + public: + /// getPassName - returns the BigBlock allocator's name + /// + virtual const char *getPassName() const { + return "BigBlock Register Allocator"; + } + + /// getAnalaysisUsage - declares the required analyses + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LiveVariables>(); + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// runOnMachineFunction - Register allocate the whole function + /// + bool runOnMachineFunction(MachineFunction &Fn); + + /// AllocateBasicBlock - Register allocate the specified basic block. + /// + void AllocateBasicBlock(MachineBasicBlock &MBB); + + /// FillVRegReadTable - Fill out the table of vreg read times given a BB + /// + void FillVRegReadTable(MachineBasicBlock &MBB); + + /// areRegsEqual - This method returns true if the specified registers are + /// related to each other. To do this, it checks to see if they are equal + /// or if the first register is in the alias set of the second register. + /// + bool areRegsEqual(unsigned R1, unsigned R2) const { + if (R1 == R2) return true; + for (const unsigned *AliasSet = RegInfo->getAliasSet(R2); + *AliasSet; ++AliasSet) { + if (*AliasSet == R1) return true; + } + return false; + } + + /// getStackSpaceFor - This returns the frame index of the specified virtual + /// register on the stack, allocating space if necessary. + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + + /// removePhysReg - This method marks the specified physical register as no + /// longer being in use. + /// + void removePhysReg(unsigned PhysReg); + + /// spillVirtReg - This method spills the value specified by PhysReg into + /// the virtual register slot specified by VirtReg. It then updates the RA + /// data structures to indicate the fact that PhysReg is now available. + /// + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned VirtReg, unsigned PhysReg); + + /// spillPhysReg - This method spills the specified physical register into + /// the virtual register slot associated with it. If OnlyVirtRegs is set to + /// true, then the request is ignored if the physical register does not + /// contain a virtual register. + /// + void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs = false); + + /// assignVirtToPhysReg - This method updates local state so that we know + /// that PhysReg is the proper container for VirtReg now. The physical + /// register must not be used for anything else when this is called. + /// + void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); + + /// isPhysRegAvailable - Return true if the specified physical register is + /// free and available for use. This also includes checking to see if + /// aliased registers are all free... + /// + bool isPhysRegAvailable(unsigned PhysReg) const; + + /// getFreeReg - Look to see if there is a free register available in the + /// specified register class. If not, return 0. + /// + unsigned getFreeReg(const TargetRegisterClass *RC); + + /// chooseReg - Pick a physical register to hold the specified + /// virtual register by choosing the one which will be read furthest + /// in the future. + /// + unsigned chooseReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg); + + /// reloadVirtReg - This method transforms the specified specified virtual + /// register use to refer to a physical register. This method may do this + /// in one of several ways: if the register is available in a physical + /// register already, it uses that physical register. If the value is not + /// in a physical register, and if there are physical registers available, + /// it loads it into a register. If register pressure is high, and it is + /// possible, it tries to fold the load of the virtual register into the + /// instruction itself. It avoids doing this if register pressure is low to + /// improve the chance that subsequent instructions can use the reloaded + /// value. This method returns the modified instruction. + /// + MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum); + + }; + char RABigBlock::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual register +/// to be held on the stack. +int RABigBlock::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { + // Find the location Reg would belong... + int FrameIdx = getVirt2StackSlot(VirtReg); + + if (FrameIdx) + return FrameIdx - 1; // Already has space allocated? + + // Allocate a new stack object for this spill location... + FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot... + getVirt2StackSlot(VirtReg) = FrameIdx + 1; + return FrameIdx; +} + + +/// removePhysReg - This method marks the specified physical register as no +/// longer being in use. +/// +void RABigBlock::removePhysReg(unsigned PhysReg) { + PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used +} + + +/// spillVirtReg - This method spills the value specified by PhysReg into the +/// virtual register slot specified by VirtReg. It then updates the RA data +/// structures to indicate the fact that PhysReg is now available. +/// +void RABigBlock::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg) { + assert(VirtReg && "Spilling a physical register is illegal!" + " Must not have appropriate kill for the register or use exists beyond" + " the intended one."); + DOUT << " Spilling register " << RegInfo->getName(PhysReg) + << " containing %reg" << VirtReg; + if (!isVirtRegModified(VirtReg)) + DOUT << " which has not been modified, so no store necessary!"; + + // Otherwise, there is a virtual register corresponding to this physical + // register. We only need to spill it into its stack slot if it has been + // modified. + if (isVirtRegModified(VirtReg)) { + const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DOUT << " to stack slot #" << FrameIndex; + RegInfo->storeRegToStackSlot(MBB, I, PhysReg, FrameIndex, RC); + ++NumStores; // Update statistics + } + + getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available + + DOUT << "\n"; + removePhysReg(PhysReg); +} + + +/// spillPhysReg - This method spills the specified physical register into the +/// virtual register slot associated with it. If OnlyVirtRegs is set to true, +/// then the request is ignored if the physical register does not contain a +/// virtual register. +/// +void RABigBlock::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs) { + if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! + assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); + if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) + spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); + } else { + // If the selected register aliases any other registers, we must make + // sure that one of the aliases isn't alive. + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register. + PhysRegsUsed[*AliasSet] != -2) // If allocatable. + if (PhysRegsUsed[*AliasSet]) + spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); + } +} + + +/// assignVirtToPhysReg - This method updates local state so that we know +/// that PhysReg is the proper container for VirtReg now. The physical +/// register must not be used for anything else when this is called. +/// +void RABigBlock::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); + // Update information to note the fact that this register was just used, and + // it holds VirtReg. + PhysRegsUsed[PhysReg] = VirtReg; + getVirt2PhysRegMapSlot(VirtReg) = PhysReg; +} + + +/// isPhysRegAvailable - Return true if the specified physical register is free +/// and available for use. This also includes checking to see if aliased +/// registers are all free... +/// +bool RABigBlock::isPhysRegAvailable(unsigned PhysReg) const { + if (PhysRegsUsed[PhysReg] != -1) return false; + + // If the selected register aliases any other allocated registers, it is + // not free! + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] != -1) // Aliased register in use? + return false; // Can't use this reg then. + return true; +} + + +/// getFreeReg - Look to see if there is a free register available in the +/// specified register class. If not, return 0. +/// +unsigned RABigBlock::getFreeReg(const TargetRegisterClass *RC) { + // Get iterators defining the range of registers that are valid to allocate in + // this class, which also specifies the preferred allocation order. + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); + + for (; RI != RE; ++RI) + if (isPhysRegAvailable(*RI)) { // Is reg unused? + assert(*RI != 0 && "Cannot use register!"); + return *RI; // Found an unused register! + } + return 0; +} + + +/// chooseReg - Pick a physical register to hold the specified +/// virtual register by choosing the one whose value will be read +/// furthest in the future. +/// +unsigned RABigBlock::chooseReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned VirtReg) { + const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg); + // First check to see if we have a free register of the requested type... + unsigned PhysReg = getFreeReg(RC); + + // If we didn't find an unused register, find the one which will be + // read at the most distant point in time. + if (PhysReg == 0) { + unsigned delay=0, longest_delay=0; + VRegTimes* ReadTimes; + + unsigned curTime = MBBCurTime; + + // for all physical regs in the RC, + for(TargetRegisterClass::iterator pReg = RC->begin(); + pReg != RC->end(); ++pReg) { + // how long until they're read? + if(PhysRegsUsed[*pReg]>0) { // ignore non-allocatable regs + ReadTimes = VRegReadTable[PhysRegsUsed[*pReg]]; + if(ReadTimes && !ReadTimes->empty()) { + unsigned& pt = VRegReadIdx[PhysRegsUsed[*pReg]]; + while(pt < ReadTimes->size() && (*ReadTimes)[pt] < curTime) { + ++pt; + } + + if(pt < ReadTimes->size()) + delay = (*ReadTimes)[pt] - curTime; + else + delay = MBBLastInsnTime + 1 - curTime; + } else { + // This register is only defined, but never + // read in this MBB. Therefore the next read + // happens after the end of this MBB + delay = MBBLastInsnTime + 1 - curTime; + } + + + if(delay > longest_delay) { + longest_delay = delay; + PhysReg = *pReg; + } + } + } + + if(PhysReg == 0) { // ok, now we're desperate. We couldn't choose + // a register to spill by looking through the + // read timetable, so now we just spill the + // first allocatable register we find. + + // for all physical regs in the RC, + for(TargetRegisterClass::iterator pReg = RC->begin(); + pReg != RC->end(); ++pReg) { + // if we find a register we can spill + if(PhysRegsUsed[*pReg]>=-1) + PhysReg = *pReg; // choose it to be spilled + } + } + + assert(PhysReg && "couldn't choose a register to spill :( "); + // TODO: assert that RC->contains(PhysReg) / handle aliased registers? + + // since we needed to look in the table we need to spill this register. + spillPhysReg(MBB, I, PhysReg); + } + + // assign the vreg to our chosen physical register + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; // and return it +} + + +/// reloadVirtReg - This method transforms an instruction with a virtual +/// register use to one that references a physical register. It does this as +/// follows: +/// +/// 1) If the register is already in a physical register, it uses it. +/// 2) Otherwise, if there is a free physical register, it uses that. +/// 3) Otherwise, it calls chooseReg() to get the physical register +/// holding the most distantly needed value, generating a spill in +/// the process. +/// +/// This method returns the modified instruction. +MachineInstr *RABigBlock::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum) { + unsigned VirtReg = MI->getOperand(OpNum).getReg(); + + // If the virtual register is already available in a physical register, + // just update the instruction and return. + if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { + MI->getOperand(OpNum).setReg(PR); + return MI; + } + + // Otherwise, if we have free physical registers available to hold the + // value, use them. + const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg); + unsigned PhysReg = getFreeReg(RC); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + + if (PhysReg) { // we have a free register, so use it. + assignVirtToPhysReg(VirtReg, PhysReg); + } else { // no free registers available. + // try to fold the spill into the instruction + if(MachineInstr* FMI = RegInfo->foldMemoryOperand(MI, OpNum, FrameIndex)) { + ++NumFolded; + // Since we changed the address of MI, make sure to update live variables + // to know that the new instruction has the properties of the old one. + LV->instructionChanged(MI, FMI); + return MBB.insert(MBB.erase(MI), FMI); + } + + // determine which of the physical registers we'll kill off, since we + // couldn't fold. + PhysReg = chooseReg(MBB, MI, VirtReg); + } + + // this virtual register is now unmodified (since we just reloaded it) + markVirtRegModified(VirtReg, false); + + DOUT << " Reloading %reg" << VirtReg << " into " + << RegInfo->getName(PhysReg) << "\n"; + + // Add move instruction(s) + RegInfo->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); + ++NumLoads; // Update statistics + + MF->setPhysRegUsed(PhysReg); + MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register + return MI; +} + +/// Fill out the vreg read timetable. Since ReadTime increases +/// monotonically, the individual readtime sets will be sorted +/// in ascending order. +void RABigBlock::FillVRegReadTable(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII; + unsigned ReadTime; + + for(ReadTime=0, MII = MBB.begin(); MII != MBB.end(); ++ReadTime, ++MII) { + MachineInstr *MI = MII; + + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + // look for vreg reads.. + if (MO.isRegister() && !MO.isDef() && MO.getReg() && + MRegisterInfo::isVirtualRegister(MO.getReg())) { + // ..and add them to the read table. + VRegTimes* &Times = VRegReadTable[MO.getReg()]; + if(!VRegReadTable[MO.getReg()]) { + Times = new VRegTimes; + VRegReadIdx[MO.getReg()] = 0; + } + Times->push_back(ReadTime); + } + } + + } + + MBBLastInsnTime = ReadTime; + + for(DenseMap<unsigned, VRegTimes*, VRegKeyInfo>::iterator Reads = VRegReadTable.begin(); + Reads != VRegReadTable.end(); ++Reads) { + if(Reads->second) { + DOUT << "Reads[" << Reads->first << "]=" << Reads->second->size() << "\n"; + } + } +} + +/// isReadModWriteImplicitKill - True if this is an implicit kill for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.getReg() == Reg && MO.isImplicit() && + MO.isDef() && !MO.isDead()) + return true; + } + return false; +} + +/// isReadModWriteImplicitDef - True if this is an implicit def for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.getReg() == Reg && MO.isImplicit() && + !MO.isDef() && MO.isKill()) + return true; + } + return false; +} + + +void RABigBlock::AllocateBasicBlock(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII = MBB.begin(); + const TargetInstrInfo &TII = *TM->getInstrInfo(); + + DEBUG(const BasicBlock *LBB = MBB.getBasicBlock(); + if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName()); + + // If this is the first basic block in the machine function, add live-in + // registers as active. + if (&MBB == &*MF->begin()) { + for (MachineFunction::livein_iterator I = MF->livein_begin(), + E = MF->livein_end(); I != E; ++I) { + unsigned Reg = I->first; + MF->setPhysRegUsed(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->setPhysRegUsed(*AliasSet); + } + } + } + } + + // Otherwise, sequentially allocate each instruction in the MBB. + MBBCurTime = -1; + while (MII != MBB.end()) { + MachineInstr *MI = MII++; + MBBCurTime++; + const TargetInstrDescriptor &TID = TII.get(MI->getOpcode()); + DEBUG(DOUT << "\nTime=" << MBBCurTime << " Starting RegAlloc of: " << *MI; + DOUT << " Regs have values: "; + for (unsigned i = 0; i != RegInfo->getNumRegs(); ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + DOUT << "[" << RegInfo->getName(i) + << ",%reg" << PhysRegsUsed[i] << "] "; + DOUT << "\n"); + + SmallVector<unsigned, 8> Kills; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isKill()) { + if (!MO.isImplicit()) + Kills.push_back(MO.getReg()); + else if (!isReadModWriteImplicitKill(MI, MO.getReg())) + // These are extra physical register kills when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + Kills.push_back(MO.getReg()); + } + } + + // Get the used operands into registers. This has the potential to spill + // incoming values if we are out of registers. Note that we completely + // ignore physical register uses here. We assume that if an explicit + // physical register is referenced by the instruction, that it is guaranteed + // to be live-in, or the input is badly hosed. + // + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + // here we are looking for only used operands (never def&use) + if (MO.isRegister() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && + MRegisterInfo::isVirtualRegister(MO.getReg())) + MI = reloadVirtReg(MBB, MI, i); + } + + // If this instruction is the last user of this register, kill the + // value, freeing the register being used, so it doesn't need to be + // spilled to memory. + // + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned VirtReg = Kills[i]; + unsigned PhysReg = VirtReg; + if (MRegisterInfo::isVirtualRegister(VirtReg)) { + // If the virtual register was never materialized into a register, it + // might not be in the map, but it won't hurt to zero it out anyway. + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } else { + assert(!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1 && + "Silently clearing a virtual register?"); + } + + if (PhysReg) { + DOUT << " Last use of " << RegInfo->getName(PhysReg) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(PhysReg); + for (const unsigned *AliasSet = RegInfo->getSubRegisters(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DOUT << " Last use of " + << RegInfo->getName(*AliasSet) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(*AliasSet); + } + } + } + } + + // Loop over all of the operands of the instruction, spilling registers that + // are defined, and marking explicit destinations in the PhysRegsUsed map. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isDef() && !MO.isImplicit() && MO.getReg() && + MRegisterInfo::isPhysicalRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->setPhysRegUsed(*AliasSet); + } + } + } + } + + // Loop over the implicit defs, spilling them as well. + if (TID.ImplicitDefs) { + for (const unsigned *ImplicitDefs = TID.ImplicitDefs; + *ImplicitDefs; ++ImplicitDefs) { + unsigned Reg = *ImplicitDefs; + if (PhysRegsUsed[Reg] != -2) { + spillPhysReg(MBB, MI, Reg, true); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + } + MF->setPhysRegUsed(Reg); + for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->setPhysRegUsed(*AliasSet); + } + } + } + } + + SmallVector<unsigned, 8> DeadDefs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isDead()) + DeadDefs.push_back(MO.getReg()); + } + + // Okay, we have allocated all of the source operands and spilled any values + // that would be destroyed by defs of this instruction. Loop over the + // explicit defs and assign them to a register, spilling incoming values if + // we need to scavenge a register. + // + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isDef() && MO.getReg() && + MRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = chooseReg(MBB, MI, DestVirtReg); + MF->setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + MI->getOperand(i).setReg(DestPhysReg); // Assign the output register + } + } + + // If this instruction defines any registers that are immediately dead, + // kill them now. + // + for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { + unsigned VirtReg = DeadDefs[i]; + unsigned PhysReg = VirtReg; + if (MRegisterInfo::isVirtualRegister(VirtReg)) { + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + assert(PhysReg != 0); + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } + + if (PhysReg) { + DOUT << " Register " << RegInfo->getName(PhysReg) + << " [%reg" << VirtReg + << "] is never used, removing it frame live list\n"; + removePhysReg(PhysReg); + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DOUT << " Register " << RegInfo->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it frame live list\n"; + removePhysReg(*AliasSet); + } + } + } + } + + // Finally, if this is a noop copy instruction, zap it. + unsigned SrcReg, DstReg; + if (TII.isMoveInstr(*MI, SrcReg, DstReg) && SrcReg == DstReg) { + LV->removeVirtualRegistersKilled(MI); + LV->removeVirtualRegistersDead(MI); + MBB.erase(MI); + } + } + + MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); + + // Spill all physical registers holding virtual registers now. + for (unsigned i = 0, e = RegInfo->getNumRegs(); i != e; ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + if (unsigned VirtReg = PhysRegsUsed[i]) + spillVirtReg(MBB, MI, VirtReg, i); + else + removePhysReg(i); +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RABigBlock::runOnMachineFunction(MachineFunction &Fn) { + DOUT << "Machine Function " << "\n"; + MF = &Fn; + TM = &Fn.getTarget(); + RegInfo = TM->getRegisterInfo(); + LV = &getAnalysis<LiveVariables>(); + + PhysRegsUsed.assign(RegInfo->getNumRegs(), -1); + + // At various places we want to efficiently check to see whether a register + // is allocatable. To handle this, we mark all unallocatable registers as + // being pinned down, permanently. + { + BitVector Allocable = RegInfo->getAllocatableSet(Fn); + for (unsigned i = 0, e = Allocable.size(); i != e; ++i) + if (!Allocable[i]) + PhysRegsUsed[i] = -2; // Mark the reg unallocable. + } + + // initialize the virtual->physical register map to have a 'null' + // mapping for all virtual registers + Virt2PhysRegMap.grow(MF->getSSARegMap()->getLastVirtReg()); + StackSlotForVirtReg.grow(MF->getSSARegMap()->getLastVirtReg()); + VirtRegModified.resize(MF->getSSARegMap()->getLastVirtReg() - MRegisterInfo::FirstVirtualRegister + 1,0); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + // fill out the read timetable + FillVRegReadTable(*MBB); + // use it to allocate the BB + AllocateBasicBlock(*MBB); + // clear it + VRegReadTable.clear(); + } + + StackSlotForVirtReg.clear(); + PhysRegsUsed.clear(); + VirtRegModified.clear(); + Virt2PhysRegMap.clear(); + return true; +} + +FunctionPass *llvm::createBigBlockRegisterAllocator() { + return new RABigBlock(); +} + diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp new file mode 100644 index 0000000..01d43fd --- /dev/null +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -0,0 +1,828 @@ +//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a linear scan register allocator. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "PhysRegTracker.h" +#include "VirtRegMap.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include <algorithm> +#include <set> +#include <queue> +#include <memory> +#include <cmath> +using namespace llvm; + +STATISTIC(NumIters , "Number of iterations performed"); +STATISTIC(NumBacktracks, "Number of times we had to backtrack"); + +static RegisterRegAlloc +linearscanRegAlloc("linearscan", " linear scan register allocator", + createLinearScanRegisterAllocator); + +namespace { + static unsigned numIterations = 0; + static unsigned numIntervals = 0; + + struct VISIBILITY_HIDDEN RALinScan : public MachineFunctionPass { + static char ID; + RALinScan() : MachineFunctionPass((intptr_t)&ID) {} + + typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr; + typedef std::vector<IntervalPtr> IntervalPtrs; + private: + /// RelatedRegClasses - This structure is built the first time a function is + /// compiled, and keeps track of which register classes have registers that + /// belong to multiple classes or have aliases that are in other classes. + EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses; + std::map<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg; + + MachineFunction* mf_; + const TargetMachine* tm_; + const MRegisterInfo* mri_; + LiveIntervals* li_; + + /// handled_ - Intervals are added to the handled_ set in the order of their + /// start value. This is uses for backtracking. + std::vector<LiveInterval*> handled_; + + /// fixed_ - Intervals that correspond to machine registers. + /// + IntervalPtrs fixed_; + + /// active_ - Intervals that are currently being processed, and which have a + /// live range active for the current point. + IntervalPtrs active_; + + /// inactive_ - Intervals that are currently being processed, but which have + /// a hold at the current point. + IntervalPtrs inactive_; + + typedef std::priority_queue<LiveInterval*, + std::vector<LiveInterval*>, + greater_ptr<LiveInterval> > IntervalHeap; + IntervalHeap unhandled_; + std::auto_ptr<PhysRegTracker> prt_; + std::auto_ptr<VirtRegMap> vrm_; + std::auto_ptr<Spiller> spiller_; + + public: + virtual const char* getPassName() const { + return "Linear Scan Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LiveIntervals>(); + AU.addRequiredID(SimpleRegisterCoalescingID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - register allocate the whole function + bool runOnMachineFunction(MachineFunction&); + + private: + /// linearScan - the linear scan algorithm + void linearScan(); + + /// initIntervalSets - initialize the interval sets. + /// + void initIntervalSets(); + + /// processActiveIntervals - expire old intervals and move non-overlapping + /// ones to the inactive list. + void processActiveIntervals(unsigned CurPoint); + + /// processInactiveIntervals - expire old intervals and move overlapping + /// ones to the active list. + void processInactiveIntervals(unsigned CurPoint); + + /// assignRegOrStackSlotAtInterval - assign a register if one + /// is available, or spill. + void assignRegOrStackSlotAtInterval(LiveInterval* cur); + + /// + /// register handling helpers + /// + + /// getFreePhysReg - return a free physical register for this virtual + /// register interval if we have one, otherwise return 0. + unsigned getFreePhysReg(LiveInterval* cur); + + /// assignVirt2StackSlot - assigns this virtual register to a + /// stack slot. returns the stack slot + int assignVirt2StackSlot(unsigned virtReg); + + void ComputeRelatedRegClasses(); + + template <typename ItTy> + void printIntervals(const char* const str, ItTy i, ItTy e) const { + if (str) DOUT << str << " intervals:\n"; + for (; i != e; ++i) { + DOUT << "\t" << *i->first << " -> "; + unsigned reg = i->first->reg; + if (MRegisterInfo::isVirtualRegister(reg)) { + reg = vrm_->getPhys(reg); + } + DOUT << mri_->getName(reg) << '\n'; + } + } + }; + char RALinScan::ID = 0; +} + +void RALinScan::ComputeRelatedRegClasses() { + const MRegisterInfo &MRI = *mri_; + + // First pass, add all reg classes to the union, and determine at least one + // reg class that each register is in. + bool HasAliases = false; + for (MRegisterInfo::regclass_iterator RCI = MRI.regclass_begin(), + E = MRI.regclass_end(); RCI != E; ++RCI) { + RelatedRegClasses.insert(*RCI); + for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end(); + I != E; ++I) { + HasAliases = HasAliases || *MRI.getAliasSet(*I) != 0; + + const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I]; + if (PRC) { + // Already processed this register. Just make sure we know that + // multiple register classes share a register. + RelatedRegClasses.unionSets(PRC, *RCI); + } else { + PRC = *RCI; + } + } + } + + // Second pass, now that we know conservatively what register classes each reg + // belongs to, add info about aliases. We don't need to do this for targets + // without register aliases. + if (HasAliases) + for (std::map<unsigned, const TargetRegisterClass*>::iterator + I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end(); + I != E; ++I) + for (const unsigned *AS = MRI.getAliasSet(I->first); *AS; ++AS) + RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]); +} + +bool RALinScan::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + tm_ = &fn.getTarget(); + mri_ = tm_->getRegisterInfo(); + li_ = &getAnalysis<LiveIntervals>(); + + // If this is the first function compiled, compute the related reg classes. + if (RelatedRegClasses.empty()) + ComputeRelatedRegClasses(); + + if (!prt_.get()) prt_.reset(new PhysRegTracker(*mri_)); + vrm_.reset(new VirtRegMap(*mf_)); + if (!spiller_.get()) spiller_.reset(createSpiller()); + + initIntervalSets(); + + linearScan(); + + // Rewrite spill code and update the PhysRegsUsed set. + spiller_->runOnMachineFunction(*mf_, *vrm_); + + vrm_.reset(); // Free the VirtRegMap + + + while (!unhandled_.empty()) unhandled_.pop(); + fixed_.clear(); + active_.clear(); + inactive_.clear(); + handled_.clear(); + + return true; +} + +/// initIntervalSets - initialize the interval sets. +/// +void RALinScan::initIntervalSets() +{ + assert(unhandled_.empty() && fixed_.empty() && + active_.empty() && inactive_.empty() && + "interval sets should be empty on initialization"); + + for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { + if (MRegisterInfo::isPhysicalRegister(i->second.reg)) { + mf_->setPhysRegUsed(i->second.reg); + fixed_.push_back(std::make_pair(&i->second, i->second.begin())); + } else + unhandled_.push(&i->second); + } +} + +void RALinScan::linearScan() +{ + // linear scan algorithm + DOUT << "********** LINEAR SCAN **********\n"; + DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n'; + + // DEBUG(printIntervals("unhandled", unhandled_.begin(), unhandled_.end())); + DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end())); + DEBUG(printIntervals("active", active_.begin(), active_.end())); + DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end())); + + while (!unhandled_.empty()) { + // pick the interval with the earliest start point + LiveInterval* cur = unhandled_.top(); + unhandled_.pop(); + ++numIterations; + DOUT << "\n*** CURRENT ***: " << *cur << '\n'; + + processActiveIntervals(cur->beginNumber()); + processInactiveIntervals(cur->beginNumber()); + + assert(MRegisterInfo::isVirtualRegister(cur->reg) && + "Can only allocate virtual registers!"); + + // Allocating a virtual register. try to find a free + // physical register or spill an interval (possibly this one) in order to + // assign it one. + assignRegOrStackSlotAtInterval(cur); + + DEBUG(printIntervals("active", active_.begin(), active_.end())); + DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end())); + } + numIntervals += li_->getNumIntervals(); + NumIters += numIterations; + + // expire any remaining active intervals + for (IntervalPtrs::reverse_iterator + i = active_.rbegin(); i != active_.rend(); ) { + unsigned reg = i->first->reg; + DOUT << "\tinterval " << *i->first << " expired\n"; + assert(MRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + prt_->delRegUse(reg); + i = IntervalPtrs::reverse_iterator(active_.erase(i.base()-1)); + } + + // expire any remaining inactive intervals + for (IntervalPtrs::reverse_iterator + i = inactive_.rbegin(); i != inactive_.rend(); ) { + DOUT << "\tinterval " << *i->first << " expired\n"; + i = IntervalPtrs::reverse_iterator(inactive_.erase(i.base()-1)); + } + + // A brute force way of adding live-ins to every BB. + MachineFunction::iterator MBB = mf_->begin(); + ++MBB; // Skip entry MBB. + for (MachineFunction::iterator E = mf_->end(); MBB != E; ++MBB) { + unsigned StartIdx = li_->getMBBStartIdx(MBB->getNumber()); + for (IntervalPtrs::iterator i = fixed_.begin(), e = fixed_.end(); + i != e; ++i) + if (i->first->liveAt(StartIdx)) + MBB->addLiveIn(i->first->reg); + + for (unsigned i = 0, e = handled_.size(); i != e; ++i) { + LiveInterval *HI = handled_[i]; + unsigned Reg = HI->reg; + if (!vrm_->hasStackSlot(Reg) && HI->liveAt(StartIdx)) { + assert(MRegisterInfo::isVirtualRegister(Reg)); + Reg = vrm_->getPhys(Reg); + MBB->addLiveIn(Reg); + } + } + } + + DOUT << *vrm_; +} + +/// processActiveIntervals - expire old intervals and move non-overlapping ones +/// to the inactive list. +void RALinScan::processActiveIntervals(unsigned CurPoint) +{ + DOUT << "\tprocessing active intervals:\n"; + + for (unsigned i = 0, e = active_.size(); i != e; ++i) { + LiveInterval *Interval = active_[i].first; + LiveInterval::iterator IntervalPos = active_[i].second; + unsigned reg = Interval->reg; + + IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); + + if (IntervalPos == Interval->end()) { // Remove expired intervals. + DOUT << "\t\tinterval " << *Interval << " expired\n"; + assert(MRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + prt_->delRegUse(reg); + + // Pop off the end of the list. + active_[i] = active_.back(); + active_.pop_back(); + --i; --e; + + } else if (IntervalPos->start > CurPoint) { + // Move inactive intervals to inactive list. + DOUT << "\t\tinterval " << *Interval << " inactive\n"; + assert(MRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + prt_->delRegUse(reg); + // add to inactive. + inactive_.push_back(std::make_pair(Interval, IntervalPos)); + + // Pop off the end of the list. + active_[i] = active_.back(); + active_.pop_back(); + --i; --e; + } else { + // Otherwise, just update the iterator position. + active_[i].second = IntervalPos; + } + } +} + +/// processInactiveIntervals - expire old intervals and move overlapping +/// ones to the active list. +void RALinScan::processInactiveIntervals(unsigned CurPoint) +{ + DOUT << "\tprocessing inactive intervals:\n"; + + for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { + LiveInterval *Interval = inactive_[i].first; + LiveInterval::iterator IntervalPos = inactive_[i].second; + unsigned reg = Interval->reg; + + IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); + + if (IntervalPos == Interval->end()) { // remove expired intervals. + DOUT << "\t\tinterval " << *Interval << " expired\n"; + + // Pop off the end of the list. + inactive_[i] = inactive_.back(); + inactive_.pop_back(); + --i; --e; + } else if (IntervalPos->start <= CurPoint) { + // move re-activated intervals in active list + DOUT << "\t\tinterval " << *Interval << " active\n"; + assert(MRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + prt_->addRegUse(reg); + // add to active + active_.push_back(std::make_pair(Interval, IntervalPos)); + + // Pop off the end of the list. + inactive_[i] = inactive_.back(); + inactive_.pop_back(); + --i; --e; + } else { + // Otherwise, just update the iterator position. + inactive_[i].second = IntervalPos; + } + } +} + +/// updateSpillWeights - updates the spill weights of the specifed physical +/// register and its weight. +static void updateSpillWeights(std::vector<float> &Weights, + unsigned reg, float weight, + const MRegisterInfo *MRI) { + Weights[reg] += weight; + for (const unsigned* as = MRI->getAliasSet(reg); *as; ++as) + Weights[*as] += weight; +} + +static +RALinScan::IntervalPtrs::iterator +FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { + for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end(); + I != E; ++I) + if (I->first == LI) return I; + return IP.end(); +} + +static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){ + for (unsigned i = 0, e = V.size(); i != e; ++i) { + RALinScan::IntervalPtr &IP = V[i]; + LiveInterval::iterator I = std::upper_bound(IP.first->begin(), + IP.second, Point); + if (I != IP.first->begin()) --I; + IP.second = I; + } +} + +/// assignRegOrStackSlotAtInterval - assign a register if one is available, or +/// spill. +void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) +{ + DOUT << "\tallocating current interval: "; + + PhysRegTracker backupPrt = *prt_; + + std::vector<std::pair<unsigned, float> > SpillWeightsToAdd; + unsigned StartPosition = cur->beginNumber(); + const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(cur->reg); + const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); + + // for every interval in inactive we overlap with, mark the + // register as not free and update spill weights. + for (IntervalPtrs::const_iterator i = inactive_.begin(), + e = inactive_.end(); i != e; ++i) { + unsigned Reg = i->first->reg; + assert(MRegisterInfo::isVirtualRegister(Reg) && + "Can only allocate virtual registers!"); + const TargetRegisterClass *RegRC = mf_->getSSARegMap()->getRegClass(Reg); + // If this is not in a related reg class to the register we're allocating, + // don't check it. + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && + cur->overlapsFrom(*i->first, i->second-1)) { + Reg = vrm_->getPhys(Reg); + prt_->addRegUse(Reg); + SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight)); + } + } + + // Speculatively check to see if we can get a register right now. If not, + // we know we won't be able to by adding more constraints. If so, we can + // check to see if it is valid. Doing an exhaustive search of the fixed_ list + // is very bad (it contains all callee clobbered registers for any functions + // with a call), so we want to avoid doing that if possible. + unsigned physReg = getFreePhysReg(cur); + if (physReg) { + // We got a register. However, if it's in the fixed_ list, we might + // conflict with it. Check to see if we conflict with it or any of its + // aliases. + std::set<unsigned> RegAliases; + for (const unsigned *AS = mri_->getAliasSet(physReg); *AS; ++AS) + RegAliases.insert(*AS); + + bool ConflictsWithFixed = false; + for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { + IntervalPtr &IP = fixed_[i]; + if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) { + // Okay, this reg is on the fixed list. Check to see if we actually + // conflict. + LiveInterval *I = IP.first; + if (I->endNumber() > StartPosition) { + LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); + IP.second = II; + if (II != I->begin() && II->start > StartPosition) + --II; + if (cur->overlapsFrom(*I, II)) { + ConflictsWithFixed = true; + break; + } + } + } + } + + // Okay, the register picked by our speculative getFreePhysReg call turned + // out to be in use. Actually add all of the conflicting fixed registers to + // prt so we can do an accurate query. + if (ConflictsWithFixed) { + // For every interval in fixed we overlap with, mark the register as not + // free and update spill weights. + for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { + IntervalPtr &IP = fixed_[i]; + LiveInterval *I = IP.first; + + const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && + I->endNumber() > StartPosition) { + LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); + IP.second = II; + if (II != I->begin() && II->start > StartPosition) + --II; + if (cur->overlapsFrom(*I, II)) { + unsigned reg = I->reg; + prt_->addRegUse(reg); + SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight)); + } + } + } + + // Using the newly updated prt_ object, which includes conflicts in the + // future, see if there are any registers available. + physReg = getFreePhysReg(cur); + } + } + + // Restore the physical register tracker, removing information about the + // future. + *prt_ = backupPrt; + + // if we find a free register, we are done: assign this virtual to + // the free physical register and add this interval to the active + // list. + if (physReg) { + DOUT << mri_->getName(physReg) << '\n'; + vrm_->assignVirt2Phys(cur->reg, physReg); + prt_->addRegUse(physReg); + active_.push_back(std::make_pair(cur, cur->begin())); + handled_.push_back(cur); + return; + } + DOUT << "no free registers\n"; + + // Compile the spill weights into an array that is better for scanning. + std::vector<float> SpillWeights(mri_->getNumRegs(), 0.0); + for (std::vector<std::pair<unsigned, float> >::iterator + I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I) + updateSpillWeights(SpillWeights, I->first, I->second, mri_); + + // for each interval in active, update spill weights. + for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end(); + i != e; ++i) { + unsigned reg = i->first->reg; + assert(MRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + reg = vrm_->getPhys(reg); + updateSpillWeights(SpillWeights, reg, i->first->weight, mri_); + } + + DOUT << "\tassigning stack slot at interval "<< *cur << ":\n"; + + // Find a register to spill. + float minWeight = HUGE_VALF; + unsigned minReg = cur->preference; // Try the preferred register first. + + if (!minReg || SpillWeights[minReg] == HUGE_VALF) + for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), + e = RC->allocation_order_end(*mf_); i != e; ++i) { + unsigned reg = *i; + if (minWeight > SpillWeights[reg]) { + minWeight = SpillWeights[reg]; + minReg = reg; + } + } + + // If we didn't find a register that is spillable, try aliases? + if (!minReg) { + for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), + e = RC->allocation_order_end(*mf_); i != e; ++i) { + unsigned reg = *i; + // No need to worry about if the alias register size < regsize of RC. + // We are going to spill all registers that alias it anyway. + for (const unsigned* as = mri_->getAliasSet(reg); *as; ++as) { + if (minWeight > SpillWeights[*as]) { + minWeight = SpillWeights[*as]; + minReg = *as; + } + } + } + + // All registers must have inf weight. Just grab one! + if (!minReg) + minReg = *RC->allocation_order_begin(*mf_); + } + + DOUT << "\t\tregister with min weight: " + << mri_->getName(minReg) << " (" << minWeight << ")\n"; + + // if the current has the minimum weight, we need to spill it and + // add any added intervals back to unhandled, and restart + // linearscan. + if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { + DOUT << "\t\t\tspilling(c): " << *cur << '\n'; + // if the current interval is re-materializable, remember so and don't + // assign it a spill slot. + if (cur->remat) + vrm_->setVirtIsReMaterialized(cur->reg, cur->remat); + int slot = cur->remat ? vrm_->assignVirtReMatId(cur->reg) + : vrm_->assignVirt2StackSlot(cur->reg); + std::vector<LiveInterval*> added = + li_->addIntervalsForSpills(*cur, *vrm_, slot); + if (added.empty()) + return; // Early exit if all spills were folded. + + // Merge added with unhandled. Note that we know that + // addIntervalsForSpills returns intervals sorted by their starting + // point. + for (unsigned i = 0, e = added.size(); i != e; ++i) + unhandled_.push(added[i]); + return; + } + + ++NumBacktracks; + + // push the current interval back to unhandled since we are going + // to re-run at least this iteration. Since we didn't modify it it + // should go back right in the front of the list + unhandled_.push(cur); + + // otherwise we spill all intervals aliasing the register with + // minimum weight, rollback to the interval with the earliest + // start point and let the linear scan algorithm run again + std::vector<LiveInterval*> added; + assert(MRegisterInfo::isPhysicalRegister(minReg) && + "did not choose a register to spill?"); + BitVector toSpill(mri_->getNumRegs()); + + // We are going to spill minReg and all its aliases. + toSpill[minReg] = true; + for (const unsigned* as = mri_->getAliasSet(minReg); *as; ++as) + toSpill[*as] = true; + + // the earliest start of a spilled interval indicates up to where + // in handled we need to roll back + unsigned earliestStart = cur->beginNumber(); + + // set of spilled vregs (used later to rollback properly) + std::set<unsigned> spilled; + + // spill live intervals of virtual regs mapped to the physical register we + // want to clear (and its aliases). We only spill those that overlap with the + // current interval as the rest do not affect its allocation. we also keep + // track of the earliest start of all spilled live intervals since this will + // mark our rollback point. + for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { + unsigned reg = i->first->reg; + if (//MRegisterInfo::isVirtualRegister(reg) && + toSpill[vrm_->getPhys(reg)] && + cur->overlapsFrom(*i->first, i->second)) { + DOUT << "\t\t\tspilling(a): " << *i->first << '\n'; + earliestStart = std::min(earliestStart, i->first->beginNumber()); + if (i->first->remat) + vrm_->setVirtIsReMaterialized(reg, i->first->remat); + int slot = i->first->remat ? vrm_->assignVirtReMatId(reg) + : vrm_->assignVirt2StackSlot(reg); + std::vector<LiveInterval*> newIs = + li_->addIntervalsForSpills(*i->first, *vrm_, slot); + std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); + spilled.insert(reg); + } + } + for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){ + unsigned reg = i->first->reg; + if (//MRegisterInfo::isVirtualRegister(reg) && + toSpill[vrm_->getPhys(reg)] && + cur->overlapsFrom(*i->first, i->second-1)) { + DOUT << "\t\t\tspilling(i): " << *i->first << '\n'; + earliestStart = std::min(earliestStart, i->first->beginNumber()); + if (i->first->remat) + vrm_->setVirtIsReMaterialized(reg, i->first->remat); + int slot = i->first->remat ? vrm_->assignVirtReMatId(reg) + : vrm_->assignVirt2StackSlot(reg); + std::vector<LiveInterval*> newIs = + li_->addIntervalsForSpills(*i->first, *vrm_, slot); + std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); + spilled.insert(reg); + } + } + + DOUT << "\t\trolling back to: " << earliestStart << '\n'; + + // Scan handled in reverse order up to the earliest start of a + // spilled live interval and undo each one, restoring the state of + // unhandled. + while (!handled_.empty()) { + LiveInterval* i = handled_.back(); + // If this interval starts before t we are done. + if (i->beginNumber() < earliestStart) + break; + DOUT << "\t\t\tundo changes for: " << *i << '\n'; + handled_.pop_back(); + + // When undoing a live interval allocation we must know if it is active or + // inactive to properly update the PhysRegTracker and the VirtRegMap. + IntervalPtrs::iterator it; + if ((it = FindIntervalInVector(active_, i)) != active_.end()) { + active_.erase(it); + assert(!MRegisterInfo::isPhysicalRegister(i->reg)); + if (!spilled.count(i->reg)) + unhandled_.push(i); + prt_->delRegUse(vrm_->getPhys(i->reg)); + vrm_->clearVirt(i->reg); + } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) { + inactive_.erase(it); + assert(!MRegisterInfo::isPhysicalRegister(i->reg)); + if (!spilled.count(i->reg)) + unhandled_.push(i); + vrm_->clearVirt(i->reg); + } else { + assert(MRegisterInfo::isVirtualRegister(i->reg) && + "Can only allocate virtual registers!"); + vrm_->clearVirt(i->reg); + unhandled_.push(i); + } + } + + // Rewind the iterators in the active, inactive, and fixed lists back to the + // point we reverted to. + RevertVectorIteratorsTo(active_, earliestStart); + RevertVectorIteratorsTo(inactive_, earliestStart); + RevertVectorIteratorsTo(fixed_, earliestStart); + + // scan the rest and undo each interval that expired after t and + // insert it in active (the next iteration of the algorithm will + // put it in inactive if required) + for (unsigned i = 0, e = handled_.size(); i != e; ++i) { + LiveInterval *HI = handled_[i]; + if (!HI->expiredAt(earliestStart) && + HI->expiredAt(cur->beginNumber())) { + DOUT << "\t\t\tundo changes for: " << *HI << '\n'; + active_.push_back(std::make_pair(HI, HI->begin())); + assert(!MRegisterInfo::isPhysicalRegister(HI->reg)); + prt_->addRegUse(vrm_->getPhys(HI->reg)); + } + } + + // merge added with unhandled + for (unsigned i = 0, e = added.size(); i != e; ++i) + unhandled_.push(added[i]); +} + +/// getFreePhysReg - return a free physical register for this virtual register +/// interval if we have one, otherwise return 0. +unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { + std::vector<unsigned> inactiveCounts(mri_->getNumRegs(), 0); + unsigned MaxInactiveCount = 0; + + const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(cur->reg); + const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); + + for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); + i != e; ++i) { + unsigned reg = i->first->reg; + assert(MRegisterInfo::isVirtualRegister(reg) && + "Can only allocate virtual registers!"); + + // If this is not in a related reg class to the register we're allocating, + // don't check it. + const TargetRegisterClass *RegRC = mf_->getSSARegMap()->getRegClass(reg); + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { + reg = vrm_->getPhys(reg); + ++inactiveCounts[reg]; + MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]); + } + } + + unsigned FreeReg = 0; + unsigned FreeRegInactiveCount = 0; + + // If copy coalescer has assigned a "preferred" register, check if it's + // available first. + if (cur->preference) + if (prt_->isRegAvail(cur->preference)) { + DOUT << "\t\tassigned the preferred register: " + << mri_->getName(cur->preference) << "\n"; + return cur->preference; + } else + DOUT << "\t\tunable to assign the preferred register: " + << mri_->getName(cur->preference) << "\n"; + + // Scan for the first available register. + TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_); + TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_); + for (; I != E; ++I) + if (prt_->isRegAvail(*I)) { + FreeReg = *I; + FreeRegInactiveCount = inactiveCounts[FreeReg]; + break; + } + + // If there are no free regs, or if this reg has the max inactive count, + // return this register. + if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) return FreeReg; + + // Continue scanning the registers, looking for the one with the highest + // inactive count. Alkis found that this reduced register pressure very + // slightly on X86 (in rev 1.94 of this file), though this should probably be + // reevaluated now. + for (; I != E; ++I) { + unsigned Reg = *I; + if (prt_->isRegAvail(Reg) && FreeRegInactiveCount < inactiveCounts[Reg]) { + FreeReg = Reg; + FreeRegInactiveCount = inactiveCounts[Reg]; + if (FreeRegInactiveCount == MaxInactiveCount) + break; // We found the one with the max inactive count. + } + } + + return FreeReg; +} + +FunctionPass* llvm::createLinearScanRegisterAllocator() { + return new RALinScan(); +} diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp new file mode 100644 index 0000000..6454900 --- /dev/null +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -0,0 +1,830 @@ +//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This register allocator allocates registers to a basic block at a time, +// attempting to keep values in registers and reusing registers as appropriate. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumFolded, "Number of loads/stores folded into instructions"); + +namespace { + static RegisterRegAlloc + localRegAlloc("local", " local register allocator", + createLocalRegisterAllocator); + + + class VISIBILITY_HIDDEN RALocal : public MachineFunctionPass { + public: + static char ID; + RALocal() : MachineFunctionPass((intptr_t)&ID) {} + private: + const TargetMachine *TM; + MachineFunction *MF; + const MRegisterInfo *RegInfo; + LiveVariables *LV; + + // StackSlotForVirtReg - Maps virtual regs to the frame index where these + // values are spilled. + std::map<unsigned, int> StackSlotForVirtReg; + + // Virt2PhysRegMap - This map contains entries for each virtual register + // that is currently available in a physical register. + IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap; + + unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { + return Virt2PhysRegMap[VirtReg]; + } + + // PhysRegsUsed - This array is effectively a map, containing entries for + // each physical register that currently has a value (ie, it is in + // Virt2PhysRegMap). The value mapped to is the virtual register + // corresponding to the physical register (the inverse of the + // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned + // because it is used by a future instruction, and to -2 if it is not + // allocatable. If the entry for a physical register is -1, then the + // physical register is "not in the map". + // + std::vector<int> PhysRegsUsed; + + // PhysRegsUseOrder - This contains a list of the physical registers that + // currently have a virtual register value in them. This list provides an + // ordering of registers, imposing a reallocation order. This list is only + // used if all registers are allocated and we have to spill one, in which + // case we spill the least recently used register. Entries at the front of + // the list are the least recently used registers, entries at the back are + // the most recently used. + // + std::vector<unsigned> PhysRegsUseOrder; + + // VirtRegModified - This bitset contains information about which virtual + // registers need to be spilled back to memory when their registers are + // scavenged. If a virtual register has simply been rematerialized, there + // is no reason to spill it to memory when we need the register back. + // + std::vector<bool> VirtRegModified; + + void markVirtRegModified(unsigned Reg, bool Val = true) { + assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + Reg -= MRegisterInfo::FirstVirtualRegister; + if (VirtRegModified.size() <= Reg) VirtRegModified.resize(Reg+1); + VirtRegModified[Reg] = Val; + } + + bool isVirtRegModified(unsigned Reg) const { + assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + assert(Reg - MRegisterInfo::FirstVirtualRegister < VirtRegModified.size() + && "Illegal virtual register!"); + return VirtRegModified[Reg - MRegisterInfo::FirstVirtualRegister]; + } + + void AddToPhysRegsUseOrder(unsigned Reg) { + std::vector<unsigned>::iterator It = + std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg); + if (It != PhysRegsUseOrder.end()) + PhysRegsUseOrder.erase(It); + PhysRegsUseOrder.push_back(Reg); + } + + void MarkPhysRegRecentlyUsed(unsigned Reg) { + if (PhysRegsUseOrder.empty() || + PhysRegsUseOrder.back() == Reg) return; // Already most recently used + + for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) + if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) { + unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle + PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1); + // Add it to the end of the list + PhysRegsUseOrder.push_back(RegMatch); + if (RegMatch == Reg) + return; // Found an exact match, exit early + } + } + + public: + virtual const char *getPassName() const { + return "Local Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LiveVariables>(); + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// runOnMachineFunction - Register allocate the whole function + bool runOnMachineFunction(MachineFunction &Fn); + + /// AllocateBasicBlock - Register allocate the specified basic block. + void AllocateBasicBlock(MachineBasicBlock &MBB); + + + /// areRegsEqual - This method returns true if the specified registers are + /// related to each other. To do this, it checks to see if they are equal + /// or if the first register is in the alias set of the second register. + /// + bool areRegsEqual(unsigned R1, unsigned R2) const { + if (R1 == R2) return true; + for (const unsigned *AliasSet = RegInfo->getAliasSet(R2); + *AliasSet; ++AliasSet) { + if (*AliasSet == R1) return true; + } + return false; + } + + /// getStackSpaceFor - This returns the frame index of the specified virtual + /// register on the stack, allocating space if necessary. + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + + /// removePhysReg - This method marks the specified physical register as no + /// longer being in use. + /// + void removePhysReg(unsigned PhysReg); + + /// spillVirtReg - This method spills the value specified by PhysReg into + /// the virtual register slot specified by VirtReg. It then updates the RA + /// data structures to indicate the fact that PhysReg is now available. + /// + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned VirtReg, unsigned PhysReg); + + /// spillPhysReg - This method spills the specified physical register into + /// the virtual register slot associated with it. If OnlyVirtRegs is set to + /// true, then the request is ignored if the physical register does not + /// contain a virtual register. + /// + void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs = false); + + /// assignVirtToPhysReg - This method updates local state so that we know + /// that PhysReg is the proper container for VirtReg now. The physical + /// register must not be used for anything else when this is called. + /// + void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); + + /// isPhysRegAvailable - Return true if the specified physical register is + /// free and available for use. This also includes checking to see if + /// aliased registers are all free... + /// + bool isPhysRegAvailable(unsigned PhysReg) const; + + /// getFreeReg - Look to see if there is a free register available in the + /// specified register class. If not, return 0. + /// + unsigned getFreeReg(const TargetRegisterClass *RC); + + /// getReg - Find a physical register to hold the specified virtual + /// register. If all compatible physical registers are used, this method + /// spills the last used virtual register to the stack, and uses that + /// register. + /// + unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg); + + /// reloadVirtReg - This method transforms the specified specified virtual + /// register use to refer to a physical register. This method may do this + /// in one of several ways: if the register is available in a physical + /// register already, it uses that physical register. If the value is not + /// in a physical register, and if there are physical registers available, + /// it loads it into a register. If register pressure is high, and it is + /// possible, it tries to fold the load of the virtual register into the + /// instruction itself. It avoids doing this if register pressure is low to + /// improve the chance that subsequent instructions can use the reloaded + /// value. This method returns the modified instruction. + /// + MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum); + + + void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I, + unsigned PhysReg); + }; + char RALocal::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual register +/// to be held on the stack. +int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { + // Find the location Reg would belong... + std::map<unsigned, int>::iterator I =StackSlotForVirtReg.lower_bound(VirtReg); + + if (I != StackSlotForVirtReg.end() && I->first == VirtReg) + return I->second; // Already has space allocated? + + // Allocate a new stack object for this spill location... + int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot... + StackSlotForVirtReg.insert(I, std::make_pair(VirtReg, FrameIdx)); + return FrameIdx; +} + + +/// removePhysReg - This method marks the specified physical register as no +/// longer being in use. +/// +void RALocal::removePhysReg(unsigned PhysReg) { + PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used + + std::vector<unsigned>::iterator It = + std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg); + if (It != PhysRegsUseOrder.end()) + PhysRegsUseOrder.erase(It); +} + + +/// spillVirtReg - This method spills the value specified by PhysReg into the +/// virtual register slot specified by VirtReg. It then updates the RA data +/// structures to indicate the fact that PhysReg is now available. +/// +void RALocal::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg) { + assert(VirtReg && "Spilling a physical register is illegal!" + " Must not have appropriate kill for the register or use exists beyond" + " the intended one."); + DOUT << " Spilling register " << RegInfo->getName(PhysReg) + << " containing %reg" << VirtReg; + if (!isVirtRegModified(VirtReg)) + DOUT << " which has not been modified, so no store necessary!"; + + // Otherwise, there is a virtual register corresponding to this physical + // register. We only need to spill it into its stack slot if it has been + // modified. + if (isVirtRegModified(VirtReg)) { + const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DOUT << " to stack slot #" << FrameIndex; + RegInfo->storeRegToStackSlot(MBB, I, PhysReg, FrameIndex, RC); + ++NumStores; // Update statistics + } + + getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available + + DOUT << "\n"; + removePhysReg(PhysReg); +} + + +/// spillPhysReg - This method spills the specified physical register into the +/// virtual register slot associated with it. If OnlyVirtRegs is set to true, +/// then the request is ignored if the physical register does not contain a +/// virtual register. +/// +void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs) { + if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! + assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); + if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) + spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); + } else { + // If the selected register aliases any other registers, we must make + // sure that one of the aliases isn't alive. + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register. + PhysRegsUsed[*AliasSet] != -2) // If allocatable. + if (PhysRegsUsed[*AliasSet]) + spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); + } +} + + +/// assignVirtToPhysReg - This method updates local state so that we know +/// that PhysReg is the proper container for VirtReg now. The physical +/// register must not be used for anything else when this is called. +/// +void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); + // Update information to note the fact that this register was just used, and + // it holds VirtReg. + PhysRegsUsed[PhysReg] = VirtReg; + getVirt2PhysRegMapSlot(VirtReg) = PhysReg; + AddToPhysRegsUseOrder(PhysReg); // New use of PhysReg +} + + +/// isPhysRegAvailable - Return true if the specified physical register is free +/// and available for use. This also includes checking to see if aliased +/// registers are all free... +/// +bool RALocal::isPhysRegAvailable(unsigned PhysReg) const { + if (PhysRegsUsed[PhysReg] != -1) return false; + + // If the selected register aliases any other allocated registers, it is + // not free! + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] != -1) // Aliased register in use? + return false; // Can't use this reg then. + return true; +} + + +/// getFreeReg - Look to see if there is a free register available in the +/// specified register class. If not, return 0. +/// +unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) { + // Get iterators defining the range of registers that are valid to allocate in + // this class, which also specifies the preferred allocation order. + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); + + for (; RI != RE; ++RI) + if (isPhysRegAvailable(*RI)) { // Is reg unused? + assert(*RI != 0 && "Cannot use register!"); + return *RI; // Found an unused register! + } + return 0; +} + + +/// getReg - Find a physical register to hold the specified virtual +/// register. If all compatible physical registers are used, this method spills +/// the last used virtual register to the stack, and uses that register. +/// +unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned VirtReg) { + const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg); + + // First check to see if we have a free register of the requested type... + unsigned PhysReg = getFreeReg(RC); + + // If we didn't find an unused register, scavenge one now! + if (PhysReg == 0) { + assert(!PhysRegsUseOrder.empty() && "No allocated registers??"); + + // Loop over all of the preallocated registers from the least recently used + // to the most recently used. When we find one that is capable of holding + // our register, use it. + for (unsigned i = 0; PhysReg == 0; ++i) { + assert(i != PhysRegsUseOrder.size() && + "Couldn't find a register of the appropriate class!"); + + unsigned R = PhysRegsUseOrder[i]; + + // We can only use this register if it holds a virtual register (ie, it + // can be spilled). Do not use it if it is an explicitly allocated + // physical register! + assert(PhysRegsUsed[R] != -1 && + "PhysReg in PhysRegsUseOrder, but is not allocated?"); + if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) { + // If the current register is compatible, use it. + if (RC->contains(R)) { + PhysReg = R; + break; + } else { + // If one of the registers aliased to the current register is + // compatible, use it. + for (const unsigned *AliasIt = RegInfo->getAliasSet(R); + *AliasIt; ++AliasIt) { + if (RC->contains(*AliasIt) && + // If this is pinned down for some reason, don't use it. For + // example, if CL is pinned, and we run across CH, don't use + // CH as justification for using scavenging ECX (which will + // fail). + PhysRegsUsed[*AliasIt] != 0 && + + // Make sure the register is allocatable. Don't allocate SIL on + // x86-32. + PhysRegsUsed[*AliasIt] != -2) { + PhysReg = *AliasIt; // Take an aliased register + break; + } + } + } + } + } + + assert(PhysReg && "Physical register not assigned!?!?"); + + // At this point PhysRegsUseOrder[i] is the least recently used register of + // compatible register class. Spill it to memory and reap its remains. + spillPhysReg(MBB, I, PhysReg); + } + + // Now that we know which register we need to assign this to, do it now! + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; +} + + +/// reloadVirtReg - This method transforms the specified specified virtual +/// register use to refer to a physical register. This method may do this in +/// one of several ways: if the register is available in a physical register +/// already, it uses that physical register. If the value is not in a physical +/// register, and if there are physical registers available, it loads it into a +/// register. If register pressure is high, and it is possible, it tries to +/// fold the load of the virtual register into the instruction itself. It +/// avoids doing this if register pressure is low to improve the chance that +/// subsequent instructions can use the reloaded value. This method returns the +/// modified instruction. +/// +MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum) { + unsigned VirtReg = MI->getOperand(OpNum).getReg(); + + // If the virtual register is already available, just update the instruction + // and return. + if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { + MarkPhysRegRecentlyUsed(PR); // Already have this value available! + MI->getOperand(OpNum).setReg(PR); // Assign the input register + return MI; + } + + // Otherwise, we need to fold it into the current instruction, or reload it. + // If we have registers available to hold the value, use them. + const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg); + unsigned PhysReg = getFreeReg(RC); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + + if (PhysReg) { // Register is available, allocate it! + assignVirtToPhysReg(VirtReg, PhysReg); + } else { // No registers available. + // If we can fold this spill into this instruction, do so now. + if (MachineInstr* FMI = RegInfo->foldMemoryOperand(MI, OpNum, FrameIndex)){ + ++NumFolded; + // Since we changed the address of MI, make sure to update live variables + // to know that the new instruction has the properties of the old one. + LV->instructionChanged(MI, FMI); + return MBB.insert(MBB.erase(MI), FMI); + } + + // It looks like we can't fold this virtual register load into this + // instruction. Force some poor hapless value out of the register file to + // make room for the new register, and reload it. + PhysReg = getReg(MBB, MI, VirtReg); + } + + markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded + + DOUT << " Reloading %reg" << VirtReg << " into " + << RegInfo->getName(PhysReg) << "\n"; + + // Add move instruction(s) + RegInfo->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); + ++NumLoads; // Update statistics + + MF->setPhysRegUsed(PhysReg); + MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register + return MI; +} + +/// isReadModWriteImplicitKill - True if this is an implicit kill for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.getReg() == Reg && MO.isImplicit() && + MO.isDef() && !MO.isDead()) + return true; + } + return false; +} + +/// isReadModWriteImplicitDef - True if this is an implicit def for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.getReg() == Reg && MO.isImplicit() && + !MO.isDef() && MO.isKill()) + return true; + } + return false; +} + +void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII = MBB.begin(); + const TargetInstrInfo &TII = *TM->getInstrInfo(); + + DEBUG(const BasicBlock *LBB = MBB.getBasicBlock(); + if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName()); + + // If this is the first basic block in the machine function, add live-in + // registers as active. + if (&MBB == &*MF->begin()) { + for (MachineFunction::livein_iterator I = MF->livein_begin(), + E = MF->livein_end(); I != E; ++I) { + unsigned Reg = I->first; + MF->setPhysRegUsed(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(Reg); + for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + AddToPhysRegsUseOrder(*AliasSet); + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->setPhysRegUsed(*AliasSet); + } + } + } + } + + // Otherwise, sequentially allocate each instruction in the MBB. + while (MII != MBB.end()) { + MachineInstr *MI = MII++; + const TargetInstrDescriptor &TID = TII.get(MI->getOpcode()); + DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI; + DOUT << " Regs have values: "; + for (unsigned i = 0; i != RegInfo->getNumRegs(); ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + DOUT << "[" << RegInfo->getName(i) + << ",%reg" << PhysRegsUsed[i] << "] "; + DOUT << "\n"); + + // Loop over the implicit uses, making sure that they are at the head of the + // use order list, so they don't get reallocated. + if (TID.ImplicitUses) { + for (const unsigned *ImplicitUses = TID.ImplicitUses; + *ImplicitUses; ++ImplicitUses) + MarkPhysRegRecentlyUsed(*ImplicitUses); + } + + SmallVector<unsigned, 8> Kills; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isKill()) { + if (!MO.isImplicit()) + Kills.push_back(MO.getReg()); + else if (!isReadModWriteImplicitKill(MI, MO.getReg())) + // These are extra physical register kills when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + Kills.push_back(MO.getReg()); + } + } + + // Get the used operands into registers. This has the potential to spill + // incoming values if we are out of registers. Note that we completely + // ignore physical register uses here. We assume that if an explicit + // physical register is referenced by the instruction, that it is guaranteed + // to be live-in, or the input is badly hosed. + // + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + // here we are looking for only used operands (never def&use) + if (MO.isRegister() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && + MRegisterInfo::isVirtualRegister(MO.getReg())) + MI = reloadVirtReg(MBB, MI, i); + } + + // If this instruction is the last user of this register, kill the + // value, freeing the register being used, so it doesn't need to be + // spilled to memory. + // + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned VirtReg = Kills[i]; + unsigned PhysReg = VirtReg; + if (MRegisterInfo::isVirtualRegister(VirtReg)) { + // If the virtual register was never materialized into a register, it + // might not be in the map, but it won't hurt to zero it out anyway. + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } else { + assert(!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1 && + "Silently clearing a virtual register?"); + } + + if (PhysReg) { + DOUT << " Last use of " << RegInfo->getName(PhysReg) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(PhysReg); + for (const unsigned *AliasSet = RegInfo->getSubRegisters(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DOUT << " Last use of " + << RegInfo->getName(*AliasSet) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(*AliasSet); + } + } + } + } + + // Loop over all of the operands of the instruction, spilling registers that + // are defined, and marking explicit destinations in the PhysRegsUsed map. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isDef() && !MO.isImplicit() && MO.getReg() && + MRegisterInfo::isPhysicalRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(Reg); + + for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + MF->setPhysRegUsed(*AliasSet); + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(*AliasSet); + } + } + } + } + + // Loop over the implicit defs, spilling them as well. + if (TID.ImplicitDefs) { + for (const unsigned *ImplicitDefs = TID.ImplicitDefs; + *ImplicitDefs; ++ImplicitDefs) { + unsigned Reg = *ImplicitDefs; + if (PhysRegsUsed[Reg] != -2) { + spillPhysReg(MBB, MI, Reg, true); + AddToPhysRegsUseOrder(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + } + MF->setPhysRegUsed(Reg); + for (const unsigned *AliasSet = RegInfo->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + AddToPhysRegsUseOrder(*AliasSet); + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->setPhysRegUsed(*AliasSet); + } + } + } + } + + SmallVector<unsigned, 8> DeadDefs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isDead()) + DeadDefs.push_back(MO.getReg()); + } + + // Okay, we have allocated all of the source operands and spilled any values + // that would be destroyed by defs of this instruction. Loop over the + // explicit defs and assign them to a register, spilling incoming values if + // we need to scavenge a register. + // + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isDef() && MO.getReg() && + MRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = getReg(MBB, MI, DestVirtReg); + MF->setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + MI->getOperand(i).setReg(DestPhysReg); // Assign the output register + } + } + + // If this instruction defines any registers that are immediately dead, + // kill them now. + // + for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { + unsigned VirtReg = DeadDefs[i]; + unsigned PhysReg = VirtReg; + if (MRegisterInfo::isVirtualRegister(VirtReg)) { + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + assert(PhysReg != 0); + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } + + if (PhysReg) { + DOUT << " Register " << RegInfo->getName(PhysReg) + << " [%reg" << VirtReg + << "] is never used, removing it frame live list\n"; + removePhysReg(PhysReg); + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DOUT << " Register " << RegInfo->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it frame live list\n"; + removePhysReg(*AliasSet); + } + } + } + } + + // Finally, if this is a noop copy instruction, zap it. + unsigned SrcReg, DstReg; + if (TII.isMoveInstr(*MI, SrcReg, DstReg) && SrcReg == DstReg) { + LV->removeVirtualRegistersKilled(MI); + LV->removeVirtualRegistersDead(MI); + MBB.erase(MI); + } + } + + MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); + + // Spill all physical registers holding virtual registers now. + for (unsigned i = 0, e = RegInfo->getNumRegs(); i != e; ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + if (unsigned VirtReg = PhysRegsUsed[i]) + spillVirtReg(MBB, MI, VirtReg, i); + else + removePhysReg(i); + +#if 0 + // This checking code is very expensive. + bool AllOk = true; + for (unsigned i = MRegisterInfo::FirstVirtualRegister, + e = MF->getSSARegMap()->getLastVirtReg(); i <= e; ++i) + if (unsigned PR = Virt2PhysRegMap[i]) { + cerr << "Register still mapped: " << i << " -> " << PR << "\n"; + AllOk = false; + } + assert(AllOk && "Virtual registers still in phys regs?"); +#endif + + // Clear any physical register which appear live at the end of the basic + // block, but which do not hold any virtual registers. e.g., the stack + // pointer. + PhysRegsUseOrder.clear(); +} + + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RALocal::runOnMachineFunction(MachineFunction &Fn) { + DOUT << "Machine Function " << "\n"; + MF = &Fn; + TM = &Fn.getTarget(); + RegInfo = TM->getRegisterInfo(); + LV = &getAnalysis<LiveVariables>(); + + PhysRegsUsed.assign(RegInfo->getNumRegs(), -1); + + // At various places we want to efficiently check to see whether a register + // is allocatable. To handle this, we mark all unallocatable registers as + // being pinned down, permanently. + { + BitVector Allocable = RegInfo->getAllocatableSet(Fn); + for (unsigned i = 0, e = Allocable.size(); i != e; ++i) + if (!Allocable[i]) + PhysRegsUsed[i] = -2; // Mark the reg unallocable. + } + + // initialize the virtual->physical register map to have a 'null' + // mapping for all virtual registers + Virt2PhysRegMap.grow(MF->getSSARegMap()->getLastVirtReg()); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) + AllocateBasicBlock(*MBB); + + StackSlotForVirtReg.clear(); + PhysRegsUsed.clear(); + VirtRegModified.clear(); + Virt2PhysRegMap.clear(); + return true; +} + +FunctionPass *llvm::createLocalRegisterAllocator() { + return new RALocal(); +} diff --git a/lib/CodeGen/RegAllocSimple.cpp b/lib/CodeGen/RegAllocSimple.cpp new file mode 100644 index 0000000..f49dd4c --- /dev/null +++ b/lib/CodeGen/RegAllocSimple.cpp @@ -0,0 +1,253 @@ +//===-- RegAllocSimple.cpp - A simple generic register allocator ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple register allocator. *Very* simple: It immediate +// spills every value right after it is computed, and it reloads all used +// operands from the spill area to temporary registers before each instruction. +// It does not keep values in registers across instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); + +namespace { + static RegisterRegAlloc + simpleRegAlloc("simple", " simple register allocator", + createSimpleRegisterAllocator); + + class VISIBILITY_HIDDEN RegAllocSimple : public MachineFunctionPass { + public: + static char ID; + RegAllocSimple() : MachineFunctionPass((intptr_t)&ID) {} + private: + MachineFunction *MF; + const TargetMachine *TM; + const MRegisterInfo *RegInfo; + + // StackSlotForVirtReg - Maps SSA Regs => frame index on the stack where + // these values are spilled + std::map<unsigned, int> StackSlotForVirtReg; + + // RegsUsed - Keep track of what registers are currently in use. This is a + // bitset. + std::vector<bool> RegsUsed; + + // RegClassIdx - Maps RegClass => which index we can take a register + // from. Since this is a simple register allocator, when we need a register + // of a certain class, we just take the next available one. + std::map<const TargetRegisterClass*, unsigned> RegClassIdx; + + public: + virtual const char *getPassName() const { + return "Simple Register Allocator"; + } + + /// runOnMachineFunction - Register allocate the whole function + bool runOnMachineFunction(MachineFunction &Fn); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(PHIEliminationID); // Eliminate PHI nodes + MachineFunctionPass::getAnalysisUsage(AU); + } + private: + /// AllocateBasicBlock - Register allocate the specified basic block. + void AllocateBasicBlock(MachineBasicBlock &MBB); + + /// getStackSpaceFor - This returns the offset of the specified virtual + /// register on the stack, allocating space if necessary. + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + + /// Given a virtual register, return a compatible physical register that is + /// currently unused. + /// + /// Side effect: marks that register as being used until manually cleared + /// + unsigned getFreeReg(unsigned virtualReg); + + /// Moves value from memory into that register + unsigned reloadVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, unsigned VirtReg); + + /// Saves reg value on the stack (maps virtual register to stack value) + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg); + }; + char RegAllocSimple::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual +/// register to be held on the stack. +int RegAllocSimple::getStackSpaceFor(unsigned VirtReg, + const TargetRegisterClass *RC) { + // Find the location VirtReg would belong... + std::map<unsigned, int>::iterator I = + StackSlotForVirtReg.lower_bound(VirtReg); + + if (I != StackSlotForVirtReg.end() && I->first == VirtReg) + return I->second; // Already has space allocated? + + // Allocate a new stack object for this spill location... + int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot... + StackSlotForVirtReg.insert(I, std::make_pair(VirtReg, FrameIdx)); + + return FrameIdx; +} + +unsigned RegAllocSimple::getFreeReg(unsigned virtualReg) { + const TargetRegisterClass* RC = MF->getSSARegMap()->getRegClass(virtualReg); + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); + + while (1) { + unsigned regIdx = RegClassIdx[RC]++; + assert(RI+regIdx != RE && "Not enough registers!"); + unsigned PhysReg = *(RI+regIdx); + + if (!RegsUsed[PhysReg]) { + MF->setPhysRegUsed(PhysReg); + return PhysReg; + } + } +} + +unsigned RegAllocSimple::reloadVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg) { + const TargetRegisterClass* RC = MF->getSSARegMap()->getRegClass(VirtReg); + int FrameIdx = getStackSpaceFor(VirtReg, RC); + unsigned PhysReg = getFreeReg(VirtReg); + + // Add move instruction(s) + ++NumLoads; + RegInfo->loadRegFromStackSlot(MBB, I, PhysReg, FrameIdx, RC); + return PhysReg; +} + +void RegAllocSimple::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg) { + const TargetRegisterClass* RC = MF->getSSARegMap()->getRegClass(VirtReg); + int FrameIdx = getStackSpaceFor(VirtReg, RC); + + // Add move instruction(s) + ++NumStores; + RegInfo->storeRegToStackSlot(MBB, I, PhysReg, FrameIdx, RC); +} + + +void RegAllocSimple::AllocateBasicBlock(MachineBasicBlock &MBB) { + // loop over each instruction + for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) { + // Made to combat the incorrect allocation of r2 = add r1, r1 + std::map<unsigned, unsigned> Virt2PhysRegMap; + + RegsUsed.resize(RegInfo->getNumRegs()); + + // This is a preliminary pass that will invalidate any registers that are + // used by the instruction (including implicit uses). + unsigned Opcode = MI->getOpcode(); + const TargetInstrDescriptor &Desc = TM->getInstrInfo()->get(Opcode); + const unsigned *Regs; + if (Desc.ImplicitUses) { + for (Regs = Desc.ImplicitUses; *Regs; ++Regs) + RegsUsed[*Regs] = true; + } + + if (Desc.ImplicitDefs) { + for (Regs = Desc.ImplicitDefs; *Regs; ++Regs) { + RegsUsed[*Regs] = true; + MF->setPhysRegUsed(*Regs); + } + } + + // Loop over uses, move from memory into registers. + for (int i = MI->getNumOperands() - 1; i >= 0; --i) { + MachineOperand &op = MI->getOperand(i); + + if (op.isRegister() && op.getReg() && + MRegisterInfo::isVirtualRegister(op.getReg())) { + unsigned virtualReg = (unsigned) op.getReg(); + DOUT << "op: " << op << "\n"; + DOUT << "\t inst[" << i << "]: "; + DEBUG(MI->print(*cerr.stream(), TM)); + + // make sure the same virtual register maps to the same physical + // register in any given instruction + unsigned physReg = Virt2PhysRegMap[virtualReg]; + if (physReg == 0) { + if (op.isDef()) { + int TiedOp = MI->getInstrDescriptor()->findTiedToSrcOperand(i); + if (TiedOp == -1) { + physReg = getFreeReg(virtualReg); + } else { + // must be same register number as the source operand that is + // tied to. This maps a = b + c into b = b + c, and saves b into + // a's spot. + assert(MI->getOperand(TiedOp).isRegister() && + MI->getOperand(TiedOp).getReg() && + MI->getOperand(TiedOp).isUse() && + "Two address instruction invalid!"); + + physReg = MI->getOperand(TiedOp).getReg(); + } + spillVirtReg(MBB, next(MI), virtualReg, physReg); + } else { + physReg = reloadVirtReg(MBB, MI, virtualReg); + Virt2PhysRegMap[virtualReg] = physReg; + } + } + MI->getOperand(i).setReg(physReg); + DOUT << "virt: " << virtualReg << ", phys: " << op.getReg() << "\n"; + } + } + RegClassIdx.clear(); + RegsUsed.clear(); + } +} + + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RegAllocSimple::runOnMachineFunction(MachineFunction &Fn) { + DOUT << "Machine Function\n"; + MF = &Fn; + TM = &MF->getTarget(); + RegInfo = TM->getRegisterInfo(); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) + AllocateBasicBlock(*MBB); + + StackSlotForVirtReg.clear(); + return true; +} + +FunctionPass *llvm::createSimpleRegisterAllocator() { + return new RegAllocSimple(); +} diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp new file mode 100644 index 0000000..ae40e58 --- /dev/null +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -0,0 +1,292 @@ +//===-- RegisterScavenging.cpp - Machine register scavenging --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the Evan Cheng and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine register scavenger. It can provide +// information such as unused register at any point in a machine basic block. +// It also provides a mechanism to make registers availbale by evicting them +// to spill slots. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reg-scavenging" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { + const MachineFunction &MF = *mbb->getParent(); + const TargetMachine &TM = MF.getTarget(); + TII = TM.getInstrInfo(); + RegInfo = TM.getRegisterInfo(); + + assert((NumPhysRegs == 0 || NumPhysRegs == RegInfo->getNumRegs()) && + "Target changed?"); + + if (!MBB) { + NumPhysRegs = RegInfo->getNumRegs(); + RegsAvailable.resize(NumPhysRegs); + + // Create reserved registers bitvector. + ReservedRegs = RegInfo->getReservedRegs(MF); + + // Create callee-saved registers bitvector. + CalleeSavedRegs.resize(NumPhysRegs); + const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + if (CSRegs != NULL) + for (unsigned i = 0; CSRegs[i]; ++i) + CalleeSavedRegs.set(CSRegs[i]); + } + + MBB = mbb; + ScavengedReg = 0; + ScavengedRC = NULL; + + // All registers started out unused. + RegsAvailable.set(); + + // Reserved registers are always used. + RegsAvailable ^= ReservedRegs; + + // Live-in registers are in use. + if (!MBB->livein_empty()) + for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + setUsed(*I); + + Tracking = false; +} + +void RegScavenger::restoreScavengedReg() { + if (!ScavengedReg) + return; + + RegInfo->loadRegFromStackSlot(*MBB, MBBI, ScavengedReg, + ScavengingFrameIndex, ScavengedRC); + MachineBasicBlock::iterator II = prior(MBBI); + RegInfo->eliminateFrameIndex(II, 0, this); + setUsed(ScavengedReg); + ScavengedReg = 0; + ScavengedRC = NULL; +} + +void RegScavenger::forward() { + // Move ptr forward. + if (!Tracking) { + MBBI = MBB->begin(); + Tracking = true; + } else { + assert(MBBI != MBB->end() && "Already at the end of the basic block!"); + MBBI = next(MBBI); + } + + MachineInstr *MI = MBBI; + + // Reaching a terminator instruction. Restore a scavenged register (which + // must be life out. + if (TII->isTerminatorInstr(MI->getOpcode())) + restoreScavengedReg(); + + // Process uses first. + BitVector ChangedRegs(NumPhysRegs); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (!isUsed(Reg)) { + // Register has been scavenged. Restore it! + if (Reg != ScavengedReg) + assert(false && "Using an undefined register!"); + else + restoreScavengedReg(); + } + if (MO.isKill() && !isReserved(Reg)) + ChangedRegs.set(Reg); + } + // Change states of all registers after all the uses are processed to guard + // against multiple uses. + setUnused(ChangedRegs); + + // Process defs. + const TargetInstrDescriptor *TID = MI->getInstrDescriptor(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + // If it's dead upon def, then it is now free. + if (MO.isDead()) { + setUnused(Reg); + continue; + } + // Skip two-address destination operand. + if (TID->findTiedToSrcOperand(i) != -1) { + assert(isUsed(Reg) && "Using an undefined register!"); + continue; + } + assert((isUnused(Reg) || isReserved(Reg)) && + "Re-defining a live register!"); + setUsed(Reg); + } +} + +void RegScavenger::backward() { + assert(Tracking && "Not tracking states!"); + assert(MBBI != MBB->begin() && "Already at start of basic block!"); + // Move ptr backward. + MBBI = prior(MBBI); + + MachineInstr *MI = MBBI; + // Process defs first. + const TargetInstrDescriptor *TID = MI->getInstrDescriptor(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + // Skip two-address destination operand. + if (TID->findTiedToSrcOperand(i) != -1) + continue; + unsigned Reg = MO.getReg(); + assert(isUsed(Reg)); + if (!isReserved(Reg)) + setUnused(Reg); + } + + // Process uses. + BitVector ChangedRegs(NumPhysRegs); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + assert(isUnused(Reg) || isReserved(Reg)); + ChangedRegs.set(Reg); + } + setUsed(ChangedRegs); +} + +void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { + if (includeReserved) + used = ~RegsAvailable; + else + used = ~RegsAvailable & ~ReservedRegs; +} + +/// CreateRegClassMask - Set the bits that represent the registers in the +/// TargetRegisterClass. +static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; + ++I) + Mask.set(*I); +} + +unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass, + const BitVector &Candidates) const { + // Mask off the registers which are not in the TargetRegisterClass. + BitVector RegsAvailableCopy(NumPhysRegs, false); + CreateRegClassMask(RegClass, RegsAvailableCopy); + RegsAvailableCopy &= RegsAvailable; + + // Restrict the search to candidates. + RegsAvailableCopy &= Candidates; + + // Returns the first unused (bit is set) register, or 0 is none is found. + int Reg = RegsAvailableCopy.find_first(); + return (Reg == -1) ? 0 : Reg; +} + +unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass, + bool ExCalleeSaved) const { + // Mask off the registers which are not in the TargetRegisterClass. + BitVector RegsAvailableCopy(NumPhysRegs, false); + CreateRegClassMask(RegClass, RegsAvailableCopy); + RegsAvailableCopy &= RegsAvailable; + + // If looking for a non-callee-saved register, mask off all the callee-saved + // registers. + if (ExCalleeSaved) + RegsAvailableCopy &= ~CalleeSavedRegs; + + // Returns the first unused (bit is set) register, or 0 is none is found. + int Reg = RegsAvailableCopy.find_first(); + return (Reg == -1) ? 0 : Reg; +} + +/// calcDistanceToUse - Calculate the distance to the first use of the +/// specified register. +static unsigned calcDistanceToUse(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, unsigned Reg) { + unsigned Dist = 0; + I = next(I); + while (I != MBB->end()) { + Dist++; + if (I->findRegisterUseOperandIdx(Reg) != -1) + return Dist; + I = next(I); + } + return Dist + 1; +} + +unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, + MachineBasicBlock::iterator I, + int SPAdj) { + assert(ScavengingFrameIndex >= 0 && + "Cannot scavenge a register without an emergency spill slot!"); + + // Mask off the registers which are not in the TargetRegisterClass. + BitVector Candidates(NumPhysRegs, false); + CreateRegClassMask(RC, Candidates); + Candidates ^= ReservedRegs; // Do not include reserved registers. + + // Exclude all the registers being used by the instruction. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand &MO = I->getOperand(i); + if (MO.isReg()) + Candidates.reset(MO.getReg()); + } + + // Find the register whose use is furtherest aaway. + unsigned SReg = 0; + unsigned MaxDist = 0; + int Reg = Candidates.find_first(); + while (Reg != -1) { + unsigned Dist = calcDistanceToUse(MBB, I, Reg); + if (Dist >= MaxDist) { + MaxDist = Dist; + SReg = Reg; + } + Reg = Candidates.find_next(Reg); + } + + if (ScavengedReg != 0) { + // First restore previously scavenged register. + RegInfo->loadRegFromStackSlot(*MBB, I, ScavengedReg, + ScavengingFrameIndex, ScavengedRC); + MachineBasicBlock::iterator II = prior(I); + RegInfo->eliminateFrameIndex(II, SPAdj, this); + } + + RegInfo->storeRegToStackSlot(*MBB, I, SReg, ScavengingFrameIndex, RC); + MachineBasicBlock::iterator II = prior(I); + RegInfo->eliminateFrameIndex(II, SPAdj, this); + ScavengedReg = SReg; + ScavengedRC = RC; + + return SReg; +} diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp new file mode 100644 index 0000000..defbe34 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp @@ -0,0 +1,102 @@ +//===-- llvm/CallingConvLower.cpp - Calling Conventions -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CCState class, used for lowering and implementing +// calling conventions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm, + SmallVector<CCValAssign, 16> &locs) + : CallingConv(CC), IsVarArg(isVarArg), TM(tm), + MRI(*TM.getRegisterInfo()), Locs(locs) { + // No stack is used. + StackOffset = 0; + + UsedRegs.resize(MRI.getNumRegs()); +} + + +/// MarkAllocated - Mark a register and all of its aliases as allocated. +void CCState::MarkAllocated(unsigned Reg) { + UsedRegs[Reg/32] |= 1 << (Reg&31); + + if (const unsigned *RegAliases = MRI.getAliasSet(Reg)) + for (; (Reg = *RegAliases); ++RegAliases) + UsedRegs[Reg/32] |= 1 << (Reg&31); +} + +/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, +/// incorporating info about the formals into this state. +void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) { + unsigned NumArgs = TheArgs->getNumValues()-1; + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT::ValueType ArgVT = TheArgs->getValueType(i); + SDOperand FlagOp = TheArgs->getOperand(3+i); + unsigned ArgFlags = cast<ConstantSDNode>(FlagOp)->getValue(); + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { + cerr << "Formal argument #" << i << " has unhandled type " + << MVT::getValueTypeString(ArgVT) << "\n"; + abort(); + } + } +} + +/// AnalyzeReturn - Analyze the returned values of an ISD::RET node, +/// incorporating info about the result values into this state. +void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) { + MVT::ValueType VT = TheRet->getOperand(i*2+1).getValueType(); + if (Fn(i, VT, VT, CCValAssign::Full, + cast<ConstantSDNode>(TheRet->getOperand(i*2+2))->getValue(), *this)){ + cerr << "Return operand #" << i << " has unhandled type " + << MVT::getValueTypeString(VT) << "\n"; + abort(); + } + } +} + + +/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info +/// about the passed values into this state. +void CCState::AnalyzeCallOperands(SDNode *TheCall, CCAssignFn Fn) { + unsigned NumOps = (TheCall->getNumOperands() - 5) / 2; + for (unsigned i = 0; i != NumOps; ++i) { + MVT::ValueType ArgVT = TheCall->getOperand(5+2*i).getValueType(); + SDOperand FlagOp = TheCall->getOperand(5+2*i+1); + unsigned ArgFlags =cast<ConstantSDNode>(FlagOp)->getValue(); + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { + cerr << "Call operand #" << i << " has unhandled type " + << MVT::getValueTypeString(ArgVT) << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallResult(SDNode *TheCall, CCAssignFn Fn) { + for (unsigned i = 0, e = TheCall->getNumValues() - 1; i != e; ++i) { + MVT::ValueType VT = TheCall->getValueType(i); + if (Fn(i, VT, VT, CCValAssign::Full, 0, *this)) { + cerr << "Call result #" << i << " has unhandled type " + << MVT::getValueTypeString(VT) << "\n"; + abort(); + } + } +} + diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp new file mode 100644 index 0000000..22c6e6b --- /dev/null +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -0,0 +1,4749 @@ +//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Nate Begeman and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run +// both before and after the DAG is legalized. +// +// FIXME: Missing folds +// sdiv, udiv, srem, urem (X, const) where X is an integer can be expanded into +// a sequence of multiplies, shifts, and adds. This should be controlled by +// some kind of hint from the target that int div is expensive. +// various folds of mulh[s,u] by constants such as -1, powers of 2, etc. +// +// FIXME: select C, pow2, pow2 -> something smart +// FIXME: trunc(select X, Y, Z) -> select X, trunc(Y), trunc(Z) +// FIXME: Dead stores -> nuke +// FIXME: shr X, (and Y,31) -> shr X, Y (TRICKY!) +// FIXME: mul (x, const) -> shifts + adds +// FIXME: undef values +// FIXME: divide by zero is currently left unfolded. do we want to turn this +// into an undef? +// FIXME: select ne (select cc, 1, 0), 0, true, false -> select cc, true, false +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dagcombine" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NodesCombined , "Number of dag nodes combined"); +STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); +STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); + +namespace { +#ifndef NDEBUG + static cl::opt<bool> + ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the first " + "dag combine pass")); + static cl::opt<bool> + ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the second " + "dag combine pass")); +#else + static const bool ViewDAGCombine1 = false; + static const bool ViewDAGCombine2 = false; +#endif + + static cl::opt<bool> + CombinerAA("combiner-alias-analysis", cl::Hidden, + cl::desc("Turn on alias analysis during testing")); + + static cl::opt<bool> + CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, + cl::desc("Include global information in alias analysis")); + +//------------------------------ DAGCombiner ---------------------------------// + + class VISIBILITY_HIDDEN DAGCombiner { + SelectionDAG &DAG; + TargetLowering &TLI; + bool AfterLegalize; + + // Worklist of all of the nodes that need to be simplified. + std::vector<SDNode*> WorkList; + + // AA - Used for DAG load/store alias analysis. + AliasAnalysis &AA; + + /// AddUsersToWorkList - When an instruction is simplified, add all users of + /// the instruction to the work lists because they might get more simplified + /// now. + /// + void AddUsersToWorkList(SDNode *N) { + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + AddToWorkList(*UI); + } + + /// removeFromWorkList - remove all instances of N from the worklist. + /// + void removeFromWorkList(SDNode *N) { + WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N), + WorkList.end()); + } + + public: + /// AddToWorkList - Add to the work list making sure it's instance is at the + /// the back (next to be processed.) + void AddToWorkList(SDNode *N) { + removeFromWorkList(N); + WorkList.push_back(N); + } + + SDOperand CombineTo(SDNode *N, const SDOperand *To, unsigned NumTo, + bool AddTo = true) { + assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); + ++NodesCombined; + DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(To[0].Val->dump(&DAG)); + DOUT << " and " << NumTo-1 << " other values\n"; + std::vector<SDNode*> NowDead; + DAG.ReplaceAllUsesWith(N, To, &NowDead); + + if (AddTo) { + // Push the new nodes and any users onto the worklist + for (unsigned i = 0, e = NumTo; i != e; ++i) { + AddToWorkList(To[i].Val); + AddUsersToWorkList(To[i].Val); + } + } + + // Nodes can be reintroduced into the worklist. Make sure we do not + // process a node that has been replaced. + removeFromWorkList(N); + for (unsigned i = 0, e = NowDead.size(); i != e; ++i) + removeFromWorkList(NowDead[i]); + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + return SDOperand(N, 0); + } + + SDOperand CombineTo(SDNode *N, SDOperand Res, bool AddTo = true) { + return CombineTo(N, &Res, 1, AddTo); + } + + SDOperand CombineTo(SDNode *N, SDOperand Res0, SDOperand Res1, + bool AddTo = true) { + SDOperand To[] = { Res0, Res1 }; + return CombineTo(N, To, 2, AddTo); + } + private: + + /// SimplifyDemandedBits - Check the specified integer node value to see if + /// it can be simplified or if things it uses can be simplified by bit + /// propagation. If so, return true. + bool SimplifyDemandedBits(SDOperand Op) { + TargetLowering::TargetLoweringOpt TLO(DAG); + uint64_t KnownZero, KnownOne; + uint64_t Demanded = MVT::getIntVTBitMask(Op.getValueType()); + if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) + return false; + + // Revisit the node. + AddToWorkList(Op.Val); + + // Replace the old value with the new one. + ++NodesCombined; + DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.Val->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(TLO.New.Val->dump(&DAG)); + DOUT << '\n'; + + std::vector<SDNode*> NowDead; + DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, NowDead); + + // Push the new node and any (possibly new) users onto the worklist. + AddToWorkList(TLO.New.Val); + AddUsersToWorkList(TLO.New.Val); + + // Nodes can end up on the worklist more than once. Make sure we do + // not process a node that has been replaced. + for (unsigned i = 0, e = NowDead.size(); i != e; ++i) + removeFromWorkList(NowDead[i]); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (TLO.Old.Val->use_empty()) { + removeFromWorkList(TLO.Old.Val); + + // If the operands of this node are only used by the node, they will now + // be dead. Make sure to visit them first to delete dead nodes early. + for (unsigned i = 0, e = TLO.Old.Val->getNumOperands(); i != e; ++i) + if (TLO.Old.Val->getOperand(i).Val->hasOneUse()) + AddToWorkList(TLO.Old.Val->getOperand(i).Val); + + DAG.DeleteNode(TLO.Old.Val); + } + return true; + } + + bool CombineToPreIndexedLoadStore(SDNode *N); + bool CombineToPostIndexedLoadStore(SDNode *N); + + + /// visit - call the node-specific routine that knows how to fold each + /// particular type of node. + SDOperand visit(SDNode *N); + + // Visitation implementation - Implement dag node combining for different + // node types. The semantics are as follows: + // Return Value: + // SDOperand.Val == 0 - No change was made + // SDOperand.Val == N - N was replaced, is dead, and is already handled. + // otherwise - N should be replaced by the returned Operand. + // + SDOperand visitTokenFactor(SDNode *N); + SDOperand visitADD(SDNode *N); + SDOperand visitSUB(SDNode *N); + SDOperand visitADDC(SDNode *N); + SDOperand visitADDE(SDNode *N); + SDOperand visitMUL(SDNode *N); + SDOperand visitSDIV(SDNode *N); + SDOperand visitUDIV(SDNode *N); + SDOperand visitSREM(SDNode *N); + SDOperand visitUREM(SDNode *N); + SDOperand visitMULHU(SDNode *N); + SDOperand visitMULHS(SDNode *N); + SDOperand visitAND(SDNode *N); + SDOperand visitOR(SDNode *N); + SDOperand visitXOR(SDNode *N); + SDOperand SimplifyVBinOp(SDNode *N); + SDOperand visitSHL(SDNode *N); + SDOperand visitSRA(SDNode *N); + SDOperand visitSRL(SDNode *N); + SDOperand visitCTLZ(SDNode *N); + SDOperand visitCTTZ(SDNode *N); + SDOperand visitCTPOP(SDNode *N); + SDOperand visitSELECT(SDNode *N); + SDOperand visitSELECT_CC(SDNode *N); + SDOperand visitSETCC(SDNode *N); + SDOperand visitSIGN_EXTEND(SDNode *N); + SDOperand visitZERO_EXTEND(SDNode *N); + SDOperand visitANY_EXTEND(SDNode *N); + SDOperand visitSIGN_EXTEND_INREG(SDNode *N); + SDOperand visitTRUNCATE(SDNode *N); + SDOperand visitBIT_CONVERT(SDNode *N); + SDOperand visitFADD(SDNode *N); + SDOperand visitFSUB(SDNode *N); + SDOperand visitFMUL(SDNode *N); + SDOperand visitFDIV(SDNode *N); + SDOperand visitFREM(SDNode *N); + SDOperand visitFCOPYSIGN(SDNode *N); + SDOperand visitSINT_TO_FP(SDNode *N); + SDOperand visitUINT_TO_FP(SDNode *N); + SDOperand visitFP_TO_SINT(SDNode *N); + SDOperand visitFP_TO_UINT(SDNode *N); + SDOperand visitFP_ROUND(SDNode *N); + SDOperand visitFP_ROUND_INREG(SDNode *N); + SDOperand visitFP_EXTEND(SDNode *N); + SDOperand visitFNEG(SDNode *N); + SDOperand visitFABS(SDNode *N); + SDOperand visitBRCOND(SDNode *N); + SDOperand visitBR_CC(SDNode *N); + SDOperand visitLOAD(SDNode *N); + SDOperand visitSTORE(SDNode *N); + SDOperand visitINSERT_VECTOR_ELT(SDNode *N); + SDOperand visitBUILD_VECTOR(SDNode *N); + SDOperand visitCONCAT_VECTORS(SDNode *N); + SDOperand visitVECTOR_SHUFFLE(SDNode *N); + + SDOperand XformToShuffleWithZero(SDNode *N); + SDOperand ReassociateOps(unsigned Opc, SDOperand LHS, SDOperand RHS); + + bool SimplifySelectOps(SDNode *SELECT, SDOperand LHS, SDOperand RHS); + SDOperand SimplifyBinOpWithSameOpcodeHands(SDNode *N); + SDOperand SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2); + SDOperand SimplifySelectCC(SDOperand N0, SDOperand N1, SDOperand N2, + SDOperand N3, ISD::CondCode CC, + bool NotExtCompare = false); + SDOperand SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1, + ISD::CondCode Cond, bool foldBooleans = true); + SDOperand ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT::ValueType); + SDOperand BuildSDIV(SDNode *N); + SDOperand BuildUDIV(SDNode *N); + SDNode *MatchRotate(SDOperand LHS, SDOperand RHS); + SDOperand ReduceLoadWidth(SDNode *N); + + /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, + /// looking for aliasing nodes and adding them to the Aliases vector. + void GatherAllAliases(SDNode *N, SDOperand OriginalChain, + SmallVector<SDOperand, 8> &Aliases); + + /// isAlias - Return true if there is any possibility that the two addresses + /// overlap. + bool isAlias(SDOperand Ptr1, int64_t Size1, + const Value *SrcValue1, int SrcValueOffset1, + SDOperand Ptr2, int64_t Size2, + const Value *SrcValue2, int SrcValueOffset2); + + /// FindAliasInfo - Extracts the relevant alias information from the memory + /// node. Returns true if the operand was a load. + bool FindAliasInfo(SDNode *N, + SDOperand &Ptr, int64_t &Size, + const Value *&SrcValue, int &SrcValueOffset); + + /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, + /// looking for a better chain (aliasing node.) + SDOperand FindBetterChain(SDNode *N, SDOperand Chain); + +public: + DAGCombiner(SelectionDAG &D, AliasAnalysis &A) + : DAG(D), + TLI(D.getTargetLoweringInfo()), + AfterLegalize(false), + AA(A) {} + + /// Run - runs the dag combiner on all nodes in the work list + void Run(bool RunningAfterLegalize); + }; +} + +//===----------------------------------------------------------------------===// +// TargetLowering::DAGCombinerInfo implementation +//===----------------------------------------------------------------------===// + +void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { + ((DAGCombiner*)DC)->AddToWorkList(N); +} + +SDOperand TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, const std::vector<SDOperand> &To) { + return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size()); +} + +SDOperand TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDOperand Res) { + return ((DAGCombiner*)DC)->CombineTo(N, Res); +} + + +SDOperand TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDOperand Res0, SDOperand Res1) { + return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1); +} + + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// isNegatibleForFree - Return 1 if we can compute the negated form of the +/// specified expression for the same cost as the expression itself, or 2 if we +/// can compute the negated form more cheaply than the expression itself. +static char isNegatibleForFree(SDOperand Op, unsigned Depth = 0) { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return 2; + + // Don't allow anything with multiple uses. + if (!Op.hasOneUse()) return 0; + + // Don't recurse exponentially. + if (Depth > 6) return 0; + + switch (Op.getOpcode()) { + default: return false; + case ISD::ConstantFP: + return 1; + case ISD::FADD: + // FIXME: determine better conditions for this xform. + if (!UnsafeFPMath) return 0; + + // -(A+B) -> -A - B + if (char V = isNegatibleForFree(Op.getOperand(0), Depth+1)) + return V; + // -(A+B) -> -B - A + return isNegatibleForFree(Op.getOperand(1), Depth+1); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!UnsafeFPMath) return 0; + + // -(A-B) -> B-A + return 1; + + case ISD::FMUL: + case ISD::FDIV: + if (HonorSignDependentRoundingFPMath()) return 0; + + // -(X*Y) -> (-X * Y) or (X*-Y) + if (char V = isNegatibleForFree(Op.getOperand(0), Depth+1)) + return V; + + return isNegatibleForFree(Op.getOperand(1), Depth+1); + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return isNegatibleForFree(Op.getOperand(0), Depth+1); + } +} + +/// GetNegatedExpression - If isNegatibleForFree returns true, this function +/// returns the newly negated expression. +static SDOperand GetNegatedExpression(SDOperand Op, SelectionDAG &DAG, + unsigned Depth = 0) { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); + + // Don't allow anything with multiple uses. + assert(Op.hasOneUse() && "Unknown reuse!"); + + assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); + switch (Op.getOpcode()) { + default: assert(0 && "Unknown code"); + case ISD::ConstantFP: + return DAG.getConstantFP(-cast<ConstantFPSDNode>(Op)->getValue(), + Op.getValueType()); + case ISD::FADD: + // FIXME: determine better conditions for this xform. + assert(UnsafeFPMath); + + // -(A+B) -> -A - B + if (isNegatibleForFree(Op.getOperand(0), Depth+1)) + return DAG.getNode(ISD::FSUB, Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, Depth+1), + Op.getOperand(1)); + // -(A+B) -> -B - A + return DAG.getNode(ISD::FSUB, Op.getValueType(), + GetNegatedExpression(Op.getOperand(1), DAG, Depth+1), + Op.getOperand(0)); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + assert(UnsafeFPMath); + + // -(0-B) -> B + if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) + if (N0CFP->getValue() == 0.0) + return Op.getOperand(1); + + // -(A-B) -> B-A + return DAG.getNode(ISD::FSUB, Op.getValueType(), Op.getOperand(1), + Op.getOperand(0)); + + case ISD::FMUL: + case ISD::FDIV: + assert(!HonorSignDependentRoundingFPMath()); + + // -(X*Y) -> -X * Y + if (isNegatibleForFree(Op.getOperand(0), Depth+1)) + return DAG.getNode(Op.getOpcode(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, Depth+1), + Op.getOperand(1)); + + // -(X*Y) -> X * -Y + return DAG.getNode(Op.getOpcode(), Op.getValueType(), + Op.getOperand(0), + GetNegatedExpression(Op.getOperand(1), DAG, Depth+1)); + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return DAG.getNode(Op.getOpcode(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, Depth+1)); + } +} + + +// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc +// that selects between the values 1 and 0, making it equivalent to a setcc. +// Also, set the incoming LHS, RHS, and CC references to the appropriate +// nodes based on the type of node we are checking. This simplifies life a +// bit for the callers. +static bool isSetCCEquivalent(SDOperand N, SDOperand &LHS, SDOperand &RHS, + SDOperand &CC) { + if (N.getOpcode() == ISD::SETCC) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(2); + return true; + } + if (N.getOpcode() == ISD::SELECT_CC && + N.getOperand(2).getOpcode() == ISD::Constant && + N.getOperand(3).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(N.getOperand(2))->getValue() == 1 && + cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(4); + return true; + } + return false; +} + +// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only +// one use. If this is true, it allows the users to invert the operation for +// free when it is profitable to do so. +static bool isOneUseSetCC(SDOperand N) { + SDOperand N0, N1, N2; + if (isSetCCEquivalent(N, N0, N1, N2) && N.Val->hasOneUse()) + return true; + return false; +} + +SDOperand DAGCombiner::ReassociateOps(unsigned Opc, SDOperand N0, SDOperand N1){ + MVT::ValueType VT = N0.getValueType(); + // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use + // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) + if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { + if (isa<ConstantSDNode>(N1)) { + SDOperand OpNode = DAG.getNode(Opc, VT, N0.getOperand(1), N1); + AddToWorkList(OpNode.Val); + return DAG.getNode(Opc, VT, OpNode, N0.getOperand(0)); + } else if (N0.hasOneUse()) { + SDOperand OpNode = DAG.getNode(Opc, VT, N0.getOperand(0), N1); + AddToWorkList(OpNode.Val); + return DAG.getNode(Opc, VT, OpNode, N0.getOperand(1)); + } + } + // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use + // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) + if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { + if (isa<ConstantSDNode>(N0)) { + SDOperand OpNode = DAG.getNode(Opc, VT, N1.getOperand(1), N0); + AddToWorkList(OpNode.Val); + return DAG.getNode(Opc, VT, OpNode, N1.getOperand(0)); + } else if (N1.hasOneUse()) { + SDOperand OpNode = DAG.getNode(Opc, VT, N1.getOperand(0), N0); + AddToWorkList(OpNode.Val); + return DAG.getNode(Opc, VT, OpNode, N1.getOperand(1)); + } + } + return SDOperand(); +} + +//===----------------------------------------------------------------------===// +// Main DAG Combiner implementation +//===----------------------------------------------------------------------===// + +void DAGCombiner::Run(bool RunningAfterLegalize) { + // set the instance variable, so that the various visit routines may use it. + AfterLegalize = RunningAfterLegalize; + + // Add all the dag nodes to the worklist. + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) + WorkList.push_back(I); + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + + // The root of the dag may dangle to deleted nodes until the dag combiner is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDOperand()); + + /// DagCombineInfo - Expose the DAG combiner to the target combiner impls. + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, !RunningAfterLegalize, false, this); + + // while the worklist isn't empty, inspect the node on the end of it and + // try and combine it. + while (!WorkList.empty()) { + SDNode *N = WorkList.back(); + WorkList.pop_back(); + + // If N has no uses, it is dead. Make sure to revisit all N's operands once + // N is deleted from the DAG, since they too may now be dead or may have a + // reduced number of uses, allowing other xforms. + if (N->use_empty() && N != &Dummy) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + AddToWorkList(N->getOperand(i).Val); + + DAG.DeleteNode(N); + continue; + } + + SDOperand RV = visit(N); + + // If nothing happened, try a target-specific DAG combine. + if (RV.Val == 0) { + assert(N->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned NULL!"); + if (N->getOpcode() >= ISD::BUILTIN_OP_END || + TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) + RV = TLI.PerformDAGCombine(N, DagCombineInfo); + } + + if (RV.Val) { + ++NodesCombined; + // If we get back the same node we passed in, rather than a new node or + // zero, we know that the node must have defined multiple values and + // CombineTo was used. Since CombineTo takes care of the worklist + // mechanics for us, we have no work to do in this case. + if (RV.Val != N) { + assert(N->getOpcode() != ISD::DELETED_NODE && + RV.Val->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned new node!"); + + DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(RV.Val->dump(&DAG)); + DOUT << '\n'; + std::vector<SDNode*> NowDead; + if (N->getNumValues() == RV.Val->getNumValues()) + DAG.ReplaceAllUsesWith(N, RV.Val, &NowDead); + else { + assert(N->getValueType(0) == RV.getValueType() && "Type mismatch"); + SDOperand OpV = RV; + DAG.ReplaceAllUsesWith(N, &OpV, &NowDead); + } + + // Push the new node and any users onto the worklist + AddToWorkList(RV.Val); + AddUsersToWorkList(RV.Val); + + // Nodes can be reintroduced into the worklist. Make sure we do not + // process a node that has been replaced. + removeFromWorkList(N); + for (unsigned i = 0, e = NowDead.size(); i != e; ++i) + removeFromWorkList(NowDead[i]); + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + } + } + } + + // If the root changed (e.g. it was a dead load, update the root). + DAG.setRoot(Dummy.getValue()); +} + +SDOperand DAGCombiner::visit(SDNode *N) { + switch(N->getOpcode()) { + default: break; + case ISD::TokenFactor: return visitTokenFactor(N); + case ISD::ADD: return visitADD(N); + case ISD::SUB: return visitSUB(N); + case ISD::ADDC: return visitADDC(N); + case ISD::ADDE: return visitADDE(N); + case ISD::MUL: return visitMUL(N); + case ISD::SDIV: return visitSDIV(N); + case ISD::UDIV: return visitUDIV(N); + case ISD::SREM: return visitSREM(N); + case ISD::UREM: return visitUREM(N); + case ISD::MULHU: return visitMULHU(N); + case ISD::MULHS: return visitMULHS(N); + case ISD::AND: return visitAND(N); + case ISD::OR: return visitOR(N); + case ISD::XOR: return visitXOR(N); + case ISD::SHL: return visitSHL(N); + case ISD::SRA: return visitSRA(N); + case ISD::SRL: return visitSRL(N); + case ISD::CTLZ: return visitCTLZ(N); + case ISD::CTTZ: return visitCTTZ(N); + case ISD::CTPOP: return visitCTPOP(N); + case ISD::SELECT: return visitSELECT(N); + case ISD::SELECT_CC: return visitSELECT_CC(N); + case ISD::SETCC: return visitSETCC(N); + case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); + case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); + case ISD::ANY_EXTEND: return visitANY_EXTEND(N); + case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); + case ISD::TRUNCATE: return visitTRUNCATE(N); + case ISD::BIT_CONVERT: return visitBIT_CONVERT(N); + case ISD::FADD: return visitFADD(N); + case ISD::FSUB: return visitFSUB(N); + case ISD::FMUL: return visitFMUL(N); + case ISD::FDIV: return visitFDIV(N); + case ISD::FREM: return visitFREM(N); + case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); + case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); + case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); + case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); + case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); + case ISD::FP_ROUND: return visitFP_ROUND(N); + case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); + case ISD::FP_EXTEND: return visitFP_EXTEND(N); + case ISD::FNEG: return visitFNEG(N); + case ISD::FABS: return visitFABS(N); + case ISD::BRCOND: return visitBRCOND(N); + case ISD::BR_CC: return visitBR_CC(N); + case ISD::LOAD: return visitLOAD(N); + case ISD::STORE: return visitSTORE(N); + case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); + case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); + case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + } + return SDOperand(); +} + +/// getInputChainForNode - Given a node, return its input chain if it has one, +/// otherwise return a null sd operand. +static SDOperand getInputChainForNode(SDNode *N) { + if (unsigned NumOps = N->getNumOperands()) { + if (N->getOperand(0).getValueType() == MVT::Other) + return N->getOperand(0); + else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + return N->getOperand(NumOps-1); + for (unsigned i = 1; i < NumOps-1; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + return N->getOperand(i); + } + return SDOperand(0, 0); +} + +SDOperand DAGCombiner::visitTokenFactor(SDNode *N) { + // If N has two operands, where one has an input chain equal to the other, + // the 'other' chain is redundant. + if (N->getNumOperands() == 2) { + if (getInputChainForNode(N->getOperand(0).Val) == N->getOperand(1)) + return N->getOperand(0); + if (getInputChainForNode(N->getOperand(1).Val) == N->getOperand(0)) + return N->getOperand(1); + } + + SmallVector<SDNode *, 8> TFs; // List of token factors to visit. + SmallVector<SDOperand, 8> Ops; // Ops for replacing token factor. + SmallPtrSet<SDNode*, 16> SeenOps; + bool Changed = false; // If we should replace this token factor. + + // Start out with this token factor. + TFs.push_back(N); + + // Iterate through token factors. The TFs grows when new token factors are + // encountered. + for (unsigned i = 0; i < TFs.size(); ++i) { + SDNode *TF = TFs[i]; + + // Check each of the operands. + for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { + SDOperand Op = TF->getOperand(i); + + switch (Op.getOpcode()) { + case ISD::EntryToken: + // Entry tokens don't need to be added to the list. They are + // rededundant. + Changed = true; + break; + + case ISD::TokenFactor: + if ((CombinerAA || Op.hasOneUse()) && + std::find(TFs.begin(), TFs.end(), Op.Val) == TFs.end()) { + // Queue up for processing. + TFs.push_back(Op.Val); + // Clean up in case the token factor is removed. + AddToWorkList(Op.Val); + Changed = true; + break; + } + // Fall thru + + default: + // Only add if it isn't already in the list. + if (SeenOps.insert(Op.Val)) + Ops.push_back(Op); + else + Changed = true; + break; + } + } + } + + SDOperand Result; + + // If we've change things around then replace token factor. + if (Changed) { + if (Ops.size() == 0) { + // The entry token is the only possible outcome. + Result = DAG.getEntryNode(); + } else { + // New and improved token factor. + Result = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0], Ops.size()); + } + + // Don't add users to work list. + return CombineTo(N, Result, false); + } + + return Result; +} + +static +SDOperand combineShlAddConstant(SDOperand N0, SDOperand N1, SelectionDAG &DAG) { + MVT::ValueType VT = N0.getValueType(); + SDOperand N00 = N0.getOperand(0); + SDOperand N01 = N0.getOperand(1); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); + if (N01C && N00.getOpcode() == ISD::ADD && N00.Val->hasOneUse() && + isa<ConstantSDNode>(N00.getOperand(1))) { + N0 = DAG.getNode(ISD::ADD, VT, + DAG.getNode(ISD::SHL, VT, N00.getOperand(0), N01), + DAG.getNode(ISD::SHL, VT, N00.getOperand(1), N01)); + return DAG.getNode(ISD::ADD, VT, N0, N1); + } + return SDOperand(); +} + +static +SDOperand combineSelectAndUse(SDNode *N, SDOperand Slct, SDOperand OtherOp, + SelectionDAG &DAG) { + MVT::ValueType VT = N->getValueType(0); + unsigned Opc = N->getOpcode(); + bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; + SDOperand LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); + SDOperand RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); + ISD::CondCode CC = ISD::SETCC_INVALID; + if (isSlctCC) + CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); + else { + SDOperand CCOp = Slct.getOperand(0); + if (CCOp.getOpcode() == ISD::SETCC) + CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); + } + + bool DoXform = false; + bool InvCC = false; + assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && + "Bad input!"); + if (LHS.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(LHS)->isNullValue()) + DoXform = true; + else if (CC != ISD::SETCC_INVALID && + RHS.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(RHS)->isNullValue()) { + std::swap(LHS, RHS); + bool isInt = MVT::isInteger(isSlctCC ? Slct.getOperand(0).getValueType() + : Slct.getOperand(0).getOperand(0).getValueType()); + CC = ISD::getSetCCInverse(CC, isInt); + DoXform = true; + InvCC = true; + } + + if (DoXform) { + SDOperand Result = DAG.getNode(Opc, VT, OtherOp, RHS); + if (isSlctCC) + return DAG.getSelectCC(OtherOp, Result, + Slct.getOperand(0), Slct.getOperand(1), CC); + SDOperand CCOp = Slct.getOperand(0); + if (InvCC) + CCOp = DAG.getSetCC(CCOp.getValueType(), CCOp.getOperand(0), + CCOp.getOperand(1), CC); + return DAG.getNode(ISD::SELECT, VT, CCOp, OtherOp, Result); + } + return SDOperand(); +} + +SDOperand DAGCombiner::visitADD(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N0.getValueType(); + + // fold vector ops + if (MVT::isVector(VT)) { + SDOperand FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.Val) return FoldedVOp; + } + + // fold (add x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (add c1, c2) -> c1+c2 + if (N0C && N1C) + return DAG.getNode(ISD::ADD, VT, N0, N1); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::ADD, VT, N1, N0); + // fold (add x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N1C && N0.getOpcode() == ISD::SUB) + if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) + return DAG.getNode(ISD::SUB, VT, + DAG.getConstant(N1C->getValue()+N0C->getValue(), VT), + N0.getOperand(1)); + // reassociate add + SDOperand RADD = ReassociateOps(ISD::ADD, N0, N1); + if (RADD.Val != 0) + return RADD; + // fold ((0-A) + B) -> B-A + if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && + cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) + return DAG.getNode(ISD::SUB, VT, N1, N0.getOperand(1)); + // fold (A + (0-B)) -> A-B + if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && + cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) + return DAG.getNode(ISD::SUB, VT, N0, N1.getOperand(1)); + // fold (A+(B-A)) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) + return N1.getOperand(0); + + if (!MVT::isVector(VT) && SimplifyDemandedBits(SDOperand(N, 0))) + return SDOperand(N, 0); + + // fold (a+b) -> (a|b) iff a and b share no bits. + if (MVT::isInteger(VT) && !MVT::isVector(VT)) { + uint64_t LHSZero, LHSOne; + uint64_t RHSZero, RHSOne; + uint64_t Mask = MVT::getIntVTBitMask(VT); + DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + if (LHSZero) { + DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || + (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + return DAG.getNode(ISD::OR, VT, N0, N1); + } + } + + // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) + if (N0.getOpcode() == ISD::SHL && N0.Val->hasOneUse()) { + SDOperand Result = combineShlAddConstant(N0, N1, DAG); + if (Result.Val) return Result; + } + if (N1.getOpcode() == ISD::SHL && N1.Val->hasOneUse()) { + SDOperand Result = combineShlAddConstant(N1, N0, DAG); + if (Result.Val) return Result; + } + + // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) + if (N0.getOpcode() == ISD::SELECT && N0.Val->hasOneUse()) { + SDOperand Result = combineSelectAndUse(N, N0, N1, DAG); + if (Result.Val) return Result; + } + if (N1.getOpcode() == ISD::SELECT && N1.Val->hasOneUse()) { + SDOperand Result = combineSelectAndUse(N, N1, N0, DAG); + if (Result.Val) return Result; + } + + return SDOperand(); +} + +SDOperand DAGCombiner::visitADDC(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N0.getValueType(); + + // If the flag result is dead, turn this into an ADD. + if (N->hasNUsesOfValue(0, 1)) + return CombineTo(N, DAG.getNode(ISD::ADD, VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, MVT::Flag)); + + // canonicalize constant to RHS. + if (N0C && !N1C) { + SDOperand Ops[] = { N1, N0 }; + return DAG.getNode(ISD::ADDC, N->getVTList(), Ops, 2); + } + + // fold (addc x, 0) -> x + no carry out + if (N1C && N1C->isNullValue()) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, MVT::Flag)); + + // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. + uint64_t LHSZero, LHSOne; + uint64_t RHSZero, RHSOne; + uint64_t Mask = MVT::getIntVTBitMask(VT); + DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + if (LHSZero) { + DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || + (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + return CombineTo(N, DAG.getNode(ISD::OR, VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, MVT::Flag)); + } + + return SDOperand(); +} + +SDOperand DAGCombiner::visitADDE(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + SDOperand CarryIn = N->getOperand(2); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + //MVT::ValueType VT = N0.getValueType(); + + // canonicalize constant to RHS + if (N0C && !N1C) { + SDOperand Ops[] = { N1, N0, CarryIn }; + return DAG.getNode(ISD::ADDE, N->getVTList(), Ops, 3); + } + + // fold (adde x, y, false) -> (addc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) { + SDOperand Ops[] = { N1, N0 }; + return DAG.getNode(ISD::ADDC, N->getVTList(), Ops, 2); + } + + return SDOperand(); +} + + + +SDOperand DAGCombiner::visitSUB(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val); + MVT::ValueType VT = N0.getValueType(); + + // fold vector ops + if (MVT::isVector(VT)) { + SDOperand FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.Val) return FoldedVOp; + } + + // fold (sub x, x) -> 0 + if (N0 == N1) + return DAG.getConstant(0, N->getValueType(0)); + // fold (sub c1, c2) -> c1-c2 + if (N0C && N1C) + return DAG.getNode(ISD::SUB, VT, N0, N1); + // fold (sub x, c) -> (add x, -c) + if (N1C) + return DAG.getNode(ISD::ADD, VT, N0, DAG.getConstant(-N1C->getValue(), VT)); + // fold (A+B)-A -> B + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) + return N0.getOperand(1); + // fold (A+B)-B -> A + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) + return N0.getOperand(0); + // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) + if (N1.getOpcode() == ISD::SELECT && N1.Val->hasOneUse()) { + SDOperand Result = combineSelectAndUse(N, N1, N0, DAG); + if (Result.Val) return Result; + } + // If either operand of a sub is undef, the result is undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDOperand(); +} + +SDOperand DAGCombiner::visitMUL(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N0.getValueType(); + + // fold vector ops + if (MVT::isVector(VT)) { + SDOperand FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.Val) return FoldedVOp; + } + + // fold (mul x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (mul c1, c2) -> c1*c2 + if (N0C && N1C) + return DAG.getNode(ISD::MUL, VT, N0, N1); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::MUL, VT, N1, N0); + // fold (mul x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mul x, -1) -> 0-x + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0); + // fold (mul x, (1 << c)) -> x << c + if (N1C && isPowerOf2_64(N1C->getValue())) + return DAG.getNode(ISD::SHL, VT, N0, + DAG.getConstant(Log2_64(N1C->getValue()), + TLI.getShiftAmountTy())); + // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c + if (N1C && isPowerOf2_64(-N1C->getSignExtended())) { + // FIXME: If the input is something that is easily negated (e.g. a + // single-use add), we should put the negate there. + return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), + DAG.getNode(ISD::SHL, VT, N0, + DAG.getConstant(Log2_64(-N1C->getSignExtended()), + TLI.getShiftAmountTy()))); + } + + // (mul (shl X, c1), c2) -> (mul X, c2 << c1) + if (N1C && N0.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N0.getOperand(1))) { + SDOperand C3 = DAG.getNode(ISD::SHL, VT, N1, N0.getOperand(1)); + AddToWorkList(C3.Val); + return DAG.getNode(ISD::MUL, VT, N0.getOperand(0), C3); + } + + // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one + // use. + { + SDOperand Sh(0,0), Y(0,0); + // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). + if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && + N0.Val->hasOneUse()) { + Sh = N0; Y = N1; + } else if (N1.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N1.getOperand(1)) && N1.Val->hasOneUse()) { + Sh = N1; Y = N0; + } + if (Sh.Val) { + SDOperand Mul = DAG.getNode(ISD::MUL, VT, Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, VT, Mul, Sh.getOperand(1)); + } + } + // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) + if (N1C && N0.getOpcode() == ISD::ADD && N0.Val->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + return DAG.getNode(ISD::ADD, VT, + DAG.getNode(ISD::MUL, VT, N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, VT, N0.getOperand(1), N1)); + } + + // reassociate mul + SDOperand RMUL = ReassociateOps(ISD::MUL, N0, N1); + if (RMUL.Val != 0) + return RMUL; + + return SDOperand(); +} + +SDOperand DAGCombiner::visitSDIV(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val); + MVT::ValueType VT = N->getValueType(0); + + // fold vector ops + if (MVT::isVector(VT)) { + SDOperand FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.Val) return FoldedVOp; + } + + // fold (sdiv c1, c2) -> c1/c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.getNode(ISD::SDIV, VT, N0, N1); + // fold (sdiv X, 1) -> X + if (N1C && N1C->getSignExtended() == 1LL) + return N0; + // fold (sdiv X, -1) -> 0-X + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0); + // If we know the sign bits of both operands are zero, strength reduce to a + // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 + uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1); + if (DAG.MaskedValueIsZero(N1, SignBit) && + DAG.MaskedValueIsZero(N0, SignBit)) + return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1); + // fold (sdiv X, pow2) -> simple ops after legalize + if (N1C && N1C->getValue() && !TLI.isIntDivCheap() && + (isPowerOf2_64(N1C->getSignExtended()) || + isPowerOf2_64(-N1C->getSignExtended()))) { + // If dividing by powers of two is cheap, then don't perform the following + // fold. + if (TLI.isPow2DivCheap()) + return SDOperand(); + int64_t pow2 = N1C->getSignExtended(); + int64_t abs2 = pow2 > 0 ? pow2 : -pow2; + unsigned lg2 = Log2_64(abs2); + // Splat the sign bit into the register + SDOperand SGN = DAG.getNode(ISD::SRA, VT, N0, + DAG.getConstant(MVT::getSizeInBits(VT)-1, + TLI.getShiftAmountTy())); + AddToWorkList(SGN.Val); + // Add (N0 < 0) ? abs2 - 1 : 0; + SDOperand SRL = DAG.getNode(ISD::SRL, VT, SGN, + DAG.getConstant(MVT::getSizeInBits(VT)-lg2, + TLI.getShiftAmountTy())); + SDOperand ADD = DAG.getNode(ISD::ADD, VT, N0, SRL); + AddToWorkList(SRL.Val); + AddToWorkList(ADD.Val); // Divide by pow2 + SDOperand SRA = DAG.getNode(ISD::SRA, VT, ADD, + DAG.getConstant(lg2, TLI.getShiftAmountTy())); + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (pow2 > 0) + return SRA; + AddToWorkList(SRA.Val); + return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), SRA); + } + // if integer divide is expensive and we satisfy the requirements, emit an + // alternate sequence. + if (N1C && (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) && + !TLI.isIntDivCheap()) { + SDOperand Op = BuildSDIV(N); + if (Op.Val) return Op; + } + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDOperand(); +} + +SDOperand DAGCombiner::visitUDIV(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val); + MVT::ValueType VT = N->getValueType(0); + + // fold vector ops + if (MVT::isVector(VT)) { + SDOperand FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.Val) return FoldedVOp; + } + + // fold (udiv c1, c2) -> c1/c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.getNode(ISD::UDIV, VT, N0, N1); + // fold (udiv x, (1 << c)) -> x >>u c + if (N1C && isPowerOf2_64(N1C->getValue())) + return DAG.getNode(ISD::SRL, VT, N0, + DAG.getConstant(Log2_64(N1C->getValue()), + TLI.getShiftAmountTy())); + // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (isPowerOf2_64(SHC->getValue())) { + MVT::ValueType ADDVT = N1.getOperand(1).getValueType(); + SDOperand Add = DAG.getNode(ISD::ADD, ADDVT, N1.getOperand(1), + DAG.getConstant(Log2_64(SHC->getValue()), + ADDVT)); + AddToWorkList(Add.Val); + return DAG.getNode(ISD::SRL, VT, N0, Add); + } + } + } + // fold (udiv x, c) -> alternate + if (N1C && N1C->getValue() && !TLI.isIntDivCheap()) { + SDOperand Op = BuildUDIV(N); + if (Op.Val) return Op; + } + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDOperand(); +} + +SDOperand DAGCombiner::visitSREM(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N->getValueType(0); + + // fold (srem c1, c2) -> c1%c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.getNode(ISD::SREM, VT, N0, N1); + // If we know the sign bits of both operands are zero, strength reduce to a + // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 + uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1); + if (DAG.MaskedValueIsZero(N1, SignBit) && + DAG.MaskedValueIsZero(N0, SignBit)) + return DAG.getNode(ISD::UREM, VT, N0, N1); + + // Unconditionally lower X%C -> X-X/C*C. This allows the X/C logic to hack on + // the remainder operation. + if (N1C && !N1C->isNullValue()) { + SDOperand Div = DAG.getNode(ISD::SDIV, VT, N0, N1); + SDOperand Mul = DAG.getNode(ISD::MUL, VT, Div, N1); + SDOperand Sub = DAG.getNode(ISD::SUB, VT, N0, Mul); + AddToWorkList(Div.Val); + AddToWorkList(Mul.Val); + return Sub; + } + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDOperand(); +} + +SDOperand DAGCombiner::visitUREM(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N->getValueType(0); + + // fold (urem c1, c2) -> c1%c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.getNode(ISD::UREM, VT, N0, N1); + // fold (urem x, pow2) -> (and x, pow2-1) + if (N1C && !N1C->isNullValue() && isPowerOf2_64(N1C->getValue())) + return DAG.getNode(ISD::AND, VT, N0, DAG.getConstant(N1C->getValue()-1,VT)); + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (isPowerOf2_64(SHC->getValue())) { + SDOperand Add = DAG.getNode(ISD::ADD, VT, N1,DAG.getConstant(~0ULL,VT)); + AddToWorkList(Add.Val); + return DAG.getNode(ISD::AND, VT, N0, Add); + } + } + } + + // Unconditionally lower X%C -> X-X/C*C. This allows the X/C logic to hack on + // the remainder operation. + if (N1C && !N1C->isNullValue()) { + SDOperand Div = DAG.getNode(ISD::UDIV, VT, N0, N1); + SDOperand Mul = DAG.getNode(ISD::MUL, VT, Div, N1); + SDOperand Sub = DAG.getNode(ISD::SUB, VT, N0, Mul); + AddToWorkList(Div.Val); + AddToWorkList(Mul.Val); + return Sub; + } + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDOperand(); +} + +SDOperand DAGCombiner::visitMULHS(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N->getValueType(0); + + // fold (mulhs x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mulhs x, 1) -> (sra x, size(x)-1) + if (N1C && N1C->getValue() == 1) + return DAG.getNode(ISD::SRA, N0.getValueType(), N0, + DAG.getConstant(MVT::getSizeInBits(N0.getValueType())-1, + TLI.getShiftAmountTy())); + // fold (mulhs x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitMULHU(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N->getValueType(0); + + // fold (mulhu x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mulhu x, 1) -> 0 + if (N1C && N1C->getValue() == 1) + return DAG.getConstant(0, N0.getValueType()); + // fold (mulhu x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + return SDOperand(); +} + +/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with +/// two operands of the same opcode, try to simplify it. +SDOperand DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { + SDOperand N0 = N->getOperand(0), N1 = N->getOperand(1); + MVT::ValueType VT = N0.getValueType(); + assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); + + // For each of OP in AND/OR/XOR: + // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) + // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) + // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) + if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| + N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::TRUNCATE) && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) { + SDOperand ORNode = DAG.getNode(N->getOpcode(), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(ORNode.Val); + return DAG.getNode(N0.getOpcode(), VT, ORNode); + } + + // For each of OP in SHL/SRL/SRA/AND... + // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) + // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) + // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || + N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && + N0.getOperand(1) == N1.getOperand(1)) { + SDOperand ORNode = DAG.getNode(N->getOpcode(), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(ORNode.Val); + return DAG.getNode(N0.getOpcode(), VT, ORNode, N0.getOperand(1)); + } + + return SDOperand(); +} + +SDOperand DAGCombiner::visitAND(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + SDOperand LL, LR, RL, RR, CC0, CC1; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N1.getValueType(); + + // fold vector ops + if (MVT::isVector(VT)) { + SDOperand FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.Val) return FoldedVOp; + } + + // fold (and x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (and c1, c2) -> c1&c2 + if (N0C && N1C) + return DAG.getNode(ISD::AND, VT, N0, N1); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::AND, VT, N1, N0); + // fold (and x, -1) -> x + if (N1C && N1C->isAllOnesValue()) + return N0; + // if (and x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT))) + return DAG.getConstant(0, VT); + // reassociate and + SDOperand RAND = ReassociateOps(ISD::AND, N0, N1); + if (RAND.Val != 0) + return RAND; + // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF + if (N1C && N0.getOpcode() == ISD::OR) + if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if ((ORI->getValue() & N1C->getValue()) == N1C->getValue()) + return N1; + // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + unsigned InMask = MVT::getIntVTBitMask(N0.getOperand(0).getValueType()); + if (DAG.MaskedValueIsZero(N0.getOperand(0), + ~N1C->getValue() & InMask)) { + SDOperand Zext = DAG.getNode(ISD::ZERO_EXTEND, N0.getValueType(), + N0.getOperand(0)); + + // Replace uses of the AND with uses of the Zero extend node. + CombineTo(N, Zext); + + // We actually want to replace all uses of the any_extend with the + // zero_extend, to avoid duplicating things. This will later cause this + // AND to be folded. + CombineTo(N0.Val, Zext); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + MVT::isInteger(LL.getValueType())) { + // fold (X == 0) & (Y == 0) -> (X|Y == 0) + if (cast<ConstantSDNode>(LR)->getValue() == 0 && Op1 == ISD::SETEQ) { + SDOperand ORNode = DAG.getNode(ISD::OR, LR.getValueType(), LL, RL); + AddToWorkList(ORNode.Val); + return DAG.getSetCC(VT, ORNode, LR, Op1); + } + // fold (X == -1) & (Y == -1) -> (X&Y == -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { + SDOperand ANDNode = DAG.getNode(ISD::AND, LR.getValueType(), LL, RL); + AddToWorkList(ANDNode.Val); + return DAG.getSetCC(VT, ANDNode, LR, Op1); + } + // fold (X > -1) & (Y > -1) -> (X|Y > -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { + SDOperand ORNode = DAG.getNode(ISD::OR, LR.getValueType(), LL, RL); + AddToWorkList(ORNode.Val); + return DAG.getSetCC(VT, ORNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = MVT::isInteger(LL.getValueType()); + ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID) + return DAG.getSetCC(N0.getValueType(), LL, LR, Result); + } + } + + // Simplify: and (op x...), (op y...) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDOperand Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.Val) return Tmp; + } + + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (!MVT::isVector(VT) && + SimplifyDemandedBits(SDOperand(N, 0))) + return SDOperand(N, 0); + // fold (zext_inreg (extload x)) -> (zextload x) + if (ISD::isEXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val)) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT::ValueType EVT = LN0->getLoadedVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + if (DAG.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) && + (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) { + SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), + LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1)); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isSEXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT::ValueType EVT = LN0->getLoadedVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + if (DAG.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) && + (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) { + SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), + LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1)); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (and (load x), 255) -> (zextload x, i8) + // fold (and (extload x, i16), 255) -> (zextload x, i8) + if (N1C && N0.getOpcode() == ISD::LOAD) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + if (LN0->getExtensionType() != ISD::SEXTLOAD && + LN0->getAddressingMode() == ISD::UNINDEXED && + N0.hasOneUse()) { + MVT::ValueType EVT, LoadedVT; + if (N1C->getValue() == 255) + EVT = MVT::i8; + else if (N1C->getValue() == 65535) + EVT = MVT::i16; + else if (N1C->getValue() == ~0U) + EVT = MVT::i32; + else + EVT = MVT::Other; + + LoadedVT = LN0->getLoadedVT(); + if (EVT != MVT::Other && LoadedVT > EVT && + (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) { + MVT::ValueType PtrType = N0.getOperand(1).getValueType(); + // For big endian targets, we need to add an offset to the pointer to + // load the correct bytes. For little endian systems, we merely need to + // read fewer bytes from the same pointer. + unsigned PtrOff = + (MVT::getSizeInBits(LoadedVT) - MVT::getSizeInBits(EVT)) / 8; + SDOperand NewPtr = LN0->getBasePtr(); + if (!TLI.isLittleEndian()) + NewPtr = DAG.getNode(ISD::ADD, PtrType, NewPtr, + DAG.getConstant(PtrOff, PtrType)); + AddToWorkList(NewPtr.Val); + SDOperand Load = + DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.Val, Load, Load.getValue(1)); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + return SDOperand(); +} + +SDOperand DAGCombiner::visitOR(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + SDOperand LL, LR, RL, RR, CC0, CC1; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N1.getValueType(); + unsigned OpSizeInBits = MVT::getSizeInBits(VT); + + // fold vector ops + if (MVT::isVector(VT)) { + SDOperand FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.Val) return FoldedVOp; + } + + // fold (or x, undef) -> -1 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(~0ULL, VT); + // fold (or c1, c2) -> c1|c2 + if (N0C && N1C) + return DAG.getNode(ISD::OR, VT, N0, N1); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::OR, VT, N1, N0); + // fold (or x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (or x, -1) -> -1 + if (N1C && N1C->isAllOnesValue()) + return N1; + // fold (or x, c) -> c iff (x & ~c) == 0 + if (N1C && + DAG.MaskedValueIsZero(N0,~N1C->getValue() & (~0ULL>>(64-OpSizeInBits)))) + return N1; + // reassociate or + SDOperand ROR = ReassociateOps(ISD::OR, N0, N1); + if (ROR.Val != 0) + return ROR; + // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) + if (N1C && N0.getOpcode() == ISD::AND && N0.Val->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); + return DAG.getNode(ISD::AND, VT, DAG.getNode(ISD::OR, VT, N0.getOperand(0), + N1), + DAG.getConstant(N1C->getValue() | C1->getValue(), VT)); + } + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + MVT::isInteger(LL.getValueType())) { + // fold (X != 0) | (Y != 0) -> (X|Y != 0) + // fold (X < 0) | (Y < 0) -> (X|Y < 0) + if (cast<ConstantSDNode>(LR)->getValue() == 0 && + (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + SDOperand ORNode = DAG.getNode(ISD::OR, LR.getValueType(), LL, RL); + AddToWorkList(ORNode.Val); + return DAG.getSetCC(VT, ORNode, LR, Op1); + } + // fold (X != -1) | (Y != -1) -> (X&Y != -1) + // fold (X > -1) | (Y > -1) -> (X&Y > -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + SDOperand ANDNode = DAG.getNode(ISD::AND, LR.getValueType(), LL, RL); + AddToWorkList(ANDNode.Val); + return DAG.getSetCC(VT, ANDNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = MVT::isInteger(LL.getValueType()); + ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID) + return DAG.getSetCC(N0.getValueType(), LL, LR, Result); + } + } + + // Simplify: or (op x...), (op y...) -> (op (or x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDOperand Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.Val) return Tmp; + } + + // (X & C1) | (Y & C2) -> (X|Y) & C3 if possible. + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N1.getOperand(1).getOpcode() == ISD::Constant && + // Don't increase # computations. + (N0.Val->hasOneUse() || N1.Val->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + uint64_t LHSMask = cast<ConstantSDNode>(N0.getOperand(1))->getValue(); + uint64_t RHSMask = cast<ConstantSDNode>(N1.getOperand(1))->getValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDOperand X =DAG.getNode(ISD::OR, VT, N0.getOperand(0), N1.getOperand(0)); + return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(LHSMask|RHSMask, VT)); + } + } + + + // See if this is some rotate idiom. + if (SDNode *Rot = MatchRotate(N0, N1)) + return SDOperand(Rot, 0); + + return SDOperand(); +} + + +/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. +static bool MatchRotateHalf(SDOperand Op, SDOperand &Shift, SDOperand &Mask) { + if (Op.getOpcode() == ISD::AND) { + if (isa<ConstantSDNode>(Op.getOperand(1))) { + Mask = Op.getOperand(1); + Op = Op.getOperand(0); + } else { + return false; + } + } + + if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { + Shift = Op; + return true; + } + return false; +} + + +// MatchRotate - Handle an 'or' of two operands. If this is one of the many +// idioms for rotate, and if the target supports rotation instructions, generate +// a rot[lr]. +SDNode *DAGCombiner::MatchRotate(SDOperand LHS, SDOperand RHS) { + // Must be a legal type. Expanded an promoted things won't work with rotates. + MVT::ValueType VT = LHS.getValueType(); + if (!TLI.isTypeLegal(VT)) return 0; + + // The target must have at least one rotate flavor. + bool HasROTL = TLI.isOperationLegal(ISD::ROTL, VT); + bool HasROTR = TLI.isOperationLegal(ISD::ROTR, VT); + if (!HasROTL && !HasROTR) return 0; + + // Match "(X shl/srl V1) & V2" where V2 may not be present. + SDOperand LHSShift; // The shift. + SDOperand LHSMask; // AND value if any. + if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) + return 0; // Not part of a rotate. + + SDOperand RHSShift; // The shift. + SDOperand RHSMask; // AND value if any. + if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) + return 0; // Not part of a rotate. + + if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) + return 0; // Not shifting the same value. + + if (LHSShift.getOpcode() == RHSShift.getOpcode()) + return 0; // Shifts must disagree. + + // Canonicalize shl to left side in a shl/srl pair. + if (RHSShift.getOpcode() == ISD::SHL) { + std::swap(LHS, RHS); + std::swap(LHSShift, RHSShift); + std::swap(LHSMask , RHSMask ); + } + + unsigned OpSizeInBits = MVT::getSizeInBits(VT); + SDOperand LHSShiftArg = LHSShift.getOperand(0); + SDOperand LHSShiftAmt = LHSShift.getOperand(1); + SDOperand RHSShiftAmt = RHSShift.getOperand(1); + + // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) + // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) + if (LHSShiftAmt.getOpcode() == ISD::Constant && + RHSShiftAmt.getOpcode() == ISD::Constant) { + uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getValue(); + uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getValue(); + if ((LShVal + RShVal) != OpSizeInBits) + return 0; + + SDOperand Rot; + if (HasROTL) + Rot = DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt); + else + Rot = DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt); + + // If there is an AND of either shifted operand, apply it to the result. + if (LHSMask.Val || RHSMask.Val) { + uint64_t Mask = MVT::getIntVTBitMask(VT); + + if (LHSMask.Val) { + uint64_t RHSBits = (1ULL << LShVal)-1; + Mask &= cast<ConstantSDNode>(LHSMask)->getValue() | RHSBits; + } + if (RHSMask.Val) { + uint64_t LHSBits = ~((1ULL << (OpSizeInBits-RShVal))-1); + Mask &= cast<ConstantSDNode>(RHSMask)->getValue() | LHSBits; + } + + Rot = DAG.getNode(ISD::AND, VT, Rot, DAG.getConstant(Mask, VT)); + } + + return Rot.Val; + } + + // If there is a mask here, and we have a variable shift, we can't be sure + // that we're masking out the right stuff. + if (LHSMask.Val || RHSMask.Val) + return 0; + + // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) + // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) + if (RHSShiftAmt.getOpcode() == ISD::SUB && + LHSShiftAmt == RHSShiftAmt.getOperand(1)) { + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { + if (SUBC->getValue() == OpSizeInBits) + if (HasROTL) + return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val; + else + return DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt).Val; + } + } + + // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) + // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) + if (LHSShiftAmt.getOpcode() == ISD::SUB && + RHSShiftAmt == LHSShiftAmt.getOperand(1)) { + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { + if (SUBC->getValue() == OpSizeInBits) + if (HasROTL) + return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val; + else + return DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt).Val; + } + } + + // Look for sign/zext/any-extended cases: + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND + || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND + || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND + || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND + || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND)) { + SDOperand LExtOp0 = LHSShiftAmt.getOperand(0); + SDOperand RExtOp0 = RHSShiftAmt.getOperand(0); + if (RExtOp0.getOpcode() == ISD::SUB && + RExtOp0.getOperand(1) == LExtOp0) { + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotr x, y) + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotl x, (sub 32, y)) + if (ConstantSDNode *SUBC = cast<ConstantSDNode>(RExtOp0.getOperand(0))) { + if (SUBC->getValue() == OpSizeInBits) { + if (HasROTL) + return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val; + else + return DAG.getNode(ISD::ROTR, VT, LHSShiftArg, RHSShiftAmt).Val; + } + } + } else if (LExtOp0.getOpcode() == ISD::SUB && + RExtOp0 == LExtOp0.getOperand(1)) { + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) -> + // (rotl x, y) + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) -> + // (rotr x, (sub 32, y)) + if (ConstantSDNode *SUBC = cast<ConstantSDNode>(LExtOp0.getOperand(0))) { + if (SUBC->getValue() == OpSizeInBits) { + if (HasROTL) + return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, RHSShiftAmt).Val; + else + return DAG.getNode(ISD::ROTL, VT, LHSShiftArg, LHSShiftAmt).Val; + } + } + } + } + + return 0; +} + + +SDOperand DAGCombiner::visitXOR(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + SDOperand LHS, RHS, CC; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N0.getValueType(); + + // fold vector ops + if (MVT::isVector(VT)) { + SDOperand FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.Val) return FoldedVOp; + } + + // fold (xor x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (xor c1, c2) -> c1^c2 + if (N0C && N1C) + return DAG.getNode(ISD::XOR, VT, N0, N1); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::XOR, VT, N1, N0); + // fold (xor x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // reassociate xor + SDOperand RXOR = ReassociateOps(ISD::XOR, N0, N1); + if (RXOR.Val != 0) + return RXOR; + // fold !(x cc y) -> (x !cc y) + if (N1C && N1C->getValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { + bool isInt = MVT::isInteger(LHS.getValueType()); + ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + isInt); + if (N0.getOpcode() == ISD::SETCC) + return DAG.getSetCC(VT, LHS, RHS, NotCC); + if (N0.getOpcode() == ISD::SELECT_CC) + return DAG.getSelectCC(LHS, RHS, N0.getOperand(2),N0.getOperand(3),NotCC); + assert(0 && "Unhandled SetCC Equivalent!"); + abort(); + } + // fold !(x or y) -> (!x and !y) iff x or y are setcc + if (N1C && N1C->getValue() == 1 && VT == MVT::i1 && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDOperand LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, VT, LHS, N1); // RHS = ~LHS + RHS = DAG.getNode(ISD::XOR, VT, RHS, N1); // RHS = ~RHS + AddToWorkList(LHS.Val); AddToWorkList(RHS.Val); + return DAG.getNode(NewOpcode, VT, LHS, RHS); + } + } + // fold !(x or y) -> (!x and !y) iff x or y are constants + if (N1C && N1C->isAllOnesValue() && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDOperand LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, VT, LHS, N1); // RHS = ~LHS + RHS = DAG.getNode(ISD::XOR, VT, RHS, N1); // RHS = ~RHS + AddToWorkList(LHS.Val); AddToWorkList(RHS.Val); + return DAG.getNode(NewOpcode, VT, LHS, RHS); + } + } + // fold (xor (xor x, c1), c2) -> (xor x, c1^c2) + if (N1C && N0.getOpcode() == ISD::XOR) { + ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (N00C) + return DAG.getNode(ISD::XOR, VT, N0.getOperand(1), + DAG.getConstant(N1C->getValue()^N00C->getValue(), VT)); + if (N01C) + return DAG.getNode(ISD::XOR, VT, N0.getOperand(0), + DAG.getConstant(N1C->getValue()^N01C->getValue(), VT)); + } + // fold (xor x, x) -> 0 + if (N0 == N1) { + if (!MVT::isVector(VT)) { + return DAG.getConstant(0, VT); + } else if (!AfterLegalize || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { + // Produce a vector of zeros. + SDOperand El = DAG.getConstant(0, MVT::getVectorElementType(VT)); + std::vector<SDOperand> Ops(MVT::getVectorNumElements(VT), El); + return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size()); + } + } + + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDOperand Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.Val) return Tmp; + } + + // Simplify the expression using non-local knowledge. + if (!MVT::isVector(VT) && + SimplifyDemandedBits(SDOperand(N, 0))) + return SDOperand(N, 0); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitSHL(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N0.getValueType(); + unsigned OpSizeInBits = MVT::getSizeInBits(VT); + + // fold (shl c1, c2) -> c1<<c2 + if (N0C && N1C) + return DAG.getNode(ISD::SHL, VT, N0, N1); + // fold (shl 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (shl x, c >= size(x)) -> undef + if (N1C && N1C->getValue() >= OpSizeInBits) + return DAG.getNode(ISD::UNDEF, VT); + // fold (shl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // if (shl x, c) is known to be zero, return 0 + if (DAG.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT))) + return DAG.getConstant(0, VT); + if (N1C && SimplifyDemandedBits(SDOperand(N, 0))) + return SDOperand(N, 0); + // fold (shl (shl x, c1), c2) -> 0 or (shl x, c1+c2) + if (N1C && N0.getOpcode() == ISD::SHL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getValue(); + uint64_t c2 = N1C->getValue(); + if (c1 + c2 > OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + // fold (shl (srl x, c1), c2) -> (shl (and x, -1 << c1), c2-c1) or + // (srl (and x, -1 << c1), c1-c2) + if (N1C && N0.getOpcode() == ISD::SRL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getValue(); + uint64_t c2 = N1C->getValue(); + SDOperand Mask = DAG.getNode(ISD::AND, VT, N0.getOperand(0), + DAG.getConstant(~0ULL << c1, VT)); + if (c2 > c1) + return DAG.getNode(ISD::SHL, VT, Mask, + DAG.getConstant(c2-c1, N1.getValueType())); + else + return DAG.getNode(ISD::SRL, VT, Mask, + DAG.getConstant(c1-c2, N1.getValueType())); + } + // fold (shl (sra x, c1), c1) -> (and x, -1 << c1) + if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) + return DAG.getNode(ISD::AND, VT, N0.getOperand(0), + DAG.getConstant(~0ULL << N1C->getValue(), VT)); + return SDOperand(); +} + +SDOperand DAGCombiner::visitSRA(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N0.getValueType(); + + // fold (sra c1, c2) -> c1>>c2 + if (N0C && N1C) + return DAG.getNode(ISD::SRA, VT, N0, N1); + // fold (sra 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (sra -1, x) -> -1 + if (N0C && N0C->isAllOnesValue()) + return N0; + // fold (sra x, c >= size(x)) -> undef + if (N1C && N1C->getValue() >= MVT::getSizeInBits(VT)) + return DAG.getNode(ISD::UNDEF, VT); + // fold (sra x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports + // sext_inreg. + if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { + unsigned LowBits = MVT::getSizeInBits(VT) - (unsigned)N1C->getValue(); + MVT::ValueType EVT; + switch (LowBits) { + default: EVT = MVT::Other; break; + case 1: EVT = MVT::i1; break; + case 8: EVT = MVT::i8; break; + case 16: EVT = MVT::i16; break; + case 32: EVT = MVT::i32; break; + } + if (EVT > MVT::Other && TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0.getOperand(0), + DAG.getValueType(EVT)); + } + + // fold (sra (sra x, c1), c2) -> (sra x, c1+c2) + if (N1C && N0.getOpcode() == ISD::SRA) { + if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + unsigned Sum = N1C->getValue() + C1->getValue(); + if (Sum >= MVT::getSizeInBits(VT)) Sum = MVT::getSizeInBits(VT)-1; + return DAG.getNode(ISD::SRA, VT, N0.getOperand(0), + DAG.getConstant(Sum, N1C->getValueType(0))); + } + } + + // Simplify, based on bits shifted out of the LHS. + if (N1C && SimplifyDemandedBits(SDOperand(N, 0))) + return SDOperand(N, 0); + + + // If the sign bit is known to be zero, switch this to a SRL. + if (DAG.MaskedValueIsZero(N0, MVT::getIntVTSignBit(VT))) + return DAG.getNode(ISD::SRL, VT, N0, N1); + return SDOperand(); +} + +SDOperand DAGCombiner::visitSRL(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT::ValueType VT = N0.getValueType(); + unsigned OpSizeInBits = MVT::getSizeInBits(VT); + + // fold (srl c1, c2) -> c1 >>u c2 + if (N0C && N1C) + return DAG.getNode(ISD::SRL, VT, N0, N1); + // fold (srl 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (srl x, c >= size(x)) -> undef + if (N1C && N1C->getValue() >= OpSizeInBits) + return DAG.getNode(ISD::UNDEF, VT); + // fold (srl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // if (srl x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDOperand(N, 0), ~0ULL >> (64-OpSizeInBits))) + return DAG.getConstant(0, VT); + + // fold (srl (srl x, c1), c2) -> 0 or (srl x, c1+c2) + if (N1C && N0.getOpcode() == ISD::SRL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getValue(); + uint64_t c2 = N1C->getValue(); + if (c1 + c2 > OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SRL, VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + + // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + // Shifting in all undef bits? + MVT::ValueType SmallVT = N0.getOperand(0).getValueType(); + if (N1C->getValue() >= MVT::getSizeInBits(SmallVT)) + return DAG.getNode(ISD::UNDEF, VT); + + SDOperand SmallShift = DAG.getNode(ISD::SRL, SmallVT, N0.getOperand(0), N1); + AddToWorkList(SmallShift.Val); + return DAG.getNode(ISD::ANY_EXTEND, VT, SmallShift); + } + + // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign + // bit, which is unmodified by sra. + if (N1C && N1C->getValue()+1 == MVT::getSizeInBits(VT)) { + if (N0.getOpcode() == ISD::SRA) + return DAG.getNode(ISD::SRL, VT, N0.getOperand(0), N1); + } + + // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). + if (N1C && N0.getOpcode() == ISD::CTLZ && + N1C->getValue() == Log2_32(MVT::getSizeInBits(VT))) { + uint64_t KnownZero, KnownOne, Mask = MVT::getIntVTBitMask(VT); + DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); + + // If any of the input bits are KnownOne, then the input couldn't be all + // zeros, thus the result of the srl will always be zero. + if (KnownOne) return DAG.getConstant(0, VT); + + // If all of the bits input the to ctlz node are known to be zero, then + // the result of the ctlz is "32" and the result of the shift is one. + uint64_t UnknownBits = ~KnownZero & Mask; + if (UnknownBits == 0) return DAG.getConstant(1, VT); + + // Otherwise, check to see if there is exactly one bit input to the ctlz. + if ((UnknownBits & (UnknownBits-1)) == 0) { + // Okay, we know that only that the single bit specified by UnknownBits + // could be set on input to the CTLZ node. If this bit is set, the SRL + // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair + // to an SRL,XOR pair, which is likely to simplify more. + unsigned ShAmt = CountTrailingZeros_64(UnknownBits); + SDOperand Op = N0.getOperand(0); + if (ShAmt) { + Op = DAG.getNode(ISD::SRL, VT, Op, + DAG.getConstant(ShAmt, TLI.getShiftAmountTy())); + AddToWorkList(Op.Val); + } + return DAG.getNode(ISD::XOR, VT, Op, DAG.getConstant(1, VT)); + } + } + + // fold operands of srl based on knowledge that the low bits are not + // demanded. + if (N1C && SimplifyDemandedBits(SDOperand(N, 0))) + return SDOperand(N, 0); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitCTLZ(SDNode *N) { + SDOperand N0 = N->getOperand(0); + MVT::ValueType VT = N->getValueType(0); + + // fold (ctlz c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTLZ, VT, N0); + return SDOperand(); +} + +SDOperand DAGCombiner::visitCTTZ(SDNode *N) { + SDOperand N0 = N->getOperand(0); + MVT::ValueType VT = N->getValueType(0); + + // fold (cttz c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTTZ, VT, N0); + return SDOperand(); +} + +SDOperand DAGCombiner::visitCTPOP(SDNode *N) { + SDOperand N0 = N->getOperand(0); + MVT::ValueType VT = N->getValueType(0); + + // fold (ctpop c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTPOP, VT, N0); + return SDOperand(); +} + +SDOperand DAGCombiner::visitSELECT(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + SDOperand N2 = N->getOperand(2); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); + MVT::ValueType VT = N->getValueType(0); + + // fold select C, X, X -> X + if (N1 == N2) + return N1; + // fold select true, X, Y -> X + if (N0C && !N0C->isNullValue()) + return N1; + // fold select false, X, Y -> Y + if (N0C && N0C->isNullValue()) + return N2; + // fold select C, 1, X -> C | X + if (MVT::i1 == VT && N1C && N1C->getValue() == 1) + return DAG.getNode(ISD::OR, VT, N0, N2); + // fold select C, 0, X -> ~C & X + // FIXME: this should check for C type == X type, not i1? + if (MVT::i1 == VT && N1C && N1C->isNullValue()) { + SDOperand XORNode = DAG.getNode(ISD::XOR, VT, N0, DAG.getConstant(1, VT)); + AddToWorkList(XORNode.Val); + return DAG.getNode(ISD::AND, VT, XORNode, N2); + } + // fold select C, X, 1 -> ~C | X + if (MVT::i1 == VT && N2C && N2C->getValue() == 1) { + SDOperand XORNode = DAG.getNode(ISD::XOR, VT, N0, DAG.getConstant(1, VT)); + AddToWorkList(XORNode.Val); + return DAG.getNode(ISD::OR, VT, XORNode, N1); + } + // fold select C, X, 0 -> C & X + // FIXME: this should check for C type == X type, not i1? + if (MVT::i1 == VT && N2C && N2C->isNullValue()) + return DAG.getNode(ISD::AND, VT, N0, N1); + // fold X ? X : Y --> X ? 1 : Y --> X | Y + if (MVT::i1 == VT && N0 == N1) + return DAG.getNode(ISD::OR, VT, N0, N2); + // fold X ? Y : X --> X ? Y : 0 --> X & Y + if (MVT::i1 == VT && N0 == N2) + return DAG.getNode(ISD::AND, VT, N0, N1); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N1, N2)) + return SDOperand(N, 0); // Don't revisit N. + + // fold selects based on a setcc into other things, such as min/max/abs + if (N0.getOpcode() == ISD::SETCC) + // FIXME: + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + if (TLI.isOperationLegal(ISD::SELECT_CC, MVT::Other)) + return DAG.getNode(ISD::SELECT_CC, VT, N0.getOperand(0), N0.getOperand(1), + N1, N2, N0.getOperand(2)); + else + return SimplifySelect(N0, N1, N2); + return SDOperand(); +} + +SDOperand DAGCombiner::visitSELECT_CC(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + SDOperand N2 = N->getOperand(2); + SDOperand N3 = N->getOperand(3); + SDOperand N4 = N->getOperand(4); + ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); + + // fold select_cc lhs, rhs, x, x, cc -> x + if (N2 == N3) + return N2; + + // Determine if the condition we're dealing with is constant + SDOperand SCC = SimplifySetCC(TLI.getSetCCResultTy(), N0, N1, CC, false); + if (SCC.Val) AddToWorkList(SCC.Val); + + if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.Val)) { + if (SCCC->getValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } + + // Fold to a simpler select_cc + if (SCC.Val && SCC.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::SELECT_CC, N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2)); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N2, N3)) + return SDOperand(N, 0); // Don't revisit N. + + // fold select_cc into other things, such as min/max/abs + return SimplifySelectCC(N0, N1, N2, N3, CC); +} + +SDOperand DAGCombiner::visitSETCC(SDNode *N) { + return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), + cast<CondCodeSDNode>(N->getOperand(2))->get()); +} + +SDOperand DAGCombiner::visitSIGN_EXTEND(SDNode *N) { + SDOperand N0 = N->getOperand(0); + MVT::ValueType VT = N->getValueType(0); + + // fold (sext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::SIGN_EXTEND, VT, N0); + + // fold (sext (sext x)) -> (sext x) + // fold (sext (aext x)) -> (sext x) + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND, VT, N0.getOperand(0)); + + // fold (sext (truncate (load x))) -> (sext (smaller load x)) + // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDOperand NarrowLoad = ReduceLoadWidth(N0.Val); + if (NarrowLoad.Val) { + if (NarrowLoad.Val != N0.Val) + CombineTo(N0.Val, NarrowLoad); + return DAG.getNode(ISD::SIGN_EXTEND, VT, NarrowLoad); + } + } + + // See if the value being truncated is already sign extended. If so, just + // eliminate the trunc/sext pair. + if (N0.getOpcode() == ISD::TRUNCATE) { + SDOperand Op = N0.getOperand(0); + unsigned OpBits = MVT::getSizeInBits(Op.getValueType()); + unsigned MidBits = MVT::getSizeInBits(N0.getValueType()); + unsigned DestBits = MVT::getSizeInBits(VT); + unsigned NumSignBits = DAG.ComputeNumSignBits(Op); + + if (OpBits == DestBits) { + // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign + // bits, it is already ready. + if (NumSignBits > DestBits-MidBits) + return Op; + } else if (OpBits < DestBits) { + // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign + // bits, just sext from i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::SIGN_EXTEND, VT, Op); + } else { + // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign + // bits, just truncate to i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::TRUNCATE, VT, Op); + } + + // fold (sext (truncate x)) -> (sextinreg x). + if (!AfterLegalize || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, + N0.getValueType())) { + if (Op.getValueType() < VT) + Op = DAG.getNode(ISD::ANY_EXTEND, VT, Op); + else if (Op.getValueType() > VT) + Op = DAG.getNode(ISD::TRUNCATE, VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, Op, + DAG.getValueType(N0.getValueType())); + } + } + + // fold (sext (load x)) -> (sext (truncate (sextload x))) + if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() && + (!AfterLegalize||TLI.isLoadXLegal(ISD::SEXTLOAD, N0.getValueType()))){ + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + + // fold (sext (sextload x)) -> (sext (truncate (sextload x))) + // fold (sext ( extload x)) -> (sext (truncate (sextload x))) + if ((ISD::isSEXTLoad(N0.Val) || ISD::isEXTLoad(N0.Val)) && + ISD::isUNINDEXEDLoad(N0.Val) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT::ValueType EVT = LN0->getLoadedVT(); + if (!AfterLegalize || TLI.isLoadXLegal(ISD::SEXTLOAD, EVT)) { + SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + } + + // sext(setcc x,y,cc) -> select_cc x, y, -1, 0, cc + if (N0.getOpcode() == ISD::SETCC) { + SDOperand SCC = + SimplifySelectCC(N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.Val) return SCC; + } + + return SDOperand(); +} + +SDOperand DAGCombiner::visitZERO_EXTEND(SDNode *N) { + SDOperand N0 = N->getOperand(0); + MVT::ValueType VT = N->getValueType(0); + + // fold (zext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, VT, N0); + // fold (zext (zext x)) -> (zext x) + // fold (zext (aext x)) -> (zext x) + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::ZERO_EXTEND, VT, N0.getOperand(0)); + + // fold (zext (truncate (load x))) -> (zext (smaller load x)) + // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDOperand NarrowLoad = ReduceLoadWidth(N0.Val); + if (NarrowLoad.Val) { + if (NarrowLoad.Val != N0.Val) + CombineTo(N0.Val, NarrowLoad); + return DAG.getNode(ISD::ZERO_EXTEND, VT, NarrowLoad); + } + } + + // fold (zext (truncate x)) -> (and x, mask) + if (N0.getOpcode() == ISD::TRUNCATE && + (!AfterLegalize || TLI.isOperationLegal(ISD::AND, VT))) { + SDOperand Op = N0.getOperand(0); + if (Op.getValueType() < VT) { + Op = DAG.getNode(ISD::ANY_EXTEND, VT, Op); + } else if (Op.getValueType() > VT) { + Op = DAG.getNode(ISD::TRUNCATE, VT, Op); + } + return DAG.getZeroExtendInReg(Op, N0.getValueType()); + } + + // fold (zext (and (trunc x), cst)) -> (and x, cst). + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant) { + SDOperand X = N0.getOperand(0).getOperand(0); + if (X.getValueType() < VT) { + X = DAG.getNode(ISD::ANY_EXTEND, VT, X); + } else if (X.getValueType() > VT) { + X = DAG.getNode(ISD::TRUNCATE, VT, X); + } + uint64_t Mask = cast<ConstantSDNode>(N0.getOperand(1))->getValue(); + return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(Mask, VT)); + } + + // fold (zext (load x)) -> (zext (truncate (zextload x))) + if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() && + (!AfterLegalize||TLI.isLoadXLegal(ISD::ZEXTLOAD, N0.getValueType()))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + + // fold (zext (zextload x)) -> (zext (truncate (zextload x))) + // fold (zext ( extload x)) -> (zext (truncate (zextload x))) + if ((ISD::isZEXTLoad(N0.Val) || ISD::isEXTLoad(N0.Val)) && + ISD::isUNINDEXEDLoad(N0.Val) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT::ValueType EVT = LN0->getLoadedVT(); + SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + + // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + if (N0.getOpcode() == ISD::SETCC) { + SDOperand SCC = + SimplifySelectCC(N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.Val) return SCC; + } + + return SDOperand(); +} + +SDOperand DAGCombiner::visitANY_EXTEND(SDNode *N) { + SDOperand N0 = N->getOperand(0); + MVT::ValueType VT = N->getValueType(0); + + // fold (aext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::ANY_EXTEND, VT, N0); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + if (N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(N0.getOpcode(), VT, N0.getOperand(0)); + + // fold (aext (truncate (load x))) -> (aext (smaller load x)) + // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDOperand NarrowLoad = ReduceLoadWidth(N0.Val); + if (NarrowLoad.Val) { + if (NarrowLoad.Val != N0.Val) + CombineTo(N0.Val, NarrowLoad); + return DAG.getNode(ISD::ANY_EXTEND, VT, NarrowLoad); + } + } + + // fold (aext (truncate x)) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDOperand TruncOp = N0.getOperand(0); + if (TruncOp.getValueType() == VT) + return TruncOp; // x iff x size == zext size. + if (TruncOp.getValueType() > VT) + return DAG.getNode(ISD::TRUNCATE, VT, TruncOp); + return DAG.getNode(ISD::ANY_EXTEND, VT, TruncOp); + } + + // fold (aext (and (trunc x), cst)) -> (and x, cst). + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant) { + SDOperand X = N0.getOperand(0).getOperand(0); + if (X.getValueType() < VT) { + X = DAG.getNode(ISD::ANY_EXTEND, VT, X); + } else if (X.getValueType() > VT) { + X = DAG.getNode(ISD::TRUNCATE, VT, X); + } + uint64_t Mask = cast<ConstantSDNode>(N0.getOperand(1))->getValue(); + return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(Mask, VT)); + } + + // fold (aext (load x)) -> (aext (truncate (extload x))) + if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() && + (!AfterLegalize||TLI.isLoadXLegal(ISD::EXTLOAD, N0.getValueType()))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDOperand ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + + // fold (aext (zextload x)) -> (aext (truncate (zextload x))) + // fold (aext (sextload x)) -> (aext (truncate (sextload x))) + // fold (aext ( extload x)) -> (aext (truncate (extload x))) + if (N0.getOpcode() == ISD::LOAD && + !ISD::isNON_EXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT::ValueType EVT = LN0->getLoadedVT(); + SDOperand ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + + // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + if (N0.getOpcode() == ISD::SETCC) { + SDOperand SCC = + SimplifySelectCC(N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.Val) + return SCC; + } + + return SDOperand(); +} + +/// ReduceLoadWidth - If the result of a wider load is shifted to right of N +/// bits and then truncated to a narrower type and where N is a multiple +/// of number of bits of the narrower type, transform it to a narrower load +/// from address + N / num of bits of new type. If the result is to be +/// extended, also fold the extension to form a extending load. +SDOperand DAGCombiner::ReduceLoadWidth(SDNode *N) { + unsigned Opc = N->getOpcode(); + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + SDOperand N0 = N->getOperand(0); + MVT::ValueType VT = N->getValueType(0); + MVT::ValueType EVT = N->getValueType(0); + + // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then + // extended to VT. + if (Opc == ISD::SIGN_EXTEND_INREG) { + ExtType = ISD::SEXTLOAD; + EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + if (AfterLegalize && !TLI.isLoadXLegal(ISD::SEXTLOAD, EVT)) + return SDOperand(); + } + + unsigned EVTBits = MVT::getSizeInBits(EVT); + unsigned ShAmt = 0; + bool CombineSRL = false; + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + ShAmt = N01->getValue(); + // Is the shift amount a multiple of size of VT? + if ((ShAmt & (EVTBits-1)) == 0) { + N0 = N0.getOperand(0); + if (MVT::getSizeInBits(N0.getValueType()) <= EVTBits) + return SDOperand(); + CombineSRL = true; + } + } + } + + if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() && + // Do not allow folding to i1 here. i1 is implicitly stored in memory in + // zero extended form: by shrinking the load, we lose track of the fact + // that it is already zero extended. + // FIXME: This should be reevaluated. + VT != MVT::i1) { + assert(MVT::getSizeInBits(N0.getValueType()) > EVTBits && + "Cannot truncate to larger type!"); + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT::ValueType PtrType = N0.getOperand(1).getValueType(); + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (!TLI.isLittleEndian()) + ShAmt = MVT::getSizeInBits(N0.getValueType()) - ShAmt - EVTBits; + uint64_t PtrOff = ShAmt / 8; + SDOperand NewPtr = DAG.getNode(ISD::ADD, PtrType, LN0->getBasePtr(), + DAG.getConstant(PtrOff, PtrType)); + AddToWorkList(NewPtr.Val); + SDOperand Load = (ExtType == ISD::NON_EXTLOAD) + ? DAG.getLoad(VT, LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), LN0->getAlignment()) + : DAG.getExtLoad(ExtType, VT, LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + AddToWorkList(N); + if (CombineSRL) { + std::vector<SDNode*> NowDead; + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), NowDead); + CombineTo(N->getOperand(0).Val, Load); + } else + CombineTo(N0.Val, Load, Load.getValue(1)); + if (ShAmt) { + if (Opc == ISD::SIGN_EXTEND_INREG) + return DAG.getNode(Opc, VT, Load, N->getOperand(1)); + else + return DAG.getNode(Opc, VT, Load); + } + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + + return SDOperand(); +} + + +SDOperand DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + MVT::ValueType VT = N->getValueType(0); + MVT::ValueType EVT = cast<VTSDNode>(N1)->getVT(); + unsigned EVTBits = MVT::getSizeInBits(EVT); + + // fold (sext_in_reg c1) -> c1 + if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0, N1); + + // If the input is already sign extended, just drop the extension. + if (DAG.ComputeNumSignBits(N0) >= MVT::getSizeInBits(VT)-EVTBits+1) + return N0; + + // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + EVT < cast<VTSDNode>(N0.getOperand(1))->getVT()) { + return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0.getOperand(0), N1); + } + + // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. + if (DAG.MaskedValueIsZero(N0, 1ULL << (EVTBits-1))) + return DAG.getZeroExtendInReg(N0, EVT); + + // fold operands of sext_in_reg based on knowledge that the top bits are not + // demanded. + if (SimplifyDemandedBits(SDOperand(N, 0))) + return SDOperand(N, 0); + + // fold (sext_in_reg (load x)) -> (smaller sextload x) + // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) + SDOperand NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.Val) + return NarrowLoad; + + // fold (sext_in_reg (srl X, 24), i8) -> sra X, 24 + // fold (sext_in_reg (srl X, 23), i8) -> sra X, 23 iff possible. + // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. + if (N0.getOpcode() == ISD::SRL) { + if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if (ShAmt->getValue()+EVTBits <= MVT::getSizeInBits(VT)) { + // We can turn this into an SRA iff the input to the SRL is already sign + // extended enough. + unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); + if (MVT::getSizeInBits(VT)-(ShAmt->getValue()+EVTBits) < InSignBits) + return DAG.getNode(ISD::SRA, VT, N0.getOperand(0), N0.getOperand(1)); + } + } + + // fold (sext_inreg (extload x)) -> (sextload x) + if (ISD::isEXTLoad(N0.Val) && + ISD::isUNINDEXEDLoad(N0.Val) && + EVT == cast<LoadSDNode>(N0)->getLoadedVT() && + (!AfterLegalize || TLI.isLoadXLegal(ISD::SEXTLOAD, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1)); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use + if (ISD::isZEXTLoad(N0.Val) && ISD::isUNINDEXEDLoad(N0.Val) && + N0.hasOneUse() && + EVT == cast<LoadSDNode>(N0)->getLoadedVT() && + (!AfterLegalize || TLI.isLoadXLegal(ISD::SEXTLOAD, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.Val, ExtLoad, ExtLoad.getValue(1)); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + return SDOperand(); +} + +SDOperand DAGCombiner::visitTRUNCATE(SDNode *N) { + SDOperand N0 = N->getOperand(0); + MVT::ValueType VT = N->getValueType(0); + + // noop truncate + if (N0.getValueType() == N->getValueType(0)) + return N0; + // fold (truncate c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::TRUNCATE, VT, N0); + // fold (truncate (truncate x)) -> (truncate x) + if (N0.getOpcode() == ISD::TRUNCATE) + return DAG.getNode(ISD::TRUNCATE, VT, N0.getOperand(0)); + // fold (truncate (ext x)) -> (ext x) or (truncate x) or x + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND|| + N0.getOpcode() == ISD::ANY_EXTEND) { + if (N0.getOperand(0).getValueType() < VT) + // if the source is smaller than the dest, we still need an extend + return DAG.getNode(N0.getOpcode(), VT, N0.getOperand(0)); + else if (N0.getOperand(0).getValueType() > VT) + // if the source is larger than the dest, than we just need the truncate + return DAG.getNode(ISD::TRUNCATE, VT, N0.getOperand(0)); + else + // if the source and dest are the same type, we can drop both the extend + // and the truncate + return N0.getOperand(0); + } + + // fold (truncate (load x)) -> (smaller load x) + // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) + return ReduceLoadWidth(N); +} + +SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) { + SDOperand N0 = N->getOperand(0); + MVT::ValueType VT = N->getValueType(0); + + // If the input is a BUILD_VECTOR with all constant elements, fold this now. + // Only do this before legalize, since afterward the target may be depending + // on the bitconvert. + // First check to see if this is all constant. + if (!AfterLegalize && + N0.getOpcode() == ISD::BUILD_VECTOR && N0.Val->hasOneUse() && + MVT::isVector(VT)) { + bool isSimple = true; + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) + if (N0.getOperand(i).getOpcode() != ISD::UNDEF && + N0.getOperand(i).getOpcode() != ISD::Constant && + N0.getOperand(i).getOpcode() != ISD::ConstantFP) { + isSimple = false; + break; + } + + MVT::ValueType DestEltVT = MVT::getVectorElementType(N->getValueType(0)); + assert(!MVT::isVector(DestEltVT) && + "Element type of vector ValueType must not be vector!"); + if (isSimple) { + return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.Val, DestEltVT); + } + } + + // If the input is a constant, let getNode() fold it. + if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { + SDOperand Res = DAG.getNode(ISD::BIT_CONVERT, VT, N0); + if (Res.Val != N) return Res; + } + + if (N0.getOpcode() == ISD::BIT_CONVERT) // conv(conv(x,t1),t2) -> conv(x,t2) + return DAG.getNode(ISD::BIT_CONVERT, VT, N0.getOperand(0)); + + // fold (conv (load x)) -> (load (conv*)x) + // If the resultant load doesn't need a higher alignment than the original! + if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() && + ISD::isUNINDEXEDLoad(N0.Val) && + TLI.isOperationLegal(ISD::LOAD, VT)) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + unsigned Align = TLI.getTargetMachine().getTargetData()-> + getABITypeAlignment(MVT::getTypeForValueType(VT)); + unsigned OrigAlign = LN0->getAlignment(); + if (Align <= OrigAlign) { + SDOperand Load = DAG.getLoad(VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), Align); + AddToWorkList(N); + CombineTo(N0.Val, DAG.getNode(ISD::BIT_CONVERT, N0.getValueType(), Load), + Load.getValue(1)); + return Load; + } + } + + return SDOperand(); +} + +/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector +/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the +/// destination element value type. +SDOperand DAGCombiner:: +ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT::ValueType DstEltVT) { + MVT::ValueType SrcEltVT = BV->getOperand(0).getValueType(); + + // If this is already the right type, we're done. + if (SrcEltVT == DstEltVT) return SDOperand(BV, 0); + + unsigned SrcBitSize = MVT::getSizeInBits(SrcEltVT); + unsigned DstBitSize = MVT::getSizeInBits(DstEltVT); + + // If this is a conversion of N elements of one type to N elements of another + // type, convert each element. This handles FP<->INT cases. + if (SrcBitSize == DstBitSize) { + SmallVector<SDOperand, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, DstEltVT, BV->getOperand(i))); + AddToWorkList(Ops.back().Val); + } + MVT::ValueType VT = + MVT::getVectorType(DstEltVT, + MVT::getVectorNumElements(BV->getValueType(0))); + return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size()); + } + + // Otherwise, we're growing or shrinking the elements. To avoid having to + // handle annoying details of growing/shrinking FP values, we convert them to + // int first. + if (MVT::isFloatingPoint(SrcEltVT)) { + // Convert the input float vector to a int vector where the elements are the + // same sizes. + assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); + MVT::ValueType IntVT = SrcEltVT == MVT::f32 ? MVT::i32 : MVT::i64; + BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).Val; + SrcEltVT = IntVT; + } + + // Now we know the input is an integer vector. If the output is a FP type, + // convert to integer first, then to FP of the right size. + if (MVT::isFloatingPoint(DstEltVT)) { + assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); + MVT::ValueType TmpVT = DstEltVT == MVT::f32 ? MVT::i32 : MVT::i64; + SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).Val; + + // Next, convert to FP elements of the same size. + return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT); + } + + // Okay, we know the src/dst types are both integers of differing types. + // Handling growing first. + assert(MVT::isInteger(SrcEltVT) && MVT::isInteger(DstEltVT)); + if (SrcBitSize < DstBitSize) { + unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; + + SmallVector<SDOperand, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; + i += NumInputsPerOutput) { + bool isLE = TLI.isLittleEndian(); + uint64_t NewBits = 0; + bool EltIsUndef = true; + for (unsigned j = 0; j != NumInputsPerOutput; ++j) { + // Shift the previously computed bits over. + NewBits <<= SrcBitSize; + SDOperand Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); + if (Op.getOpcode() == ISD::UNDEF) continue; + EltIsUndef = false; + + NewBits |= cast<ConstantSDNode>(Op)->getValue(); + } + + if (EltIsUndef) + Ops.push_back(DAG.getNode(ISD::UNDEF, DstEltVT)); + else + Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); + } + + MVT::ValueType VT = MVT::getVectorType(DstEltVT, + Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size()); + } + + // Finally, this must be the case where we are shrinking elements: each input + // turns into multiple outputs. + unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; + SmallVector<SDOperand, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { + for (unsigned j = 0; j != NumOutputsPerInput; ++j) + Ops.push_back(DAG.getNode(ISD::UNDEF, DstEltVT)); + continue; + } + uint64_t OpVal = cast<ConstantSDNode>(BV->getOperand(i))->getValue(); + + for (unsigned j = 0; j != NumOutputsPerInput; ++j) { + unsigned ThisVal = OpVal & ((1ULL << DstBitSize)-1); + OpVal >>= DstBitSize; + Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); + } + + // For big endian targets, swap the order of the pieces of each element. + if (!TLI.isLittleEndian()) + std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); + } + MVT::ValueType VT = MVT::getVectorType(DstEltVT, Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size()); +} + + + +SDOperand DAGCombiner::visitFADD(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT::ValueType VT = N->getValueType(0); + + // fold vector ops + if (MVT::isVector(VT)) { + SDOperand FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.Val) return FoldedVOp; + } + + // fold (fadd c1, c2) -> c1+c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FADD, VT, N0, N1); + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FADD, VT, N1, N0); + // fold (A + (-B)) -> A-B + if (isNegatibleForFree(N1) == 2) + return DAG.getNode(ISD::FSUB, VT, N0, GetNegatedExpression(N1, DAG)); + // fold ((-A) + B) -> B-A + if (isNegatibleForFree(N0) == 2) + return DAG.getNode(ISD::FSUB, VT, N1, GetNegatedExpression(N0, DAG)); + + // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) + if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && + N0.Val->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::FADD, VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, VT, N0.getOperand(1), N1)); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitFSUB(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT::ValueType VT = N->getValueType(0); + + // fold vector ops + if (MVT::isVector(VT)) { + SDOperand FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.Val) return FoldedVOp; + } + + // fold (fsub c1, c2) -> c1-c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FSUB, VT, N0, N1); + // fold (0-B) -> -B + if (UnsafeFPMath && N0CFP && N0CFP->getValue() == 0.0) { + if (isNegatibleForFree(N1)) + return GetNegatedExpression(N1, DAG); + return DAG.getNode(ISD::FNEG, VT, N1); + } + // fold (A-(-B)) -> A+B + if (isNegatibleForFree(N1)) + return DAG.getNode(ISD::FADD, VT, N0, GetNegatedExpression(N1, DAG)); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitFMUL(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT::ValueType VT = N->getValueType(0); + + // fold vector ops + if (MVT::isVector(VT)) { + SDOperand FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.Val) return FoldedVOp; + } + + // fold (fmul c1, c2) -> c1*c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FMUL, VT, N0, N1); + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FMUL, VT, N1, N0); + // fold (fmul X, 2.0) -> (fadd X, X) + if (N1CFP && N1CFP->isExactlyValue(+2.0)) + return DAG.getNode(ISD::FADD, VT, N0, N0); + // fold (fmul X, -1.0) -> (fneg X) + if (N1CFP && N1CFP->isExactlyValue(-1.0)) + return DAG.getNode(ISD::FNEG, VT, N0); + + // -X * -Y -> X*Y + if (char LHSNeg = isNegatibleForFree(N0)) { + if (char RHSNeg = isNegatibleForFree(N1)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FMUL, VT, GetNegatedExpression(N0, DAG), + GetNegatedExpression(N1, DAG)); + } + } + + // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && + N0.Val->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::FMUL, VT, N0.getOperand(0), + DAG.getNode(ISD::FMUL, VT, N0.getOperand(1), N1)); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitFDIV(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT::ValueType VT = N->getValueType(0); + + // fold vector ops + if (MVT::isVector(VT)) { + SDOperand FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.Val) return FoldedVOp; + } + + // fold (fdiv c1, c2) -> c1/c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FDIV, VT, N0, N1); + + + // -X / -Y -> X*Y + if (char LHSNeg = isNegatibleForFree(N0)) { + if (char RHSNeg = isNegatibleForFree(N1)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FDIV, VT, GetNegatedExpression(N0, DAG), + GetNegatedExpression(N1, DAG)); + } + } + + return SDOperand(); +} + +SDOperand DAGCombiner::visitFREM(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT::ValueType VT = N->getValueType(0); + + // fold (frem c1, c2) -> fmod(c1,c2) + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FREM, VT, N0, N1); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitFCOPYSIGN(SDNode *N) { + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT::ValueType VT = N->getValueType(0); + + if (N0CFP && N1CFP) // Constant fold + return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1); + + if (N1CFP) { + // copysign(x, c1) -> fabs(x) iff ispos(c1) + // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) + union { + double d; + int64_t i; + } u; + u.d = N1CFP->getValue(); + if (u.i >= 0) + return DAG.getNode(ISD::FABS, VT, N0); + else + return DAG.getNode(ISD::FNEG, VT, DAG.getNode(ISD::FABS, VT, N0)); + } + + // copysign(fabs(x), y) -> copysign(x, y) + // copysign(fneg(x), y) -> copysign(x, y) + // copysign(copysign(x,z), y) -> copysign(x, y) + if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || + N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, VT, N0.getOperand(0), N1); + + // copysign(x, abs(y)) -> abs(x) + if (N1.getOpcode() == ISD::FABS) + return DAG.getNode(ISD::FABS, VT, N0); + + // copysign(x, copysign(y,z)) -> copysign(x, z) + if (N1.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1.getOperand(1)); + + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1.getOperand(0)); + + return SDOperand(); +} + + + +SDOperand DAGCombiner::visitSINT_TO_FP(SDNode *N) { + SDOperand N0 = N->getOperand(0); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + MVT::ValueType VT = N->getValueType(0); + + // fold (sint_to_fp c1) -> c1fp + if (N0C) + return DAG.getNode(ISD::SINT_TO_FP, VT, N0); + return SDOperand(); +} + +SDOperand DAGCombiner::visitUINT_TO_FP(SDNode *N) { + SDOperand N0 = N->getOperand(0); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + MVT::ValueType VT = N->getValueType(0); + + // fold (uint_to_fp c1) -> c1fp + if (N0C) + return DAG.getNode(ISD::UINT_TO_FP, VT, N0); + return SDOperand(); +} + +SDOperand DAGCombiner::visitFP_TO_SINT(SDNode *N) { + SDOperand N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT::ValueType VT = N->getValueType(0); + + // fold (fp_to_sint c1fp) -> c1 + if (N0CFP) + return DAG.getNode(ISD::FP_TO_SINT, VT, N0); + return SDOperand(); +} + +SDOperand DAGCombiner::visitFP_TO_UINT(SDNode *N) { + SDOperand N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT::ValueType VT = N->getValueType(0); + + // fold (fp_to_uint c1fp) -> c1 + if (N0CFP) + return DAG.getNode(ISD::FP_TO_UINT, VT, N0); + return SDOperand(); +} + +SDOperand DAGCombiner::visitFP_ROUND(SDNode *N) { + SDOperand N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT::ValueType VT = N->getValueType(0); + + // fold (fp_round c1fp) -> c1fp + if (N0CFP) + return DAG.getNode(ISD::FP_ROUND, VT, N0); + + // fold (fp_round (fp_extend x)) -> x + if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) + return N0.getOperand(0); + + // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.Val->hasOneUse()) { + SDOperand Tmp = DAG.getNode(ISD::FP_ROUND, VT, N0.getOperand(0)); + AddToWorkList(Tmp.Val); + return DAG.getNode(ISD::FCOPYSIGN, VT, Tmp, N0.getOperand(1)); + } + + return SDOperand(); +} + +SDOperand DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { + SDOperand N0 = N->getOperand(0); + MVT::ValueType VT = N->getValueType(0); + MVT::ValueType EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + + // fold (fp_round_inreg c1fp) -> c1fp + if (N0CFP) { + SDOperand Round = DAG.getConstantFP(N0CFP->getValue(), EVT); + return DAG.getNode(ISD::FP_EXTEND, VT, Round); + } + return SDOperand(); +} + +SDOperand DAGCombiner::visitFP_EXTEND(SDNode *N) { + SDOperand N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT::ValueType VT = N->getValueType(0); + + // fold (fp_extend c1fp) -> c1fp + if (N0CFP) + return DAG.getNode(ISD::FP_EXTEND, VT, N0); + + // fold (fpext (load x)) -> (fpext (fpround (extload x))) + if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() && + (!AfterLegalize||TLI.isLoadXLegal(ISD::EXTLOAD, N0.getValueType()))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDOperand ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.Val, DAG.getNode(ISD::FP_ROUND, N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } + + + return SDOperand(); +} + +SDOperand DAGCombiner::visitFNEG(SDNode *N) { + SDOperand N0 = N->getOperand(0); + + if (isNegatibleForFree(N0)) + return GetNegatedExpression(N0, DAG); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitFABS(SDNode *N) { + SDOperand N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT::ValueType VT = N->getValueType(0); + + // fold (fabs c1) -> fabs(c1) + if (N0CFP) + return DAG.getNode(ISD::FABS, VT, N0); + // fold (fabs (fabs x)) -> (fabs x) + if (N0.getOpcode() == ISD::FABS) + return N->getOperand(0); + // fold (fabs (fneg x)) -> (fabs x) + // fold (fabs (fcopysign x, y)) -> (fabs x) + if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FABS, VT, N0.getOperand(0)); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitBRCOND(SDNode *N) { + SDOperand Chain = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + SDOperand N2 = N->getOperand(2); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + + // never taken branch, fold to chain + if (N1C && N1C->isNullValue()) + return Chain; + // unconditional branch + if (N1C && N1C->getValue() == 1) + return DAG.getNode(ISD::BR, MVT::Other, Chain, N2); + // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal + // on the target. + if (N1.getOpcode() == ISD::SETCC && + TLI.isOperationLegal(ISD::BR_CC, MVT::Other)) { + return DAG.getNode(ISD::BR_CC, MVT::Other, Chain, N1.getOperand(2), + N1.getOperand(0), N1.getOperand(1), N2); + } + return SDOperand(); +} + +// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. +// +SDOperand DAGCombiner::visitBR_CC(SDNode *N) { + CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); + SDOperand CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); + + // Use SimplifySetCC to simplify SETCC's. + SDOperand Simp = SimplifySetCC(MVT::i1, CondLHS, CondRHS, CC->get(), false); + if (Simp.Val) AddToWorkList(Simp.Val); + + ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.Val); + + // fold br_cc true, dest -> br dest (unconditional branch) + if (SCCC && SCCC->getValue()) + return DAG.getNode(ISD::BR, MVT::Other, N->getOperand(0), + N->getOperand(4)); + // fold br_cc false, dest -> unconditional fall through + if (SCCC && SCCC->isNullValue()) + return N->getOperand(0); + + // fold to a simpler setcc + if (Simp.Val && Simp.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::BR_CC, MVT::Other, N->getOperand(0), + Simp.getOperand(2), Simp.getOperand(0), + Simp.getOperand(1), N->getOperand(4)); + return SDOperand(); +} + + +/// CombineToPreIndexedLoadStore - Try turning a load / store and a +/// pre-indexed load / store when the base pointer is a add or subtract +/// and it has other uses besides the load / store. After the +/// transformation, the new indexed load / store has effectively folded +/// the add / subtract in and all of its other uses are redirected to the +/// new load / store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (!AfterLegalize) + return false; + + bool isLoad = true; + SDOperand Ptr; + MVT::ValueType VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->getAddressingMode() != ISD::UNINDEXED) + return false; + VT = LD->getLoadedVT(); + if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->getAddressingMode() != ISD::UNINDEXED) + return false; + VT = ST->getStoredVT(); + if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else + return false; + + // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail + // out. There is no reason to make this a preinc/predec. + if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || + Ptr.Val->hasOneUse()) + return false; + + // Ask the target to do addressing mode selection. + SDOperand BasePtr; + SDOperand Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) + return false; + // Don't create a indexed load / store with zero offset. + if (isa<ConstantSDNode>(Offset) && + cast<ConstantSDNode>(Offset)->getValue() == 0) + return false; + + // Try turning it into a pre-indexed load / store except when: + // 1) The new base ptr is a frame index. + // 2) If N is a store and the new base ptr is either the same as or is a + // predecessor of the value being stored. + // 3) Another use of old base ptr is a predecessor of N. If ptr is folded + // that would create a cycle. + // 4) All uses are load / store ops that use it as old base ptr. + + // Check #1. Preinc'ing a frame index would require copying the stack pointer + // (plus the implicit offset) to a register to preinc anyway. + if (isa<FrameIndexSDNode>(BasePtr)) + return false; + + // Check #2. + if (!isLoad) { + SDOperand Val = cast<StoreSDNode>(N)->getValue(); + if (Val == BasePtr || BasePtr.Val->isPredecessor(Val.Val)) + return false; + } + + // Now check for #3 and #4. + bool RealUse = false; + for (SDNode::use_iterator I = Ptr.Val->use_begin(), + E = Ptr.Val->use_end(); I != E; ++I) { + SDNode *Use = *I; + if (Use == N) + continue; + if (Use->isPredecessor(N)) + return false; + + if (!((Use->getOpcode() == ISD::LOAD && + cast<LoadSDNode>(Use)->getBasePtr() == Ptr) || + (Use->getOpcode() == ISD::STORE) && + cast<StoreSDNode>(Use)->getBasePtr() == Ptr)) + RealUse = true; + } + if (!RealUse) + return false; + + SDOperand Result; + if (isLoad) + Result = DAG.getIndexedLoad(SDOperand(N,0), BasePtr, Offset, AM); + else + Result = DAG.getIndexedStore(SDOperand(N,0), BasePtr, Offset, AM); + ++PreIndexedNodes; + ++NodesCombined; + DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(Result.Val->dump(&DAG)); + DOUT << '\n'; + std::vector<SDNode*> NowDead; + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(0), + NowDead); + DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 1), Result.getValue(2), + NowDead); + } else { + DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(1), + NowDead); + } + + // Nodes can end up on the worklist more than once. Make sure we do + // not process a node that has been replaced. + for (unsigned i = 0, e = NowDead.size(); i != e; ++i) + removeFromWorkList(NowDead[i]); + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + + // Replace the uses of Ptr with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0), + NowDead); + removeFromWorkList(Ptr.Val); + for (unsigned i = 0, e = NowDead.size(); i != e; ++i) + removeFromWorkList(NowDead[i]); + DAG.DeleteNode(Ptr.Val); + + return true; +} + +/// CombineToPostIndexedLoadStore - Try combine a load / store with a +/// add / sub of the base pointer node into a post-indexed load / store. +/// The transformation folded the add / subtract into the new indexed +/// load / store effectively and all of its uses are redirected to the +/// new load / store. +bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { + if (!AfterLegalize) + return false; + + bool isLoad = true; + SDOperand Ptr; + MVT::ValueType VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->getAddressingMode() != ISD::UNINDEXED) + return false; + VT = LD->getLoadedVT(); + if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->getAddressingMode() != ISD::UNINDEXED) + return false; + VT = ST->getStoredVT(); + if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else + return false; + + if (Ptr.Val->hasOneUse()) + return false; + + for (SDNode::use_iterator I = Ptr.Val->use_begin(), + E = Ptr.Val->use_end(); I != E; ++I) { + SDNode *Op = *I; + if (Op == N || + (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) + continue; + + SDOperand BasePtr; + SDOperand Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { + if (Ptr == Offset) + std::swap(BasePtr, Offset); + if (Ptr != BasePtr) + continue; + // Don't create a indexed load / store with zero offset. + if (isa<ConstantSDNode>(Offset) && + cast<ConstantSDNode>(Offset)->getValue() == 0) + continue; + + // Try turning it into a post-indexed load / store except when + // 1) All uses are load / store ops that use it as base ptr. + // 2) Op must be independent of N, i.e. Op is neither a predecessor + // nor a successor of N. Otherwise, if Op is folded that would + // create a cycle. + + // Check for #1. + bool TryNext = false; + for (SDNode::use_iterator II = BasePtr.Val->use_begin(), + EE = BasePtr.Val->use_end(); II != EE; ++II) { + SDNode *Use = *II; + if (Use == Ptr.Val) + continue; + + // If all the uses are load / store addresses, then don't do the + // transformation. + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + bool RealUse = false; + for (SDNode::use_iterator III = Use->use_begin(), + EEE = Use->use_end(); III != EEE; ++III) { + SDNode *UseUse = *III; + if (!((UseUse->getOpcode() == ISD::LOAD && + cast<LoadSDNode>(UseUse)->getBasePtr().Val == Use) || + (UseUse->getOpcode() == ISD::STORE) && + cast<StoreSDNode>(UseUse)->getBasePtr().Val == Use)) + RealUse = true; + } + + if (!RealUse) { + TryNext = true; + break; + } + } + } + if (TryNext) + continue; + + // Check for #2 + if (!Op->isPredecessor(N) && !N->isPredecessor(Op)) { + SDOperand Result = isLoad + ? DAG.getIndexedLoad(SDOperand(N,0), BasePtr, Offset, AM) + : DAG.getIndexedStore(SDOperand(N,0), BasePtr, Offset, AM); + ++PostIndexedNodes; + ++NodesCombined; + DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(Result.Val->dump(&DAG)); + DOUT << '\n'; + std::vector<SDNode*> NowDead; + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(0), + NowDead); + DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 1), Result.getValue(2), + NowDead); + } else { + DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(1), + NowDead); + } + + // Nodes can end up on the worklist more than once. Make sure we do + // not process a node that has been replaced. + for (unsigned i = 0, e = NowDead.size(); i != e; ++i) + removeFromWorkList(NowDead[i]); + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + + // Replace the uses of Use with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(SDOperand(Op, 0), + Result.getValue(isLoad ? 1 : 0), + NowDead); + removeFromWorkList(Op); + for (unsigned i = 0, e = NowDead.size(); i != e; ++i) + removeFromWorkList(NowDead[i]); + DAG.DeleteNode(Op); + + return true; + } + } + } + return false; +} + + +SDOperand DAGCombiner::visitLOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + SDOperand Chain = LD->getChain(); + SDOperand Ptr = LD->getBasePtr(); + + // If load is not volatile and there are no uses of the loaded value (and + // the updated indexed value in case of indexed loads), change uses of the + // chain value into uses of the chain input (i.e. delete the dead load). + if (!LD->isVolatile()) { + if (N->getValueType(1) == MVT::Other) { + // Unindexed loads. + if (N->hasNUsesOfValue(0, 0)) + return CombineTo(N, DAG.getNode(ISD::UNDEF, N->getValueType(0)), Chain); + } else { + // Indexed loads. + assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); + if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { + SDOperand Undef0 = DAG.getNode(ISD::UNDEF, N->getValueType(0)); + SDOperand Undef1 = DAG.getNode(ISD::UNDEF, N->getValueType(1)); + SDOperand To[] = { Undef0, Undef1, Chain }; + return CombineTo(N, To, 3); + } + } + } + + // If this load is directly stored, replace the load value with the stored + // value. + // TODO: Handle store large -> read small portion. + // TODO: Handle TRUNCSTORE/LOADEXT + if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + if (ISD::isNON_TRUNCStore(Chain.Val)) { + StoreSDNode *PrevST = cast<StoreSDNode>(Chain); + if (PrevST->getBasePtr() == Ptr && + PrevST->getValue().getValueType() == N->getValueType(0)) + return CombineTo(N, Chain.getOperand(1), Chain); + } + } + + if (CombinerAA) { + // Walk up chain skipping non-aliasing memory nodes. + SDOperand BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + SDOperand ReplLoad; + + // Replace the chain to void dependency. + if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + ReplLoad = DAG.getLoad(N->getValueType(0), BetterChain, Ptr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); + } else { + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), + LD->getValueType(0), + BetterChain, Ptr, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->getLoadedVT(), + LD->isVolatile(), + LD->getAlignment()); + } + + // Create token factor to keep old chain connected. + SDOperand Token = DAG.getNode(ISD::TokenFactor, MVT::Other, + Chain, ReplLoad.getValue(1)); + + // Replace uses with load result and token factor. Don't add users + // to work list. + return CombineTo(N, ReplLoad.getValue(0), Token, false); + } + } + + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDOperand(N, 0); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitSTORE(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDOperand Chain = ST->getChain(); + SDOperand Value = ST->getValue(); + SDOperand Ptr = ST->getBasePtr(); + + // If this is a store of a bit convert, store the input value if the + // resultant store does not need a higher alignment than the original. + if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && + ST->getAddressingMode() == ISD::UNINDEXED) { + unsigned Align = ST->getAlignment(); + MVT::ValueType SVT = Value.getOperand(0).getValueType(); + unsigned OrigAlign = TLI.getTargetMachine().getTargetData()-> + getABITypeAlignment(MVT::getTypeForValueType(SVT)); + if (Align <= OrigAlign && TLI.isOperationLegal(ISD::STORE, SVT)) + return DAG.getStore(Chain, Value.getOperand(0), Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), Align); + } + + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { + if (Value.getOpcode() != ISD::TargetConstantFP) { + SDOperand Tmp; + switch (CFP->getValueType(0)) { + default: assert(0 && "Unknown FP type"); + case MVT::f32: + if (!AfterLegalize || TLI.isTypeLegal(MVT::i32)) { + Tmp = DAG.getConstant(FloatToBits(CFP->getValue()), MVT::i32); + return DAG.getStore(Chain, Tmp, Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), + ST->getAlignment()); + } + break; + case MVT::f64: + if (!AfterLegalize || TLI.isTypeLegal(MVT::i64)) { + Tmp = DAG.getConstant(DoubleToBits(CFP->getValue()), MVT::i64); + return DAG.getStore(Chain, Tmp, Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), + ST->getAlignment()); + } else if (TLI.isTypeLegal(MVT::i32)) { + // Many FP stores are not make apparent until after legalize, e.g. for + // argument passing. Since this is so common, custom legalize the + // 64-bit integer store into two 32-bit stores. + uint64_t Val = DoubleToBits(CFP->getValue()); + SDOperand Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); + SDOperand Hi = DAG.getConstant(Val >> 32, MVT::i32); + if (!TLI.isLittleEndian()) std::swap(Lo, Hi); + + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + + SDOperand St0 = DAG.getStore(Chain, Lo, Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), + isVolatile, ST->getAlignment()); + Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr, + DAG.getConstant(4, Ptr.getValueType())); + SVOffset += 4; + if (Alignment > 4) + Alignment = 4; + SDOperand St1 = DAG.getStore(Chain, Hi, Ptr, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + return DAG.getNode(ISD::TokenFactor, MVT::Other, St0, St1); + } + break; + } + } + } + + if (CombinerAA) { + // Walk up chain skipping non-aliasing memory nodes. + SDOperand BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + // Replace the chain to avoid dependency. + SDOperand ReplStore; + if (ST->isTruncatingStore()) { + ReplStore = DAG.getTruncStore(BetterChain, Value, Ptr, + ST->getSrcValue(), ST->getSrcValueOffset(), ST->getStoredVT(), + ST->isVolatile(), ST->getAlignment()); + } else { + ReplStore = DAG.getStore(BetterChain, Value, Ptr, + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->isVolatile(), ST->getAlignment()); + } + + // Create token to keep both nodes around. + SDOperand Token = + DAG.getNode(ISD::TokenFactor, MVT::Other, Chain, ReplStore); + + // Don't add users to work list. + return CombineTo(N, Token, false); + } + } + + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDOperand(N, 0); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { + SDOperand InVec = N->getOperand(0); + SDOperand InVal = N->getOperand(1); + SDOperand EltNo = N->getOperand(2); + + // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new + // vector with the inserted element. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) { + unsigned Elt = cast<ConstantSDNode>(EltNo)->getValue(); + SmallVector<SDOperand, 8> Ops(InVec.Val->op_begin(), InVec.Val->op_end()); + if (Elt < Ops.size()) + Ops[Elt] = InVal; + return DAG.getNode(ISD::BUILD_VECTOR, InVec.getValueType(), + &Ops[0], Ops.size()); + } + + return SDOperand(); +} + +SDOperand DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + unsigned NumInScalars = N->getNumOperands(); + MVT::ValueType VT = N->getValueType(0); + unsigned NumElts = MVT::getVectorNumElements(VT); + MVT::ValueType EltType = MVT::getVectorElementType(VT); + + // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT + // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from + // at most two distinct vectors, turn this into a shuffle node. + SDOperand VecIn1, VecIn2; + for (unsigned i = 0; i != NumInScalars; ++i) { + // Ignore undef inputs. + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + // If this input is something other than a EXTRACT_VECTOR_ELT with a + // constant index, bail out. + if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { + VecIn1 = VecIn2 = SDOperand(0, 0); + break; + } + + // If the input vector type disagrees with the result of the build_vector, + // we can't make a shuffle. + SDOperand ExtractedFromVec = N->getOperand(i).getOperand(0); + if (ExtractedFromVec.getValueType() != VT) { + VecIn1 = VecIn2 = SDOperand(0, 0); + break; + } + + // Otherwise, remember this. We allow up to two distinct input vectors. + if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) + continue; + + if (VecIn1.Val == 0) { + VecIn1 = ExtractedFromVec; + } else if (VecIn2.Val == 0) { + VecIn2 = ExtractedFromVec; + } else { + // Too many inputs. + VecIn1 = VecIn2 = SDOperand(0, 0); + break; + } + } + + // If everything is good, we can make a shuffle operation. + if (VecIn1.Val) { + SmallVector<SDOperand, 8> BuildVecIndices; + for (unsigned i = 0; i != NumInScalars; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) { + BuildVecIndices.push_back(DAG.getNode(ISD::UNDEF, TLI.getPointerTy())); + continue; + } + + SDOperand Extract = N->getOperand(i); + + // If extracting from the first vector, just use the index directly. + if (Extract.getOperand(0) == VecIn1) { + BuildVecIndices.push_back(Extract.getOperand(1)); + continue; + } + + // Otherwise, use InIdx + VecSize + unsigned Idx = cast<ConstantSDNode>(Extract.getOperand(1))->getValue(); + BuildVecIndices.push_back(DAG.getConstant(Idx+NumInScalars, + TLI.getPointerTy())); + } + + // Add count and size info. + MVT::ValueType BuildVecVT = + MVT::getVectorType(TLI.getPointerTy(), NumElts); + + // Return the new VECTOR_SHUFFLE node. + SDOperand Ops[5]; + Ops[0] = VecIn1; + if (VecIn2.Val) { + Ops[1] = VecIn2; + } else { + // Use an undef build_vector as input for the second operand. + std::vector<SDOperand> UnOps(NumInScalars, + DAG.getNode(ISD::UNDEF, + EltType)); + Ops[1] = DAG.getNode(ISD::BUILD_VECTOR, VT, + &UnOps[0], UnOps.size()); + AddToWorkList(Ops[1].Val); + } + Ops[2] = DAG.getNode(ISD::BUILD_VECTOR, BuildVecVT, + &BuildVecIndices[0], BuildVecIndices.size()); + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Ops, 3); + } + + return SDOperand(); +} + +SDOperand DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { + // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of + // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector + // inputs come from at most two distinct vectors, turn this into a shuffle + // node. + + // If we only have one input vector, we don't need to do any concatenation. + if (N->getNumOperands() == 1) { + return N->getOperand(0); + } + + return SDOperand(); +} + +SDOperand DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { + SDOperand ShufMask = N->getOperand(2); + unsigned NumElts = ShufMask.getNumOperands(); + + // If the shuffle mask is an identity operation on the LHS, return the LHS. + bool isIdentity = true; + for (unsigned i = 0; i != NumElts; ++i) { + if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF && + cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() != i) { + isIdentity = false; + break; + } + } + if (isIdentity) return N->getOperand(0); + + // If the shuffle mask is an identity operation on the RHS, return the RHS. + isIdentity = true; + for (unsigned i = 0; i != NumElts; ++i) { + if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF && + cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() != i+NumElts) { + isIdentity = false; + break; + } + } + if (isIdentity) return N->getOperand(1); + + // Check if the shuffle is a unary shuffle, i.e. one of the vectors is not + // needed at all. + bool isUnary = true; + bool isSplat = true; + int VecNum = -1; + unsigned BaseIdx = 0; + for (unsigned i = 0; i != NumElts; ++i) + if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF) { + unsigned Idx = cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue(); + int V = (Idx < NumElts) ? 0 : 1; + if (VecNum == -1) { + VecNum = V; + BaseIdx = Idx; + } else { + if (BaseIdx != Idx) + isSplat = false; + if (VecNum != V) { + isUnary = false; + break; + } + } + } + + SDOperand N0 = N->getOperand(0); + SDOperand N1 = N->getOperand(1); + // Normalize unary shuffle so the RHS is undef. + if (isUnary && VecNum == 1) + std::swap(N0, N1); + + // If it is a splat, check if the argument vector is a build_vector with + // all scalar elements the same. + if (isSplat) { + SDNode *V = N0.Val; + + // If this is a bit convert that changes the element type of the vector but + // not the number of vector elements, look through it. Be careful not to + // look though conversions that change things like v4f32 to v2f64. + if (V->getOpcode() == ISD::BIT_CONVERT) { + SDOperand ConvInput = V->getOperand(0); + if (MVT::getVectorNumElements(ConvInput.getValueType()) == NumElts) + V = ConvInput.Val; + } + + if (V->getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElems = V->getNumOperands(); + if (NumElems > BaseIdx) { + SDOperand Base; + bool AllSame = true; + for (unsigned i = 0; i != NumElems; ++i) { + if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + Base = V->getOperand(i); + break; + } + } + // Splat of <u, u, u, u>, return <u, u, u, u> + if (!Base.Val) + return N0; + for (unsigned i = 0; i != NumElems; ++i) { + if (V->getOperand(i).getOpcode() != ISD::UNDEF && + V->getOperand(i) != Base) { + AllSame = false; + break; + } + } + // Splat of <x, x, x, x>, return <x, x, x, x> + if (AllSame) + return N0; + } + } + } + + // If it is a unary or the LHS and the RHS are the same node, turn the RHS + // into an undef. + if (isUnary || N0 == N1) { + // Check the SHUFFLE mask, mapping any inputs from the 2nd operand into the + // first operand. + SmallVector<SDOperand, 8> MappedOps; + for (unsigned i = 0; i != NumElts; ++i) { + if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF || + cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() < NumElts) { + MappedOps.push_back(ShufMask.getOperand(i)); + } else { + unsigned NewIdx = + cast<ConstantSDNode>(ShufMask.getOperand(i))->getValue() - NumElts; + MappedOps.push_back(DAG.getConstant(NewIdx, MVT::i32)); + } + } + ShufMask = DAG.getNode(ISD::BUILD_VECTOR, ShufMask.getValueType(), + &MappedOps[0], MappedOps.size()); + AddToWorkList(ShufMask.Val); + return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getValueType(0), + N0, + DAG.getNode(ISD::UNDEF, N->getValueType(0)), + ShufMask); + } + + return SDOperand(); +} + +/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform +/// an AND to a vector_shuffle with the destination vector and a zero vector. +/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> +/// vector_shuffle V, Zero, <0, 4, 2, 4> +SDOperand DAGCombiner::XformToShuffleWithZero(SDNode *N) { + SDOperand LHS = N->getOperand(0); + SDOperand RHS = N->getOperand(1); + if (N->getOpcode() == ISD::AND) { + if (RHS.getOpcode() == ISD::BIT_CONVERT) + RHS = RHS.getOperand(0); + if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + std::vector<SDOperand> IdxOps; + unsigned NumOps = RHS.getNumOperands(); + unsigned NumElts = NumOps; + MVT::ValueType EVT = MVT::getVectorElementType(RHS.getValueType()); + for (unsigned i = 0; i != NumElts; ++i) { + SDOperand Elt = RHS.getOperand(i); + if (!isa<ConstantSDNode>(Elt)) + return SDOperand(); + else if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) + IdxOps.push_back(DAG.getConstant(i, EVT)); + else if (cast<ConstantSDNode>(Elt)->isNullValue()) + IdxOps.push_back(DAG.getConstant(NumElts, EVT)); + else + return SDOperand(); + } + + // Let's see if the target supports this vector_shuffle. + if (!TLI.isVectorClearMaskLegal(IdxOps, EVT, DAG)) + return SDOperand(); + + // Return the new VECTOR_SHUFFLE node. + MVT::ValueType VT = MVT::getVectorType(EVT, NumElts); + std::vector<SDOperand> Ops; + LHS = DAG.getNode(ISD::BIT_CONVERT, VT, LHS); + Ops.push_back(LHS); + AddToWorkList(LHS.Val); + std::vector<SDOperand> ZeroOps(NumElts, DAG.getConstant(0, EVT)); + Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, VT, + &ZeroOps[0], ZeroOps.size())); + Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, VT, + &IdxOps[0], IdxOps.size())); + SDOperand Result = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, + &Ops[0], Ops.size()); + if (VT != LHS.getValueType()) { + Result = DAG.getNode(ISD::BIT_CONVERT, LHS.getValueType(), Result); + } + return Result; + } + } + return SDOperand(); +} + +/// SimplifyVBinOp - Visit a binary vector operation, like ADD. +SDOperand DAGCombiner::SimplifyVBinOp(SDNode *N) { + // After legalize, the target may be depending on adds and other + // binary ops to provide legal ways to construct constants or other + // things. Simplifying them may result in a loss of legality. + if (AfterLegalize) return SDOperand(); + + MVT::ValueType VT = N->getValueType(0); + assert(MVT::isVector(VT) && "SimplifyVBinOp only works on vectors!"); + + MVT::ValueType EltType = MVT::getVectorElementType(VT); + SDOperand LHS = N->getOperand(0); + SDOperand RHS = N->getOperand(1); + SDOperand Shuffle = XformToShuffleWithZero(N); + if (Shuffle.Val) return Shuffle; + + // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold + // this operation. + if (LHS.getOpcode() == ISD::BUILD_VECTOR && + RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDOperand, 8> Ops; + for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { + SDOperand LHSOp = LHS.getOperand(i); + SDOperand RHSOp = RHS.getOperand(i); + // If these two elements can't be folded, bail out. + if ((LHSOp.getOpcode() != ISD::UNDEF && + LHSOp.getOpcode() != ISD::Constant && + LHSOp.getOpcode() != ISD::ConstantFP) || + (RHSOp.getOpcode() != ISD::UNDEF && + RHSOp.getOpcode() != ISD::Constant && + RHSOp.getOpcode() != ISD::ConstantFP)) + break; + // Can't fold divide by zero. + if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || + N->getOpcode() == ISD::FDIV) { + if ((RHSOp.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(RHSOp.Val)->isNullValue()) || + (RHSOp.getOpcode() == ISD::ConstantFP && + !cast<ConstantFPSDNode>(RHSOp.Val)->getValue())) + break; + } + Ops.push_back(DAG.getNode(N->getOpcode(), EltType, LHSOp, RHSOp)); + AddToWorkList(Ops.back().Val); + assert((Ops.back().getOpcode() == ISD::UNDEF || + Ops.back().getOpcode() == ISD::Constant || + Ops.back().getOpcode() == ISD::ConstantFP) && + "Scalar binop didn't fold!"); + } + + if (Ops.size() == LHS.getNumOperands()) { + MVT::ValueType VT = LHS.getValueType(); + return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size()); + } + } + + return SDOperand(); +} + +SDOperand DAGCombiner::SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2){ + assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); + + SDOperand SCC = SimplifySelectCC(N0.getOperand(0), N0.getOperand(1), N1, N2, + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If we got a simplified select_cc node back from SimplifySelectCC, then + // break it down into a new SETCC node, and a new SELECT node, and then return + // the SELECT node, since we were called with a SELECT node. + if (SCC.Val) { + // Check to see if we got a select_cc back (to turn into setcc/select). + // Otherwise, just return whatever node we got back, like fabs. + if (SCC.getOpcode() == ISD::SELECT_CC) { + SDOperand SETCC = DAG.getNode(ISD::SETCC, N0.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), + SCC.getOperand(4)); + AddToWorkList(SETCC.Val); + return DAG.getNode(ISD::SELECT, SCC.getValueType(), SCC.getOperand(2), + SCC.getOperand(3), SETCC); + } + return SCC; + } + return SDOperand(); +} + +/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS +/// are the two values being selected between, see if we can simplify the +/// select. Callers of this should assume that TheSelect is deleted if this +/// returns true. As such, they should return the appropriate thing (e.g. the +/// node) back to the top-level of the DAG combiner loop to avoid it being +/// looked at. +/// +bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDOperand LHS, + SDOperand RHS) { + + // If this is a select from two identical things, try to pull the operation + // through the select. + if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){ + // If this is a load and the token chain is identical, replace the select + // of two loads with a load through a select of the address to load from. + // This triggers in things like "select bool X, 10.0, 123.0" after the FP + // constants have been dropped into the constant pool. + if (LHS.getOpcode() == ISD::LOAD && + // Token chains must be identical. + LHS.getOperand(0) == RHS.getOperand(0)) { + LoadSDNode *LLD = cast<LoadSDNode>(LHS); + LoadSDNode *RLD = cast<LoadSDNode>(RHS); + + // If this is an EXTLOAD, the VT's must match. + if (LLD->getLoadedVT() == RLD->getLoadedVT()) { + // FIXME: this conflates two src values, discarding one. This is not + // the right thing to do, but nothing uses srcvalues now. When they do, + // turn SrcValue into a list of locations. + SDOperand Addr; + if (TheSelect->getOpcode() == ISD::SELECT) { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if (!LLD->isPredecessor(TheSelect->getOperand(0).Val) && + !RLD->isPredecessor(TheSelect->getOperand(0).Val)) { + Addr = DAG.getNode(ISD::SELECT, LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } + } else { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if (!LLD->isPredecessor(TheSelect->getOperand(0).Val) && + !RLD->isPredecessor(TheSelect->getOperand(0).Val) && + !LLD->isPredecessor(TheSelect->getOperand(1).Val) && + !RLD->isPredecessor(TheSelect->getOperand(1).Val)) { + Addr = DAG.getNode(ISD::SELECT_CC, LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } + } + + if (Addr.Val) { + SDOperand Load; + if (LLD->getExtensionType() == ISD::NON_EXTLOAD) + Load = DAG.getLoad(TheSelect->getValueType(0), LLD->getChain(), + Addr,LLD->getSrcValue(), + LLD->getSrcValueOffset(), + LLD->isVolatile(), + LLD->getAlignment()); + else { + Load = DAG.getExtLoad(LLD->getExtensionType(), + TheSelect->getValueType(0), + LLD->getChain(), Addr, LLD->getSrcValue(), + LLD->getSrcValueOffset(), + LLD->getLoadedVT(), + LLD->isVolatile(), + LLD->getAlignment()); + } + // Users of the select now use the result of the load. + CombineTo(TheSelect, Load); + + // Users of the old loads now use the new load's chain. We know the + // old-load value is dead now. + CombineTo(LHS.Val, Load.getValue(0), Load.getValue(1)); + CombineTo(RHS.Val, Load.getValue(0), Load.getValue(1)); + return true; + } + } + } + } + + return false; +} + +SDOperand DAGCombiner::SimplifySelectCC(SDOperand N0, SDOperand N1, + SDOperand N2, SDOperand N3, + ISD::CondCode CC, bool NotExtCompare) { + + MVT::ValueType VT = N2.getValueType(); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val); + ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.Val); + + // Determine if the condition we're dealing with is constant + SDOperand SCC = SimplifySetCC(TLI.getSetCCResultTy(), N0, N1, CC, false); + if (SCC.Val) AddToWorkList(SCC.Val); + ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.Val); + + // fold select_cc true, x, y -> x + if (SCCC && SCCC->getValue()) + return N2; + // fold select_cc false, x, y -> y + if (SCCC && SCCC->getValue() == 0) + return N3; + + // Check to see if we can simplify the select into an fabs node + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { + // Allow either -0.0 or 0.0 + if (CFP->getValue() == 0.0) { + // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs + if ((CC == ISD::SETGE || CC == ISD::SETGT) && + N0 == N2 && N3.getOpcode() == ISD::FNEG && + N2 == N3.getOperand(0)) + return DAG.getNode(ISD::FABS, VT, N0); + + // select (setl[te] X, +/-0.0), fneg(X), X -> fabs + if ((CC == ISD::SETLT || CC == ISD::SETLE) && + N0 == N3 && N2.getOpcode() == ISD::FNEG && + N2.getOperand(0) == N3) + return DAG.getNode(ISD::FABS, VT, N3); + } + } + + // Check to see if we can perform the "gzip trick", transforming + // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A + if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && + MVT::isInteger(N0.getValueType()) && + MVT::isInteger(N2.getValueType()) && + (N1C->isNullValue() || // (a < 0) ? b : 0 + (N1C->getValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 + MVT::ValueType XType = N0.getValueType(); + MVT::ValueType AType = N2.getValueType(); + if (XType >= AType) { + // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a + // single-bit constant. + if (N2C && ((N2C->getValue() & (N2C->getValue()-1)) == 0)) { + unsigned ShCtV = Log2_64(N2C->getValue()); + ShCtV = MVT::getSizeInBits(XType)-ShCtV-1; + SDOperand ShCt = DAG.getConstant(ShCtV, TLI.getShiftAmountTy()); + SDOperand Shift = DAG.getNode(ISD::SRL, XType, N0, ShCt); + AddToWorkList(Shift.Val); + if (XType > AType) { + Shift = DAG.getNode(ISD::TRUNCATE, AType, Shift); + AddToWorkList(Shift.Val); + } + return DAG.getNode(ISD::AND, AType, Shift, N2); + } + SDOperand Shift = DAG.getNode(ISD::SRA, XType, N0, + DAG.getConstant(MVT::getSizeInBits(XType)-1, + TLI.getShiftAmountTy())); + AddToWorkList(Shift.Val); + if (XType > AType) { + Shift = DAG.getNode(ISD::TRUNCATE, AType, Shift); + AddToWorkList(Shift.Val); + } + return DAG.getNode(ISD::AND, AType, Shift, N2); + } + } + + // fold select C, 16, 0 -> shl C, 4 + if (N2C && N3C && N3C->isNullValue() && isPowerOf2_64(N2C->getValue()) && + TLI.getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) { + + // If the caller doesn't want us to simplify this into a zext of a compare, + // don't do it. + if (NotExtCompare && N2C->getValue() == 1) + return SDOperand(); + + // Get a SetCC of the condition + // FIXME: Should probably make sure that setcc is legal if we ever have a + // target where it isn't. + SDOperand Temp, SCC; + // cast from setcc result type to select result type + if (AfterLegalize) { + SCC = DAG.getSetCC(TLI.getSetCCResultTy(), N0, N1, CC); + if (N2.getValueType() < SCC.getValueType()) + Temp = DAG.getZeroExtendInReg(SCC, N2.getValueType()); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getValueType(), SCC); + } else { + SCC = DAG.getSetCC(MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getValueType(), SCC); + } + AddToWorkList(SCC.Val); + AddToWorkList(Temp.Val); + + if (N2C->getValue() == 1) + return Temp; + // shl setcc result by log2 n2c + return DAG.getNode(ISD::SHL, N2.getValueType(), Temp, + DAG.getConstant(Log2_64(N2C->getValue()), + TLI.getShiftAmountTy())); + } + + // Check to see if this is the equivalent of setcc + // FIXME: Turn all of these into setcc if setcc if setcc is legal + // otherwise, go ahead with the folds. + if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getValue() == 1ULL)) { + MVT::ValueType XType = N0.getValueType(); + if (TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultTy())) { + SDOperand Res = DAG.getSetCC(TLI.getSetCCResultTy(), N0, N1, CC); + if (Res.getValueType() != VT) + Res = DAG.getNode(ISD::ZERO_EXTEND, VT, Res); + return Res; + } + + // seteq X, 0 -> srl (ctlz X, log2(size(X))) + if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && + TLI.isOperationLegal(ISD::CTLZ, XType)) { + SDOperand Ctlz = DAG.getNode(ISD::CTLZ, XType, N0); + return DAG.getNode(ISD::SRL, XType, Ctlz, + DAG.getConstant(Log2_32(MVT::getSizeInBits(XType)), + TLI.getShiftAmountTy())); + } + // setgt X, 0 -> srl (and (-X, ~X), size(X)-1) + if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { + SDOperand NegN0 = DAG.getNode(ISD::SUB, XType, DAG.getConstant(0, XType), + N0); + SDOperand NotN0 = DAG.getNode(ISD::XOR, XType, N0, + DAG.getConstant(~0ULL, XType)); + return DAG.getNode(ISD::SRL, XType, + DAG.getNode(ISD::AND, XType, NegN0, NotN0), + DAG.getConstant(MVT::getSizeInBits(XType)-1, + TLI.getShiftAmountTy())); + } + // setgt X, -1 -> xor (srl (X, size(X)-1), 1) + if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { + SDOperand Sign = DAG.getNode(ISD::SRL, XType, N0, + DAG.getConstant(MVT::getSizeInBits(XType)-1, + TLI.getShiftAmountTy())); + return DAG.getNode(ISD::XOR, XType, Sign, DAG.getConstant(1, XType)); + } + } + + // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && + N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && + N2.getOperand(0) == N1 && MVT::isInteger(N0.getValueType())) { + MVT::ValueType XType = N0.getValueType(); + SDOperand Shift = DAG.getNode(ISD::SRA, XType, N0, + DAG.getConstant(MVT::getSizeInBits(XType)-1, + TLI.getShiftAmountTy())); + SDOperand Add = DAG.getNode(ISD::ADD, XType, N0, Shift); + AddToWorkList(Shift.Val); + AddToWorkList(Add.Val); + return DAG.getNode(ISD::XOR, XType, Add, Shift); + } + // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && + N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { + if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) { + MVT::ValueType XType = N0.getValueType(); + if (SubC->isNullValue() && MVT::isInteger(XType)) { + SDOperand Shift = DAG.getNode(ISD::SRA, XType, N0, + DAG.getConstant(MVT::getSizeInBits(XType)-1, + TLI.getShiftAmountTy())); + SDOperand Add = DAG.getNode(ISD::ADD, XType, N0, Shift); + AddToWorkList(Shift.Val); + AddToWorkList(Add.Val); + return DAG.getNode(ISD::XOR, XType, Add, Shift); + } + } + } + + return SDOperand(); +} + +/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. +SDOperand DAGCombiner::SimplifySetCC(MVT::ValueType VT, SDOperand N0, + SDOperand N1, ISD::CondCode Cond, + bool foldBooleans) { + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, !AfterLegalize, false, this); + return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo); +} + +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDOperand DAGCombiner::BuildSDIV(SDNode *N) { + std::vector<SDNode*> Built; + SDOperand S = TLI.BuildSDIV(N, DAG, &Built); + + for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); + ii != ee; ++ii) + AddToWorkList(*ii); + return S; +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDOperand DAGCombiner::BuildUDIV(SDNode *N) { + std::vector<SDNode*> Built; + SDOperand S = TLI.BuildUDIV(N, DAG, &Built); + + for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); + ii != ee; ++ii) + AddToWorkList(*ii); + return S; +} + +/// FindBaseOffset - Return true if base is known not to alias with anything +/// but itself. Provides base object and offset as results. +static bool FindBaseOffset(SDOperand Ptr, SDOperand &Base, int64_t &Offset) { + // Assume it is a primitive operation. + Base = Ptr; Offset = 0; + + // If it's an adding a simple constant then integrate the offset. + if (Base.getOpcode() == ISD::ADD) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { + Base = Base.getOperand(0); + Offset += C->getValue(); + } + } + + // If it's any of the following then it can't alias with anything but itself. + return isa<FrameIndexSDNode>(Base) || + isa<ConstantPoolSDNode>(Base) || + isa<GlobalAddressSDNode>(Base); +} + +/// isAlias - Return true if there is any possibility that the two addresses +/// overlap. +bool DAGCombiner::isAlias(SDOperand Ptr1, int64_t Size1, + const Value *SrcValue1, int SrcValueOffset1, + SDOperand Ptr2, int64_t Size2, + const Value *SrcValue2, int SrcValueOffset2) +{ + // If they are the same then they must be aliases. + if (Ptr1 == Ptr2) return true; + + // Gather base node and offset information. + SDOperand Base1, Base2; + int64_t Offset1, Offset2; + bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1); + bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2); + + // If they have a same base address then... + if (Base1 == Base2) { + // Check to see if the addresses overlap. + return!((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + } + + // If we know both bases then they can't alias. + if (KnownBase1 && KnownBase2) return false; + + if (CombinerGlobalAA) { + // Use alias analysis information. + int Overlap1 = Size1 + SrcValueOffset1 + Offset1; + int Overlap2 = Size2 + SrcValueOffset2 + Offset2; + AliasAnalysis::AliasResult AAResult = + AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2); + if (AAResult == AliasAnalysis::NoAlias) + return false; + } + + // Otherwise we have to assume they alias. + return true; +} + +/// FindAliasInfo - Extracts the relevant alias information from the memory +/// node. Returns true if the operand was a load. +bool DAGCombiner::FindAliasInfo(SDNode *N, + SDOperand &Ptr, int64_t &Size, + const Value *&SrcValue, int &SrcValueOffset) { + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + Ptr = LD->getBasePtr(); + Size = MVT::getSizeInBits(LD->getLoadedVT()) >> 3; + SrcValue = LD->getSrcValue(); + SrcValueOffset = LD->getSrcValueOffset(); + return true; + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + Ptr = ST->getBasePtr(); + Size = MVT::getSizeInBits(ST->getStoredVT()) >> 3; + SrcValue = ST->getSrcValue(); + SrcValueOffset = ST->getSrcValueOffset(); + } else { + assert(0 && "FindAliasInfo expected a memory operand"); + } + + return false; +} + +/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, +/// looking for aliasing nodes and adding them to the Aliases vector. +void DAGCombiner::GatherAllAliases(SDNode *N, SDOperand OriginalChain, + SmallVector<SDOperand, 8> &Aliases) { + SmallVector<SDOperand, 8> Chains; // List of chains to visit. + std::set<SDNode *> Visited; // Visited node set. + + // Get alias information for node. + SDOperand Ptr; + int64_t Size; + const Value *SrcValue; + int SrcValueOffset; + bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset); + + // Starting off. + Chains.push_back(OriginalChain); + + // Look at each chain and determine if it is an alias. If so, add it to the + // aliases list. If not, then continue up the chain looking for the next + // candidate. + while (!Chains.empty()) { + SDOperand Chain = Chains.back(); + Chains.pop_back(); + + // Don't bother if we've been before. + if (Visited.find(Chain.Val) != Visited.end()) continue; + Visited.insert(Chain.Val); + + switch (Chain.getOpcode()) { + case ISD::EntryToken: + // Entry token is ideal chain operand, but handled in FindBetterChain. + break; + + case ISD::LOAD: + case ISD::STORE: { + // Get alias information for Chain. + SDOperand OpPtr; + int64_t OpSize; + const Value *OpSrcValue; + int OpSrcValueOffset; + bool IsOpLoad = FindAliasInfo(Chain.Val, OpPtr, OpSize, + OpSrcValue, OpSrcValueOffset); + + // If chain is alias then stop here. + if (!(IsLoad && IsOpLoad) && + isAlias(Ptr, Size, SrcValue, SrcValueOffset, + OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) { + Aliases.push_back(Chain); + } else { + // Look further up the chain. + Chains.push_back(Chain.getOperand(0)); + // Clean up old chain. + AddToWorkList(Chain.Val); + } + break; + } + + case ISD::TokenFactor: + // We have to check each of the operands of the token factor, so we queue + // then up. Adding the operands to the queue (stack) in reverse order + // maintains the original order and increases the likelihood that getNode + // will find a matching token factor (CSE.) + for (unsigned n = Chain.getNumOperands(); n;) + Chains.push_back(Chain.getOperand(--n)); + // Eliminate the token factor if we can. + AddToWorkList(Chain.Val); + break; + + default: + // For all other instructions we will just have to take what we can get. + Aliases.push_back(Chain); + break; + } + } +} + +/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking +/// for a better chain (aliasing node.) +SDOperand DAGCombiner::FindBetterChain(SDNode *N, SDOperand OldChain) { + SmallVector<SDOperand, 8> Aliases; // Ops for replacing token factor. + + // Accumulate all the aliases to this node. + GatherAllAliases(N, OldChain, Aliases); + + if (Aliases.size() == 0) { + // If no operands then chain to entry token. + return DAG.getEntryNode(); + } else if (Aliases.size() == 1) { + // If a single operand then chain to it. We don't need to revisit it. + return Aliases[0]; + } + + // Construct a custom tailored token factor. + SDOperand NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &Aliases[0], Aliases.size()); + + // Make sure the old chain gets cleaned up. + if (NewChain != OldChain) AddToWorkList(OldChain.Val); + + return NewChain; +} + +// SelectionDAG::Combine - This is the entry point for the file. +// +void SelectionDAG::Combine(bool RunningAfterLegalize, AliasAnalysis &AA) { + if (!RunningAfterLegalize && ViewDAGCombine1) + viewGraph(); + if (RunningAfterLegalize && ViewDAGCombine2) + viewGraph(); + /// run - This is the main entry point to this class. + /// + DAGCombiner(*this, AA).Run(RunningAfterLegalize); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp new file mode 100644 index 0000000..5bcee35 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -0,0 +1,5755 @@ +//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::Legalize method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <map> +using namespace llvm; + +#ifndef NDEBUG +static cl::opt<bool> +ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize")); +#else +static const bool ViewLegalizeDAGs = 0; +#endif + +//===----------------------------------------------------------------------===// +/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and +/// hacks on it until the target machine can handle it. This involves +/// eliminating value sizes the machine cannot handle (promoting small sizes to +/// large sizes or splitting up large values into small values) as well as +/// eliminating operations the machine cannot handle. +/// +/// This code also does a small amount of optimization and recognition of idioms +/// as part of its processing. For example, if a target does not support a +/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this +/// will attempt merge setcc and brc instructions into brcc's. +/// +namespace { +class VISIBILITY_HIDDEN SelectionDAGLegalize { + TargetLowering &TLI; + SelectionDAG &DAG; + + // Libcall insertion helpers. + + /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been + /// legalized. We use this to ensure that calls are properly serialized + /// against each other, including inserted libcalls. + SDOperand LastCALLSEQ_END; + + /// IsLegalizingCall - This member is used *only* for purposes of providing + /// helpful assertions that a libcall isn't created while another call is + /// being legalized (which could lead to non-serialized call sequences). + bool IsLegalizingCall; + + enum LegalizeAction { + Legal, // The target natively supports this operation. + Promote, // This operation should be executed in a larger type. + Expand // Try to expand this to other ops, otherwise use a libcall. + }; + + /// ValueTypeActions - This is a bitvector that contains two bits for each + /// value type, where the two bits correspond to the LegalizeAction enum. + /// This can be queried with "getTypeAction(VT)". + TargetLowering::ValueTypeActionImpl ValueTypeActions; + + /// LegalizedNodes - For nodes that are of legal width, and that have more + /// than one use, this map indicates what regularized operand to use. This + /// allows us to avoid legalizing the same thing more than once. + DenseMap<SDOperand, SDOperand> LegalizedNodes; + + /// PromotedNodes - For nodes that are below legal width, and that have more + /// than one use, this map indicates what promoted value to use. This allows + /// us to avoid promoting the same thing more than once. + DenseMap<SDOperand, SDOperand> PromotedNodes; + + /// ExpandedNodes - For nodes that need to be expanded this map indicates + /// which which operands are the expanded version of the input. This allows + /// us to avoid expanding the same node more than once. + DenseMap<SDOperand, std::pair<SDOperand, SDOperand> > ExpandedNodes; + + /// SplitNodes - For vector nodes that need to be split, this map indicates + /// which which operands are the split version of the input. This allows us + /// to avoid splitting the same node more than once. + std::map<SDOperand, std::pair<SDOperand, SDOperand> > SplitNodes; + + /// ScalarizedNodes - For nodes that need to be converted from vector types to + /// scalar types, this contains the mapping of ones we have already + /// processed to the result. + std::map<SDOperand, SDOperand> ScalarizedNodes; + + void AddLegalizedOperand(SDOperand From, SDOperand To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + } + void AddPromotedOperand(SDOperand From, SDOperand To) { + bool isNew = PromotedNodes.insert(std::make_pair(From, To)); + assert(isNew && "Got into the map somehow?"); + // If someone requests legalization of the new node, return itself. + LegalizedNodes.insert(std::make_pair(To, To)); + } + +public: + + SelectionDAGLegalize(SelectionDAG &DAG); + + /// getTypeAction - Return how we should legalize values of this type, either + /// it is already legal or we need to expand it into multiple registers of + /// smaller integer type, or we need to promote it to a larger type. + LegalizeAction getTypeAction(MVT::ValueType VT) const { + return (LegalizeAction)ValueTypeActions.getTypeAction(VT); + } + + /// isTypeLegal - Return true if this type is legal on this target. + /// + bool isTypeLegal(MVT::ValueType VT) const { + return getTypeAction(VT) == Legal; + } + + void LegalizeDAG(); + +private: + /// HandleOp - Legalize, Promote, or Expand the specified operand as + /// appropriate for its type. + void HandleOp(SDOperand Op); + + /// LegalizeOp - We know that the specified value has a legal type. + /// Recursively ensure that the operands have legal types, then return the + /// result. + SDOperand LegalizeOp(SDOperand O); + + /// PromoteOp - Given an operation that produces a value in an invalid type, + /// promote it to compute the value into a larger type. The produced value + /// will have the correct bits for the low portion of the register, but no + /// guarantee is made about the top bits: it may be zero, sign-extended, or + /// garbage. + SDOperand PromoteOp(SDOperand O); + + /// ExpandOp - Expand the specified SDOperand into its two component pieces + /// Lo&Hi. Note that the Op MUST be an expanded type. As a result of this, + /// the LegalizeNodes map is filled in for any results that are not expanded, + /// the ExpandedNodes map is filled in for any results that are expanded, and + /// the Lo/Hi values are returned. This applies to integer types and Vector + /// types. + void ExpandOp(SDOperand O, SDOperand &Lo, SDOperand &Hi); + + /// SplitVectorOp - Given an operand of vector type, break it down into + /// two smaller values. + void SplitVectorOp(SDOperand O, SDOperand &Lo, SDOperand &Hi); + + /// ScalarizeVectorOp - Given an operand of single-element vector type + /// (e.g. v1f32), convert it into the equivalent operation that returns a + /// scalar (e.g. f32) value. + SDOperand ScalarizeVectorOp(SDOperand O); + + /// isShuffleLegal - Return true if a vector shuffle is legal with the + /// specified mask and type. Targets can specify exactly which masks they + /// support and the code generator is tasked with not creating illegal masks. + /// + /// Note that this will also return true for shuffles that are promoted to a + /// different type. + /// + /// If this is a legal shuffle, this method returns the (possibly promoted) + /// build_vector Mask. If it's not a legal shuffle, it returns null. + SDNode *isShuffleLegal(MVT::ValueType VT, SDOperand Mask) const; + + bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet<SDNode*, 32> &NodesLeadingTo); + + void LegalizeSetCCOperands(SDOperand &LHS, SDOperand &RHS, SDOperand &CC); + + SDOperand CreateStackTemporary(MVT::ValueType VT); + + SDOperand ExpandLibCall(const char *Name, SDNode *Node, bool isSigned, + SDOperand &Hi); + SDOperand ExpandIntToFP(bool isSigned, MVT::ValueType DestTy, + SDOperand Source); + + SDOperand ExpandBIT_CONVERT(MVT::ValueType DestVT, SDOperand SrcOp); + SDOperand ExpandBUILD_VECTOR(SDNode *Node); + SDOperand ExpandSCALAR_TO_VECTOR(SDNode *Node); + SDOperand ExpandLegalINT_TO_FP(bool isSigned, + SDOperand LegalOp, + MVT::ValueType DestVT); + SDOperand PromoteLegalINT_TO_FP(SDOperand LegalOp, MVT::ValueType DestVT, + bool isSigned); + SDOperand PromoteLegalFP_TO_INT(SDOperand LegalOp, MVT::ValueType DestVT, + bool isSigned); + + SDOperand ExpandBSWAP(SDOperand Op); + SDOperand ExpandBitCount(unsigned Opc, SDOperand Op); + bool ExpandShift(unsigned Opc, SDOperand Op, SDOperand Amt, + SDOperand &Lo, SDOperand &Hi); + void ExpandShiftParts(unsigned NodeOp, SDOperand Op, SDOperand Amt, + SDOperand &Lo, SDOperand &Hi); + + SDOperand ExpandEXTRACT_SUBVECTOR(SDOperand Op); + SDOperand ExpandEXTRACT_VECTOR_ELT(SDOperand Op); + + SDOperand getIntPtrConstant(uint64_t Val) { + return DAG.getConstant(Val, TLI.getPointerTy()); + } +}; +} + +/// isVectorShuffleLegal - Return true if a vector shuffle is legal with the +/// specified mask and type. Targets can specify exactly which masks they +/// support and the code generator is tasked with not creating illegal masks. +/// +/// Note that this will also return true for shuffles that are promoted to a +/// different type. +SDNode *SelectionDAGLegalize::isShuffleLegal(MVT::ValueType VT, + SDOperand Mask) const { + switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, VT)) { + default: return 0; + case TargetLowering::Legal: + case TargetLowering::Custom: + break; + case TargetLowering::Promote: { + // If this is promoted to a different type, convert the shuffle mask and + // ask if it is legal in the promoted type! + MVT::ValueType NVT = TLI.getTypeToPromoteTo(ISD::VECTOR_SHUFFLE, VT); + + // If we changed # elements, change the shuffle mask. + unsigned NumEltsGrowth = + MVT::getVectorNumElements(NVT) / MVT::getVectorNumElements(VT); + assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); + if (NumEltsGrowth > 1) { + // Renumber the elements. + SmallVector<SDOperand, 8> Ops; + for (unsigned i = 0, e = Mask.getNumOperands(); i != e; ++i) { + SDOperand InOp = Mask.getOperand(i); + for (unsigned j = 0; j != NumEltsGrowth; ++j) { + if (InOp.getOpcode() == ISD::UNDEF) + Ops.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); + else { + unsigned InEltNo = cast<ConstantSDNode>(InOp)->getValue(); + Ops.push_back(DAG.getConstant(InEltNo*NumEltsGrowth+j, MVT::i32)); + } + } + } + Mask = DAG.getNode(ISD::BUILD_VECTOR, NVT, &Ops[0], Ops.size()); + } + VT = NVT; + break; + } + } + return TLI.isShuffleMaskLegal(Mask, VT) ? Mask.Val : 0; +} + +SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) + : TLI(dag.getTargetLoweringInfo()), DAG(dag), + ValueTypeActions(TLI.getValueTypeActions()) { + assert(MVT::LAST_VALUETYPE <= 32 && + "Too many value types for ValueTypeActions to hold!"); +} + +/// ComputeTopDownOrdering - Compute a top-down ordering of the dag, where Order +/// contains all of a nodes operands before it contains the node. +static void ComputeTopDownOrdering(SelectionDAG &DAG, + SmallVector<SDNode*, 64> &Order) { + + DenseMap<SDNode*, unsigned> Visited; + std::vector<SDNode*> Worklist; + Worklist.reserve(128); + + // Compute ordering from all of the leaves in the graphs, those (like the + // entry node) that have no operands. + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + if (I->getNumOperands() == 0) { + Visited[I] = 0 - 1U; + Worklist.push_back(I); + } + } + + while (!Worklist.empty()) { + SDNode *N = Worklist.back(); + Worklist.pop_back(); + + if (++Visited[N] != N->getNumOperands()) + continue; // Haven't visited all operands yet + + Order.push_back(N); + + // Now that we have N in, add anything that uses it if all of their operands + // are now done. + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) + Worklist.push_back(*UI); + } + + assert(Order.size() == Visited.size() && + Order.size() == + (unsigned)std::distance(DAG.allnodes_begin(), DAG.allnodes_end()) && + "Error: DAG is cyclic!"); +} + + +void SelectionDAGLegalize::LegalizeDAG() { + LastCALLSEQ_END = DAG.getEntryNode(); + IsLegalizingCall = false; + + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. + SmallVector<SDNode*, 64> Order; + ComputeTopDownOrdering(DAG, Order); + + for (unsigned i = 0, e = Order.size(); i != e; ++i) + HandleOp(SDOperand(Order[i], 0)); + + // Finally, it's possible the root changed. Get the new root. + SDOperand OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + ExpandedNodes.clear(); + LegalizedNodes.clear(); + PromotedNodes.clear(); + SplitNodes.clear(); + ScalarizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); +} + + +/// FindCallEndFromCallStart - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_END node that terminates the call sequence. +static SDNode *FindCallEndFromCallStart(SDNode *Node) { + if (Node->getOpcode() == ISD::CALLSEQ_END) + return Node; + if (Node->use_empty()) + return 0; // No CallSeqEnd + + // The chain is usually at the end. + SDOperand TheChain(Node, Node->getNumValues()-1); + if (TheChain.getValueType() != MVT::Other) { + // Sometimes it's at the beginning. + TheChain = SDOperand(Node, 0); + if (TheChain.getValueType() != MVT::Other) { + // Otherwise, hunt for it. + for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) + if (Node->getValueType(i) == MVT::Other) { + TheChain = SDOperand(Node, i); + break; + } + + // Otherwise, we walked into a node without a chain. + if (TheChain.getValueType() != MVT::Other) + return 0; + } + } + + for (SDNode::use_iterator UI = Node->use_begin(), + E = Node->use_end(); UI != E; ++UI) { + + // Make sure to only follow users of our token chain. + SDNode *User = *UI; + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) + if (User->getOperand(i) == TheChain) + if (SDNode *Result = FindCallEndFromCallStart(User)) + return Result; + } + return 0; +} + +/// FindCallStartFromCallEnd - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_START node that initiates the call sequence. +static SDNode *FindCallStartFromCallEnd(SDNode *Node) { + assert(Node && "Didn't find callseq_start for a call??"); + if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; + + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Node doesn't have a token chain argument!"); + return FindCallStartFromCallEnd(Node->getOperand(0).Val); +} + +/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to +/// see if any uses can reach Dest. If no dest operands can get to dest, +/// legalize them, legalize ourself, and return false, otherwise, return true. +/// +/// Keep track of the nodes we fine that actually do lead to Dest in +/// NodesLeadingTo. This avoids retraversing them exponential number of times. +/// +bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet<SDNode*, 32> &NodesLeadingTo) { + if (N == Dest) return true; // N certainly leads to Dest :) + + // If we've already processed this node and it does lead to Dest, there is no + // need to reprocess it. + if (NodesLeadingTo.count(N)) return true; + + // If the first result of this node has been already legalized, then it cannot + // reach N. + switch (getTypeAction(N->getValueType(0))) { + case Legal: + if (LegalizedNodes.count(SDOperand(N, 0))) return false; + break; + case Promote: + if (PromotedNodes.count(SDOperand(N, 0))) return false; + break; + case Expand: + if (ExpandedNodes.count(SDOperand(N, 0))) return false; + break; + } + + // Okay, this node has not already been legalized. Check and legalize all + // operands. If none lead to Dest, then we can legalize this node. + bool OperandsLeadToDest = false; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + OperandsLeadToDest |= // If an operand leads to Dest, so do we. + LegalizeAllNodesNotLeadingTo(N->getOperand(i).Val, Dest, NodesLeadingTo); + + if (OperandsLeadToDest) { + NodesLeadingTo.insert(N); + return true; + } + + // Okay, this node looks safe, legalize it and return false. + HandleOp(SDOperand(N, 0)); + return false; +} + +/// HandleOp - Legalize, Promote, or Expand the specified operand as +/// appropriate for its type. +void SelectionDAGLegalize::HandleOp(SDOperand Op) { + MVT::ValueType VT = Op.getValueType(); + switch (getTypeAction(VT)) { + default: assert(0 && "Bad type action!"); + case Legal: (void)LegalizeOp(Op); break; + case Promote: (void)PromoteOp(Op); break; + case Expand: + if (!MVT::isVector(VT)) { + // If this is an illegal scalar, expand it into its two component + // pieces. + SDOperand X, Y; + ExpandOp(Op, X, Y); + } else if (MVT::getVectorNumElements(VT) == 1) { + // If this is an illegal single element vector, convert it to a + // scalar operation. + (void)ScalarizeVectorOp(Op); + } else { + // Otherwise, this is an illegal multiple element vector. + // Split it in half and legalize both parts. + SDOperand X, Y; + SplitVectorOp(Op, X, Y); + } + break; + } +} + +/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or +/// a load from the constant pool. +static SDOperand ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, + SelectionDAG &DAG, TargetLowering &TLI) { + bool Extend = false; + + // If a FP immediate is precise when represented as a float and if the + // target can do an extending load from float to double, we put it into + // the constant pool as a float, even if it's is statically typed as a + // double. + MVT::ValueType VT = CFP->getValueType(0); + bool isDouble = VT == MVT::f64; + ConstantFP *LLVMC = ConstantFP::get(isDouble ? Type::DoubleTy : + Type::FloatTy, CFP->getValue()); + if (!UseCP) { + double Val = LLVMC->getValue(); + return isDouble + ? DAG.getConstant(DoubleToBits(Val), MVT::i64) + : DAG.getConstant(FloatToBits(Val), MVT::i32); + } + + if (isDouble && CFP->isExactlyValue((float)CFP->getValue()) && + // Only do this if the target has a native EXTLOAD instruction from f32. + TLI.isLoadXLegal(ISD::EXTLOAD, MVT::f32)) { + LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC,Type::FloatTy)); + VT = MVT::f32; + Extend = true; + } + + SDOperand CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); + if (Extend) { + return DAG.getExtLoad(ISD::EXTLOAD, MVT::f64, DAG.getEntryNode(), + CPIdx, NULL, 0, MVT::f32); + } else { + return DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0); + } +} + + +/// ExpandFCOPYSIGNToBitwiseOps - Expands fcopysign to a series of bitwise +/// operations. +static +SDOperand ExpandFCOPYSIGNToBitwiseOps(SDNode *Node, MVT::ValueType NVT, + SelectionDAG &DAG, TargetLowering &TLI) { + MVT::ValueType VT = Node->getValueType(0); + MVT::ValueType SrcVT = Node->getOperand(1).getValueType(); + assert((SrcVT == MVT::f32 || SrcVT == MVT::f64) && + "fcopysign expansion only supported for f32 and f64"); + MVT::ValueType SrcNVT = (SrcVT == MVT::f64) ? MVT::i64 : MVT::i32; + + // First get the sign bit of second operand. + SDOperand Mask1 = (SrcVT == MVT::f64) + ? DAG.getConstantFP(BitsToDouble(1ULL << 63), SrcVT) + : DAG.getConstantFP(BitsToFloat(1U << 31), SrcVT); + Mask1 = DAG.getNode(ISD::BIT_CONVERT, SrcNVT, Mask1); + SDOperand SignBit= DAG.getNode(ISD::BIT_CONVERT, SrcNVT, Node->getOperand(1)); + SignBit = DAG.getNode(ISD::AND, SrcNVT, SignBit, Mask1); + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = MVT::getSizeInBits(SrcNVT) - MVT::getSizeInBits(NVT); + if (SizeDiff > 0) { + SignBit = DAG.getNode(ISD::SRL, SrcNVT, SignBit, + DAG.getConstant(SizeDiff, TLI.getShiftAmountTy())); + SignBit = DAG.getNode(ISD::TRUNCATE, NVT, SignBit); + } else if (SizeDiff < 0) + SignBit = DAG.getNode(ISD::SIGN_EXTEND, NVT, SignBit); + + // Clear the sign bit of first operand. + SDOperand Mask2 = (VT == MVT::f64) + ? DAG.getConstantFP(BitsToDouble(~(1ULL << 63)), VT) + : DAG.getConstantFP(BitsToFloat(~(1U << 31)), VT); + Mask2 = DAG.getNode(ISD::BIT_CONVERT, NVT, Mask2); + SDOperand Result = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0)); + Result = DAG.getNode(ISD::AND, NVT, Result, Mask2); + + // Or the value with the sign bit. + Result = DAG.getNode(ISD::OR, NVT, Result, SignBit); + return Result; +} + + +/// LegalizeOp - We know that the specified value has a legal type, and +/// that its operands are legal. Now ensure that the operation itself +/// is legal, recursively ensuring that the operands' operations remain +/// legal. +SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) { + assert(isTypeLegal(Op.getValueType()) && + "Caller should expand or promote operands that are not legal!"); + SDNode *Node = Op.Val; + + // If this operation defines any values that cannot be represented in a + // register on this target, make sure to expand or promote them. + if (Node->getNumValues() > 1) { + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + if (getTypeAction(Node->getValueType(i)) != Legal) { + HandleOp(Op.getValue(i)); + assert(LegalizedNodes.count(Op) && + "Handling didn't add legal operands!"); + return LegalizedNodes[Op]; + } + } + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap<SDOperand, SDOperand>::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + + SDOperand Tmp1, Tmp2, Tmp3, Tmp4; + SDOperand Result = Op; + bool isCustom = false; + + switch (Node->getOpcode()) { + case ISD::FrameIndex: + case ISD::EntryToken: + case ISD::Register: + case ISD::BasicBlock: + case ISD::TargetFrameIndex: + case ISD::TargetJumpTable: + case ISD::TargetConstant: + case ISD::TargetConstantFP: + case ISD::TargetConstantPool: + case ISD::TargetGlobalAddress: + case ISD::TargetGlobalTLSAddress: + case ISD::TargetExternalSymbol: + case ISD::VALUETYPE: + case ISD::SRCVALUE: + case ISD::STRING: + case ISD::CONDCODE: + // Primitives must all be legal. + assert(TLI.isOperationLegal(Node->getValueType(0), Node->getValueType(0)) && + "This must be legal!"); + break; + default: + if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { + // If this is a target node, legalize it by legalizing the operands then + // passing it through. + SmallVector<SDOperand, 8> Ops; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(LegalizeOp(Node->getOperand(i))); + + Result = DAG.UpdateNodeOperands(Result.getValue(0), &Ops[0], Ops.size()); + + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); + return Result.getValue(Op.ResNo); + } + // Otherwise this is an unhandled builtin node. splat. +#ifndef NDEBUG + cerr << "NODE: "; Node->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to legalize this operator!"); + abort(); + case ISD::GLOBAL_OFFSET_TABLE: + case ISD::GlobalAddress: + case ISD::GlobalTLSAddress: + case ISD::ExternalSymbol: + case ISD::ConstantPool: + case ISD::JumpTable: // Nothing to do. + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Op, DAG); + if (Tmp1.Val) Result = Tmp1; + // FALLTHROUGH if the target doesn't want to lower this op after all. + case TargetLowering::Legal: + break; + } + break; + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + case ISD::FRAME_TO_ARGS_OFFSET: + // The only option for these nodes is to custom lower them. If the target + // does not custom lower them, then return zero. + Tmp1 = TLI.LowerOperation(Op, DAG); + if (Tmp1.Val) + Result = Tmp1; + else + Result = DAG.getConstant(0, TLI.getPointerTy()); + break; + case ISD::EXCEPTIONADDR: { + Tmp1 = LegalizeOp(Node->getOperand(0)); + MVT::ValueType VT = Node->getValueType(0); + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Expand: { + unsigned Reg = TLI.getExceptionAddressRegister(); + Result = DAG.getCopyFromReg(Tmp1, Reg, VT).getValue(Op.ResNo); + } + break; + case TargetLowering::Custom: + Result = TLI.LowerOperation(Op, DAG); + if (Result.Val) break; + // Fall Thru + case TargetLowering::Legal: { + SDOperand Ops[] = { DAG.getConstant(0, VT), Tmp1 }; + Result = DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other), + Ops, 2).getValue(Op.ResNo); + break; + } + } + } + break; + case ISD::EHSELECTION: { + Tmp1 = LegalizeOp(Node->getOperand(0)); + Tmp2 = LegalizeOp(Node->getOperand(1)); + MVT::ValueType VT = Node->getValueType(0); + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Expand: { + unsigned Reg = TLI.getExceptionSelectorRegister(); + Result = DAG.getCopyFromReg(Tmp2, Reg, VT).getValue(Op.ResNo); + } + break; + case TargetLowering::Custom: + Result = TLI.LowerOperation(Op, DAG); + if (Result.Val) break; + // Fall Thru + case TargetLowering::Legal: { + SDOperand Ops[] = { DAG.getConstant(0, VT), Tmp2 }; + Result = DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other), + Ops, 2).getValue(Op.ResNo); + break; + } + } + } + break; + case ISD::EH_RETURN: { + MVT::ValueType VT = Node->getValueType(0); + // The only "good" option for this node is to custom lower it. + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: assert(0 && "This action is not supported at all!"); + case TargetLowering::Custom: + Result = TLI.LowerOperation(Op, DAG); + if (Result.Val) break; + // Fall Thru + case TargetLowering::Legal: + // Target does not know, how to lower this, lower to noop + Result = LegalizeOp(Node->getOperand(0)); + break; + } + } + break; + case ISD::AssertSext: + case ISD::AssertZext: + Tmp1 = LegalizeOp(Node->getOperand(0)); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1)); + break; + case ISD::MERGE_VALUES: + // Legalize eliminates MERGE_VALUES nodes. + Result = Node->getOperand(Op.ResNo); + break; + case ISD::CopyFromReg: + Tmp1 = LegalizeOp(Node->getOperand(0)); + Result = Op.getValue(0); + if (Node->getNumValues() == 2) { + Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1)); + } else { + assert(Node->getNumValues() == 3 && "Invalid copyfromreg!"); + if (Node->getNumOperands() == 3) { + Tmp2 = LegalizeOp(Node->getOperand(2)); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1),Tmp2); + } else { + Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1)); + } + AddLegalizedOperand(Op.getValue(2), Result.getValue(2)); + } + // Since CopyFromReg produces two values, make sure to remember that we + // legalized both of them. + AddLegalizedOperand(Op.getValue(0), Result); + AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); + return Result.getValue(Op.ResNo); + case ISD::UNDEF: { + MVT::ValueType VT = Op.getValueType(); + switch (TLI.getOperationAction(ISD::UNDEF, VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Expand: + if (MVT::isInteger(VT)) + Result = DAG.getConstant(0, VT); + else if (MVT::isFloatingPoint(VT)) + Result = DAG.getConstantFP(0, VT); + else + assert(0 && "Unknown value type!"); + break; + case TargetLowering::Legal: + break; + } + break; + } + + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: { + SmallVector<SDOperand, 8> Ops; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(LegalizeOp(Node->getOperand(i))); + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + + // Allow the target to custom lower its intrinsics if it wants to. + if (TLI.getOperationAction(Node->getOpcode(), MVT::Other) == + TargetLowering::Custom) { + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.Val) Result = Tmp3; + } + + if (Result.Val->getNumValues() == 1) break; + + // Must have return value and chain result. + assert(Result.Val->getNumValues() == 2 && + "Cannot return more than two values!"); + + // Since loads produce two values, make sure to remember that we + // legalized both of them. + AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0)); + AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1)); + return Result.getValue(Op.ResNo); + } + + case ISD::LOCATION: + assert(Node->getNumOperands() == 5 && "Invalid LOCATION node!"); + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the input chain. + + switch (TLI.getOperationAction(ISD::LOCATION, MVT::Other)) { + case TargetLowering::Promote: + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Expand: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + bool useDEBUG_LOC = TLI.isOperationLegal(ISD::DEBUG_LOC, MVT::Other); + bool useLABEL = TLI.isOperationLegal(ISD::LABEL, MVT::Other); + + if (MMI && (useDEBUG_LOC || useLABEL)) { + const std::string &FName = + cast<StringSDNode>(Node->getOperand(3))->getValue(); + const std::string &DirName = + cast<StringSDNode>(Node->getOperand(4))->getValue(); + unsigned SrcFile = MMI->RecordSource(DirName, FName); + + SmallVector<SDOperand, 8> Ops; + Ops.push_back(Tmp1); // chain + SDOperand LineOp = Node->getOperand(1); + SDOperand ColOp = Node->getOperand(2); + + if (useDEBUG_LOC) { + Ops.push_back(LineOp); // line # + Ops.push_back(ColOp); // col # + Ops.push_back(DAG.getConstant(SrcFile, MVT::i32)); // source file id + Result = DAG.getNode(ISD::DEBUG_LOC, MVT::Other, &Ops[0], Ops.size()); + } else { + unsigned Line = cast<ConstantSDNode>(LineOp)->getValue(); + unsigned Col = cast<ConstantSDNode>(ColOp)->getValue(); + unsigned ID = MMI->RecordLabel(Line, Col, SrcFile); + Ops.push_back(DAG.getConstant(ID, MVT::i32)); + Result = DAG.getNode(ISD::LABEL, MVT::Other,&Ops[0],Ops.size()); + } + } else { + Result = Tmp1; // chain + } + break; + } + case TargetLowering::Legal: + if (Tmp1 != Node->getOperand(0) || + getTypeAction(Node->getOperand(1).getValueType()) == Promote) { + SmallVector<SDOperand, 8> Ops; + Ops.push_back(Tmp1); + if (getTypeAction(Node->getOperand(1).getValueType()) == Legal) { + Ops.push_back(Node->getOperand(1)); // line # must be legal. + Ops.push_back(Node->getOperand(2)); // col # must be legal. + } else { + // Otherwise promote them. + Ops.push_back(PromoteOp(Node->getOperand(1))); + Ops.push_back(PromoteOp(Node->getOperand(2))); + } + Ops.push_back(Node->getOperand(3)); // filename must be legal. + Ops.push_back(Node->getOperand(4)); // working dir # must be legal. + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + break; + } + break; + + case ISD::DEBUG_LOC: + assert(Node->getNumOperands() == 4 && "Invalid DEBUG_LOC node!"); + switch (TLI.getOperationAction(ISD::DEBUG_LOC, MVT::Other)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the line #. + Tmp3 = LegalizeOp(Node->getOperand(2)); // Legalize the col #. + Tmp4 = LegalizeOp(Node->getOperand(3)); // Legalize the source file id. + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4); + break; + } + break; + + case ISD::LABEL: + assert(Node->getNumOperands() == 2 && "Invalid LABEL node!"); + switch (TLI.getOperationAction(ISD::LABEL, MVT::Other)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the label id. + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + break; + case TargetLowering::Expand: + Result = LegalizeOp(Node->getOperand(0)); + break; + } + break; + + case ISD::Constant: + // We know we don't need to expand constants here, constants only have one + // value and we check that it is fine above. + + // FIXME: Maybe we should handle things like targets that don't support full + // 32-bit immediates? + break; + case ISD::ConstantFP: { + // Spill FP immediates to the constant pool if the target cannot directly + // codegen them. Targets often have some immediate values that can be + // efficiently generated into an FP register without a load. We explicitly + // leave these constants as ConstantFP nodes for the target to deal with. + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); + + // Check to see if this FP immediate is already legal. + bool isLegal = false; + for (TargetLowering::legal_fpimm_iterator I = TLI.legal_fpimm_begin(), + E = TLI.legal_fpimm_end(); I != E; ++I) + if (CFP->isExactlyValue(*I)) { + isLegal = true; + break; + } + + // If this is a legal constant, turn it into a TargetConstantFP node. + if (isLegal) { + Result = DAG.getTargetConstantFP(CFP->getValue(), CFP->getValueType(0)); + break; + } + + switch (TLI.getOperationAction(ISD::ConstantFP, CFP->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.Val) { + Result = Tmp3; + break; + } + // FALLTHROUGH + case TargetLowering::Expand: + Result = ExpandConstantFP(CFP, true, DAG, TLI); + } + break; + } + case ISD::TokenFactor: + if (Node->getNumOperands() == 2) { + Tmp1 = LegalizeOp(Node->getOperand(0)); + Tmp2 = LegalizeOp(Node->getOperand(1)); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + } else if (Node->getNumOperands() == 3) { + Tmp1 = LegalizeOp(Node->getOperand(0)); + Tmp2 = LegalizeOp(Node->getOperand(1)); + Tmp3 = LegalizeOp(Node->getOperand(2)); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3); + } else { + SmallVector<SDOperand, 8> Ops; + // Legalize the operands. + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(LegalizeOp(Node->getOperand(i))); + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + break; + + case ISD::FORMAL_ARGUMENTS: + case ISD::CALL: + // The only option for this is to custom lower it. + Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG); + assert(Tmp3.Val && "Target didn't custom lower this node!"); + assert(Tmp3.Val->getNumValues() == Result.Val->getNumValues() && + "Lowering call/formal_arguments produced unexpected # results!"); + + // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to + // remember that we legalized all of them, so it doesn't get relegalized. + for (unsigned i = 0, e = Tmp3.Val->getNumValues(); i != e; ++i) { + Tmp1 = LegalizeOp(Tmp3.getValue(i)); + if (Op.ResNo == i) + Tmp2 = Tmp1; + AddLegalizedOperand(SDOperand(Node, i), Tmp1); + } + return Tmp2; + + case ISD::BUILD_VECTOR: + switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.Val) { + Result = Tmp3; + break; + } + // FALLTHROUGH + case TargetLowering::Expand: + Result = ExpandBUILD_VECTOR(Result.Val); + break; + } + break; + case ISD::INSERT_VECTOR_ELT: + Tmp1 = LegalizeOp(Node->getOperand(0)); // InVec + Tmp2 = LegalizeOp(Node->getOperand(1)); // InVal + Tmp3 = LegalizeOp(Node->getOperand(2)); // InEltNo + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3); + + switch (TLI.getOperationAction(ISD::INSERT_VECTOR_ELT, + Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + break; + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.Val) { + Result = Tmp3; + break; + } + // FALLTHROUGH + case TargetLowering::Expand: { + // If the insert index is a constant, codegen this as a scalar_to_vector, + // then a shuffle that inserts it into the right position in the vector. + if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Tmp3)) { + SDOperand ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, + Tmp1.getValueType(), Tmp2); + + unsigned NumElts = MVT::getVectorNumElements(Tmp1.getValueType()); + MVT::ValueType ShufMaskVT = MVT::getIntVectorWithNumElements(NumElts); + MVT::ValueType ShufMaskEltVT = MVT::getVectorElementType(ShufMaskVT); + + // We generate a shuffle of InVec and ScVec, so the shuffle mask should + // be 0,1,2,3,4,5... with the appropriate element replaced with elt 0 of + // the RHS. + SmallVector<SDOperand, 8> ShufOps; + for (unsigned i = 0; i != NumElts; ++i) { + if (i != InsertPos->getValue()) + ShufOps.push_back(DAG.getConstant(i, ShufMaskEltVT)); + else + ShufOps.push_back(DAG.getConstant(NumElts, ShufMaskEltVT)); + } + SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, ShufMaskVT, + &ShufOps[0], ShufOps.size()); + + Result = DAG.getNode(ISD::VECTOR_SHUFFLE, Tmp1.getValueType(), + Tmp1, ScVec, ShufMask); + Result = LegalizeOp(Result); + break; + } + + // If the target doesn't support this, we have to spill the input vector + // to a temporary stack slot, update the element, then reload it. This is + // badness. We could also load the value into a vector register (either + // with a "move to register" or "extload into register" instruction, then + // permute it into place, if the idx is a constant and if the idx is + // supported by the target. + MVT::ValueType VT = Tmp1.getValueType(); + MVT::ValueType EltVT = Tmp2.getValueType(); + MVT::ValueType IdxVT = Tmp3.getValueType(); + MVT::ValueType PtrVT = TLI.getPointerTy(); + SDOperand StackPtr = CreateStackTemporary(VT); + // Store the vector. + SDOperand Ch = DAG.getStore(DAG.getEntryNode(), Tmp1, StackPtr, NULL, 0); + + // Truncate or zero extend offset to target pointer type. + unsigned CastOpc = (IdxVT > PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; + Tmp3 = DAG.getNode(CastOpc, PtrVT, Tmp3); + // Add the offset to the index. + unsigned EltSize = MVT::getSizeInBits(EltVT)/8; + Tmp3 = DAG.getNode(ISD::MUL, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT)); + SDOperand StackPtr2 = DAG.getNode(ISD::ADD, IdxVT, Tmp3, StackPtr); + // Store the scalar value. + Ch = DAG.getStore(Ch, Tmp2, StackPtr2, NULL, 0); + // Load the updated vector. + Result = DAG.getLoad(VT, Ch, StackPtr, NULL, 0); + break; + } + } + break; + case ISD::SCALAR_TO_VECTOR: + if (!TLI.isTypeLegal(Node->getOperand(0).getValueType())) { + Result = LegalizeOp(ExpandSCALAR_TO_VECTOR(Node)); + break; + } + + Tmp1 = LegalizeOp(Node->getOperand(0)); // InVal + Result = DAG.UpdateNodeOperands(Result, Tmp1); + switch (TLI.getOperationAction(ISD::SCALAR_TO_VECTOR, + Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + break; + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.Val) { + Result = Tmp3; + break; + } + // FALLTHROUGH + case TargetLowering::Expand: + Result = LegalizeOp(ExpandSCALAR_TO_VECTOR(Node)); + break; + } + break; + case ISD::VECTOR_SHUFFLE: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the input vectors, + Tmp2 = LegalizeOp(Node->getOperand(1)); // but not the shuffle mask. + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2)); + + // Allow targets to custom lower the SHUFFLEs they support. + switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE,Result.getValueType())) { + default: assert(0 && "Unknown operation action!"); + case TargetLowering::Legal: + assert(isShuffleLegal(Result.getValueType(), Node->getOperand(2)) && + "vector shuffle should not be created if not legal!"); + break; + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.Val) { + Result = Tmp3; + break; + } + // FALLTHROUGH + case TargetLowering::Expand: { + MVT::ValueType VT = Node->getValueType(0); + MVT::ValueType EltVT = MVT::getVectorElementType(VT); + MVT::ValueType PtrVT = TLI.getPointerTy(); + SDOperand Mask = Node->getOperand(2); + unsigned NumElems = Mask.getNumOperands(); + SmallVector<SDOperand,8> Ops; + for (unsigned i = 0; i != NumElems; ++i) { + SDOperand Arg = Mask.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { + Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT)); + } else { + assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); + unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); + if (Idx < NumElems) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp1, + DAG.getConstant(Idx, PtrVT))); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp2, + DAG.getConstant(Idx - NumElems, PtrVT))); + } + } + Result = DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size()); + break; + } + case TargetLowering::Promote: { + // Change base type to a different vector type. + MVT::ValueType OVT = Node->getValueType(0); + MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + + // Cast the two input vectors. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp1); + Tmp2 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp2); + + // Convert the shuffle mask to the right # elements. + Tmp3 = SDOperand(isShuffleLegal(OVT, Node->getOperand(2)), 0); + assert(Tmp3.Val && "Shuffle not legal?"); + Result = DAG.getNode(ISD::VECTOR_SHUFFLE, NVT, Tmp1, Tmp2, Tmp3); + Result = DAG.getNode(ISD::BIT_CONVERT, OVT, Result); + break; + } + } + break; + + case ISD::EXTRACT_VECTOR_ELT: + Tmp1 = Node->getOperand(0); + Tmp2 = LegalizeOp(Node->getOperand(1)); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + Result = ExpandEXTRACT_VECTOR_ELT(Result); + break; + + case ISD::EXTRACT_SUBVECTOR: + Tmp1 = Node->getOperand(0); + Tmp2 = LegalizeOp(Node->getOperand(1)); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + Result = ExpandEXTRACT_SUBVECTOR(Result); + break; + + case ISD::CALLSEQ_START: { + SDNode *CallEnd = FindCallEndFromCallStart(Node); + + // Recursively Legalize all of the inputs of the call end that do not lead + // to this call start. This ensures that any libcalls that need be inserted + // are inserted *before* the CALLSEQ_START. + {SmallPtrSet<SDNode*, 32> NodesLeadingTo; + for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) + LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).Val, Node, + NodesLeadingTo); + } + + // Now that we legalized all of the inputs (which may have inserted + // libcalls) create the new CALLSEQ_START node. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + + // Merge in the last call, to ensure that this call start after the last + // call ended. + if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { + Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + } + + // Do not try to legalize the target-specific arguments (#1+). + if (Tmp1 != Node->getOperand(0)) { + SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + + // Remember that the CALLSEQ_START is legalized. + AddLegalizedOperand(Op.getValue(0), Result); + if (Node->getNumValues() == 2) // If this has a flag result, remember it. + AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); + + // Now that the callseq_start and all of the non-call nodes above this call + // sequence have been legalized, legalize the call itself. During this + // process, no libcalls can/will be inserted, guaranteeing that no calls + // can overlap. + assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); + SDOperand InCallSEQ = LastCALLSEQ_END; + // Note that we are selecting this call! + LastCALLSEQ_END = SDOperand(CallEnd, 0); + IsLegalizingCall = true; + + // Legalize the call, starting from the CALLSEQ_END. + LegalizeOp(LastCALLSEQ_END); + assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); + return Result; + } + case ISD::CALLSEQ_END: + // If the CALLSEQ_START node hasn't been legalized first, legalize it. This + // will cause this node to be legalized as well as handling libcalls right. + if (LastCALLSEQ_END.Val != Node) { + LegalizeOp(SDOperand(FindCallStartFromCallEnd(Node), 0)); + DenseMap<SDOperand, SDOperand>::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; + } + + // Otherwise, the call start has been legalized and everything is going + // according to plan. Just legalize ourselves normally here. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + // Do not try to legalize the target-specific arguments (#1+), except for + // an optional flag input. + if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){ + if (Tmp1 != Node->getOperand(0)) { + SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + } else { + Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); + if (Tmp1 != Node->getOperand(0) || + Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { + SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Ops.back() = Tmp2; + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + } + assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); + // This finishes up call legalization. + IsLegalizingCall = false; + + // If the CALLSEQ_END node has a flag, remember that we legalized it. + AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0)); + if (Node->getNumValues() == 2) + AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1)); + return Result.getValue(Op.ResNo); + case ISD::DYNAMIC_STACKALLOC: { + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the size. + Tmp3 = LegalizeOp(Node->getOperand(2)); // Legalize the alignment. + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3); + + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); + switch (TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Expand: { + unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" + " not tell us which reg is the stack pointer!"); + SDOperand Chain = Tmp1.getOperand(0); + SDOperand Size = Tmp2.getOperand(1); + SDOperand SP = DAG.getCopyFromReg(Chain, SPReg, Node->getValueType(0)); + Tmp1 = DAG.getNode(ISD::SUB, Node->getValueType(0), SP, Size); // Value + Tmp2 = DAG.getCopyToReg(SP.getValue(1), SPReg, Tmp1); // Output chain + Tmp1 = LegalizeOp(Tmp1); + Tmp2 = LegalizeOp(Tmp2); + break; + } + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Tmp1, DAG); + if (Tmp3.Val) { + Tmp1 = LegalizeOp(Tmp3); + Tmp2 = LegalizeOp(Tmp3.getValue(1)); + } + break; + case TargetLowering::Legal: + break; + } + // Since this op produce two values, make sure to remember that we + // legalized both of them. + AddLegalizedOperand(SDOperand(Node, 0), Tmp1); + AddLegalizedOperand(SDOperand(Node, 1), Tmp2); + return Op.ResNo ? Tmp2 : Tmp1; + } + case ISD::INLINEASM: { + SmallVector<SDOperand, 8> Ops(Node->op_begin(), Node->op_end()); + bool Changed = false; + // Legalize all of the operands of the inline asm, in case they are nodes + // that need to be expanded or something. Note we skip the asm string and + // all of the TargetConstant flags. + SDOperand Op = LegalizeOp(Ops[0]); + Changed = Op != Ops[0]; + Ops[0] = Op; + + bool HasInFlag = Ops.back().getValueType() == MVT::Flag; + for (unsigned i = 2, e = Ops.size()-HasInFlag; i < e; ) { + unsigned NumVals = cast<ConstantSDNode>(Ops[i])->getValue() >> 3; + for (++i; NumVals; ++i, --NumVals) { + SDOperand Op = LegalizeOp(Ops[i]); + if (Op != Ops[i]) { + Changed = true; + Ops[i] = Op; + } + } + } + + if (HasInFlag) { + Op = LegalizeOp(Ops.back()); + Changed |= Op != Ops.back(); + Ops.back() = Op; + } + + if (Changed) + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + + // INLINE asm returns a chain and flag, make sure to add both to the map. + AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0)); + AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1)); + return Result.getValue(Op.ResNo); + } + case ISD::BR: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + // Ensure that libcalls are emitted before a branch. + Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + LastCALLSEQ_END = DAG.getEntryNode(); + + Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1)); + break; + case ISD::BRIND: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + // Ensure that libcalls are emitted before a branch. + Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + LastCALLSEQ_END = DAG.getEntryNode(); + + switch (getTypeAction(Node->getOperand(1).getValueType())) { + default: assert(0 && "Indirect target must be legal type (pointer)!"); + case Legal: + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the condition. + break; + } + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + break; + case ISD::BR_JT: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + // Ensure that libcalls are emitted before a branch. + Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + LastCALLSEQ_END = DAG.getEntryNode(); + + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the jumptable node. + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2)); + + switch (TLI.getOperationAction(ISD::BR_JT, MVT::Other)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + break; + case TargetLowering::Expand: { + SDOperand Chain = Result.getOperand(0); + SDOperand Table = Result.getOperand(1); + SDOperand Index = Result.getOperand(2); + + MVT::ValueType PTy = TLI.getPointerTy(); + MachineFunction &MF = DAG.getMachineFunction(); + unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize(); + Index= DAG.getNode(ISD::MUL, PTy, Index, DAG.getConstant(EntrySize, PTy)); + SDOperand Addr = DAG.getNode(ISD::ADD, PTy, Index, Table); + + SDOperand LD; + switch (EntrySize) { + default: assert(0 && "Size of jump table not supported yet."); break; + case 4: LD = DAG.getLoad(MVT::i32, Chain, Addr, NULL, 0); break; + case 8: LD = DAG.getLoad(MVT::i64, Chain, Addr, NULL, 0); break; + } + + if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) { + // For PIC, the sequence is: + // BRIND(load(Jumptable + index) + RelocBase) + // RelocBase is the JumpTable on PPC and X86, GOT on Alpha + SDOperand Reloc; + if (TLI.usesGlobalOffsetTable()) + Reloc = DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, PTy); + else + Reloc = Table; + Addr = (PTy != MVT::i32) ? DAG.getNode(ISD::SIGN_EXTEND, PTy, LD) : LD; + Addr = DAG.getNode(ISD::ADD, PTy, Addr, Reloc); + Result = DAG.getNode(ISD::BRIND, MVT::Other, LD.getValue(1), Addr); + } else { + Result = DAG.getNode(ISD::BRIND, MVT::Other, LD.getValue(1), LD); + } + } + } + break; + case ISD::BRCOND: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + // Ensure that libcalls are emitted before a return. + Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + LastCALLSEQ_END = DAG.getEntryNode(); + + switch (getTypeAction(Node->getOperand(1).getValueType())) { + case Expand: assert(0 && "It's impossible to expand bools"); + case Legal: + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the condition. + break; + case Promote: + Tmp2 = PromoteOp(Node->getOperand(1)); // Promote the condition. + + // The top bits of the promoted condition are not necessarily zero, ensure + // that the value is properly zero extended. + if (!DAG.MaskedValueIsZero(Tmp2, + MVT::getIntVTBitMask(Tmp2.getValueType())^1)) + Tmp2 = DAG.getZeroExtendInReg(Tmp2, MVT::i1); + break; + } + + // Basic block destination (Op#2) is always legal. + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2)); + + switch (TLI.getOperationAction(ISD::BRCOND, MVT::Other)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + break; + case TargetLowering::Expand: + // Expand brcond's setcc into its constituent parts and create a BR_CC + // Node. + if (Tmp2.getOpcode() == ISD::SETCC) { + Result = DAG.getNode(ISD::BR_CC, MVT::Other, Tmp1, Tmp2.getOperand(2), + Tmp2.getOperand(0), Tmp2.getOperand(1), + Node->getOperand(2)); + } else { + Result = DAG.getNode(ISD::BR_CC, MVT::Other, Tmp1, + DAG.getCondCode(ISD::SETNE), Tmp2, + DAG.getConstant(0, Tmp2.getValueType()), + Node->getOperand(2)); + } + break; + } + break; + case ISD::BR_CC: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + // Ensure that libcalls are emitted before a branch. + Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + Tmp2 = Node->getOperand(2); // LHS + Tmp3 = Node->getOperand(3); // RHS + Tmp4 = Node->getOperand(1); // CC + + LegalizeSetCCOperands(Tmp2, Tmp3, Tmp4); + LastCALLSEQ_END = DAG.getEntryNode(); + + // If we didn't get both a LHS and RHS back from LegalizeSetCCOperands, + // the LHS is a legal SETCC itself. In this case, we need to compare + // the result against zero to select between true and false values. + if (Tmp3.Val == 0) { + Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + } + + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp4, Tmp2, Tmp3, + Node->getOperand(4)); + + switch (TLI.getOperationAction(ISD::BR_CC, Tmp3.getValueType())) { + default: assert(0 && "Unexpected action for BR_CC!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp4 = TLI.LowerOperation(Result, DAG); + if (Tmp4.Val) Result = Tmp4; + break; + } + break; + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Node); + Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) { + MVT::ValueType VT = Node->getValueType(0); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Tmp3 = Result.getValue(0); + Tmp4 = Result.getValue(1); + + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Tmp3, DAG); + if (Tmp1.Val) { + Tmp3 = LegalizeOp(Tmp1); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); + } + break; + case TargetLowering::Promote: { + // Only promote a load of vector type to another. + assert(MVT::isVector(VT) && "Cannot promote this load!"); + // Change base type to a different vector type. + MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + + Tmp1 = DAG.getLoad(NVT, Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); + Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, VT, Tmp1)); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); + break; + } + } + // Since loads produce two values, make sure to remember that we + // legalized both of them. + AddLegalizedOperand(SDOperand(Node, 0), Tmp3); + AddLegalizedOperand(SDOperand(Node, 1), Tmp4); + return Op.ResNo ? Tmp4 : Tmp3; + } else { + MVT::ValueType SrcVT = LD->getLoadedVT(); + switch (TLI.getLoadXAction(ExtType, SrcVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Promote: + assert(SrcVT == MVT::i1 && + "Can only promote extending LOAD from i1 -> i8!"); + Result = DAG.getExtLoad(ExtType, Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), LD->getSrcValueOffset(), + MVT::i8, LD->isVolatile(), LD->getAlignment()); + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); + break; + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); + + if (isCustom) { + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.Val) { + Tmp1 = LegalizeOp(Tmp3); + Tmp2 = LegalizeOp(Tmp3.getValue(1)); + } + } + break; + case TargetLowering::Expand: + // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND + if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) { + SDOperand Load = DAG.getLoad(SrcVT, Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); + Result = DAG.getNode(ISD::FP_EXTEND, Node->getValueType(0), Load); + Tmp1 = LegalizeOp(Result); // Relegalize new nodes. + Tmp2 = LegalizeOp(Load.getValue(1)); + break; + } + assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an explicit + // zero/sign extend inreg. + Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), + Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), SrcVT, + LD->isVolatile(), LD->getAlignment()); + SDOperand ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, SrcVT); + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + break; + } + // Since loads produce two values, make sure to remember that we legalized + // both of them. + AddLegalizedOperand(SDOperand(Node, 0), Tmp1); + AddLegalizedOperand(SDOperand(Node, 1), Tmp2); + return Op.ResNo ? Tmp2 : Tmp1; + } + } + case ISD::EXTRACT_ELEMENT: { + MVT::ValueType OpTy = Node->getOperand(0).getValueType(); + switch (getTypeAction(OpTy)) { + default: assert(0 && "EXTRACT_ELEMENT action for type unimplemented!"); + case Legal: + if (cast<ConstantSDNode>(Node->getOperand(1))->getValue()) { + // 1 -> Hi + Result = DAG.getNode(ISD::SRL, OpTy, Node->getOperand(0), + DAG.getConstant(MVT::getSizeInBits(OpTy)/2, + TLI.getShiftAmountTy())); + Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), Result); + } else { + // 0 -> Lo + Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), + Node->getOperand(0)); + } + break; + case Expand: + // Get both the low and high parts. + ExpandOp(Node->getOperand(0), Tmp1, Tmp2); + if (cast<ConstantSDNode>(Node->getOperand(1))->getValue()) + Result = Tmp2; // 1 -> Hi + else + Result = Tmp1; // 0 -> Lo + break; + } + break; + } + + case ISD::CopyToReg: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + + assert(isTypeLegal(Node->getOperand(2).getValueType()) && + "Register type must be legal!"); + // Legalize the incoming value (must be a legal type). + Tmp2 = LegalizeOp(Node->getOperand(2)); + if (Node->getNumValues() == 1) { + Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1), Tmp2); + } else { + assert(Node->getNumValues() == 2 && "Unknown CopyToReg"); + if (Node->getNumOperands() == 4) { + Tmp3 = LegalizeOp(Node->getOperand(3)); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1), Tmp2, + Tmp3); + } else { + Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1),Tmp2); + } + + // Since this produces two values, make sure to remember that we legalized + // both of them. + AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0)); + AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1)); + return Result; + } + break; + + case ISD::RET: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + + // Ensure that libcalls are emitted before a return. + Tmp1 = DAG.getNode(ISD::TokenFactor, MVT::Other, Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + LastCALLSEQ_END = DAG.getEntryNode(); + + switch (Node->getNumOperands()) { + case 3: // ret val + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); // Signness + switch (getTypeAction(Tmp2.getValueType())) { + case Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1, LegalizeOp(Tmp2), Tmp3); + break; + case Expand: + if (!MVT::isVector(Tmp2.getValueType())) { + SDOperand Lo, Hi; + ExpandOp(Tmp2, Lo, Hi); + + // Big endian systems want the hi reg first. + if (!TLI.isLittleEndian()) + std::swap(Lo, Hi); + + if (Hi.Val) + Result = DAG.getNode(ISD::RET, MVT::Other, Tmp1, Lo, Tmp3, Hi,Tmp3); + else + Result = DAG.getNode(ISD::RET, MVT::Other, Tmp1, Lo, Tmp3); + Result = LegalizeOp(Result); + } else { + SDNode *InVal = Tmp2.Val; + unsigned NumElems = MVT::getVectorNumElements(InVal->getValueType(0)); + MVT::ValueType EVT = MVT::getVectorElementType(InVal->getValueType(0)); + + // Figure out if there is a simple type corresponding to this Vector + // type. If so, convert to the vector type. + MVT::ValueType TVT = MVT::getVectorType(EVT, NumElems); + if (TLI.isTypeLegal(TVT)) { + // Turn this into a return of the vector type. + Tmp2 = LegalizeOp(Tmp2); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3); + } else if (NumElems == 1) { + // Turn this into a return of the scalar type. + Tmp2 = ScalarizeVectorOp(Tmp2); + Tmp2 = LegalizeOp(Tmp2); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3); + + // FIXME: Returns of gcc generic vectors smaller than a legal type + // should be returned in integer registers! + + // The scalarized value type may not be legal, e.g. it might require + // promotion or expansion. Relegalize the return. + Result = LegalizeOp(Result); + } else { + // FIXME: Returns of gcc generic vectors larger than a legal vector + // type should be returned by reference! + SDOperand Lo, Hi; + SplitVectorOp(Tmp2, Lo, Hi); + Result = DAG.getNode(ISD::RET, MVT::Other, Tmp1, Lo, Tmp3, Hi,Tmp3); + Result = LegalizeOp(Result); + } + } + break; + case Promote: + Tmp2 = PromoteOp(Node->getOperand(1)); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3); + Result = LegalizeOp(Result); + break; + } + break; + case 1: // ret void + Result = DAG.UpdateNodeOperands(Result, Tmp1); + break; + default: { // ret <values> + SmallVector<SDOperand, 8> NewValues; + NewValues.push_back(Tmp1); + for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) + switch (getTypeAction(Node->getOperand(i).getValueType())) { + case Legal: + NewValues.push_back(LegalizeOp(Node->getOperand(i))); + NewValues.push_back(Node->getOperand(i+1)); + break; + case Expand: { + SDOperand Lo, Hi; + assert(!MVT::isExtendedVT(Node->getOperand(i).getValueType()) && + "FIXME: TODO: implement returning non-legal vector types!"); + ExpandOp(Node->getOperand(i), Lo, Hi); + NewValues.push_back(Lo); + NewValues.push_back(Node->getOperand(i+1)); + if (Hi.Val) { + NewValues.push_back(Hi); + NewValues.push_back(Node->getOperand(i+1)); + } + break; + } + case Promote: + assert(0 && "Can't promote multiple return value yet!"); + } + + if (NewValues.size() == Node->getNumOperands()) + Result = DAG.UpdateNodeOperands(Result, &NewValues[0],NewValues.size()); + else + Result = DAG.getNode(ISD::RET, MVT::Other, + &NewValues[0], NewValues.size()); + break; + } + } + + if (Result.getOpcode() == ISD::RET) { + switch (TLI.getOperationAction(Result.getOpcode(), MVT::Other)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + break; + } + } + break; + case ISD::STORE: { + StoreSDNode *ST = cast<StoreSDNode>(Node); + Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + + if (!ST->isTruncatingStore()) { + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // FIXME: We shouldn't do this for TargetConstantFP's. + // FIXME: move this to the DAG Combiner! Note that we can't regress due + // to phase ordering between legalized code and the dag combiner. This + // probably means that we need to integrate dag combiner and legalizer + // together. + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { + if (CFP->getValueType(0) == MVT::f32) { + Tmp3 = DAG.getConstant(FloatToBits(CFP->getValue()), MVT::i32); + } else { + assert(CFP->getValueType(0) == MVT::f64 && "Unknown FP type!"); + Tmp3 = DAG.getConstant(DoubleToBits(CFP->getValue()), MVT::i64); + } + Result = DAG.getStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + break; + } + + switch (getTypeAction(ST->getStoredVT())) { + case Legal: { + Tmp3 = LegalizeOp(ST->getValue()); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, + ST->getOffset()); + + MVT::ValueType VT = Tmp3.getValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + break; + case TargetLowering::Promote: + assert(MVT::isVector(VT) && "Unknown legal promote case!"); + Tmp3 = DAG.getNode(ISD::BIT_CONVERT, + TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); + Result = DAG.getStore(Tmp1, Tmp3, Tmp2, + ST->getSrcValue(), SVOffset, isVolatile, + Alignment); + break; + } + break; + } + case Promote: + // Truncate the value and store the result. + Tmp3 = PromoteOp(ST->getValue()); + Result = DAG.getTruncStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, ST->getStoredVT(), + isVolatile, Alignment); + break; + + case Expand: + unsigned IncrementSize = 0; + SDOperand Lo, Hi; + + // If this is a vector type, then we have to calculate the increment as + // the product of the element size in bytes, and the number of elements + // in the high half of the vector. + if (MVT::isVector(ST->getValue().getValueType())) { + SDNode *InVal = ST->getValue().Val; + unsigned NumElems = MVT::getVectorNumElements(InVal->getValueType(0)); + MVT::ValueType EVT = MVT::getVectorElementType(InVal->getValueType(0)); + + // Figure out if there is a simple type corresponding to this Vector + // type. If so, convert to the vector type. + MVT::ValueType TVT = MVT::getVectorType(EVT, NumElems); + if (TLI.isTypeLegal(TVT)) { + // Turn this into a normal store of the vector type. + Tmp3 = LegalizeOp(Node->getOperand(1)); + Result = DAG.getStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + Result = LegalizeOp(Result); + break; + } else if (NumElems == 1) { + // Turn this into a normal store of the scalar type. + Tmp3 = ScalarizeVectorOp(Node->getOperand(1)); + Result = DAG.getStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + // The scalarized value type may not be legal, e.g. it might require + // promotion or expansion. Relegalize the scalar store. + Result = LegalizeOp(Result); + break; + } else { + SplitVectorOp(Node->getOperand(1), Lo, Hi); + IncrementSize = NumElems/2 * MVT::getSizeInBits(EVT)/8; + } + } else { + ExpandOp(Node->getOperand(1), Lo, Hi); + IncrementSize = Hi.Val ? MVT::getSizeInBits(Hi.getValueType())/8 : 0; + + if (!TLI.isLittleEndian()) + std::swap(Lo, Hi); + } + + Lo = DAG.getStore(Tmp1, Lo, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + + if (Hi.Val == NULL) { + // Must be int <-> float one-to-one expansion. + Result = Lo; + break; + } + + Tmp2 = DAG.getNode(ISD::ADD, Tmp2.getValueType(), Tmp2, + getIntPtrConstant(IncrementSize)); + assert(isTypeLegal(Tmp2.getValueType()) && + "Pointers must be legal!"); + SVOffset += IncrementSize; + if (Alignment > IncrementSize) + Alignment = IncrementSize; + Hi = DAG.getStore(Tmp1, Hi, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + Result = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo, Hi); + break; + } + } else { + // Truncating store + assert(isTypeLegal(ST->getValue().getValueType()) && + "Cannot handle illegal TRUNCSTORE yet!"); + Tmp3 = LegalizeOp(ST->getValue()); + + // The only promote case we handle is TRUNCSTORE:i1 X into + // -> TRUNCSTORE:i8 (and X, 1) + if (ST->getStoredVT() == MVT::i1 && + TLI.getStoreXAction(MVT::i1) == TargetLowering::Promote) { + // Promote the bool to a mask then store. + Tmp3 = DAG.getNode(ISD::AND, Tmp3.getValueType(), Tmp3, + DAG.getConstant(1, Tmp3.getValueType())); + Result = DAG.getTruncStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, MVT::i8, + isVolatile, Alignment); + } else if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || + Tmp2 != ST->getBasePtr()) { + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, + ST->getOffset()); + } + + MVT::ValueType StVT = cast<StoreSDNode>(Result.Val)->getStoredVT(); + switch (TLI.getStoreXAction(StVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + break; + } + } + break; + } + case ISD::PCMARKER: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1)); + break; + case ISD::STACKSAVE: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + Result = DAG.UpdateNodeOperands(Result, Tmp1); + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); + + switch (TLI.getOperationAction(ISD::STACKSAVE, MVT::Other)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.Val) { + Tmp1 = LegalizeOp(Tmp3); + Tmp2 = LegalizeOp(Tmp3.getValue(1)); + } + break; + case TargetLowering::Expand: + // Expand to CopyFromReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Tmp1 = DAG.getCopyFromReg(Result.getOperand(0), SP, + Node->getValueType(0)); + Tmp2 = Tmp1.getValue(1); + } else { + Tmp1 = DAG.getNode(ISD::UNDEF, Node->getValueType(0)); + Tmp2 = Node->getOperand(0); + } + break; + } + + // Since stacksave produce two values, make sure to remember that we + // legalized both of them. + AddLegalizedOperand(SDOperand(Node, 0), Tmp1); + AddLegalizedOperand(SDOperand(Node, 1), Tmp2); + return Op.ResNo ? Tmp2 : Tmp1; + + case ISD::STACKRESTORE: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the pointer. + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + + switch (TLI.getOperationAction(ISD::STACKRESTORE, MVT::Other)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + break; + case TargetLowering::Expand: + // Expand to CopyToReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Result = DAG.getCopyToReg(Tmp1, SP, Tmp2); + } else { + Result = Tmp1; + } + break; + } + break; + + case ISD::READCYCLECOUNTER: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain + Result = DAG.UpdateNodeOperands(Result, Tmp1); + switch (TLI.getOperationAction(ISD::READCYCLECOUNTER, + Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); + break; + case TargetLowering::Custom: + Result = TLI.LowerOperation(Result, DAG); + Tmp1 = LegalizeOp(Result.getValue(0)); + Tmp2 = LegalizeOp(Result.getValue(1)); + break; + } + + // Since rdcc produce two values, make sure to remember that we legalized + // both of them. + AddLegalizedOperand(SDOperand(Node, 0), Tmp1); + AddLegalizedOperand(SDOperand(Node, 1), Tmp2); + return Result; + + case ISD::SELECT: + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Expand: assert(0 && "It's impossible to expand bools"); + case Legal: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the condition. + break; + case Promote: + Tmp1 = PromoteOp(Node->getOperand(0)); // Promote the condition. + // Make sure the condition is either zero or one. + if (!DAG.MaskedValueIsZero(Tmp1, + MVT::getIntVTBitMask(Tmp1.getValueType())^1)) + Tmp1 = DAG.getZeroExtendInReg(Tmp1, MVT::i1); + break; + } + Tmp2 = LegalizeOp(Node->getOperand(1)); // TrueVal + Tmp3 = LegalizeOp(Node->getOperand(2)); // FalseVal + + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3); + + switch (TLI.getOperationAction(ISD::SELECT, Tmp2.getValueType())) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: { + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + break; + } + case TargetLowering::Expand: + if (Tmp1.getOpcode() == ISD::SETCC) { + Result = DAG.getSelectCC(Tmp1.getOperand(0), Tmp1.getOperand(1), + Tmp2, Tmp3, + cast<CondCodeSDNode>(Tmp1.getOperand(2))->get()); + } else { + Result = DAG.getSelectCC(Tmp1, + DAG.getConstant(0, Tmp1.getValueType()), + Tmp2, Tmp3, ISD::SETNE); + } + break; + case TargetLowering::Promote: { + MVT::ValueType NVT = + TLI.getTypeToPromoteTo(ISD::SELECT, Tmp2.getValueType()); + unsigned ExtOp, TruncOp; + if (MVT::isVector(Tmp2.getValueType())) { + ExtOp = ISD::BIT_CONVERT; + TruncOp = ISD::BIT_CONVERT; + } else if (MVT::isInteger(Tmp2.getValueType())) { + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } else { + ExtOp = ISD::FP_EXTEND; + TruncOp = ISD::FP_ROUND; + } + // Promote each of the values to the new type. + Tmp2 = DAG.getNode(ExtOp, NVT, Tmp2); + Tmp3 = DAG.getNode(ExtOp, NVT, Tmp3); + // Perform the larger operation, then round down. + Result = DAG.getNode(ISD::SELECT, NVT, Tmp1, Tmp2,Tmp3); + Result = DAG.getNode(TruncOp, Node->getValueType(0), Result); + break; + } + } + break; + case ISD::SELECT_CC: { + Tmp1 = Node->getOperand(0); // LHS + Tmp2 = Node->getOperand(1); // RHS + Tmp3 = LegalizeOp(Node->getOperand(2)); // True + Tmp4 = LegalizeOp(Node->getOperand(3)); // False + SDOperand CC = Node->getOperand(4); + + LegalizeSetCCOperands(Tmp1, Tmp2, CC); + + // If we didn't get both a LHS and RHS back from LegalizeSetCCOperands, + // the LHS is a legal SETCC itself. In this case, we need to compare + // the result against zero to select between true and false values. + if (Tmp2.Val == 0) { + Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + } + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4, CC); + + // Everything is legal, see if we should expand this op or something. + switch (TLI.getOperationAction(ISD::SELECT_CC, Tmp3.getValueType())) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + break; + } + break; + } + case ISD::SETCC: + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + LegalizeSetCCOperands(Tmp1, Tmp2, Tmp3); + + // If we had to Expand the SetCC operands into a SELECT node, then it may + // not always be possible to return a true LHS & RHS. In this case, just + // return the value we legalized, returned in the LHS + if (Tmp2.Val == 0) { + Result = Tmp1; + break; + } + + switch (TLI.getOperationAction(ISD::SETCC, Tmp1.getValueType())) { + default: assert(0 && "Cannot handle this action for SETCC yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH. + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3); + if (isCustom) { + Tmp4 = TLI.LowerOperation(Result, DAG); + if (Tmp4.Val) Result = Tmp4; + } + break; + case TargetLowering::Promote: { + // First step, figure out the appropriate operation to use. + // Allow SETCC to not be supported for all legal data types + // Mostly this targets FP + MVT::ValueType NewInTy = Node->getOperand(0).getValueType(); + MVT::ValueType OldVT = NewInTy; OldVT = OldVT; + + // Scan for the appropriate larger type to use. + while (1) { + NewInTy = (MVT::ValueType)(NewInTy+1); + + assert(MVT::isInteger(NewInTy) == MVT::isInteger(OldVT) && + "Fell off of the edge of the integer world"); + assert(MVT::isFloatingPoint(NewInTy) == MVT::isFloatingPoint(OldVT) && + "Fell off of the edge of the floating point world"); + + // If the target supports SETCC of this type, use it. + if (TLI.isOperationLegal(ISD::SETCC, NewInTy)) + break; + } + if (MVT::isInteger(NewInTy)) + assert(0 && "Cannot promote Legal Integer SETCC yet"); + else { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, NewInTy, Tmp1); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, NewInTy, Tmp2); + } + Tmp1 = LegalizeOp(Tmp1); + Tmp2 = LegalizeOp(Tmp2); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3); + Result = LegalizeOp(Result); + break; + } + case TargetLowering::Expand: + // Expand a setcc node into a select_cc of the same condition, lhs, and + // rhs that selects between const 1 (true) and const 0 (false). + MVT::ValueType VT = Node->getValueType(0); + Result = DAG.getNode(ISD::SELECT_CC, VT, Tmp1, Tmp2, + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + Tmp3); + break; + } + break; + case ISD::MEMSET: + case ISD::MEMCPY: + case ISD::MEMMOVE: { + Tmp1 = LegalizeOp(Node->getOperand(0)); // Chain + Tmp2 = LegalizeOp(Node->getOperand(1)); // Pointer + + if (Node->getOpcode() == ISD::MEMSET) { // memset = ubyte + switch (getTypeAction(Node->getOperand(2).getValueType())) { + case Expand: assert(0 && "Cannot expand a byte!"); + case Legal: + Tmp3 = LegalizeOp(Node->getOperand(2)); + break; + case Promote: + Tmp3 = PromoteOp(Node->getOperand(2)); + break; + } + } else { + Tmp3 = LegalizeOp(Node->getOperand(2)); // memcpy/move = pointer, + } + + SDOperand Tmp4; + switch (getTypeAction(Node->getOperand(3).getValueType())) { + case Expand: { + // Length is too big, just take the lo-part of the length. + SDOperand HiPart; + ExpandOp(Node->getOperand(3), Tmp4, HiPart); + break; + } + case Legal: + Tmp4 = LegalizeOp(Node->getOperand(3)); + break; + case Promote: + Tmp4 = PromoteOp(Node->getOperand(3)); + break; + } + + SDOperand Tmp5; + switch (getTypeAction(Node->getOperand(4).getValueType())) { // uint + case Expand: assert(0 && "Cannot expand this yet!"); + case Legal: + Tmp5 = LegalizeOp(Node->getOperand(4)); + break; + case Promote: + Tmp5 = PromoteOp(Node->getOperand(4)); + break; + } + + switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) { + default: assert(0 && "This action not implemented for this operation!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4, Tmp5); + if (isCustom) { + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + } + break; + case TargetLowering::Expand: { + // Otherwise, the target does not support this operation. Lower the + // operation to an explicit libcall as appropriate. + MVT::ValueType IntPtr = TLI.getPointerTy(); + const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + const char *FnName = 0; + if (Node->getOpcode() == ISD::MEMSET) { + Entry.Node = Tmp2; Entry.Ty = IntPtrTy; + Args.push_back(Entry); + // Extend the (previously legalized) ubyte argument to be an int value + // for the call. + if (Tmp3.getValueType() > MVT::i32) + Tmp3 = DAG.getNode(ISD::TRUNCATE, MVT::i32, Tmp3); + else + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Tmp3); + Entry.Node = Tmp3; Entry.Ty = Type::Int32Ty; Entry.isSExt = true; + Args.push_back(Entry); + Entry.Node = Tmp4; Entry.Ty = IntPtrTy; Entry.isSExt = false; + Args.push_back(Entry); + + FnName = "memset"; + } else if (Node->getOpcode() == ISD::MEMCPY || + Node->getOpcode() == ISD::MEMMOVE) { + Entry.Ty = IntPtrTy; + Entry.Node = Tmp2; Args.push_back(Entry); + Entry.Node = Tmp3; Args.push_back(Entry); + Entry.Node = Tmp4; Args.push_back(Entry); + FnName = Node->getOpcode() == ISD::MEMMOVE ? "memmove" : "memcpy"; + } else { + assert(0 && "Unknown op!"); + } + + std::pair<SDOperand,SDOperand> CallResult = + TLI.LowerCallTo(Tmp1, Type::VoidTy, false, false, CallingConv::C, false, + DAG.getExternalSymbol(FnName, IntPtr), Args, DAG); + Result = CallResult.second; + break; + } + } + break; + } + + case ISD::SHL_PARTS: + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: { + SmallVector<SDOperand, 8> Ops; + bool Changed = false; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + Ops.push_back(LegalizeOp(Node->getOperand(i))); + Changed |= Ops.back() != Node->getOperand(i); + } + if (Changed) + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + + switch (TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) { + SDOperand Tmp2, RetVal(0, 0); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { + Tmp2 = LegalizeOp(Tmp1.getValue(i)); + AddLegalizedOperand(SDOperand(Node, i), Tmp2); + if (i == Op.ResNo) + RetVal = Tmp2; + } + assert(RetVal.Val && "Illegal result number"); + return RetVal; + } + break; + } + + // Since these produce multiple values, make sure to remember that we + // legalized all of them. + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + AddLegalizedOperand(SDOperand(Node, i), Result.getValue(i)); + return Result.getValue(Op.ResNo); + } + + // Binary operators + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::UDIV: + case ISD::SDIV: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + Tmp1 = LegalizeOp(Node->getOperand(0)); // LHS + switch (getTypeAction(Node->getOperand(1).getValueType())) { + case Expand: assert(0 && "Not possible"); + case Legal: + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the RHS. + break; + case Promote: + Tmp2 = PromoteOp(Node->getOperand(1)); // Promote the RHS. + break; + } + + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + default: assert(0 && "BinOp legalize operation not supported"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + break; + case TargetLowering::Expand: { + if (Node->getValueType(0) == MVT::i32) { + switch (Node->getOpcode()) { + default: assert(0 && "Do not know how to expand this integer BinOp!"); + case ISD::UDIV: + case ISD::SDIV: + RTLIB::Libcall LC = Node->getOpcode() == ISD::UDIV + ? RTLIB::UDIV_I32 : RTLIB::SDIV_I32; + SDOperand Dummy; + bool isSigned = Node->getOpcode() == ISD::SDIV; + Result = ExpandLibCall(TLI.getLibcallName(LC), Node, isSigned, Dummy); + }; + break; + } + + assert(MVT::isVector(Node->getValueType(0)) && + "Cannot expand this binary operator!"); + // Expand the operation into a bunch of nasty scalar code. + SmallVector<SDOperand, 8> Ops; + MVT::ValueType EltVT = MVT::getVectorElementType(Node->getValueType(0)); + MVT::ValueType PtrVT = TLI.getPointerTy(); + for (unsigned i = 0, e = MVT::getVectorNumElements(Node->getValueType(0)); + i != e; ++i) { + SDOperand Idx = DAG.getConstant(i, PtrVT); + SDOperand LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp1, Idx); + SDOperand RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Tmp2, Idx); + Ops.push_back(DAG.getNode(Node->getOpcode(), EltVT, LHS, RHS)); + } + Result = DAG.getNode(ISD::BUILD_VECTOR, Node->getValueType(0), + &Ops[0], Ops.size()); + break; + } + case TargetLowering::Promote: { + switch (Node->getOpcode()) { + default: assert(0 && "Do not know how to promote this BinOp!"); + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + MVT::ValueType OVT = Node->getValueType(0); + MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + assert(MVT::isVector(OVT) && "Cannot promote this BinOp!"); + // Bit convert each of the values to the new type. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp1); + Tmp2 = DAG.getNode(ISD::BIT_CONVERT, NVT, Tmp2); + Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2); + // Bit convert the result back the original type. + Result = DAG.getNode(ISD::BIT_CONVERT, OVT, Result); + break; + } + } + } + } + break; + + case ISD::FCOPYSIGN: // FCOPYSIGN does not require LHS/RHS to match type! + Tmp1 = LegalizeOp(Node->getOperand(0)); // LHS + switch (getTypeAction(Node->getOperand(1).getValueType())) { + case Expand: assert(0 && "Not possible"); + case Legal: + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the RHS. + break; + case Promote: + Tmp2 = PromoteOp(Node->getOperand(1)); // Promote the RHS. + break; + } + + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + default: assert(0 && "Operation not supported"); + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + break; + case TargetLowering::Legal: break; + case TargetLowering::Expand: { + // If this target supports fabs/fneg natively and select is cheap, + // do this efficiently. + if (!TLI.isSelectExpensive() && + TLI.getOperationAction(ISD::FABS, Tmp1.getValueType()) == + TargetLowering::Legal && + TLI.getOperationAction(ISD::FNEG, Tmp1.getValueType()) == + TargetLowering::Legal) { + // Get the sign bit of the RHS. + MVT::ValueType IVT = + Tmp2.getValueType() == MVT::f32 ? MVT::i32 : MVT::i64; + SDOperand SignBit = DAG.getNode(ISD::BIT_CONVERT, IVT, Tmp2); + SignBit = DAG.getSetCC(TLI.getSetCCResultTy(), + SignBit, DAG.getConstant(0, IVT), ISD::SETLT); + // Get the absolute value of the result. + SDOperand AbsVal = DAG.getNode(ISD::FABS, Tmp1.getValueType(), Tmp1); + // Select between the nabs and abs value based on the sign bit of + // the input. + Result = DAG.getNode(ISD::SELECT, AbsVal.getValueType(), SignBit, + DAG.getNode(ISD::FNEG, AbsVal.getValueType(), + AbsVal), + AbsVal); + Result = LegalizeOp(Result); + break; + } + + // Otherwise, do bitwise ops! + MVT::ValueType NVT = + Node->getValueType(0) == MVT::f32 ? MVT::i32 : MVT::i64; + Result = ExpandFCOPYSIGNToBitwiseOps(Node, NVT, DAG, TLI); + Result = DAG.getNode(ISD::BIT_CONVERT, Node->getValueType(0), Result); + Result = LegalizeOp(Result); + break; + } + } + break; + + case ISD::ADDC: + case ISD::SUBC: + Tmp1 = LegalizeOp(Node->getOperand(0)); + Tmp2 = LegalizeOp(Node->getOperand(1)); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + // Since this produces two values, make sure to remember that we legalized + // both of them. + AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0)); + AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1)); + return Result; + + case ISD::ADDE: + case ISD::SUBE: + Tmp1 = LegalizeOp(Node->getOperand(0)); + Tmp2 = LegalizeOp(Node->getOperand(1)); + Tmp3 = LegalizeOp(Node->getOperand(2)); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3); + // Since this produces two values, make sure to remember that we legalized + // both of them. + AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0)); + AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1)); + return Result; + + case ISD::BUILD_PAIR: { + MVT::ValueType PairTy = Node->getValueType(0); + // TODO: handle the case where the Lo and Hi operands are not of legal type + Tmp1 = LegalizeOp(Node->getOperand(0)); // Lo + Tmp2 = LegalizeOp(Node->getOperand(1)); // Hi + switch (TLI.getOperationAction(ISD::BUILD_PAIR, PairTy)) { + case TargetLowering::Promote: + case TargetLowering::Custom: + assert(0 && "Cannot promote/custom this yet!"); + case TargetLowering::Legal: + if (Tmp1 != Node->getOperand(0) || Tmp2 != Node->getOperand(1)) + Result = DAG.getNode(ISD::BUILD_PAIR, PairTy, Tmp1, Tmp2); + break; + case TargetLowering::Expand: + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, PairTy, Tmp1); + Tmp2 = DAG.getNode(ISD::ANY_EXTEND, PairTy, Tmp2); + Tmp2 = DAG.getNode(ISD::SHL, PairTy, Tmp2, + DAG.getConstant(MVT::getSizeInBits(PairTy)/2, + TLI.getShiftAmountTy())); + Result = DAG.getNode(ISD::OR, PairTy, Tmp1, Tmp2); + break; + } + break; + } + + case ISD::UREM: + case ISD::SREM: + case ISD::FREM: + Tmp1 = LegalizeOp(Node->getOperand(0)); // LHS + Tmp2 = LegalizeOp(Node->getOperand(1)); // RHS + + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + case TargetLowering::Promote: assert(0 && "Cannot promote this yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + if (isCustom) { + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + } + break; + case TargetLowering::Expand: + unsigned DivOpc= (Node->getOpcode() == ISD::UREM) ? ISD::UDIV : ISD::SDIV; + bool isSigned = DivOpc == ISD::SDIV; + if (MVT::isInteger(Node->getValueType(0))) { + if (TLI.getOperationAction(DivOpc, Node->getValueType(0)) == + TargetLowering::Legal) { + // X % Y -> X-X/Y*Y + MVT::ValueType VT = Node->getValueType(0); + Result = DAG.getNode(DivOpc, VT, Tmp1, Tmp2); + Result = DAG.getNode(ISD::MUL, VT, Result, Tmp2); + Result = DAG.getNode(ISD::SUB, VT, Tmp1, Result); + } else { + assert(Node->getValueType(0) == MVT::i32 && + "Cannot expand this binary operator!"); + RTLIB::Libcall LC = Node->getOpcode() == ISD::UREM + ? RTLIB::UREM_I32 : RTLIB::SREM_I32; + SDOperand Dummy; + Result = ExpandLibCall(TLI.getLibcallName(LC), Node, isSigned, Dummy); + } + } else { + // Floating point mod -> fmod libcall. + RTLIB::Libcall LC = Node->getValueType(0) == MVT::f32 + ? RTLIB::REM_F32 : RTLIB::REM_F64; + SDOperand Dummy; + Result = ExpandLibCall(TLI.getLibcallName(LC), Node, + false/*sign irrelevant*/, Dummy); + } + break; + } + break; + case ISD::VAARG: { + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the pointer. + + MVT::ValueType VT = Node->getValueType(0); + switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2)); + Result = Result.getValue(0); + Tmp1 = Result.getValue(1); + + if (isCustom) { + Tmp2 = TLI.LowerOperation(Result, DAG); + if (Tmp2.Val) { + Result = LegalizeOp(Tmp2); + Tmp1 = LegalizeOp(Tmp2.getValue(1)); + } + } + break; + case TargetLowering::Expand: { + SrcValueSDNode *SV = cast<SrcValueSDNode>(Node->getOperand(2)); + SDOperand VAList = DAG.getLoad(TLI.getPointerTy(), Tmp1, Tmp2, + SV->getValue(), SV->getOffset()); + // Increment the pointer, VAList, to the next vaarg + Tmp3 = DAG.getNode(ISD::ADD, TLI.getPointerTy(), VAList, + DAG.getConstant(MVT::getSizeInBits(VT)/8, + TLI.getPointerTy())); + // Store the incremented VAList to the legalized pointer + Tmp3 = DAG.getStore(VAList.getValue(1), Tmp3, Tmp2, SV->getValue(), + SV->getOffset()); + // Load the actual argument out of the pointer VAList + Result = DAG.getLoad(VT, Tmp3, VAList, NULL, 0); + Tmp1 = LegalizeOp(Result.getValue(1)); + Result = LegalizeOp(Result); + break; + } + } + // Since VAARG produces two values, make sure to remember that we + // legalized both of them. + AddLegalizedOperand(SDOperand(Node, 0), Result); + AddLegalizedOperand(SDOperand(Node, 1), Tmp1); + return Op.ResNo ? Tmp1 : Result; + } + + case ISD::VACOPY: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the dest pointer. + Tmp3 = LegalizeOp(Node->getOperand(2)); // Legalize the source pointer. + + switch (TLI.getOperationAction(ISD::VACOPY, MVT::Other)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, + Node->getOperand(3), Node->getOperand(4)); + if (isCustom) { + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + } + break; + case TargetLowering::Expand: + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + SrcValueSDNode *SVD = cast<SrcValueSDNode>(Node->getOperand(3)); + SrcValueSDNode *SVS = cast<SrcValueSDNode>(Node->getOperand(4)); + Tmp4 = DAG.getLoad(TLI.getPointerTy(), Tmp1, Tmp3, SVD->getValue(), + SVD->getOffset()); + Result = DAG.getStore(Tmp4.getValue(1), Tmp4, Tmp2, SVS->getValue(), + SVS->getOffset()); + break; + } + break; + + case ISD::VAEND: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the pointer. + + switch (TLI.getOperationAction(ISD::VAEND, MVT::Other)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2)); + if (isCustom) { + Tmp1 = TLI.LowerOperation(Tmp1, DAG); + if (Tmp1.Val) Result = Tmp1; + } + break; + case TargetLowering::Expand: + Result = Tmp1; // Default to a no-op, return the chain + break; + } + break; + + case ISD::VASTART: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + Tmp2 = LegalizeOp(Node->getOperand(1)); // Legalize the pointer. + + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2)); + + switch (TLI.getOperationAction(ISD::VASTART, MVT::Other)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + break; + } + break; + + case ISD::ROTL: + case ISD::ROTR: + Tmp1 = LegalizeOp(Node->getOperand(0)); // LHS + Tmp2 = LegalizeOp(Node->getOperand(1)); // RHS + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2); + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + default: + assert(0 && "ROTL/ROTR legalize operation not supported"); + break; + case TargetLowering::Legal: + break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + break; + case TargetLowering::Promote: + assert(0 && "Do not know how to promote ROTL/ROTR"); + break; + case TargetLowering::Expand: + assert(0 && "Do not know how to expand ROTL/ROTR"); + break; + } + break; + + case ISD::BSWAP: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Op + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + case TargetLowering::Custom: + assert(0 && "Cannot custom legalize this yet!"); + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1); + break; + case TargetLowering::Promote: { + MVT::ValueType OVT = Tmp1.getValueType(); + MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + unsigned DiffBits = MVT::getSizeInBits(NVT) - MVT::getSizeInBits(OVT); + + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1); + Tmp1 = DAG.getNode(ISD::BSWAP, NVT, Tmp1); + Result = DAG.getNode(ISD::SRL, NVT, Tmp1, + DAG.getConstant(DiffBits, TLI.getShiftAmountTy())); + break; + } + case TargetLowering::Expand: + Result = ExpandBSWAP(Tmp1); + break; + } + break; + + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::CTLZ: + Tmp1 = LegalizeOp(Node->getOperand(0)); // Op + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + case TargetLowering::Custom: assert(0 && "Cannot custom handle this yet!"); + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1); + break; + case TargetLowering::Promote: { + MVT::ValueType OVT = Tmp1.getValueType(); + MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + + // Zero extend the argument. + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1); + // Perform the larger operation, then subtract if needed. + Tmp1 = DAG.getNode(Node->getOpcode(), Node->getValueType(0), Tmp1); + switch (Node->getOpcode()) { + case ISD::CTPOP: + Result = Tmp1; + break; + case ISD::CTTZ: + //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT) + Tmp2 = DAG.getSetCC(TLI.getSetCCResultTy(), Tmp1, + DAG.getConstant(MVT::getSizeInBits(NVT), NVT), + ISD::SETEQ); + Result = DAG.getNode(ISD::SELECT, NVT, Tmp2, + DAG.getConstant(MVT::getSizeInBits(OVT),NVT), Tmp1); + break; + case ISD::CTLZ: + // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) + Result = DAG.getNode(ISD::SUB, NVT, Tmp1, + DAG.getConstant(MVT::getSizeInBits(NVT) - + MVT::getSizeInBits(OVT), NVT)); + break; + } + break; + } + case TargetLowering::Expand: + Result = ExpandBitCount(Node->getOpcode(), Tmp1); + break; + } + break; + + // Unary operators + case ISD::FABS: + case ISD::FNEG: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + Tmp1 = LegalizeOp(Node->getOperand(0)); + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + case TargetLowering::Promote: + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1); + if (isCustom) { + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + } + break; + case TargetLowering::Expand: + switch (Node->getOpcode()) { + default: assert(0 && "Unreachable!"); + case ISD::FNEG: + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + Tmp2 = DAG.getConstantFP(-0.0, Node->getValueType(0)); + Result = DAG.getNode(ISD::FSUB, Node->getValueType(0), Tmp2, Tmp1); + break; + case ISD::FABS: { + // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). + MVT::ValueType VT = Node->getValueType(0); + Tmp2 = DAG.getConstantFP(0.0, VT); + Tmp2 = DAG.getSetCC(TLI.getSetCCResultTy(), Tmp1, Tmp2, ISD::SETUGT); + Tmp3 = DAG.getNode(ISD::FNEG, VT, Tmp1); + Result = DAG.getNode(ISD::SELECT, VT, Tmp2, Tmp1, Tmp3); + break; + } + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: { + MVT::ValueType VT = Node->getValueType(0); + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + switch(Node->getOpcode()) { + case ISD::FSQRT: + LC = VT == MVT::f32 ? RTLIB::SQRT_F32 : RTLIB::SQRT_F64; + break; + case ISD::FSIN: + LC = VT == MVT::f32 ? RTLIB::SIN_F32 : RTLIB::SIN_F64; + break; + case ISD::FCOS: + LC = VT == MVT::f32 ? RTLIB::COS_F32 : RTLIB::COS_F64; + break; + default: assert(0 && "Unreachable!"); + } + SDOperand Dummy; + Result = ExpandLibCall(TLI.getLibcallName(LC), Node, + false/*sign irrelevant*/, Dummy); + break; + } + } + break; + } + break; + case ISD::FPOWI: { + // We always lower FPOWI into a libcall. No target support it yet. + RTLIB::Libcall LC = Node->getValueType(0) == MVT::f32 + ? RTLIB::POWI_F32 : RTLIB::POWI_F64; + SDOperand Dummy; + Result = ExpandLibCall(TLI.getLibcallName(LC), Node, + false/*sign irrelevant*/, Dummy); + break; + } + case ISD::BIT_CONVERT: + if (!isTypeLegal(Node->getOperand(0).getValueType())) { + Result = ExpandBIT_CONVERT(Node->getValueType(0), Node->getOperand(0)); + } else if (MVT::isVector(Op.getOperand(0).getValueType())) { + // The input has to be a vector type, we have to either scalarize it, pack + // it, or convert it based on whether the input vector type is legal. + SDNode *InVal = Node->getOperand(0).Val; + unsigned NumElems = MVT::getVectorNumElements(InVal->getValueType(0)); + MVT::ValueType EVT = MVT::getVectorElementType(InVal->getValueType(0)); + + // Figure out if there is a simple type corresponding to this Vector + // type. If so, convert to the vector type. + MVT::ValueType TVT = MVT::getVectorType(EVT, NumElems); + if (TLI.isTypeLegal(TVT)) { + // Turn this into a bit convert of the vector input. + Result = DAG.getNode(ISD::BIT_CONVERT, Node->getValueType(0), + LegalizeOp(Node->getOperand(0))); + break; + } else if (NumElems == 1) { + // Turn this into a bit convert of the scalar input. + Result = DAG.getNode(ISD::BIT_CONVERT, Node->getValueType(0), + ScalarizeVectorOp(Node->getOperand(0))); + break; + } else { + // FIXME: UNIMP! Store then reload + assert(0 && "Cast from unsupported vector type not implemented yet!"); + } + } else { + switch (TLI.getOperationAction(ISD::BIT_CONVERT, + Node->getOperand(0).getValueType())) { + default: assert(0 && "Unknown operation action!"); + case TargetLowering::Expand: + Result = ExpandBIT_CONVERT(Node->getValueType(0), Node->getOperand(0)); + break; + case TargetLowering::Legal: + Tmp1 = LegalizeOp(Node->getOperand(0)); + Result = DAG.UpdateNodeOperands(Result, Tmp1); + break; + } + } + break; + + // Conversion operators. The source and destination have different types. + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: { + bool isSigned = Node->getOpcode() == ISD::SINT_TO_FP; + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Legal: + switch (TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType())) { + default: assert(0 && "Unknown operation action!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Tmp1 = LegalizeOp(Node->getOperand(0)); + Result = DAG.UpdateNodeOperands(Result, Tmp1); + if (isCustom) { + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + } + break; + case TargetLowering::Expand: + Result = ExpandLegalINT_TO_FP(isSigned, + LegalizeOp(Node->getOperand(0)), + Node->getValueType(0)); + break; + case TargetLowering::Promote: + Result = PromoteLegalINT_TO_FP(LegalizeOp(Node->getOperand(0)), + Node->getValueType(0), + isSigned); + break; + } + break; + case Expand: + Result = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP, + Node->getValueType(0), Node->getOperand(0)); + break; + case Promote: + Tmp1 = PromoteOp(Node->getOperand(0)); + if (isSigned) { + Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, Tmp1.getValueType(), + Tmp1, DAG.getValueType(Node->getOperand(0).getValueType())); + } else { + Tmp1 = DAG.getZeroExtendInReg(Tmp1, + Node->getOperand(0).getValueType()); + } + Result = DAG.UpdateNodeOperands(Result, Tmp1); + Result = LegalizeOp(Result); // The 'op' is not necessarily legal! + break; + } + break; + } + case ISD::TRUNCATE: + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Legal: + Tmp1 = LegalizeOp(Node->getOperand(0)); + Result = DAG.UpdateNodeOperands(Result, Tmp1); + break; + case Expand: + ExpandOp(Node->getOperand(0), Tmp1, Tmp2); + + // Since the result is legal, we should just be able to truncate the low + // part of the source. + Result = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), Tmp1); + break; + case Promote: + Result = PromoteOp(Node->getOperand(0)); + Result = DAG.getNode(ISD::TRUNCATE, Op.getValueType(), Result); + break; + } + break; + + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Legal: + Tmp1 = LegalizeOp(Node->getOperand(0)); + + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))){ + default: assert(0 && "Unknown operation action!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1); + if (isCustom) { + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.Val) Result = Tmp1; + } + break; + case TargetLowering::Promote: + Result = PromoteLegalFP_TO_INT(Tmp1, Node->getValueType(0), + Node->getOpcode() == ISD::FP_TO_SINT); + break; + case TargetLowering::Expand: + if (Node->getOpcode() == ISD::FP_TO_UINT) { + SDOperand True, False; + MVT::ValueType VT = Node->getOperand(0).getValueType(); + MVT::ValueType NVT = Node->getValueType(0); + unsigned ShiftAmt = MVT::getSizeInBits(Node->getValueType(0))-1; + Tmp2 = DAG.getConstantFP((double)(1ULL << ShiftAmt), VT); + Tmp3 = DAG.getSetCC(TLI.getSetCCResultTy(), + Node->getOperand(0), Tmp2, ISD::SETLT); + True = DAG.getNode(ISD::FP_TO_SINT, NVT, Node->getOperand(0)); + False = DAG.getNode(ISD::FP_TO_SINT, NVT, + DAG.getNode(ISD::FSUB, VT, Node->getOperand(0), + Tmp2)); + False = DAG.getNode(ISD::XOR, NVT, False, + DAG.getConstant(1ULL << ShiftAmt, NVT)); + Result = DAG.getNode(ISD::SELECT, NVT, Tmp3, True, False); + break; + } else { + assert(0 && "Do not know how to expand FP_TO_SINT yet!"); + } + break; + } + break; + case Expand: { + // Convert f32 / f64 to i32 / i64. + MVT::ValueType VT = Op.getValueType(); + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + switch (Node->getOpcode()) { + case ISD::FP_TO_SINT: + if (Node->getOperand(0).getValueType() == MVT::f32) + LC = (VT == MVT::i32) + ? RTLIB::FPTOSINT_F32_I32 : RTLIB::FPTOSINT_F32_I64; + else + LC = (VT == MVT::i32) + ? RTLIB::FPTOSINT_F64_I32 : RTLIB::FPTOSINT_F64_I64; + break; + case ISD::FP_TO_UINT: + if (Node->getOperand(0).getValueType() == MVT::f32) + LC = (VT == MVT::i32) + ? RTLIB::FPTOUINT_F32_I32 : RTLIB::FPTOSINT_F32_I64; + else + LC = (VT == MVT::i32) + ? RTLIB::FPTOUINT_F64_I32 : RTLIB::FPTOSINT_F64_I64; + break; + default: assert(0 && "Unreachable!"); + } + SDOperand Dummy; + Result = ExpandLibCall(TLI.getLibcallName(LC), Node, + false/*sign irrelevant*/, Dummy); + break; + } + case Promote: + Tmp1 = PromoteOp(Node->getOperand(0)); + Result = DAG.UpdateNodeOperands(Result, LegalizeOp(Tmp1)); + Result = LegalizeOp(Result); + break; + } + break; + + case ISD::FP_ROUND: + if (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)) == + TargetLowering::Expand) { + // The only way we can lower this is to turn it into a TRUNCSTORE, + // EXTLOAD pair, targetting a temporary location (a stack slot). + + // NOTE: there is a choice here between constantly creating new stack + // slots and always reusing the same one. We currently always create + // new ones, as reuse may inhibit scheduling. + MVT::ValueType VT = Op.getValueType(); // 32 + const Type *Ty = MVT::getTypeForValueType(VT); + uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty); + unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = + MF.getFrameInfo()->CreateStackObject(TySize, Align); + SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Result = DAG.getTruncStore(DAG.getEntryNode(), Node->getOperand(0), + StackSlot, NULL, 0, VT); + Result = DAG.getLoad(VT, Result, StackSlot, NULL, 0, VT); + break; + } + // FALL THROUGH + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::FP_EXTEND: + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Expand: assert(0 && "Shouldn't need to expand other operators here!"); + case Legal: + Tmp1 = LegalizeOp(Node->getOperand(0)); + Result = DAG.UpdateNodeOperands(Result, Tmp1); + break; + case Promote: + switch (Node->getOpcode()) { + case ISD::ANY_EXTEND: + Tmp1 = PromoteOp(Node->getOperand(0)); + Result = DAG.getNode(ISD::ANY_EXTEND, Op.getValueType(), Tmp1); + break; + case ISD::ZERO_EXTEND: + Result = PromoteOp(Node->getOperand(0)); + Result = DAG.getNode(ISD::ANY_EXTEND, Op.getValueType(), Result); + Result = DAG.getZeroExtendInReg(Result, + Node->getOperand(0).getValueType()); + break; + case ISD::SIGN_EXTEND: + Result = PromoteOp(Node->getOperand(0)); + Result = DAG.getNode(ISD::ANY_EXTEND, Op.getValueType(), Result); + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, Result.getValueType(), + Result, + DAG.getValueType(Node->getOperand(0).getValueType())); + break; + case ISD::FP_EXTEND: + Result = PromoteOp(Node->getOperand(0)); + if (Result.getValueType() != Op.getValueType()) + // Dynamically dead while we have only 2 FP types. + Result = DAG.getNode(ISD::FP_EXTEND, Op.getValueType(), Result); + break; + case ISD::FP_ROUND: + Result = PromoteOp(Node->getOperand(0)); + Result = DAG.getNode(Node->getOpcode(), Op.getValueType(), Result); + break; + } + } + break; + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: { + Tmp1 = LegalizeOp(Node->getOperand(0)); + MVT::ValueType ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + + // If this operation is not supported, convert it to a shl/shr or load/store + // pair. + switch (TLI.getOperationAction(Node->getOpcode(), ExtraVT)) { + default: assert(0 && "This action not supported for this op yet!"); + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1, Node->getOperand(1)); + break; + case TargetLowering::Expand: + // If this is an integer extend and shifts are supported, do that. + if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) { + // NOTE: we could fall back on load/store here too for targets without + // SAR. However, it is doubtful that any exist. + unsigned BitsDiff = MVT::getSizeInBits(Node->getValueType(0)) - + MVT::getSizeInBits(ExtraVT); + SDOperand ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy()); + Result = DAG.getNode(ISD::SHL, Node->getValueType(0), + Node->getOperand(0), ShiftCst); + Result = DAG.getNode(ISD::SRA, Node->getValueType(0), + Result, ShiftCst); + } else if (Node->getOpcode() == ISD::FP_ROUND_INREG) { + // The only way we can lower this is to turn it into a TRUNCSTORE, + // EXTLOAD pair, targetting a temporary location (a stack slot). + + // NOTE: there is a choice here between constantly creating new stack + // slots and always reusing the same one. We currently always create + // new ones, as reuse may inhibit scheduling. + const Type *Ty = MVT::getTypeForValueType(ExtraVT); + uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty); + unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = + MF.getFrameInfo()->CreateStackObject(TySize, Align); + SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Result = DAG.getTruncStore(DAG.getEntryNode(), Node->getOperand(0), + StackSlot, NULL, 0, ExtraVT); + Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), + Result, StackSlot, NULL, 0, ExtraVT); + } else { + assert(0 && "Unknown op"); + } + break; + } + break; + } + } + + assert(Result.getValueType() == Op.getValueType() && + "Bad legalization!"); + + // Make sure that the generated code is itself legal. + if (Result != Op) + Result = LegalizeOp(Result); + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; +} + +/// PromoteOp - Given an operation that produces a value in an invalid type, +/// promote it to compute the value into a larger type. The produced value will +/// have the correct bits for the low portion of the register, but no guarantee +/// is made about the top bits: it may be zero, sign-extended, or garbage. +SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) { + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType NVT = TLI.getTypeToTransformTo(VT); + assert(getTypeAction(VT) == Promote && + "Caller should expand or legalize operands that are not promotable!"); + assert(NVT > VT && MVT::isInteger(NVT) == MVT::isInteger(VT) && + "Cannot promote to smaller type!"); + + SDOperand Tmp1, Tmp2, Tmp3; + SDOperand Result; + SDNode *Node = Op.Val; + + DenseMap<SDOperand, SDOperand>::iterator I = PromotedNodes.find(Op); + if (I != PromotedNodes.end()) return I->second; + + switch (Node->getOpcode()) { + case ISD::CopyFromReg: + assert(0 && "CopyFromReg must be legal!"); + default: +#ifndef NDEBUG + cerr << "NODE: "; Node->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to promote this operator!"); + abort(); + case ISD::UNDEF: + Result = DAG.getNode(ISD::UNDEF, NVT); + break; + case ISD::Constant: + if (VT != MVT::i1) + Result = DAG.getNode(ISD::SIGN_EXTEND, NVT, Op); + else + Result = DAG.getNode(ISD::ZERO_EXTEND, NVT, Op); + assert(isa<ConstantSDNode>(Result) && "Didn't constant fold zext?"); + break; + case ISD::ConstantFP: + Result = DAG.getNode(ISD::FP_EXTEND, NVT, Op); + assert(isa<ConstantFPSDNode>(Result) && "Didn't constant fold fp_extend?"); + break; + + case ISD::SETCC: + assert(isTypeLegal(TLI.getSetCCResultTy()) && "SetCC type is not legal??"); + Result = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(),Node->getOperand(0), + Node->getOperand(1), Node->getOperand(2)); + break; + + case ISD::TRUNCATE: + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Legal: + Result = LegalizeOp(Node->getOperand(0)); + assert(Result.getValueType() >= NVT && + "This truncation doesn't make sense!"); + if (Result.getValueType() > NVT) // Truncate to NVT instead of VT + Result = DAG.getNode(ISD::TRUNCATE, NVT, Result); + break; + case Promote: + // The truncation is not required, because we don't guarantee anything + // about high bits anyway. + Result = PromoteOp(Node->getOperand(0)); + break; + case Expand: + ExpandOp(Node->getOperand(0), Tmp1, Tmp2); + // Truncate the low part of the expanded value to the result type + Result = DAG.getNode(ISD::TRUNCATE, NVT, Tmp1); + } + break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Expand: assert(0 && "BUG: Smaller reg should have been promoted!"); + case Legal: + // Input is legal? Just do extend all the way to the larger type. + Result = DAG.getNode(Node->getOpcode(), NVT, Node->getOperand(0)); + break; + case Promote: + // Promote the reg if it's smaller. + Result = PromoteOp(Node->getOperand(0)); + // The high bits are not guaranteed to be anything. Insert an extend. + if (Node->getOpcode() == ISD::SIGN_EXTEND) + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Result, + DAG.getValueType(Node->getOperand(0).getValueType())); + else if (Node->getOpcode() == ISD::ZERO_EXTEND) + Result = DAG.getZeroExtendInReg(Result, + Node->getOperand(0).getValueType()); + break; + } + break; + case ISD::BIT_CONVERT: + Result = ExpandBIT_CONVERT(Node->getValueType(0), Node->getOperand(0)); + Result = PromoteOp(Result); + break; + + case ISD::FP_EXTEND: + assert(0 && "Case not implemented. Dynamically dead with 2 FP types!"); + case ISD::FP_ROUND: + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Expand: assert(0 && "BUG: Cannot expand FP regs!"); + case Promote: assert(0 && "Unreachable with 2 FP types!"); + case Legal: + // Input is legal? Do an FP_ROUND_INREG. + Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Node->getOperand(0), + DAG.getValueType(VT)); + break; + } + break; + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Legal: + // No extra round required here. + Result = DAG.getNode(Node->getOpcode(), NVT, Node->getOperand(0)); + break; + + case Promote: + Result = PromoteOp(Node->getOperand(0)); + if (Node->getOpcode() == ISD::SINT_TO_FP) + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, Result.getValueType(), + Result, + DAG.getValueType(Node->getOperand(0).getValueType())); + else + Result = DAG.getZeroExtendInReg(Result, + Node->getOperand(0).getValueType()); + // No extra round required here. + Result = DAG.getNode(Node->getOpcode(), NVT, Result); + break; + case Expand: + Result = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP, NVT, + Node->getOperand(0)); + // Round if we cannot tolerate excess precision. + if (NoExcessFPPrecision) + Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result, + DAG.getValueType(VT)); + break; + } + break; + + case ISD::SIGN_EXTEND_INREG: + Result = PromoteOp(Node->getOperand(0)); + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Result, + Node->getOperand(1)); + break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Legal: + case Expand: + Tmp1 = Node->getOperand(0); + break; + case Promote: + // The input result is prerounded, so we don't have to do anything + // special. + Tmp1 = PromoteOp(Node->getOperand(0)); + break; + } + // If we're promoting a UINT to a larger size, check to see if the new node + // will be legal. If it isn't, check to see if FP_TO_SINT is legal, since + // we can use that instead. This allows us to generate better code for + // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not + // legal, such as PowerPC. + if (Node->getOpcode() == ISD::FP_TO_UINT && + !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && + (TLI.isOperationLegal(ISD::FP_TO_SINT, NVT) || + TLI.getOperationAction(ISD::FP_TO_SINT, NVT)==TargetLowering::Custom)){ + Result = DAG.getNode(ISD::FP_TO_SINT, NVT, Tmp1); + } else { + Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1); + } + break; + + case ISD::FABS: + case ISD::FNEG: + Tmp1 = PromoteOp(Node->getOperand(0)); + assert(Tmp1.getValueType() == NVT); + Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1); + // NOTE: we do not have to do any extra rounding here for + // NoExcessFPPrecision, because we know the input will have the appropriate + // precision, and these operations don't modify precision at all. + break; + + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + Tmp1 = PromoteOp(Node->getOperand(0)); + assert(Tmp1.getValueType() == NVT); + Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1); + if (NoExcessFPPrecision) + Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result, + DAG.getValueType(VT)); + break; + + case ISD::FPOWI: { + // Promote f32 powi to f64 powi. Note that this could insert a libcall + // directly as well, which may be better. + Tmp1 = PromoteOp(Node->getOperand(0)); + assert(Tmp1.getValueType() == NVT); + Result = DAG.getNode(ISD::FPOWI, NVT, Tmp1, Node->getOperand(1)); + if (NoExcessFPPrecision) + Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result, + DAG.getValueType(VT)); + break; + } + + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + // The input may have strange things in the top bits of the registers, but + // these operations don't care. They may have weird bits going out, but + // that too is okay if they are integer operations. + Tmp1 = PromoteOp(Node->getOperand(0)); + Tmp2 = PromoteOp(Node->getOperand(1)); + assert(Tmp1.getValueType() == NVT && Tmp2.getValueType() == NVT); + Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2); + break; + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + Tmp1 = PromoteOp(Node->getOperand(0)); + Tmp2 = PromoteOp(Node->getOperand(1)); + assert(Tmp1.getValueType() == NVT && Tmp2.getValueType() == NVT); + Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2); + + // Floating point operations will give excess precision that we may not be + // able to tolerate. If we DO allow excess precision, just leave it, + // otherwise excise it. + // FIXME: Why would we need to round FP ops more than integer ones? + // Is Round(Add(Add(A,B),C)) != Round(Add(Round(Add(A,B)), C)) + if (NoExcessFPPrecision) + Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result, + DAG.getValueType(VT)); + break; + + case ISD::SDIV: + case ISD::SREM: + // These operators require that their input be sign extended. + Tmp1 = PromoteOp(Node->getOperand(0)); + Tmp2 = PromoteOp(Node->getOperand(1)); + if (MVT::isInteger(NVT)) { + Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp1, + DAG.getValueType(VT)); + Tmp2 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp2, + DAG.getValueType(VT)); + } + Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2); + + // Perform FP_ROUND: this is probably overly pessimistic. + if (MVT::isFloatingPoint(NVT) && NoExcessFPPrecision) + Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result, + DAG.getValueType(VT)); + break; + case ISD::FDIV: + case ISD::FREM: + case ISD::FCOPYSIGN: + // These operators require that their input be fp extended. + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Legal: + Tmp1 = LegalizeOp(Node->getOperand(0)); + break; + case Promote: + Tmp1 = PromoteOp(Node->getOperand(0)); + break; + case Expand: + assert(0 && "not implemented"); + } + switch (getTypeAction(Node->getOperand(1).getValueType())) { + case Legal: + Tmp2 = LegalizeOp(Node->getOperand(1)); + break; + case Promote: + Tmp2 = PromoteOp(Node->getOperand(1)); + break; + case Expand: + assert(0 && "not implemented"); + } + Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2); + + // Perform FP_ROUND: this is probably overly pessimistic. + if (NoExcessFPPrecision && Node->getOpcode() != ISD::FCOPYSIGN) + Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result, + DAG.getValueType(VT)); + break; + + case ISD::UDIV: + case ISD::UREM: + // These operators require that their input be zero extended. + Tmp1 = PromoteOp(Node->getOperand(0)); + Tmp2 = PromoteOp(Node->getOperand(1)); + assert(MVT::isInteger(NVT) && "Operators don't apply to FP!"); + Tmp1 = DAG.getZeroExtendInReg(Tmp1, VT); + Tmp2 = DAG.getZeroExtendInReg(Tmp2, VT); + Result = DAG.getNode(Node->getOpcode(), NVT, Tmp1, Tmp2); + break; + + case ISD::SHL: + Tmp1 = PromoteOp(Node->getOperand(0)); + Result = DAG.getNode(ISD::SHL, NVT, Tmp1, Node->getOperand(1)); + break; + case ISD::SRA: + // The input value must be properly sign extended. + Tmp1 = PromoteOp(Node->getOperand(0)); + Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp1, + DAG.getValueType(VT)); + Result = DAG.getNode(ISD::SRA, NVT, Tmp1, Node->getOperand(1)); + break; + case ISD::SRL: + // The input value must be properly zero extended. + Tmp1 = PromoteOp(Node->getOperand(0)); + Tmp1 = DAG.getZeroExtendInReg(Tmp1, VT); + Result = DAG.getNode(ISD::SRL, NVT, Tmp1, Node->getOperand(1)); + break; + + case ISD::VAARG: + Tmp1 = Node->getOperand(0); // Get the chain. + Tmp2 = Node->getOperand(1); // Get the pointer. + if (TLI.getOperationAction(ISD::VAARG, VT) == TargetLowering::Custom) { + Tmp3 = DAG.getVAArg(VT, Tmp1, Tmp2, Node->getOperand(2)); + Result = TLI.CustomPromoteOperation(Tmp3, DAG); + } else { + SrcValueSDNode *SV = cast<SrcValueSDNode>(Node->getOperand(2)); + SDOperand VAList = DAG.getLoad(TLI.getPointerTy(), Tmp1, Tmp2, + SV->getValue(), SV->getOffset()); + // Increment the pointer, VAList, to the next vaarg + Tmp3 = DAG.getNode(ISD::ADD, TLI.getPointerTy(), VAList, + DAG.getConstant(MVT::getSizeInBits(VT)/8, + TLI.getPointerTy())); + // Store the incremented VAList to the legalized pointer + Tmp3 = DAG.getStore(VAList.getValue(1), Tmp3, Tmp2, SV->getValue(), + SV->getOffset()); + // Load the actual argument out of the pointer VAList + Result = DAG.getExtLoad(ISD::EXTLOAD, NVT, Tmp3, VAList, NULL, 0, VT); + } + // Remember that we legalized the chain. + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1))); + break; + + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Node); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(Node) + ? ISD::EXTLOAD : LD->getExtensionType(); + Result = DAG.getExtLoad(ExtType, NVT, + LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getLoadedVT(), + LD->isVolatile(), + LD->getAlignment()); + // Remember that we legalized the chain. + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1))); + break; + } + case ISD::SELECT: + Tmp2 = PromoteOp(Node->getOperand(1)); // Legalize the op0 + Tmp3 = PromoteOp(Node->getOperand(2)); // Legalize the op1 + Result = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), Tmp2, Tmp3); + break; + case ISD::SELECT_CC: + Tmp2 = PromoteOp(Node->getOperand(2)); // True + Tmp3 = PromoteOp(Node->getOperand(3)); // False + Result = DAG.getNode(ISD::SELECT_CC, NVT, Node->getOperand(0), + Node->getOperand(1), Tmp2, Tmp3, Node->getOperand(4)); + break; + case ISD::BSWAP: + Tmp1 = Node->getOperand(0); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1); + Tmp1 = DAG.getNode(ISD::BSWAP, NVT, Tmp1); + Result = DAG.getNode(ISD::SRL, NVT, Tmp1, + DAG.getConstant(MVT::getSizeInBits(NVT) - + MVT::getSizeInBits(VT), + TLI.getShiftAmountTy())); + break; + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::CTLZ: + // Zero extend the argument + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Node->getOperand(0)); + // Perform the larger operation, then subtract if needed. + Tmp1 = DAG.getNode(Node->getOpcode(), NVT, Tmp1); + switch(Node->getOpcode()) { + case ISD::CTPOP: + Result = Tmp1; + break; + case ISD::CTTZ: + // if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT) + Tmp2 = DAG.getSetCC(TLI.getSetCCResultTy(), Tmp1, + DAG.getConstant(MVT::getSizeInBits(NVT), NVT), + ISD::SETEQ); + Result = DAG.getNode(ISD::SELECT, NVT, Tmp2, + DAG.getConstant(MVT::getSizeInBits(VT), NVT), Tmp1); + break; + case ISD::CTLZ: + //Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) + Result = DAG.getNode(ISD::SUB, NVT, Tmp1, + DAG.getConstant(MVT::getSizeInBits(NVT) - + MVT::getSizeInBits(VT), NVT)); + break; + } + break; + case ISD::EXTRACT_SUBVECTOR: + Result = PromoteOp(ExpandEXTRACT_SUBVECTOR(Op)); + break; + case ISD::EXTRACT_VECTOR_ELT: + Result = PromoteOp(ExpandEXTRACT_VECTOR_ELT(Op)); + break; + } + + assert(Result.Val && "Didn't set a result!"); + + // Make sure the result is itself legal. + Result = LegalizeOp(Result); + + // Remember that we promoted this! + AddPromotedOperand(Op, Result); + return Result; +} + +/// ExpandEXTRACT_VECTOR_ELT - Expand an EXTRACT_VECTOR_ELT operation into +/// a legal EXTRACT_VECTOR_ELT operation, scalar code, or memory traffic, +/// based on the vector type. The return type of this matches the element type +/// of the vector, which may not be legal for the target. +SDOperand SelectionDAGLegalize::ExpandEXTRACT_VECTOR_ELT(SDOperand Op) { + // We know that operand #0 is the Vec vector. If the index is a constant + // or if the invec is a supported hardware type, we can use it. Otherwise, + // lower to a store then an indexed load. + SDOperand Vec = Op.getOperand(0); + SDOperand Idx = Op.getOperand(1); + + SDNode *InVal = Vec.Val; + MVT::ValueType TVT = InVal->getValueType(0); + unsigned NumElems = MVT::getVectorNumElements(TVT); + + switch (TLI.getOperationAction(ISD::EXTRACT_VECTOR_ELT, TVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: { + Vec = LegalizeOp(Vec); + Op = DAG.UpdateNodeOperands(Op, Vec, Idx); + SDOperand Tmp3 = TLI.LowerOperation(Op, DAG); + if (Tmp3.Val) + return Tmp3; + break; + } + case TargetLowering::Legal: + if (isTypeLegal(TVT)) { + Vec = LegalizeOp(Vec); + Op = DAG.UpdateNodeOperands(Op, Vec, Idx); + Op = LegalizeOp(Op); + } + break; + case TargetLowering::Expand: + break; + } + + if (NumElems == 1) { + // This must be an access of the only element. Return it. + Op = ScalarizeVectorOp(Vec); + } else if (!TLI.isTypeLegal(TVT) && isa<ConstantSDNode>(Idx)) { + ConstantSDNode *CIdx = cast<ConstantSDNode>(Idx); + SDOperand Lo, Hi; + SplitVectorOp(Vec, Lo, Hi); + if (CIdx->getValue() < NumElems/2) { + Vec = Lo; + } else { + Vec = Hi; + Idx = DAG.getConstant(CIdx->getValue() - NumElems/2, + Idx.getValueType()); + } + + // It's now an extract from the appropriate high or low part. Recurse. + Op = DAG.UpdateNodeOperands(Op, Vec, Idx); + Op = ExpandEXTRACT_VECTOR_ELT(Op); + } else { + // Store the value to a temporary stack slot, then LOAD the scalar + // element back out. + SDOperand StackPtr = CreateStackTemporary(Vec.getValueType()); + SDOperand Ch = DAG.getStore(DAG.getEntryNode(), Vec, StackPtr, NULL, 0); + + // Add the offset to the index. + unsigned EltSize = MVT::getSizeInBits(Op.getValueType())/8; + Idx = DAG.getNode(ISD::MUL, Idx.getValueType(), Idx, + DAG.getConstant(EltSize, Idx.getValueType())); + StackPtr = DAG.getNode(ISD::ADD, Idx.getValueType(), Idx, StackPtr); + + Op = DAG.getLoad(Op.getValueType(), Ch, StackPtr, NULL, 0); + } + return Op; +} + +/// ExpandEXTRACT_SUBVECTOR - Expand a EXTRACT_SUBVECTOR operation. For now +/// we assume the operation can be split if it is not already legal. +SDOperand SelectionDAGLegalize::ExpandEXTRACT_SUBVECTOR(SDOperand Op) { + // We know that operand #0 is the Vec vector. For now we assume the index + // is a constant and that the extracted result is a supported hardware type. + SDOperand Vec = Op.getOperand(0); + SDOperand Idx = LegalizeOp(Op.getOperand(1)); + + unsigned NumElems = MVT::getVectorNumElements(Vec.getValueType()); + + if (NumElems == MVT::getVectorNumElements(Op.getValueType())) { + // This must be an access of the desired vector length. Return it. + return Vec; + } + + ConstantSDNode *CIdx = cast<ConstantSDNode>(Idx); + SDOperand Lo, Hi; + SplitVectorOp(Vec, Lo, Hi); + if (CIdx->getValue() < NumElems/2) { + Vec = Lo; + } else { + Vec = Hi; + Idx = DAG.getConstant(CIdx->getValue() - NumElems/2, Idx.getValueType()); + } + + // It's now an extract from the appropriate high or low part. Recurse. + Op = DAG.UpdateNodeOperands(Op, Vec, Idx); + return ExpandEXTRACT_SUBVECTOR(Op); +} + +/// LegalizeSetCCOperands - Attempts to create a legal LHS and RHS for a SETCC +/// with condition CC on the current target. This usually involves legalizing +/// or promoting the arguments. In the case where LHS and RHS must be expanded, +/// there may be no choice but to create a new SetCC node to represent the +/// legalized value of setcc lhs, rhs. In this case, the value is returned in +/// LHS, and the SDOperand returned in RHS has a nil SDNode value. +void SelectionDAGLegalize::LegalizeSetCCOperands(SDOperand &LHS, + SDOperand &RHS, + SDOperand &CC) { + SDOperand Tmp1, Tmp2, Result; + + switch (getTypeAction(LHS.getValueType())) { + case Legal: + Tmp1 = LegalizeOp(LHS); // LHS + Tmp2 = LegalizeOp(RHS); // RHS + break; + case Promote: + Tmp1 = PromoteOp(LHS); // LHS + Tmp2 = PromoteOp(RHS); // RHS + + // If this is an FP compare, the operands have already been extended. + if (MVT::isInteger(LHS.getValueType())) { + MVT::ValueType VT = LHS.getValueType(); + MVT::ValueType NVT = TLI.getTypeToTransformTo(VT); + + // Otherwise, we have to insert explicit sign or zero extends. Note + // that we could insert sign extends for ALL conditions, but zero extend + // is cheaper on many machines (an AND instead of two shifts), so prefer + // it. + switch (cast<CondCodeSDNode>(CC)->get()) { + default: assert(0 && "Unknown integer comparison!"); + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGE: + case ISD::SETUGT: + case ISD::SETULE: + case ISD::SETULT: + // ALL of these operations will work if we either sign or zero extend + // the operands (including the unsigned comparisons!). Zero extend is + // usually a simpler/cheaper operation, so prefer it. + Tmp1 = DAG.getZeroExtendInReg(Tmp1, VT); + Tmp2 = DAG.getZeroExtendInReg(Tmp2, VT); + break; + case ISD::SETGE: + case ISD::SETGT: + case ISD::SETLT: + case ISD::SETLE: + Tmp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp1, + DAG.getValueType(VT)); + Tmp2 = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Tmp2, + DAG.getValueType(VT)); + break; + } + } + break; + case Expand: { + MVT::ValueType VT = LHS.getValueType(); + if (VT == MVT::f32 || VT == MVT::f64) { + // Expand into one or more soft-fp libcall(s). + RTLIB::Libcall LC1, LC2 = RTLIB::UNKNOWN_LIBCALL; + switch (cast<CondCodeSDNode>(CC)->get()) { + case ISD::SETEQ: + case ISD::SETOEQ: + LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; + break; + case ISD::SETNE: + case ISD::SETUNE: + LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64; + break; + case ISD::SETGE: + case ISD::SETOGE: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; + break; + case ISD::SETLT: + case ISD::SETOLT: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + break; + case ISD::SETLE: + case ISD::SETOLE: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; + break; + case ISD::SETGT: + case ISD::SETOGT: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; + break; + case ISD::SETUO: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; + break; + case ISD::SETO: + LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64; + break; + default: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; + switch (cast<CondCodeSDNode>(CC)->get()) { + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + // Fallthrough + case ISD::SETUGT: + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; + break; + case ISD::SETUGE: + LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; + break; + case ISD::SETULT: + LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + break; + case ISD::SETULE: + LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; + break; + case ISD::SETUEQ: + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; + break; + default: assert(0 && "Unsupported FP setcc!"); + } + } + + SDOperand Dummy; + Tmp1 = ExpandLibCall(TLI.getLibcallName(LC1), + DAG.getNode(ISD::MERGE_VALUES, VT, LHS, RHS).Val, + false /*sign irrelevant*/, Dummy); + Tmp2 = DAG.getConstant(0, MVT::i32); + CC = DAG.getCondCode(TLI.getCmpLibcallCC(LC1)); + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { + Tmp1 = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(), Tmp1, Tmp2, CC); + LHS = ExpandLibCall(TLI.getLibcallName(LC2), + DAG.getNode(ISD::MERGE_VALUES, VT, LHS, RHS).Val, + false /*sign irrelevant*/, Dummy); + Tmp2 = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(), LHS, Tmp2, + DAG.getCondCode(TLI.getCmpLibcallCC(LC2))); + Tmp1 = DAG.getNode(ISD::OR, Tmp1.getValueType(), Tmp1, Tmp2); + Tmp2 = SDOperand(); + } + LHS = Tmp1; + RHS = Tmp2; + return; + } + + SDOperand LHSLo, LHSHi, RHSLo, RHSHi; + ExpandOp(LHS, LHSLo, LHSHi); + ExpandOp(RHS, RHSLo, RHSHi); + switch (cast<CondCodeSDNode>(CC)->get()) { + case ISD::SETEQ: + case ISD::SETNE: + if (RHSLo == RHSHi) + if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) + if (RHSCST->isAllOnesValue()) { + // Comparison to -1. + Tmp1 = DAG.getNode(ISD::AND, LHSLo.getValueType(), LHSLo, LHSHi); + Tmp2 = RHSLo; + break; + } + + Tmp1 = DAG.getNode(ISD::XOR, LHSLo.getValueType(), LHSLo, RHSLo); + Tmp2 = DAG.getNode(ISD::XOR, LHSLo.getValueType(), LHSHi, RHSHi); + Tmp1 = DAG.getNode(ISD::OR, Tmp1.getValueType(), Tmp1, Tmp2); + Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + break; + default: + // If this is a comparison of the sign bit, just look at the top part. + // X > -1, x < 0 + if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(RHS)) + if ((cast<CondCodeSDNode>(CC)->get() == ISD::SETLT && + CST->getValue() == 0) || // X < 0 + (cast<CondCodeSDNode>(CC)->get() == ISD::SETGT && + CST->isAllOnesValue())) { // X > -1 + Tmp1 = LHSHi; + Tmp2 = RHSHi; + break; + } + + // FIXME: This generated code sucks. + ISD::CondCode LowCC; + ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + switch (CCCode) { + default: assert(0 && "Unknown integer setcc!"); + case ISD::SETLT: + case ISD::SETULT: LowCC = ISD::SETULT; break; + case ISD::SETGT: + case ISD::SETUGT: LowCC = ISD::SETUGT; break; + case ISD::SETLE: + case ISD::SETULE: LowCC = ISD::SETULE; break; + case ISD::SETGE: + case ISD::SETUGE: LowCC = ISD::SETUGE; break; + } + + // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison + // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands + // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2; + + // NOTE: on targets without efficient SELECT of bools, we can always use + // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL); + Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultTy(), LHSLo, RHSLo, LowCC, + false, DagCombineInfo); + if (!Tmp1.Val) + Tmp1 = DAG.getSetCC(TLI.getSetCCResultTy(), LHSLo, RHSLo, LowCC); + Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultTy(), LHSHi, RHSHi, + CCCode, false, DagCombineInfo); + if (!Tmp2.Val) + Tmp2 = DAG.getNode(ISD::SETCC, TLI.getSetCCResultTy(), LHSHi, RHSHi, CC); + + ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.Val); + ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.Val); + if ((Tmp1C && Tmp1C->getValue() == 0) || + (Tmp2C && Tmp2C->getValue() == 0 && + (CCCode == ISD::SETLE || CCCode == ISD::SETGE || + CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) || + (Tmp2C && Tmp2C->getValue() == 1 && + (CCCode == ISD::SETLT || CCCode == ISD::SETGT || + CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) { + // low part is known false, returns high part. + // For LE / GE, if high part is known false, ignore the low part. + // For LT / GT, if high part is known true, ignore the low part. + Tmp1 = Tmp2; + Tmp2 = SDOperand(); + } else { + Result = TLI.SimplifySetCC(TLI.getSetCCResultTy(), LHSHi, RHSHi, + ISD::SETEQ, false, DagCombineInfo); + if (!Result.Val) + Result=DAG.getSetCC(TLI.getSetCCResultTy(), LHSHi, RHSHi, ISD::SETEQ); + Result = LegalizeOp(DAG.getNode(ISD::SELECT, Tmp1.getValueType(), + Result, Tmp1, Tmp2)); + Tmp1 = Result; + Tmp2 = SDOperand(); + } + } + } + } + LHS = Tmp1; + RHS = Tmp2; +} + +/// ExpandBIT_CONVERT - Expand a BIT_CONVERT node into a store/load combination. +/// The resultant code need not be legal. Note that SrcOp is the input operand +/// to the BIT_CONVERT, not the BIT_CONVERT node itself. +SDOperand SelectionDAGLegalize::ExpandBIT_CONVERT(MVT::ValueType DestVT, + SDOperand SrcOp) { + // Create the stack frame object. + SDOperand FIPtr = CreateStackTemporary(DestVT); + + // Emit a store to the stack slot. + SDOperand Store = DAG.getStore(DAG.getEntryNode(), SrcOp, FIPtr, NULL, 0); + // Result is a load from the stack slot. + return DAG.getLoad(DestVT, Store, FIPtr, NULL, 0); +} + +SDOperand SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { + // Create a vector sized/aligned stack slot, store the value to element #0, + // then load the whole vector back out. + SDOperand StackPtr = CreateStackTemporary(Node->getValueType(0)); + SDOperand Ch = DAG.getStore(DAG.getEntryNode(), Node->getOperand(0), StackPtr, + NULL, 0); + return DAG.getLoad(Node->getValueType(0), Ch, StackPtr, NULL, 0); +} + + +/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't +/// support the operation, but do support the resultant vector type. +SDOperand SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { + + // If the only non-undef value is the low element, turn this into a + // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X. + unsigned NumElems = Node->getNumOperands(); + bool isOnlyLowElement = true; + SDOperand SplatValue = Node->getOperand(0); + std::map<SDOperand, std::vector<unsigned> > Values; + Values[SplatValue].push_back(0); + bool isConstant = true; + if (!isa<ConstantFPSDNode>(SplatValue) && !isa<ConstantSDNode>(SplatValue) && + SplatValue.getOpcode() != ISD::UNDEF) + isConstant = false; + + for (unsigned i = 1; i < NumElems; ++i) { + SDOperand V = Node->getOperand(i); + Values[V].push_back(i); + if (V.getOpcode() != ISD::UNDEF) + isOnlyLowElement = false; + if (SplatValue != V) + SplatValue = SDOperand(0,0); + + // If this isn't a constant element or an undef, we can't use a constant + // pool load. + if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V) && + V.getOpcode() != ISD::UNDEF) + isConstant = false; + } + + if (isOnlyLowElement) { + // If the low element is an undef too, then this whole things is an undef. + if (Node->getOperand(0).getOpcode() == ISD::UNDEF) + return DAG.getNode(ISD::UNDEF, Node->getValueType(0)); + // Otherwise, turn this into a scalar_to_vector node. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, Node->getValueType(0), + Node->getOperand(0)); + } + + // If all elements are constants, create a load from the constant pool. + if (isConstant) { + MVT::ValueType VT = Node->getValueType(0); + const Type *OpNTy = + MVT::getTypeForValueType(Node->getOperand(0).getValueType()); + std::vector<Constant*> CV; + for (unsigned i = 0, e = NumElems; i != e; ++i) { + if (ConstantFPSDNode *V = + dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) { + CV.push_back(ConstantFP::get(OpNTy, V->getValue())); + } else if (ConstantSDNode *V = + dyn_cast<ConstantSDNode>(Node->getOperand(i))) { + CV.push_back(ConstantInt::get(OpNTy, V->getValue())); + } else { + assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); + CV.push_back(UndefValue::get(OpNTy)); + } + } + Constant *CP = ConstantVector::get(CV); + SDOperand CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy()); + return DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0); + } + + if (SplatValue.Val) { // Splat of one value? + // Build the shuffle constant vector: <0, 0, 0, 0> + MVT::ValueType MaskVT = + MVT::getIntVectorWithNumElements(NumElems); + SDOperand Zero = DAG.getConstant(0, MVT::getVectorElementType(MaskVT)); + std::vector<SDOperand> ZeroVec(NumElems, Zero); + SDOperand SplatMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, + &ZeroVec[0], ZeroVec.size()); + + // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it. + if (isShuffleLegal(Node->getValueType(0), SplatMask)) { + // Get the splatted value into the low element of a vector register. + SDOperand LowValVec = + DAG.getNode(ISD::SCALAR_TO_VECTOR, Node->getValueType(0), SplatValue); + + // Return shuffle(LowValVec, undef, <0,0,0,0>) + return DAG.getNode(ISD::VECTOR_SHUFFLE, Node->getValueType(0), LowValVec, + DAG.getNode(ISD::UNDEF, Node->getValueType(0)), + SplatMask); + } + } + + // If there are only two unique elements, we may be able to turn this into a + // vector shuffle. + if (Values.size() == 2) { + // Build the shuffle constant vector: e.g. <0, 4, 0, 4> + MVT::ValueType MaskVT = + MVT::getIntVectorWithNumElements(NumElems); + std::vector<SDOperand> MaskVec(NumElems); + unsigned i = 0; + for (std::map<SDOperand,std::vector<unsigned> >::iterator I=Values.begin(), + E = Values.end(); I != E; ++I) { + for (std::vector<unsigned>::iterator II = I->second.begin(), + EE = I->second.end(); II != EE; ++II) + MaskVec[*II] = DAG.getConstant(i, MVT::getVectorElementType(MaskVT)); + i += NumElems; + } + SDOperand ShuffleMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, + &MaskVec[0], MaskVec.size()); + + // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it. + if (TLI.isOperationLegal(ISD::SCALAR_TO_VECTOR, Node->getValueType(0)) && + isShuffleLegal(Node->getValueType(0), ShuffleMask)) { + SmallVector<SDOperand, 8> Ops; + for(std::map<SDOperand,std::vector<unsigned> >::iterator I=Values.begin(), + E = Values.end(); I != E; ++I) { + SDOperand Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, Node->getValueType(0), + I->first); + Ops.push_back(Op); + } + Ops.push_back(ShuffleMask); + + // Return shuffle(LoValVec, HiValVec, <0,1,0,1>) + return DAG.getNode(ISD::VECTOR_SHUFFLE, Node->getValueType(0), + &Ops[0], Ops.size()); + } + } + + // Otherwise, we can't handle this case efficiently. Allocate a sufficiently + // aligned object on the stack, store each element into it, then load + // the result as a vector. + MVT::ValueType VT = Node->getValueType(0); + // Create the stack frame object. + SDOperand FIPtr = CreateStackTemporary(VT); + + // Emit a store of each element to the stack slot. + SmallVector<SDOperand, 8> Stores; + unsigned TypeByteSize = + MVT::getSizeInBits(Node->getOperand(0).getValueType())/8; + // Store (in the right endianness) the elements to memory. + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + // Ignore undef elements. + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + unsigned Offset = TypeByteSize*i; + + SDOperand Idx = DAG.getConstant(Offset, FIPtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr, Idx); + + Stores.push_back(DAG.getStore(DAG.getEntryNode(), Node->getOperand(i), Idx, + NULL, 0)); + } + + SDOperand StoreChain; + if (!Stores.empty()) // Not all undef elements? + StoreChain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &Stores[0], Stores.size()); + else + StoreChain = DAG.getEntryNode(); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, StoreChain, FIPtr, NULL, 0); +} + +/// CreateStackTemporary - Create a stack temporary, suitable for holding the +/// specified value type. +SDOperand SelectionDAGLegalize::CreateStackTemporary(MVT::ValueType VT) { + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + unsigned ByteSize = MVT::getSizeInBits(VT)/8; + const Type *Ty = MVT::getTypeForValueType(VT); + unsigned StackAlign = (unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty); + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign); + return DAG.getFrameIndex(FrameIdx, TLI.getPointerTy()); +} + +void SelectionDAGLegalize::ExpandShiftParts(unsigned NodeOp, + SDOperand Op, SDOperand Amt, + SDOperand &Lo, SDOperand &Hi) { + // Expand the subcomponents. + SDOperand LHSL, LHSH; + ExpandOp(Op, LHSL, LHSH); + + SDOperand Ops[] = { LHSL, LHSH, Amt }; + MVT::ValueType VT = LHSL.getValueType(); + Lo = DAG.getNode(NodeOp, DAG.getNodeValueTypes(VT, VT), 2, Ops, 3); + Hi = Lo.getValue(1); +} + + +/// ExpandShift - Try to find a clever way to expand this shift operation out to +/// smaller elements. If we can't find a way that is more efficient than a +/// libcall on this target, return false. Otherwise, return true with the +/// low-parts expanded into Lo and Hi. +bool SelectionDAGLegalize::ExpandShift(unsigned Opc, SDOperand Op,SDOperand Amt, + SDOperand &Lo, SDOperand &Hi) { + assert((Opc == ISD::SHL || Opc == ISD::SRA || Opc == ISD::SRL) && + "This is not a shift!"); + + MVT::ValueType NVT = TLI.getTypeToTransformTo(Op.getValueType()); + SDOperand ShAmt = LegalizeOp(Amt); + MVT::ValueType ShTy = ShAmt.getValueType(); + unsigned VTBits = MVT::getSizeInBits(Op.getValueType()); + unsigned NVTBits = MVT::getSizeInBits(NVT); + + // Handle the case when Amt is an immediate. Other cases are currently broken + // and are disabled. + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Amt.Val)) { + unsigned Cst = CN->getValue(); + // Expand the incoming operand to be shifted, so that we have its parts + SDOperand InL, InH; + ExpandOp(Op, InL, InH); + switch(Opc) { + case ISD::SHL: + if (Cst > VTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, NVT); + } else if (Cst > NVTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getNode(ISD::SHL, NVT, InL, DAG.getConstant(Cst-NVTBits,ShTy)); + } else if (Cst == NVTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = InL; + } else { + Lo = DAG.getNode(ISD::SHL, NVT, InL, DAG.getConstant(Cst, ShTy)); + Hi = DAG.getNode(ISD::OR, NVT, + DAG.getNode(ISD::SHL, NVT, InH, DAG.getConstant(Cst, ShTy)), + DAG.getNode(ISD::SRL, NVT, InL, DAG.getConstant(NVTBits-Cst, ShTy))); + } + return true; + case ISD::SRL: + if (Cst > VTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, NVT); + } else if (Cst > NVTBits) { + Lo = DAG.getNode(ISD::SRL, NVT, InH, DAG.getConstant(Cst-NVTBits,ShTy)); + Hi = DAG.getConstant(0, NVT); + } else if (Cst == NVTBits) { + Lo = InH; + Hi = DAG.getConstant(0, NVT); + } else { + Lo = DAG.getNode(ISD::OR, NVT, + DAG.getNode(ISD::SRL, NVT, InL, DAG.getConstant(Cst, ShTy)), + DAG.getNode(ISD::SHL, NVT, InH, DAG.getConstant(NVTBits-Cst, ShTy))); + Hi = DAG.getNode(ISD::SRL, NVT, InH, DAG.getConstant(Cst, ShTy)); + } + return true; + case ISD::SRA: + if (Cst > VTBits) { + Hi = Lo = DAG.getNode(ISD::SRA, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else if (Cst > NVTBits) { + Lo = DAG.getNode(ISD::SRA, NVT, InH, + DAG.getConstant(Cst-NVTBits, ShTy)); + Hi = DAG.getNode(ISD::SRA, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else if (Cst == NVTBits) { + Lo = InH; + Hi = DAG.getNode(ISD::SRA, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else { + Lo = DAG.getNode(ISD::OR, NVT, + DAG.getNode(ISD::SRL, NVT, InL, DAG.getConstant(Cst, ShTy)), + DAG.getNode(ISD::SHL, NVT, InH, DAG.getConstant(NVTBits-Cst, ShTy))); + Hi = DAG.getNode(ISD::SRA, NVT, InH, DAG.getConstant(Cst, ShTy)); + } + return true; + } + } + + // Okay, the shift amount isn't constant. However, if we can tell that it is + // >= 32 or < 32, we can still simplify it, without knowing the actual value. + uint64_t Mask = NVTBits, KnownZero, KnownOne; + DAG.ComputeMaskedBits(Amt, Mask, KnownZero, KnownOne); + + // If we know that the high bit of the shift amount is one, then we can do + // this as a couple of simple shifts. + if (KnownOne & Mask) { + // Mask out the high bit, which we know is set. + Amt = DAG.getNode(ISD::AND, Amt.getValueType(), Amt, + DAG.getConstant(NVTBits-1, Amt.getValueType())); + + // Expand the incoming operand to be shifted, so that we have its parts + SDOperand InL, InH; + ExpandOp(Op, InL, InH); + switch(Opc) { + case ISD::SHL: + Lo = DAG.getConstant(0, NVT); // Low part is zero. + Hi = DAG.getNode(ISD::SHL, NVT, InL, Amt); // High part from Lo part. + return true; + case ISD::SRL: + Hi = DAG.getConstant(0, NVT); // Hi part is zero. + Lo = DAG.getNode(ISD::SRL, NVT, InH, Amt); // Lo part from Hi part. + return true; + case ISD::SRA: + Hi = DAG.getNode(ISD::SRA, NVT, InH, // Sign extend high part. + DAG.getConstant(NVTBits-1, Amt.getValueType())); + Lo = DAG.getNode(ISD::SRA, NVT, InH, Amt); // Lo part from Hi part. + return true; + } + } + + // If we know that the high bit of the shift amount is zero, then we can do + // this as a couple of simple shifts. + if (KnownZero & Mask) { + // Compute 32-amt. + SDOperand Amt2 = DAG.getNode(ISD::SUB, Amt.getValueType(), + DAG.getConstant(NVTBits, Amt.getValueType()), + Amt); + + // Expand the incoming operand to be shifted, so that we have its parts + SDOperand InL, InH; + ExpandOp(Op, InL, InH); + switch(Opc) { + case ISD::SHL: + Lo = DAG.getNode(ISD::SHL, NVT, InL, Amt); + Hi = DAG.getNode(ISD::OR, NVT, + DAG.getNode(ISD::SHL, NVT, InH, Amt), + DAG.getNode(ISD::SRL, NVT, InL, Amt2)); + return true; + case ISD::SRL: + Hi = DAG.getNode(ISD::SRL, NVT, InH, Amt); + Lo = DAG.getNode(ISD::OR, NVT, + DAG.getNode(ISD::SRL, NVT, InL, Amt), + DAG.getNode(ISD::SHL, NVT, InH, Amt2)); + return true; + case ISD::SRA: + Hi = DAG.getNode(ISD::SRA, NVT, InH, Amt); + Lo = DAG.getNode(ISD::OR, NVT, + DAG.getNode(ISD::SRL, NVT, InL, Amt), + DAG.getNode(ISD::SHL, NVT, InH, Amt2)); + return true; + } + } + + return false; +} + + +// ExpandLibCall - Expand a node into a call to a libcall. If the result value +// does not fit into a register, return the lo part and set the hi part to the +// by-reg argument. If it does fit into a single register, return the result +// and leave the Hi part unset. +SDOperand SelectionDAGLegalize::ExpandLibCall(const char *Name, SDNode *Node, + bool isSigned, SDOperand &Hi) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDOperand InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + MVT::ValueType ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = MVT::getTypeForValueType(ArgVT); + Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Args.push_back(Entry); + } + SDOperand Callee = DAG.getExternalSymbol(Name, TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = MVT::getTypeForValueType(Node->getValueType(0)); + std::pair<SDOperand,SDOperand> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, false, CallingConv::C, false, + Callee, Args, DAG); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + SDOperand Result; + switch (getTypeAction(CallInfo.first.getValueType())) { + default: assert(0 && "Unknown thing"); + case Legal: + Result = CallInfo.first; + break; + case Expand: + ExpandOp(CallInfo.first, Result, Hi); + break; + } + return Result; +} + + +/// ExpandIntToFP - Expand a [US]INT_TO_FP operation. +/// +SDOperand SelectionDAGLegalize:: +ExpandIntToFP(bool isSigned, MVT::ValueType DestTy, SDOperand Source) { + assert(getTypeAction(Source.getValueType()) == Expand && + "This is not an expansion!"); + assert(Source.getValueType() == MVT::i64 && "Only handle expand from i64!"); + + if (!isSigned) { + assert(Source.getValueType() == MVT::i64 && + "This only works for 64-bit -> FP"); + // The 64-bit value loaded will be incorrectly if the 'sign bit' of the + // incoming integer is set. To handle this, we dynamically test to see if + // it is set, and, if so, add a fudge factor. + SDOperand Lo, Hi; + ExpandOp(Source, Lo, Hi); + + // If this is unsigned, and not supported, first perform the conversion to + // signed, then adjust the result if the sign bit is set. + SDOperand SignedConv = ExpandIntToFP(true, DestTy, + DAG.getNode(ISD::BUILD_PAIR, Source.getValueType(), Lo, Hi)); + + SDOperand SignSet = DAG.getSetCC(TLI.getSetCCResultTy(), Hi, + DAG.getConstant(0, Hi.getValueType()), + ISD::SETLT); + SDOperand Zero = getIntPtrConstant(0), Four = getIntPtrConstant(4); + SDOperand CstOffset = DAG.getNode(ISD::SELECT, Zero.getValueType(), + SignSet, Four, Zero); + uint64_t FF = 0x5f800000ULL; + if (TLI.isLittleEndian()) FF <<= 32; + static Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF); + + SDOperand CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); + CPIdx = DAG.getNode(ISD::ADD, TLI.getPointerTy(), CPIdx, CstOffset); + SDOperand FudgeInReg; + if (DestTy == MVT::f32) + FudgeInReg = DAG.getLoad(MVT::f32, DAG.getEntryNode(), CPIdx, NULL, 0); + else { + assert(DestTy == MVT::f64 && "Unexpected conversion"); + // FIXME: Avoid the extend by construction the right constantpool? + FudgeInReg = DAG.getExtLoad(ISD::EXTLOAD, MVT::f64, DAG.getEntryNode(), + CPIdx, NULL, 0, MVT::f32); + } + MVT::ValueType SCVT = SignedConv.getValueType(); + if (SCVT != DestTy) { + // Destination type needs to be expanded as well. The FADD now we are + // constructing will be expanded into a libcall. + if (MVT::getSizeInBits(SCVT) != MVT::getSizeInBits(DestTy)) { + assert(SCVT == MVT::i32 && DestTy == MVT::f64); + SignedConv = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, + SignedConv, SignedConv.getValue(1)); + } + SignedConv = DAG.getNode(ISD::BIT_CONVERT, DestTy, SignedConv); + } + return DAG.getNode(ISD::FADD, DestTy, SignedConv, FudgeInReg); + } + + // Check to see if the target has a custom way to lower this. If so, use it. + switch (TLI.getOperationAction(ISD::SINT_TO_FP, Source.getValueType())) { + default: assert(0 && "This action not implemented for this operation!"); + case TargetLowering::Legal: + case TargetLowering::Expand: + break; // This case is handled below. + case TargetLowering::Custom: { + SDOperand NV = TLI.LowerOperation(DAG.getNode(ISD::SINT_TO_FP, DestTy, + Source), DAG); + if (NV.Val) + return LegalizeOp(NV); + break; // The target decided this was legal after all + } + } + + // Expand the source, then glue it back together for the call. We must expand + // the source in case it is shared (this pass of legalize must traverse it). + SDOperand SrcLo, SrcHi; + ExpandOp(Source, SrcLo, SrcHi); + Source = DAG.getNode(ISD::BUILD_PAIR, Source.getValueType(), SrcLo, SrcHi); + + RTLIB::Libcall LC; + if (DestTy == MVT::f32) + LC = RTLIB::SINTTOFP_I64_F32; + else { + assert(DestTy == MVT::f64 && "Unknown fp value type!"); + LC = RTLIB::SINTTOFP_I64_F64; + } + + assert(TLI.getLibcallName(LC) && "Don't know how to expand this SINT_TO_FP!"); + Source = DAG.getNode(ISD::SINT_TO_FP, DestTy, Source); + SDOperand UnusedHiPart; + return ExpandLibCall(TLI.getLibcallName(LC), Source.Val, isSigned, + UnusedHiPart); +} + +/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a +/// INT_TO_FP operation of the specified operand when the target requests that +/// we expand it. At this point, we know that the result and operand types are +/// legal for the target. +SDOperand SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, + SDOperand Op0, + MVT::ValueType DestVT) { + if (Op0.getValueType() == MVT::i32) { + // simple 32-bit [signed|unsigned] integer to float/double expansion + + // get the stack frame index of a 8 byte buffer, pessimistically aligned + MachineFunction &MF = DAG.getMachineFunction(); + const Type *F64Type = MVT::getTypeForValueType(MVT::f64); + unsigned StackAlign = + (unsigned)TLI.getTargetData()->getPrefTypeAlignment(F64Type); + int SSFI = MF.getFrameInfo()->CreateStackObject(8, StackAlign); + // get address of 8 byte buffer + SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + // word offset constant for Hi/Lo address computation + SDOperand WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy()); + // set up Hi and Lo (into buffer) address based on endian + SDOperand Hi = StackSlot; + SDOperand Lo = DAG.getNode(ISD::ADD, TLI.getPointerTy(), StackSlot,WordOff); + if (TLI.isLittleEndian()) + std::swap(Hi, Lo); + + // if signed map to unsigned space + SDOperand Op0Mapped; + if (isSigned) { + // constant used to invert sign bit (signed to unsigned mapping) + SDOperand SignBit = DAG.getConstant(0x80000000u, MVT::i32); + Op0Mapped = DAG.getNode(ISD::XOR, MVT::i32, Op0, SignBit); + } else { + Op0Mapped = Op0; + } + // store the lo of the constructed double - based on integer input + SDOperand Store1 = DAG.getStore(DAG.getEntryNode(), + Op0Mapped, Lo, NULL, 0); + // initial hi portion of constructed double + SDOperand InitialHi = DAG.getConstant(0x43300000u, MVT::i32); + // store the hi of the constructed double - biased exponent + SDOperand Store2=DAG.getStore(Store1, InitialHi, Hi, NULL, 0); + // load the constructed double + SDOperand Load = DAG.getLoad(MVT::f64, Store2, StackSlot, NULL, 0); + // FP constant to bias correct the final result + SDOperand Bias = DAG.getConstantFP(isSigned ? + BitsToDouble(0x4330000080000000ULL) + : BitsToDouble(0x4330000000000000ULL), + MVT::f64); + // subtract the bias + SDOperand Sub = DAG.getNode(ISD::FSUB, MVT::f64, Load, Bias); + // final result + SDOperand Result; + // handle final rounding + if (DestVT == MVT::f64) { + // do nothing + Result = Sub; + } else { + // if f32 then cast to f32 + Result = DAG.getNode(ISD::FP_ROUND, MVT::f32, Sub); + } + return Result; + } + assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + SDOperand Tmp1 = DAG.getNode(ISD::SINT_TO_FP, DestVT, Op0); + + SDOperand SignSet = DAG.getSetCC(TLI.getSetCCResultTy(), Op0, + DAG.getConstant(0, Op0.getValueType()), + ISD::SETLT); + SDOperand Zero = getIntPtrConstant(0), Four = getIntPtrConstant(4); + SDOperand CstOffset = DAG.getNode(ISD::SELECT, Zero.getValueType(), + SignSet, Four, Zero); + + // If the sign bit of the integer is set, the large number will be treated + // as a negative number. To counteract this, the dynamic code adds an + // offset depending on the data type. + uint64_t FF; + switch (Op0.getValueType()) { + default: assert(0 && "Unsupported integer type!"); + case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) + case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) + case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) + case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) + } + if (TLI.isLittleEndian()) FF <<= 32; + static Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF); + + SDOperand CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); + CPIdx = DAG.getNode(ISD::ADD, TLI.getPointerTy(), CPIdx, CstOffset); + SDOperand FudgeInReg; + if (DestVT == MVT::f32) + FudgeInReg = DAG.getLoad(MVT::f32, DAG.getEntryNode(), CPIdx, NULL, 0); + else { + assert(DestVT == MVT::f64 && "Unexpected conversion"); + FudgeInReg = LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, MVT::f64, + DAG.getEntryNode(), CPIdx, + NULL, 0, MVT::f32)); + } + + return DAG.getNode(ISD::FADD, DestVT, Tmp1, FudgeInReg); +} + +/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a +/// *INT_TO_FP operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP +/// operation that takes a larger input. +SDOperand SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDOperand LegalOp, + MVT::ValueType DestVT, + bool isSigned) { + // First step, figure out the appropriate *INT_TO_FP operation to use. + MVT::ValueType NewInTy = LegalOp.getValueType(); + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewInTy = (MVT::ValueType)(NewInTy+1); + assert(MVT::isInteger(NewInTy) && "Ran out of possibilities!"); + + // If the target supports SINT_TO_FP of this type, use it. + switch (TLI.getOperationAction(ISD::SINT_TO_FP, NewInTy)) { + default: break; + case TargetLowering::Legal: + if (!TLI.isTypeLegal(NewInTy)) + break; // Can't use this datatype. + // FALL THROUGH. + case TargetLowering::Custom: + OpToUse = ISD::SINT_TO_FP; + break; + } + if (OpToUse) break; + if (isSigned) continue; + + // If the target supports UINT_TO_FP of this type, use it. + switch (TLI.getOperationAction(ISD::UINT_TO_FP, NewInTy)) { + default: break; + case TargetLowering::Legal: + if (!TLI.isTypeLegal(NewInTy)) + break; // Can't use this datatype. + // FALL THROUGH. + case TargetLowering::Custom: + OpToUse = ISD::UINT_TO_FP; + break; + } + if (OpToUse) break; + + // Otherwise, try a larger type. + } + + // Okay, we found the operation and type to use. Zero extend our input to the + // desired type then run the operation on it. + return DAG.getNode(OpToUse, DestVT, + DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + NewInTy, LegalOp)); +} + +/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a +/// FP_TO_*INT operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT +/// operation that returns a larger result. +SDOperand SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDOperand LegalOp, + MVT::ValueType DestVT, + bool isSigned) { + // First step, figure out the appropriate FP_TO*INT operation to use. + MVT::ValueType NewOutTy = DestVT; + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewOutTy = (MVT::ValueType)(NewOutTy+1); + assert(MVT::isInteger(NewOutTy) && "Ran out of possibilities!"); + + // If the target supports FP_TO_SINT returning this type, use it. + switch (TLI.getOperationAction(ISD::FP_TO_SINT, NewOutTy)) { + default: break; + case TargetLowering::Legal: + if (!TLI.isTypeLegal(NewOutTy)) + break; // Can't use this datatype. + // FALL THROUGH. + case TargetLowering::Custom: + OpToUse = ISD::FP_TO_SINT; + break; + } + if (OpToUse) break; + + // If the target supports FP_TO_UINT of this type, use it. + switch (TLI.getOperationAction(ISD::FP_TO_UINT, NewOutTy)) { + default: break; + case TargetLowering::Legal: + if (!TLI.isTypeLegal(NewOutTy)) + break; // Can't use this datatype. + // FALL THROUGH. + case TargetLowering::Custom: + OpToUse = ISD::FP_TO_UINT; + break; + } + if (OpToUse) break; + + // Otherwise, try a larger type. + } + + // Okay, we found the operation and type to use. Truncate the result of the + // extended FP_TO_*INT operation to the desired size. + return DAG.getNode(ISD::TRUNCATE, DestVT, + DAG.getNode(OpToUse, NewOutTy, LegalOp)); +} + +/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. +/// +SDOperand SelectionDAGLegalize::ExpandBSWAP(SDOperand Op) { + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType SHVT = TLI.getShiftAmountTy(); + SDOperand Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; + switch (VT) { + default: assert(0 && "Unhandled Expand type in BSWAP!"); abort(); + case MVT::i16: + Tmp2 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(8, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(8, SHVT)); + return DAG.getNode(ISD::OR, VT, Tmp1, Tmp2); + case MVT::i32: + Tmp4 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(24, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(8, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(8, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(24, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, VT, Tmp3, DAG.getConstant(0xFF0000, VT)); + Tmp2 = DAG.getNode(ISD::AND, VT, Tmp2, DAG.getConstant(0xFF00, VT)); + Tmp4 = DAG.getNode(ISD::OR, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, VT, Tmp2, Tmp1); + return DAG.getNode(ISD::OR, VT, Tmp4, Tmp2); + case MVT::i64: + Tmp8 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(56, SHVT)); + Tmp7 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(40, SHVT)); + Tmp6 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(24, SHVT)); + Tmp5 = DAG.getNode(ISD::SHL, VT, Op, DAG.getConstant(8, SHVT)); + Tmp4 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(8, SHVT)); + Tmp3 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(24, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(40, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, VT, Op, DAG.getConstant(56, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, VT, Tmp7, DAG.getConstant(255ULL<<48, VT)); + Tmp6 = DAG.getNode(ISD::AND, VT, Tmp6, DAG.getConstant(255ULL<<40, VT)); + Tmp5 = DAG.getNode(ISD::AND, VT, Tmp5, DAG.getConstant(255ULL<<32, VT)); + Tmp4 = DAG.getNode(ISD::AND, VT, Tmp4, DAG.getConstant(255ULL<<24, VT)); + Tmp3 = DAG.getNode(ISD::AND, VT, Tmp3, DAG.getConstant(255ULL<<16, VT)); + Tmp2 = DAG.getNode(ISD::AND, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT)); + Tmp8 = DAG.getNode(ISD::OR, VT, Tmp8, Tmp7); + Tmp6 = DAG.getNode(ISD::OR, VT, Tmp6, Tmp5); + Tmp4 = DAG.getNode(ISD::OR, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, VT, Tmp2, Tmp1); + Tmp8 = DAG.getNode(ISD::OR, VT, Tmp8, Tmp6); + Tmp4 = DAG.getNode(ISD::OR, VT, Tmp4, Tmp2); + return DAG.getNode(ISD::OR, VT, Tmp8, Tmp4); + } +} + +/// ExpandBitCount - Expand the specified bitcount instruction into operations. +/// +SDOperand SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDOperand Op) { + switch (Opc) { + default: assert(0 && "Cannot expand this yet!"); + case ISD::CTPOP: { + static const uint64_t mask[6] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, + 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, + 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL + }; + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType ShVT = TLI.getShiftAmountTy(); + unsigned len = MVT::getSizeInBits(VT); + for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8]) + SDOperand Tmp2 = DAG.getConstant(mask[i], VT); + SDOperand Tmp3 = DAG.getConstant(1ULL << i, ShVT); + Op = DAG.getNode(ISD::ADD, VT, DAG.getNode(ISD::AND, VT, Op, Tmp2), + DAG.getNode(ISD::AND, VT, + DAG.getNode(ISD::SRL, VT, Op, Tmp3),Tmp2)); + } + return Op; + } + case ISD::CTLZ: { + // for now, we do this: + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // return popcount(~x); + // + // but see also: http://www.hackersdelight.org/HDcode/nlz.cc + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType ShVT = TLI.getShiftAmountTy(); + unsigned len = MVT::getSizeInBits(VT); + for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + SDOperand Tmp3 = DAG.getConstant(1ULL << i, ShVT); + Op = DAG.getNode(ISD::OR, VT, Op, DAG.getNode(ISD::SRL, VT, Op, Tmp3)); + } + Op = DAG.getNode(ISD::XOR, VT, Op, DAG.getConstant(~0ULL, VT)); + return DAG.getNode(ISD::CTPOP, VT, Op); + } + case ISD::CTTZ: { + // for now, we use: { return popcount(~x & (x - 1)); } + // unless the target has ctlz but not ctpop, in which case we use: + // { return 32 - nlz(~x & (x-1)); } + // see also http://www.hackersdelight.org/HDcode/ntz.cc + MVT::ValueType VT = Op.getValueType(); + SDOperand Tmp2 = DAG.getConstant(~0ULL, VT); + SDOperand Tmp3 = DAG.getNode(ISD::AND, VT, + DAG.getNode(ISD::XOR, VT, Op, Tmp2), + DAG.getNode(ISD::SUB, VT, Op, DAG.getConstant(1, VT))); + // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. + if (!TLI.isOperationLegal(ISD::CTPOP, VT) && + TLI.isOperationLegal(ISD::CTLZ, VT)) + return DAG.getNode(ISD::SUB, VT, + DAG.getConstant(MVT::getSizeInBits(VT), VT), + DAG.getNode(ISD::CTLZ, VT, Tmp3)); + return DAG.getNode(ISD::CTPOP, VT, Tmp3); + } + } +} + +/// ExpandOp - Expand the specified SDOperand into its two component pieces +/// Lo&Hi. Note that the Op MUST be an expanded type. As a result of this, the +/// LegalizeNodes map is filled in for any results that are not expanded, the +/// ExpandedNodes map is filled in for any results that are expanded, and the +/// Lo/Hi values are returned. +void SelectionDAGLegalize::ExpandOp(SDOperand Op, SDOperand &Lo, SDOperand &Hi){ + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType NVT = TLI.getTypeToTransformTo(VT); + SDNode *Node = Op.Val; + assert(getTypeAction(VT) == Expand && "Not an expanded type!"); + assert(((MVT::isInteger(NVT) && NVT < VT) || MVT::isFloatingPoint(VT) || + MVT::isVector(VT)) && + "Cannot expand to FP value or to larger int value!"); + + // See if we already expanded it. + DenseMap<SDOperand, std::pair<SDOperand, SDOperand> >::iterator I + = ExpandedNodes.find(Op); + if (I != ExpandedNodes.end()) { + Lo = I->second.first; + Hi = I->second.second; + return; + } + + switch (Node->getOpcode()) { + case ISD::CopyFromReg: + assert(0 && "CopyFromReg must be legal!"); + default: +#ifndef NDEBUG + cerr << "NODE: "; Node->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to expand this operator!"); + abort(); + case ISD::UNDEF: + NVT = TLI.getTypeToExpandTo(VT); + Lo = DAG.getNode(ISD::UNDEF, NVT); + Hi = DAG.getNode(ISD::UNDEF, NVT); + break; + case ISD::Constant: { + uint64_t Cst = cast<ConstantSDNode>(Node)->getValue(); + Lo = DAG.getConstant(Cst, NVT); + Hi = DAG.getConstant(Cst >> MVT::getSizeInBits(NVT), NVT); + break; + } + case ISD::ConstantFP: { + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); + Lo = ExpandConstantFP(CFP, false, DAG, TLI); + if (getTypeAction(Lo.getValueType()) == Expand) + ExpandOp(Lo, Lo, Hi); + break; + } + case ISD::BUILD_PAIR: + // Return the operands. + Lo = Node->getOperand(0); + Hi = Node->getOperand(1); + break; + + case ISD::SIGN_EXTEND_INREG: + ExpandOp(Node->getOperand(0), Lo, Hi); + // sext_inreg the low part if needed. + Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, NVT, Lo, Node->getOperand(1)); + + // The high part gets the sign extension from the lo-part. This handles + // things like sextinreg V:i64 from i8. + Hi = DAG.getNode(ISD::SRA, NVT, Lo, + DAG.getConstant(MVT::getSizeInBits(NVT)-1, + TLI.getShiftAmountTy())); + break; + + case ISD::BSWAP: { + ExpandOp(Node->getOperand(0), Lo, Hi); + SDOperand TempLo = DAG.getNode(ISD::BSWAP, NVT, Hi); + Hi = DAG.getNode(ISD::BSWAP, NVT, Lo); + Lo = TempLo; + break; + } + + case ISD::CTPOP: + ExpandOp(Node->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::ADD, NVT, // ctpop(HL) -> ctpop(H)+ctpop(L) + DAG.getNode(ISD::CTPOP, NVT, Lo), + DAG.getNode(ISD::CTPOP, NVT, Hi)); + Hi = DAG.getConstant(0, NVT); + break; + + case ISD::CTLZ: { + // ctlz (HL) -> ctlz(H) != 32 ? ctlz(H) : (ctlz(L)+32) + ExpandOp(Node->getOperand(0), Lo, Hi); + SDOperand BitsC = DAG.getConstant(MVT::getSizeInBits(NVT), NVT); + SDOperand HLZ = DAG.getNode(ISD::CTLZ, NVT, Hi); + SDOperand TopNotZero = DAG.getSetCC(TLI.getSetCCResultTy(), HLZ, BitsC, + ISD::SETNE); + SDOperand LowPart = DAG.getNode(ISD::CTLZ, NVT, Lo); + LowPart = DAG.getNode(ISD::ADD, NVT, LowPart, BitsC); + + Lo = DAG.getNode(ISD::SELECT, NVT, TopNotZero, HLZ, LowPart); + Hi = DAG.getConstant(0, NVT); + break; + } + + case ISD::CTTZ: { + // cttz (HL) -> cttz(L) != 32 ? cttz(L) : (cttz(H)+32) + ExpandOp(Node->getOperand(0), Lo, Hi); + SDOperand BitsC = DAG.getConstant(MVT::getSizeInBits(NVT), NVT); + SDOperand LTZ = DAG.getNode(ISD::CTTZ, NVT, Lo); + SDOperand BotNotZero = DAG.getSetCC(TLI.getSetCCResultTy(), LTZ, BitsC, + ISD::SETNE); + SDOperand HiPart = DAG.getNode(ISD::CTTZ, NVT, Hi); + HiPart = DAG.getNode(ISD::ADD, NVT, HiPart, BitsC); + + Lo = DAG.getNode(ISD::SELECT, NVT, BotNotZero, LTZ, HiPart); + Hi = DAG.getConstant(0, NVT); + break; + } + + case ISD::VAARG: { + SDOperand Ch = Node->getOperand(0); // Legalize the chain. + SDOperand Ptr = Node->getOperand(1); // Legalize the pointer. + Lo = DAG.getVAArg(NVT, Ch, Ptr, Node->getOperand(2)); + Hi = DAG.getVAArg(NVT, Lo.getValue(1), Ptr, Node->getOperand(2)); + + // Remember that we legalized the chain. + Hi = LegalizeOp(Hi); + AddLegalizedOperand(Op.getValue(1), Hi.getValue(1)); + if (!TLI.isLittleEndian()) + std::swap(Lo, Hi); + break; + } + + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Node); + SDOperand Ch = LD->getChain(); // Legalize the chain. + SDOperand Ptr = LD->getBasePtr(); // Legalize the pointer. + ISD::LoadExtType ExtType = LD->getExtensionType(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + + if (ExtType == ISD::NON_EXTLOAD) { + Lo = DAG.getLoad(NVT, Ch, Ptr, LD->getSrcValue(), SVOffset, + isVolatile, Alignment); + if (VT == MVT::f32 || VT == MVT::f64) { + // f32->i32 or f64->i64 one to one expansion. + // Remember that we legalized the chain. + AddLegalizedOperand(SDOperand(Node, 1), LegalizeOp(Lo.getValue(1))); + // Recursively expand the new load. + if (getTypeAction(NVT) == Expand) + ExpandOp(Lo, Lo, Hi); + break; + } + + // Increment the pointer to the other half. + unsigned IncrementSize = MVT::getSizeInBits(Lo.getValueType())/8; + Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr, + getIntPtrConstant(IncrementSize)); + SVOffset += IncrementSize; + if (Alignment > IncrementSize) + Alignment = IncrementSize; + Hi = DAG.getLoad(NVT, Ch, Ptr, LD->getSrcValue(), SVOffset, + isVolatile, Alignment); + + // Build a factor node to remember that this load is independent of the + // other one. + SDOperand TF = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Remember that we legalized the chain. + AddLegalizedOperand(Op.getValue(1), LegalizeOp(TF)); + if (!TLI.isLittleEndian()) + std::swap(Lo, Hi); + } else { + MVT::ValueType EVT = LD->getLoadedVT(); + + if (VT == MVT::f64 && EVT == MVT::f32) { + // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND + SDOperand Load = DAG.getLoad(EVT, Ch, Ptr, LD->getSrcValue(), + SVOffset, isVolatile, Alignment); + // Remember that we legalized the chain. + AddLegalizedOperand(SDOperand(Node, 1), LegalizeOp(Load.getValue(1))); + ExpandOp(DAG.getNode(ISD::FP_EXTEND, VT, Load), Lo, Hi); + break; + } + + if (EVT == NVT) + Lo = DAG.getLoad(NVT, Ch, Ptr, LD->getSrcValue(), + SVOffset, isVolatile, Alignment); + else + Lo = DAG.getExtLoad(ExtType, NVT, Ch, Ptr, LD->getSrcValue(), + SVOffset, EVT, isVolatile, + Alignment); + + // Remember that we legalized the chain. + AddLegalizedOperand(SDOperand(Node, 1), LegalizeOp(Lo.getValue(1))); + + if (ExtType == ISD::SEXTLOAD) { + // The high part is obtained by SRA'ing all but one of the bits of the + // lo part. + unsigned LoSize = MVT::getSizeInBits(Lo.getValueType()); + Hi = DAG.getNode(ISD::SRA, NVT, Lo, + DAG.getConstant(LoSize-1, TLI.getShiftAmountTy())); + } else if (ExtType == ISD::ZEXTLOAD) { + // The high part is just a zero. + Hi = DAG.getConstant(0, NVT); + } else /* if (ExtType == ISD::EXTLOAD) */ { + // The high part is undefined. + Hi = DAG.getNode(ISD::UNDEF, NVT); + } + } + break; + } + case ISD::AND: + case ISD::OR: + case ISD::XOR: { // Simple logical operators -> two trivial pieces. + SDOperand LL, LH, RL, RH; + ExpandOp(Node->getOperand(0), LL, LH); + ExpandOp(Node->getOperand(1), RL, RH); + Lo = DAG.getNode(Node->getOpcode(), NVT, LL, RL); + Hi = DAG.getNode(Node->getOpcode(), NVT, LH, RH); + break; + } + case ISD::SELECT: { + SDOperand LL, LH, RL, RH; + ExpandOp(Node->getOperand(1), LL, LH); + ExpandOp(Node->getOperand(2), RL, RH); + if (getTypeAction(NVT) == Expand) + NVT = TLI.getTypeToExpandTo(NVT); + Lo = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), LL, RL); + if (VT != MVT::f32) + Hi = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), LH, RH); + break; + } + case ISD::SELECT_CC: { + SDOperand TL, TH, FL, FH; + ExpandOp(Node->getOperand(2), TL, TH); + ExpandOp(Node->getOperand(3), FL, FH); + if (getTypeAction(NVT) == Expand) + NVT = TLI.getTypeToExpandTo(NVT); + Lo = DAG.getNode(ISD::SELECT_CC, NVT, Node->getOperand(0), + Node->getOperand(1), TL, FL, Node->getOperand(4)); + if (VT != MVT::f32) + Hi = DAG.getNode(ISD::SELECT_CC, NVT, Node->getOperand(0), + Node->getOperand(1), TH, FH, Node->getOperand(4)); + break; + } + case ISD::ANY_EXTEND: + // The low part is any extension of the input (which degenerates to a copy). + Lo = DAG.getNode(ISD::ANY_EXTEND, NVT, Node->getOperand(0)); + // The high part is undefined. + Hi = DAG.getNode(ISD::UNDEF, NVT); + break; + case ISD::SIGN_EXTEND: { + // The low part is just a sign extension of the input (which degenerates to + // a copy). + Lo = DAG.getNode(ISD::SIGN_EXTEND, NVT, Node->getOperand(0)); + + // The high part is obtained by SRA'ing all but one of the bits of the lo + // part. + unsigned LoSize = MVT::getSizeInBits(Lo.getValueType()); + Hi = DAG.getNode(ISD::SRA, NVT, Lo, + DAG.getConstant(LoSize-1, TLI.getShiftAmountTy())); + break; + } + case ISD::ZERO_EXTEND: + // The low part is just a zero extension of the input (which degenerates to + // a copy). + Lo = DAG.getNode(ISD::ZERO_EXTEND, NVT, Node->getOperand(0)); + + // The high part is just a zero. + Hi = DAG.getConstant(0, NVT); + break; + + case ISD::TRUNCATE: { + // The input value must be larger than this value. Expand *it*. + SDOperand NewLo; + ExpandOp(Node->getOperand(0), NewLo, Hi); + + // The low part is now either the right size, or it is closer. If not the + // right size, make an illegal truncate so we recursively expand it. + if (NewLo.getValueType() != Node->getValueType(0)) + NewLo = DAG.getNode(ISD::TRUNCATE, Node->getValueType(0), NewLo); + ExpandOp(NewLo, Lo, Hi); + break; + } + + case ISD::BIT_CONVERT: { + SDOperand Tmp; + if (TLI.getOperationAction(ISD::BIT_CONVERT, VT) == TargetLowering::Custom){ + // If the target wants to, allow it to lower this itself. + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Expand: assert(0 && "cannot expand FP!"); + case Legal: Tmp = LegalizeOp(Node->getOperand(0)); break; + case Promote: Tmp = PromoteOp (Node->getOperand(0)); break; + } + Tmp = TLI.LowerOperation(DAG.getNode(ISD::BIT_CONVERT, VT, Tmp), DAG); + } + + // f32 / f64 must be expanded to i32 / i64. + if (VT == MVT::f32 || VT == MVT::f64) { + Lo = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0)); + if (getTypeAction(NVT) == Expand) + ExpandOp(Lo, Lo, Hi); + break; + } + + // If source operand will be expanded to the same type as VT, i.e. + // i64 <- f64, i32 <- f32, expand the source operand instead. + MVT::ValueType VT0 = Node->getOperand(0).getValueType(); + if (getTypeAction(VT0) == Expand && TLI.getTypeToTransformTo(VT0) == VT) { + ExpandOp(Node->getOperand(0), Lo, Hi); + break; + } + + // Turn this into a load/store pair by default. + if (Tmp.Val == 0) + Tmp = ExpandBIT_CONVERT(VT, Node->getOperand(0)); + + ExpandOp(Tmp, Lo, Hi); + break; + } + + case ISD::READCYCLECOUNTER: + assert(TLI.getOperationAction(ISD::READCYCLECOUNTER, VT) == + TargetLowering::Custom && + "Must custom expand ReadCycleCounter"); + Lo = TLI.LowerOperation(Op, DAG); + assert(Lo.Val && "Node must be custom expanded!"); + Hi = Lo.getValue(1); + AddLegalizedOperand(SDOperand(Node, 1), // Remember we legalized the chain. + LegalizeOp(Lo.getValue(2))); + break; + + // These operators cannot be expanded directly, emit them as calls to + // library functions. + case ISD::FP_TO_SINT: { + if (TLI.getOperationAction(ISD::FP_TO_SINT, VT) == TargetLowering::Custom) { + SDOperand Op; + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Expand: assert(0 && "cannot expand FP!"); + case Legal: Op = LegalizeOp(Node->getOperand(0)); break; + case Promote: Op = PromoteOp (Node->getOperand(0)); break; + } + + Op = TLI.LowerOperation(DAG.getNode(ISD::FP_TO_SINT, VT, Op), DAG); + + // Now that the custom expander is done, expand the result, which is still + // VT. + if (Op.Val) { + ExpandOp(Op, Lo, Hi); + break; + } + } + + RTLIB::Libcall LC; + if (Node->getOperand(0).getValueType() == MVT::f32) + LC = RTLIB::FPTOSINT_F32_I64; + else + LC = RTLIB::FPTOSINT_F64_I64; + Lo = ExpandLibCall(TLI.getLibcallName(LC), Node, + false/*sign irrelevant*/, Hi); + break; + } + + case ISD::FP_TO_UINT: { + if (TLI.getOperationAction(ISD::FP_TO_UINT, VT) == TargetLowering::Custom) { + SDOperand Op; + switch (getTypeAction(Node->getOperand(0).getValueType())) { + case Expand: assert(0 && "cannot expand FP!"); + case Legal: Op = LegalizeOp(Node->getOperand(0)); break; + case Promote: Op = PromoteOp (Node->getOperand(0)); break; + } + + Op = TLI.LowerOperation(DAG.getNode(ISD::FP_TO_UINT, VT, Op), DAG); + + // Now that the custom expander is done, expand the result. + if (Op.Val) { + ExpandOp(Op, Lo, Hi); + break; + } + } + + RTLIB::Libcall LC; + if (Node->getOperand(0).getValueType() == MVT::f32) + LC = RTLIB::FPTOUINT_F32_I64; + else + LC = RTLIB::FPTOUINT_F64_I64; + Lo = ExpandLibCall(TLI.getLibcallName(LC), Node, + false/*sign irrelevant*/, Hi); + break; + } + + case ISD::SHL: { + // If the target wants custom lowering, do so. + SDOperand ShiftAmt = LegalizeOp(Node->getOperand(1)); + if (TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Custom) { + SDOperand Op = DAG.getNode(ISD::SHL, VT, Node->getOperand(0), ShiftAmt); + Op = TLI.LowerOperation(Op, DAG); + if (Op.Val) { + // Now that the custom expander is done, expand the result, which is + // still VT. + ExpandOp(Op, Lo, Hi); + break; + } + } + + // If ADDC/ADDE are supported and if the shift amount is a constant 1, emit + // this X << 1 as X+X. + if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(ShiftAmt)) { + if (ShAmt->getValue() == 1 && TLI.isOperationLegal(ISD::ADDC, NVT) && + TLI.isOperationLegal(ISD::ADDE, NVT)) { + SDOperand LoOps[2], HiOps[3]; + ExpandOp(Node->getOperand(0), LoOps[0], HiOps[0]); + SDVTList VTList = DAG.getVTList(LoOps[0].getValueType(), MVT::Flag); + LoOps[1] = LoOps[0]; + Lo = DAG.getNode(ISD::ADDC, VTList, LoOps, 2); + + HiOps[1] = HiOps[0]; + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, VTList, HiOps, 3); + break; + } + } + + // If we can emit an efficient shift operation, do so now. + if (ExpandShift(ISD::SHL, Node->getOperand(0), ShiftAmt, Lo, Hi)) + break; + + // If this target supports SHL_PARTS, use it. + TargetLowering::LegalizeAction Action = + TLI.getOperationAction(ISD::SHL_PARTS, NVT); + if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || + Action == TargetLowering::Custom) { + ExpandShiftParts(ISD::SHL_PARTS, Node->getOperand(0), ShiftAmt, Lo, Hi); + break; + } + + // Otherwise, emit a libcall. + Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SHL_I64), Node, + false/*left shift=unsigned*/, Hi); + break; + } + + case ISD::SRA: { + // If the target wants custom lowering, do so. + SDOperand ShiftAmt = LegalizeOp(Node->getOperand(1)); + if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Custom) { + SDOperand Op = DAG.getNode(ISD::SRA, VT, Node->getOperand(0), ShiftAmt); + Op = TLI.LowerOperation(Op, DAG); + if (Op.Val) { + // Now that the custom expander is done, expand the result, which is + // still VT. + ExpandOp(Op, Lo, Hi); + break; + } + } + + // If we can emit an efficient shift operation, do so now. + if (ExpandShift(ISD::SRA, Node->getOperand(0), ShiftAmt, Lo, Hi)) + break; + + // If this target supports SRA_PARTS, use it. + TargetLowering::LegalizeAction Action = + TLI.getOperationAction(ISD::SRA_PARTS, NVT); + if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || + Action == TargetLowering::Custom) { + ExpandShiftParts(ISD::SRA_PARTS, Node->getOperand(0), ShiftAmt, Lo, Hi); + break; + } + + // Otherwise, emit a libcall. + Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SRA_I64), Node, + true/*ashr is signed*/, Hi); + break; + } + + case ISD::SRL: { + // If the target wants custom lowering, do so. + SDOperand ShiftAmt = LegalizeOp(Node->getOperand(1)); + if (TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Custom) { + SDOperand Op = DAG.getNode(ISD::SRL, VT, Node->getOperand(0), ShiftAmt); + Op = TLI.LowerOperation(Op, DAG); + if (Op.Val) { + // Now that the custom expander is done, expand the result, which is + // still VT. + ExpandOp(Op, Lo, Hi); + break; + } + } + + // If we can emit an efficient shift operation, do so now. + if (ExpandShift(ISD::SRL, Node->getOperand(0), ShiftAmt, Lo, Hi)) + break; + + // If this target supports SRL_PARTS, use it. + TargetLowering::LegalizeAction Action = + TLI.getOperationAction(ISD::SRL_PARTS, NVT); + if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || + Action == TargetLowering::Custom) { + ExpandShiftParts(ISD::SRL_PARTS, Node->getOperand(0), ShiftAmt, Lo, Hi); + break; + } + + // Otherwise, emit a libcall. + Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SRL_I64), Node, + false/*lshr is unsigned*/, Hi); + break; + } + + case ISD::ADD: + case ISD::SUB: { + // If the target wants to custom expand this, let them. + if (TLI.getOperationAction(Node->getOpcode(), VT) == + TargetLowering::Custom) { + Op = TLI.LowerOperation(Op, DAG); + if (Op.Val) { + ExpandOp(Op, Lo, Hi); + break; + } + } + + // Expand the subcomponents. + SDOperand LHSL, LHSH, RHSL, RHSH; + ExpandOp(Node->getOperand(0), LHSL, LHSH); + ExpandOp(Node->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDOperand LoOps[2], HiOps[3]; + LoOps[0] = LHSL; + LoOps[1] = RHSL; + HiOps[0] = LHSH; + HiOps[1] = RHSH; + if (Node->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADDC, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SUBC, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, VTList, HiOps, 3); + } + break; + } + + case ISD::ADDC: + case ISD::SUBC: { + // Expand the subcomponents. + SDOperand LHSL, LHSH, RHSL, RHSH; + ExpandOp(Node->getOperand(0), LHSL, LHSH); + ExpandOp(Node->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDOperand LoOps[2] = { LHSL, RHSL }; + SDOperand HiOps[3] = { LHSH, RHSH }; + + if (Node->getOpcode() == ISD::ADDC) { + Lo = DAG.getNode(ISD::ADDC, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SUBC, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, VTList, HiOps, 3); + } + // Remember that we legalized the flag. + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Hi.getValue(1))); + break; + } + case ISD::ADDE: + case ISD::SUBE: { + // Expand the subcomponents. + SDOperand LHSL, LHSH, RHSL, RHSH; + ExpandOp(Node->getOperand(0), LHSL, LHSH); + ExpandOp(Node->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDOperand LoOps[3] = { LHSL, RHSL, Node->getOperand(2) }; + SDOperand HiOps[3] = { LHSH, RHSH }; + + Lo = DAG.getNode(Node->getOpcode(), VTList, LoOps, 3); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(Node->getOpcode(), VTList, HiOps, 3); + + // Remember that we legalized the flag. + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Hi.getValue(1))); + break; + } + case ISD::MUL: { + // If the target wants to custom expand this, let them. + if (TLI.getOperationAction(ISD::MUL, VT) == TargetLowering::Custom) { + SDOperand New = TLI.LowerOperation(Op, DAG); + if (New.Val) { + ExpandOp(New, Lo, Hi); + break; + } + } + + bool HasMULHS = TLI.isOperationLegal(ISD::MULHS, NVT); + bool HasMULHU = TLI.isOperationLegal(ISD::MULHU, NVT); + if (HasMULHS || HasMULHU) { + SDOperand LL, LH, RL, RH; + ExpandOp(Node->getOperand(0), LL, LH); + ExpandOp(Node->getOperand(1), RL, RH); + unsigned SH = MVT::getSizeInBits(RH.getValueType())-1; + // FIXME: Move this to the dag combiner. + // MULHS implicitly sign extends its inputs. Check to see if ExpandOp + // extended the sign bit of the low half through the upper half, and if so + // emit a MULHS instead of the alternate sequence that is valid for any + // i64 x i64 multiply. + if (HasMULHS && + // is RH an extension of the sign bit of RL? + RH.getOpcode() == ISD::SRA && RH.getOperand(0) == RL && + RH.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(RH.getOperand(1))->getValue() == SH && + // is LH an extension of the sign bit of LL? + LH.getOpcode() == ISD::SRA && LH.getOperand(0) == LL && + LH.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(LH.getOperand(1))->getValue() == SH) { + // Low part: + Lo = DAG.getNode(ISD::MUL, NVT, LL, RL); + // High part: + Hi = DAG.getNode(ISD::MULHS, NVT, LL, RL); + break; + } else if (HasMULHU) { + // Low part: + Lo = DAG.getNode(ISD::MUL, NVT, LL, RL); + + // High part: + Hi = DAG.getNode(ISD::MULHU, NVT, LL, RL); + RH = DAG.getNode(ISD::MUL, NVT, LL, RH); + LH = DAG.getNode(ISD::MUL, NVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, NVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, NVT, Hi, LH); + break; + } + } + + Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::MUL_I64), Node, + false/*sign irrelevant*/, Hi); + break; + } + case ISD::SDIV: + Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SDIV_I64), Node, true, Hi); + break; + case ISD::UDIV: + Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::UDIV_I64), Node, true, Hi); + break; + case ISD::SREM: + Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::SREM_I64), Node, true, Hi); + break; + case ISD::UREM: + Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::UREM_I64), Node, true, Hi); + break; + + case ISD::FADD: + Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32) + ? RTLIB::ADD_F32 : RTLIB::ADD_F64), + Node, false, Hi); + break; + case ISD::FSUB: + Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32) + ? RTLIB::SUB_F32 : RTLIB::SUB_F64), + Node, false, Hi); + break; + case ISD::FMUL: + Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32) + ? RTLIB::MUL_F32 : RTLIB::MUL_F64), + Node, false, Hi); + break; + case ISD::FDIV: + Lo = ExpandLibCall(TLI.getLibcallName((VT == MVT::f32) + ? RTLIB::DIV_F32 : RTLIB::DIV_F64), + Node, false, Hi); + break; + case ISD::FP_EXTEND: + Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::FPEXT_F32_F64), Node, true,Hi); + break; + case ISD::FP_ROUND: + Lo = ExpandLibCall(TLI.getLibcallName(RTLIB::FPROUND_F64_F32),Node,true,Hi); + break; + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + switch(Node->getOpcode()) { + case ISD::FSQRT: + LC = (VT == MVT::f32) ? RTLIB::SQRT_F32 : RTLIB::SQRT_F64; + break; + case ISD::FSIN: + LC = (VT == MVT::f32) ? RTLIB::SIN_F32 : RTLIB::SIN_F64; + break; + case ISD::FCOS: + LC = (VT == MVT::f32) ? RTLIB::COS_F32 : RTLIB::COS_F64; + break; + default: assert(0 && "Unreachable!"); + } + Lo = ExpandLibCall(TLI.getLibcallName(LC), Node, false, Hi); + break; + } + case ISD::FABS: { + SDOperand Mask = (VT == MVT::f64) + ? DAG.getConstantFP(BitsToDouble(~(1ULL << 63)), VT) + : DAG.getConstantFP(BitsToFloat(~(1U << 31)), VT); + Mask = DAG.getNode(ISD::BIT_CONVERT, NVT, Mask); + Lo = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0)); + Lo = DAG.getNode(ISD::AND, NVT, Lo, Mask); + if (getTypeAction(NVT) == Expand) + ExpandOp(Lo, Lo, Hi); + break; + } + case ISD::FNEG: { + SDOperand Mask = (VT == MVT::f64) + ? DAG.getConstantFP(BitsToDouble(1ULL << 63), VT) + : DAG.getConstantFP(BitsToFloat(1U << 31), VT); + Mask = DAG.getNode(ISD::BIT_CONVERT, NVT, Mask); + Lo = DAG.getNode(ISD::BIT_CONVERT, NVT, Node->getOperand(0)); + Lo = DAG.getNode(ISD::XOR, NVT, Lo, Mask); + if (getTypeAction(NVT) == Expand) + ExpandOp(Lo, Lo, Hi); + break; + } + case ISD::FCOPYSIGN: { + Lo = ExpandFCOPYSIGNToBitwiseOps(Node, NVT, DAG, TLI); + if (getTypeAction(NVT) == Expand) + ExpandOp(Lo, Lo, Hi); + break; + } + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: { + bool isSigned = Node->getOpcode() == ISD::SINT_TO_FP; + MVT::ValueType SrcVT = Node->getOperand(0).getValueType(); + RTLIB::Libcall LC; + if (Node->getOperand(0).getValueType() == MVT::i64) { + if (VT == MVT::f32) + LC = isSigned ? RTLIB::SINTTOFP_I64_F32 : RTLIB::UINTTOFP_I64_F32; + else + LC = isSigned ? RTLIB::SINTTOFP_I64_F64 : RTLIB::UINTTOFP_I64_F64; + } else { + if (VT == MVT::f32) + LC = isSigned ? RTLIB::SINTTOFP_I32_F32 : RTLIB::UINTTOFP_I32_F32; + else + LC = isSigned ? RTLIB::SINTTOFP_I32_F64 : RTLIB::UINTTOFP_I32_F64; + } + + // Promote the operand if needed. + if (getTypeAction(SrcVT) == Promote) { + SDOperand Tmp = PromoteOp(Node->getOperand(0)); + Tmp = isSigned + ? DAG.getNode(ISD::SIGN_EXTEND_INREG, Tmp.getValueType(), Tmp, + DAG.getValueType(SrcVT)) + : DAG.getZeroExtendInReg(Tmp, SrcVT); + Node = DAG.UpdateNodeOperands(Op, Tmp).Val; + } + + const char *LibCall = TLI.getLibcallName(LC); + if (LibCall) + Lo = ExpandLibCall(TLI.getLibcallName(LC), Node, isSigned, Hi); + else { + Lo = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP, VT, + Node->getOperand(0)); + if (getTypeAction(Lo.getValueType()) == Expand) + ExpandOp(Lo, Lo, Hi); + } + break; + } + } + + // Make sure the resultant values have been legalized themselves, unless this + // is a type that requires multi-step expansion. + if (getTypeAction(NVT) != Expand && NVT != MVT::isVoid) { + Lo = LegalizeOp(Lo); + if (Hi.Val) + // Don't legalize the high part if it is expanded to a single node. + Hi = LegalizeOp(Hi); + } + + // Remember in a map if the values will be reused later. + bool isNew = ExpandedNodes.insert(std::make_pair(Op, std::make_pair(Lo, Hi))); + assert(isNew && "Value already expanded?!?"); +} + +/// SplitVectorOp - Given an operand of vector type, break it down into +/// two smaller values, still of vector type. +void SelectionDAGLegalize::SplitVectorOp(SDOperand Op, SDOperand &Lo, + SDOperand &Hi) { + assert(MVT::isVector(Op.getValueType()) && "Cannot split non-vector type!"); + SDNode *Node = Op.Val; + unsigned NumElements = MVT::getVectorNumElements(Node->getValueType(0)); + assert(NumElements > 1 && "Cannot split a single element vector!"); + unsigned NewNumElts = NumElements/2; + MVT::ValueType NewEltVT = MVT::getVectorElementType(Node->getValueType(0)); + MVT::ValueType NewVT = MVT::getVectorType(NewEltVT, NewNumElts); + + // See if we already split it. + std::map<SDOperand, std::pair<SDOperand, SDOperand> >::iterator I + = SplitNodes.find(Op); + if (I != SplitNodes.end()) { + Lo = I->second.first; + Hi = I->second.second; + return; + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + Node->dump(&DAG); +#endif + assert(0 && "Unhandled operation in SplitVectorOp!"); + case ISD::BUILD_PAIR: + Lo = Node->getOperand(0); + Hi = Node->getOperand(1); + break; + case ISD::BUILD_VECTOR: { + SmallVector<SDOperand, 8> LoOps(Node->op_begin(), + Node->op_begin()+NewNumElts); + Lo = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &LoOps[0], LoOps.size()); + + SmallVector<SDOperand, 8> HiOps(Node->op_begin()+NewNumElts, + Node->op_end()); + Hi = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &HiOps[0], HiOps.size()); + break; + } + case ISD::CONCAT_VECTORS: { + unsigned NewNumSubvectors = Node->getNumOperands() / 2; + if (NewNumSubvectors == 1) { + Lo = Node->getOperand(0); + Hi = Node->getOperand(1); + } else { + SmallVector<SDOperand, 8> LoOps(Node->op_begin(), + Node->op_begin()+NewNumSubvectors); + Lo = DAG.getNode(ISD::CONCAT_VECTORS, NewVT, &LoOps[0], LoOps.size()); + + SmallVector<SDOperand, 8> HiOps(Node->op_begin()+NewNumSubvectors, + Node->op_end()); + Hi = DAG.getNode(ISD::CONCAT_VECTORS, NewVT, &HiOps[0], HiOps.size()); + } + break; + } + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::FDIV: + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + SDOperand LL, LH, RL, RH; + SplitVectorOp(Node->getOperand(0), LL, LH); + SplitVectorOp(Node->getOperand(1), RL, RH); + + Lo = DAG.getNode(Node->getOpcode(), NewVT, LL, RL); + Hi = DAG.getNode(Node->getOpcode(), NewVT, LH, RH); + break; + } + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Node); + SDOperand Ch = LD->getChain(); + SDOperand Ptr = LD->getBasePtr(); + const Value *SV = LD->getSrcValue(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + + Lo = DAG.getLoad(NewVT, Ch, Ptr, SV, SVOffset, isVolatile, Alignment); + unsigned IncrementSize = NewNumElts * MVT::getSizeInBits(NewEltVT)/8; + Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr, + getIntPtrConstant(IncrementSize)); + SVOffset += IncrementSize; + if (Alignment > IncrementSize) + Alignment = IncrementSize; + Hi = DAG.getLoad(NewVT, Ch, Ptr, SV, SVOffset, isVolatile, Alignment); + + // Build a factor node to remember that this load is independent of the + // other one. + SDOperand TF = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Remember that we legalized the chain. + AddLegalizedOperand(Op.getValue(1), LegalizeOp(TF)); + break; + } + case ISD::BIT_CONVERT: { + // We know the result is a vector. The input may be either a vector or a + // scalar value. + SDOperand InOp = Node->getOperand(0); + if (!MVT::isVector(InOp.getValueType()) || + MVT::getVectorNumElements(InOp.getValueType()) == 1) { + // The input is a scalar or single-element vector. + // Lower to a store/load so that it can be split. + // FIXME: this could be improved probably. + SDOperand Ptr = CreateStackTemporary(InOp.getValueType()); + + SDOperand St = DAG.getStore(DAG.getEntryNode(), + InOp, Ptr, NULL, 0); + InOp = DAG.getLoad(Op.getValueType(), St, Ptr, NULL, 0); + } + // Split the vector and convert each of the pieces now. + SplitVectorOp(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, NewVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, NewVT, Hi); + break; + } + } + + // Remember in a map if the values will be reused later. + bool isNew = + SplitNodes.insert(std::make_pair(Op, std::make_pair(Lo, Hi))).second; + assert(isNew && "Value already split?!?"); +} + + +/// ScalarizeVectorOp - Given an operand of single-element vector type +/// (e.g. v1f32), convert it into the equivalent operation that returns a +/// scalar (e.g. f32) value. +SDOperand SelectionDAGLegalize::ScalarizeVectorOp(SDOperand Op) { + assert(MVT::isVector(Op.getValueType()) && + "Bad ScalarizeVectorOp invocation!"); + SDNode *Node = Op.Val; + MVT::ValueType NewVT = MVT::getVectorElementType(Op.getValueType()); + assert(MVT::getVectorNumElements(Op.getValueType()) == 1); + + // See if we already scalarized it. + std::map<SDOperand, SDOperand>::iterator I = ScalarizedNodes.find(Op); + if (I != ScalarizedNodes.end()) return I->second; + + SDOperand Result; + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + Node->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Unknown vector operation in ScalarizeVectorOp!"); + case ISD::ADD: + case ISD::FADD: + case ISD::SUB: + case ISD::FSUB: + case ISD::MUL: + case ISD::FMUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::FDIV: + case ISD::SREM: + case ISD::UREM: + case ISD::FREM: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + Result = DAG.getNode(Node->getOpcode(), + NewVT, + ScalarizeVectorOp(Node->getOperand(0)), + ScalarizeVectorOp(Node->getOperand(1))); + break; + case ISD::FNEG: + case ISD::FABS: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + Result = DAG.getNode(Node->getOpcode(), + NewVT, + ScalarizeVectorOp(Node->getOperand(0))); + break; + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Node); + SDOperand Ch = LegalizeOp(LD->getChain()); // Legalize the chain. + SDOperand Ptr = LegalizeOp(LD->getBasePtr()); // Legalize the pointer. + + const Value *SV = LD->getSrcValue(); + int SVOffset = LD->getSrcValueOffset(); + Result = DAG.getLoad(NewVT, Ch, Ptr, SV, SVOffset, + LD->isVolatile(), LD->getAlignment()); + + // Remember that we legalized the chain. + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1))); + break; + } + case ISD::BUILD_VECTOR: + Result = Node->getOperand(0); + break; + case ISD::INSERT_VECTOR_ELT: + // Returning the inserted scalar element. + Result = Node->getOperand(1); + break; + case ISD::CONCAT_VECTORS: + assert(Node->getOperand(0).getValueType() == NewVT && + "Concat of non-legal vectors not yet supported!"); + Result = Node->getOperand(0); + break; + case ISD::VECTOR_SHUFFLE: { + // Figure out if the scalar is the LHS or RHS and return it. + SDOperand EltNum = Node->getOperand(2).getOperand(0); + if (cast<ConstantSDNode>(EltNum)->getValue()) + Result = ScalarizeVectorOp(Node->getOperand(1)); + else + Result = ScalarizeVectorOp(Node->getOperand(0)); + break; + } + case ISD::EXTRACT_SUBVECTOR: + Result = Node->getOperand(0); + assert(Result.getValueType() == NewVT); + break; + case ISD::BIT_CONVERT: + Result = DAG.getNode(ISD::BIT_CONVERT, NewVT, Op.getOperand(0)); + break; + case ISD::SELECT: + Result = DAG.getNode(ISD::SELECT, NewVT, Op.getOperand(0), + ScalarizeVectorOp(Op.getOperand(1)), + ScalarizeVectorOp(Op.getOperand(2))); + break; + } + + if (TLI.isTypeLegal(NewVT)) + Result = LegalizeOp(Result); + bool isNew = ScalarizedNodes.insert(std::make_pair(Op, Result)).second; + assert(isNew && "Value already scalarized?"); + return Result; +} + + +// SelectionDAG::Legalize - This is the entry point for the file. +// +void SelectionDAG::Legalize() { + if (ViewLegalizeDAGs) viewGraph(); + + /// run - This is the main entry point to this class. + /// + SelectionDAGLegalize(*this).LegalizeDAG(); +} + diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile new file mode 100644 index 0000000..6c50288 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/Makefile @@ -0,0 +1,15 @@ +##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file was developed by the LLVM research group and is distributed under +# the University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMSelectionDAG +PARALLEL_DIRS = +BUILD_ARCHIVE = 1 +DONT_BUILD_RELINKED = 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp new file mode 100644 index 0000000..06b2329 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp @@ -0,0 +1,725 @@ +//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by James M. Laskey and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a simple two pass scheduler. The first pass attempts to push +// backward any lengthy instructions and critical paths. The second pass packs +// instructions into semi-optimal time slots. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/Type.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +/// BuildSchedUnits - Build SUnits from the selection dag that we are input. +/// This SUnit graph is similar to the SelectionDAG, but represents flagged +/// together nodes with a single SUnit. +void ScheduleDAG::BuildSchedUnits() { + // Reserve entries in the vector for each of the SUnits we are creating. This + // ensure that reallocation of the vector won't happen, so SUnit*'s won't get + // invalidated. + SUnits.reserve(std::distance(DAG.allnodes_begin(), DAG.allnodes_end())); + + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + + for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(), + E = DAG.allnodes_end(); NI != E; ++NI) { + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. + continue; + + // If this node has already been processed, stop now. + if (SUnitMap[NI]) continue; + + SUnit *NodeSUnit = NewSUnit(NI); + + // See if anything is flagged to this node, if so, add them to flagged + // nodes. Nodes can have at most one flag input and one flag output. Flags + // are required the be the last operand and result of a node. + + // Scan up, adding flagged preds to FlaggedNodes. + SDNode *N = NI; + if (N->getNumOperands() && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { + do { + N = N->getOperand(N->getNumOperands()-1).Val; + NodeSUnit->FlaggedNodes.push_back(N); + SUnitMap[N] = NodeSUnit; + } while (N->getNumOperands() && + N->getOperand(N->getNumOperands()-1).getValueType()== MVT::Flag); + std::reverse(NodeSUnit->FlaggedNodes.begin(), + NodeSUnit->FlaggedNodes.end()); + } + + // Scan down, adding this node and any flagged succs to FlaggedNodes if they + // have a user of the flag operand. + N = NI; + while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { + SDOperand FlagVal(N, N->getNumValues()-1); + + // There are either zero or one users of the Flag result. + bool HasFlagUse = false; + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) + if (FlagVal.isOperand(*UI)) { + HasFlagUse = true; + NodeSUnit->FlaggedNodes.push_back(N); + SUnitMap[N] = NodeSUnit; + N = *UI; + break; + } + if (!HasFlagUse) break; + } + + // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node. + // Update the SUnit + NodeSUnit->Node = N; + SUnitMap[N] = NodeSUnit; + + // Compute the latency for the node. We use the sum of the latencies for + // all nodes flagged together into this SUnit. + if (InstrItins.isEmpty()) { + // No latency information. + NodeSUnit->Latency = 1; + } else { + NodeSUnit->Latency = 0; + if (N->isTargetOpcode()) { + unsigned SchedClass = TII->getSchedClass(N->getTargetOpcode()); + InstrStage *S = InstrItins.begin(SchedClass); + InstrStage *E = InstrItins.end(SchedClass); + for (; S != E; ++S) + NodeSUnit->Latency += S->Cycles; + } + for (unsigned i = 0, e = NodeSUnit->FlaggedNodes.size(); i != e; ++i) { + SDNode *FNode = NodeSUnit->FlaggedNodes[i]; + if (FNode->isTargetOpcode()) { + unsigned SchedClass = TII->getSchedClass(FNode->getTargetOpcode()); + InstrStage *S = InstrItins.begin(SchedClass); + InstrStage *E = InstrItins.end(SchedClass); + for (; S != E; ++S) + NodeSUnit->Latency += S->Cycles; + } + } + } + } + + // Pass 2: add the preds, succs, etc. + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + SUnit *SU = &SUnits[su]; + SDNode *MainNode = SU->Node; + + if (MainNode->isTargetOpcode()) { + unsigned Opc = MainNode->getTargetOpcode(); + for (unsigned i = 0, ee = TII->getNumOperands(Opc); i != ee; ++i) { + if (TII->getOperandConstraint(Opc, i, TOI::TIED_TO) != -1) { + SU->isTwoAddress = true; + break; + } + } + if (TII->isCommutableInstr(Opc)) + SU->isCommutable = true; + } + + // Find all predecessors and successors of the group. + // Temporarily add N to make code simpler. + SU->FlaggedNodes.push_back(MainNode); + + for (unsigned n = 0, e = SU->FlaggedNodes.size(); n != e; ++n) { + SDNode *N = SU->FlaggedNodes[n]; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDNode *OpN = N->getOperand(i).Val; + if (isPassiveNode(OpN)) continue; // Not scheduled. + SUnit *OpSU = SUnitMap[OpN]; + assert(OpSU && "Node has no SUnit!"); + if (OpSU == SU) continue; // In the same group. + + MVT::ValueType OpVT = N->getOperand(i).getValueType(); + assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); + bool isChain = OpVT == MVT::Other; + + if (SU->addPred(OpSU, isChain)) { + if (!isChain) { + SU->NumPreds++; + SU->NumPredsLeft++; + } else { + SU->NumChainPredsLeft++; + } + } + if (OpSU->addSucc(SU, isChain)) { + if (!isChain) { + OpSU->NumSuccs++; + OpSU->NumSuccsLeft++; + } else { + OpSU->NumChainSuccsLeft++; + } + } + } + } + + // Remove MainNode from FlaggedNodes again. + SU->FlaggedNodes.pop_back(); + } + + return; +} + +void ScheduleDAG::CalculateDepths() { + std::vector<std::pair<SUnit*, unsigned> > WorkList; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) + if (SUnits[i].Preds.size() == 0/* && &SUnits[i] != Entry*/) + WorkList.push_back(std::make_pair(&SUnits[i], 0U)); + + while (!WorkList.empty()) { + SUnit *SU = WorkList.back().first; + unsigned Depth = WorkList.back().second; + WorkList.pop_back(); + if (SU->Depth == 0 || Depth > SU->Depth) { + SU->Depth = Depth; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + WorkList.push_back(std::make_pair(I->first, Depth+1)); + } + } +} + +void ScheduleDAG::CalculateHeights() { + std::vector<std::pair<SUnit*, unsigned> > WorkList; + SUnit *Root = SUnitMap[DAG.getRoot().Val]; + WorkList.push_back(std::make_pair(Root, 0U)); + + while (!WorkList.empty()) { + SUnit *SU = WorkList.back().first; + unsigned Height = WorkList.back().second; + WorkList.pop_back(); + if (SU->Height == 0 || Height > SU->Height) { + SU->Height = Height; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + WorkList.push_back(std::make_pair(I->first, Height+1)); + } + } +} + +/// CountResults - The results of target nodes have register or immediate +/// operands first, then an optional chain, and optional flag operands (which do +/// not go into the machine instrs.) +unsigned ScheduleDAG::CountResults(SDNode *Node) { + unsigned N = Node->getNumValues(); + while (N && Node->getValueType(N - 1) == MVT::Flag) + --N; + if (N && Node->getValueType(N - 1) == MVT::Other) + --N; // Skip over chain result. + return N; +} + +/// CountOperands The inputs to target nodes have any actual inputs first, +/// followed by an optional chain operand, then flag operands. Compute the +/// number of actual operands that will go into the machine instr. +unsigned ScheduleDAG::CountOperands(SDNode *Node) { + unsigned N = Node->getNumOperands(); + while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag) + --N; + if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) + --N; // Ignore chain if it exists. + return N; +} + +static const TargetRegisterClass *getInstrOperandRegClass( + const MRegisterInfo *MRI, + const TargetInstrInfo *TII, + const TargetInstrDescriptor *II, + unsigned Op) { + if (Op >= II->numOperands) { + assert((II->Flags & M_VARIABLE_OPS)&& "Invalid operand # of instruction"); + return NULL; + } + const TargetOperandInfo &toi = II->OpInfo[Op]; + return (toi.Flags & M_LOOK_UP_PTR_REG_CLASS) + ? TII->getPointerRegClass() : MRI->getRegClass(toi.RegClass); +} + +static void CreateVirtualRegisters(SDNode *Node, + unsigned NumResults, + const MRegisterInfo *MRI, + MachineInstr *MI, + SSARegMap *RegMap, + const TargetInstrInfo *TII, + const TargetInstrDescriptor &II, + DenseMap<SDOperand, unsigned> &VRBaseMap) { + for (unsigned i = 0; i < NumResults; ++i) { + // If the specific node value is only used by a CopyToReg and the dest reg + // is a vreg, use the CopyToReg'd destination register instead of creating + // a new vreg. + unsigned VRBase = 0; + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *Use = *UI; + if (Use->getOpcode() == ISD::CopyToReg && + Use->getOperand(2).Val == Node && + Use->getOperand(2).ResNo == i) { + unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); + if (MRegisterInfo::isVirtualRegister(Reg)) { + VRBase = Reg; + MI->addRegOperand(Reg, true); + break; + } + } + } + + if (VRBase == 0) { + // Create the result registers for this node and add the result regs to + // the machine instruction. + const TargetRegisterClass *RC = getInstrOperandRegClass(MRI, TII, &II, i); + assert(RC && "Isn't a register operand!"); + VRBase = RegMap->createVirtualRegister(RC); + MI->addRegOperand(VRBase, true); + } + + bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,i), VRBase)); + assert(isNew && "Node emitted out of order - early"); + } +} + +/// getVR - Return the virtual register corresponding to the specified result +/// of the specified node. +static unsigned getVR(SDOperand Op, DenseMap<SDOperand, unsigned> &VRBaseMap) { + DenseMap<SDOperand, unsigned>::iterator I = VRBaseMap.find(Op); + assert(I != VRBaseMap.end() && "Node emitted out of order - late"); + return I->second; +} + + +/// AddOperand - Add the specified operand to the specified machine instr. II +/// specifies the instruction information for the node, and IIOpNum is the +/// operand number (in the II) that we are adding. IIOpNum and II are used for +/// assertions only. +void ScheduleDAG::AddOperand(MachineInstr *MI, SDOperand Op, + unsigned IIOpNum, + const TargetInstrDescriptor *II, + DenseMap<SDOperand, unsigned> &VRBaseMap) { + if (Op.isTargetOpcode()) { + // Note that this case is redundant with the final else block, but we + // include it because it is the most common and it makes the logic + // simpler here. + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + + // Get/emit the operand. + unsigned VReg = getVR(Op, VRBaseMap); + const TargetInstrDescriptor *TID = MI->getInstrDescriptor(); + bool isOptDef = (IIOpNum < TID->numOperands) + ? (TID->OpInfo[IIOpNum].Flags & M_OPTIONAL_DEF_OPERAND) : false; + MI->addRegOperand(VReg, isOptDef); + + // Verify that it is right. + assert(MRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); + if (II) { + const TargetRegisterClass *RC = + getInstrOperandRegClass(MRI, TII, II, IIOpNum); + assert(RC && "Don't have operand info for this instruction!"); + const TargetRegisterClass *VRC = RegMap->getRegClass(VReg); + if (VRC != RC) { + cerr << "Register class of operand and regclass of use don't agree!\n"; +#ifndef NDEBUG + cerr << "Operand = " << IIOpNum << "\n"; + cerr << "Op->Val = "; Op.Val->dump(&DAG); cerr << "\n"; + cerr << "MI = "; MI->print(cerr); + cerr << "VReg = " << VReg << "\n"; + cerr << "VReg RegClass size = " << VRC->getSize() + << ", align = " << VRC->getAlignment() << "\n"; + cerr << "Expected RegClass size = " << RC->getSize() + << ", align = " << RC->getAlignment() << "\n"; +#endif + cerr << "Fatal error, aborting.\n"; + abort(); + } + } + } else if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Op)) { + MI->addImmOperand(C->getValue()); + } else if (RegisterSDNode *R = + dyn_cast<RegisterSDNode>(Op)) { + MI->addRegOperand(R->getReg(), false); + } else if (GlobalAddressSDNode *TGA = + dyn_cast<GlobalAddressSDNode>(Op)) { + MI->addGlobalAddressOperand(TGA->getGlobal(), TGA->getOffset()); + } else if (BasicBlockSDNode *BB = + dyn_cast<BasicBlockSDNode>(Op)) { + MI->addMachineBasicBlockOperand(BB->getBasicBlock()); + } else if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(Op)) { + MI->addFrameIndexOperand(FI->getIndex()); + } else if (JumpTableSDNode *JT = + dyn_cast<JumpTableSDNode>(Op)) { + MI->addJumpTableIndexOperand(JT->getIndex()); + } else if (ConstantPoolSDNode *CP = + dyn_cast<ConstantPoolSDNode>(Op)) { + int Offset = CP->getOffset(); + unsigned Align = CP->getAlignment(); + const Type *Type = CP->getType(); + // MachineConstantPool wants an explicit alignment. + if (Align == 0) { + Align = TM.getTargetData()->getPreferredTypeAlignmentShift(Type); + if (Align == 0) { + // Alignment of vector types. FIXME! + Align = TM.getTargetData()->getTypeSize(Type); + Align = Log2_64(Align); + } + } + + unsigned Idx; + if (CP->isMachineConstantPoolEntry()) + Idx = ConstPool->getConstantPoolIndex(CP->getMachineCPVal(), Align); + else + Idx = ConstPool->getConstantPoolIndex(CP->getConstVal(), Align); + MI->addConstantPoolIndexOperand(Idx, Offset); + } else if (ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(Op)) { + MI->addExternalSymbolOperand(ES->getSymbol()); + } else { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + unsigned VReg = getVR(Op, VRBaseMap); + MI->addRegOperand(VReg, false); + + // Verify that it is right. + assert(MRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); + if (II) { + const TargetRegisterClass *RC = + getInstrOperandRegClass(MRI, TII, II, IIOpNum); + assert(RC && "Don't have operand info for this instruction!"); + assert(RegMap->getRegClass(VReg) == RC && + "Register class of operand and regclass of use don't agree!"); + } + } + +} + +// Returns the Register Class of a physical register +static const TargetRegisterClass *getPhysicalRegisterRegClass( + const MRegisterInfo *MRI, + MVT::ValueType VT, + unsigned reg) { + assert(MRegisterInfo::isPhysicalRegister(reg) && + "reg must be a physical register"); + // Pick the register class of the right type that contains this physreg. + for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(), + E = MRI->regclass_end(); I != E; ++I) + if ((*I)->hasType(VT) && (*I)->contains(reg)) + return *I; + assert(false && "Couldn't find the register class"); + return 0; +} + +/// EmitNode - Generate machine code for an node and needed dependencies. +/// +void ScheduleDAG::EmitNode(SDNode *Node, + DenseMap<SDOperand, unsigned> &VRBaseMap) { + // If machine instruction + if (Node->isTargetOpcode()) { + unsigned Opc = Node->getTargetOpcode(); + const TargetInstrDescriptor &II = TII->get(Opc); + + unsigned NumResults = CountResults(Node); + unsigned NodeOperands = CountOperands(Node); + unsigned NumMIOperands = NodeOperands + NumResults; +#ifndef NDEBUG + assert((unsigned(II.numOperands) == NumMIOperands || + (II.Flags & M_VARIABLE_OPS)) && + "#operands for dag node doesn't match .td file!"); +#endif + + // Create the new machine instruction. + MachineInstr *MI = new MachineInstr(II); + + // Add result register values for things that are defined by this + // instruction. + if (NumResults) + CreateVirtualRegisters(Node, NumResults, MRI, MI, RegMap, + TII, II, VRBaseMap); + + // Emit all of the actual operands of this instruction, adding them to the + // instruction as appropriate. + for (unsigned i = 0; i != NodeOperands; ++i) + AddOperand(MI, Node->getOperand(i), i+NumResults, &II, VRBaseMap); + + // Commute node if it has been determined to be profitable. + if (CommuteSet.count(Node)) { + MachineInstr *NewMI = TII->commuteInstruction(MI); + if (NewMI == 0) + DOUT << "Sched: COMMUTING FAILED!\n"; + else { + DOUT << "Sched: COMMUTED TO: " << *NewMI; + if (MI != NewMI) { + delete MI; + MI = NewMI; + } + } + } + + // Now that we have emitted all operands, emit this instruction itself. + if ((II.Flags & M_USES_CUSTOM_DAG_SCHED_INSERTION) == 0) { + BB->insert(BB->end(), MI); + } else { + // Insert this instruction into the end of the basic block, potentially + // taking some custom action. + BB = DAG.getTargetLoweringInfo().InsertAtEndOfBasicBlock(MI, BB); + } + } else { + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + Node->dump(&DAG); +#endif + assert(0 && "This target-independent node should have been selected!"); + case ISD::EntryToken: // fall thru + case ISD::TokenFactor: + case ISD::LABEL: + break; + case ISD::CopyToReg: { + unsigned InReg; + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(2))) + InReg = R->getReg(); + else + InReg = getVR(Node->getOperand(2), VRBaseMap); + unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + if (InReg != DestReg) {// Coalesced away the copy? + const TargetRegisterClass *TRC = 0; + // Get the target register class + if (MRegisterInfo::isVirtualRegister(InReg)) + TRC = RegMap->getRegClass(InReg); + else + TRC = getPhysicalRegisterRegClass(MRI, + Node->getOperand(2).getValueType(), + InReg); + MRI->copyRegToReg(*BB, BB->end(), DestReg, InReg, TRC); + } + break; + } + case ISD::CopyFromReg: { + unsigned VRBase = 0; + unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + if (MRegisterInfo::isVirtualRegister(SrcReg)) { + // Just use the input register directly! + bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,0),SrcReg)); + assert(isNew && "Node emitted out of order - early"); + break; + } + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *Use = *UI; + if (Use->getOpcode() == ISD::CopyToReg && + Use->getOperand(2).Val == Node) { + unsigned DestReg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); + if (MRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + break; + } + } + } + + // Figure out the register class to create for the destreg. + const TargetRegisterClass *TRC = 0; + if (VRBase) { + TRC = RegMap->getRegClass(VRBase); + } else { + TRC = getPhysicalRegisterRegClass(MRI, Node->getValueType(0), SrcReg); + + // Create the reg, emit the copy. + VRBase = RegMap->createVirtualRegister(TRC); + } + MRI->copyRegToReg(*BB, BB->end(), VRBase, SrcReg, TRC); + + bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,0), VRBase)); + assert(isNew && "Node emitted out of order - early"); + break; + } + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + // Create the inline asm machine instruction. + MachineInstr *MI = + new MachineInstr(BB, TII->get(TargetInstrInfo::INLINEASM)); + + // Add the asm string as an external symbol operand. + const char *AsmStr = + cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol(); + MI->addExternalSymbolOperand(AsmStr); + + // Add all of the operand registers to the instruction. + for (unsigned i = 2; i != NumOps;) { + unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getValue(); + unsigned NumVals = Flags >> 3; + + MI->addImmOperand(Flags); + ++i; // Skip the ID value. + + switch (Flags & 7) { + default: assert(0 && "Bad flags!"); + case 1: // Use of register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MI->addRegOperand(Reg, false); + } + break; + case 2: // Def of register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MI->addRegOperand(Reg, true); + } + break; + case 3: { // Immediate. + assert(NumVals == 1 && "Unknown immediate value!"); + if (ConstantSDNode *CS=dyn_cast<ConstantSDNode>(Node->getOperand(i))){ + MI->addImmOperand(CS->getValue()); + } else { + GlobalAddressSDNode *GA = + cast<GlobalAddressSDNode>(Node->getOperand(i)); + MI->addGlobalAddressOperand(GA->getGlobal(), GA->getOffset()); + } + ++i; + break; + } + case 4: // Addressing mode. + // The addressing mode has been selected, just add all of the + // operands to the machine instruction. + for (; NumVals; --NumVals, ++i) + AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap); + break; + } + } + break; + } + } + } +} + +void ScheduleDAG::EmitNoop() { + TII->insertNoop(*BB, BB->end()); +} + +/// EmitSchedule - Emit the machine code in scheduled order. +void ScheduleDAG::EmitSchedule() { + // If this is the first basic block in the function, and if it has live ins + // that need to be copied into vregs, emit the copies into the top of the + // block before emitting the code for the block. + MachineFunction &MF = DAG.getMachineFunction(); + if (&MF.front() == BB && MF.livein_begin() != MF.livein_end()) { + for (MachineFunction::livein_iterator LI = MF.livein_begin(), + E = MF.livein_end(); LI != E; ++LI) + if (LI->second) + MRI->copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second, + LI->first, RegMap->getRegClass(LI->second)); + } + + + // Finally, emit the code for all of the scheduled instructions. + DenseMap<SDOperand, unsigned> VRBaseMap; + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) { + for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; j++) + EmitNode(SU->FlaggedNodes[j], VRBaseMap); + EmitNode(SU->Node, VRBaseMap); + } else { + // Null SUnit* is a noop. + EmitNoop(); + } + } +} + +/// dump - dump the schedule. +void ScheduleDAG::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(&DAG); + else + cerr << "**** NOOP ****\n"; + } +} + + +/// Run - perform scheduling. +/// +MachineBasicBlock *ScheduleDAG::Run() { + TII = TM.getInstrInfo(); + MRI = TM.getRegisterInfo(); + RegMap = BB->getParent()->getSSARegMap(); + ConstPool = BB->getParent()->getConstantPool(); + + Schedule(); + return BB; +} + +/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or +/// a group of nodes flagged together. +void SUnit::dump(const SelectionDAG *G) const { + cerr << "SU(" << NodeNum << "): "; + Node->dump(G); + cerr << "\n"; + if (FlaggedNodes.size() != 0) { + for (unsigned i = 0, e = FlaggedNodes.size(); i != e; i++) { + cerr << " "; + FlaggedNodes[i]->dump(G); + cerr << "\n"; + } + } +} + +void SUnit::dumpAll(const SelectionDAG *G) const { + dump(G); + + cerr << " # preds left : " << NumPredsLeft << "\n"; + cerr << " # succs left : " << NumSuccsLeft << "\n"; + cerr << " # chain preds left : " << NumChainPredsLeft << "\n"; + cerr << " # chain succs left : " << NumChainSuccsLeft << "\n"; + cerr << " Latency : " << Latency << "\n"; + cerr << " Depth : " << Depth << "\n"; + cerr << " Height : " << Height << "\n"; + + if (Preds.size() != 0) { + cerr << " Predecessors:\n"; + for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { + if (I->second) + cerr << " ch #"; + else + cerr << " val #"; + cerr << I->first << " - SU(" << I->first->NodeNum << ")\n"; + } + } + if (Succs.size() != 0) { + cerr << " Successors:\n"; + for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); + I != E; ++I) { + if (I->second) + cerr << " ch #"; + else + cerr << " val #"; + cerr << I->first << " - SU(" << I->first->NodeNum << ")\n"; + } + } + cerr << "\n"; +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp new file mode 100644 index 0000000..9e4e46f --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -0,0 +1,531 @@ +//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Evan Cheng and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/Statistic.h" +#include <climits> +#include <queue> +using namespace llvm; + +STATISTIC(NumNoops , "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); + +static RegisterScheduler + tdListDAGScheduler("list-td", " Top-down list scheduler", + createTDListDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGList - The actual list scheduler implementation. This supports +/// top-down scheduling. +/// +class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAG { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + SchedulingPriorityQueue *AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. This keeps track of each SUnit and the + /// number of cycles left to execute before the operation is available. + std::vector<std::pair<unsigned, SUnit*> > PendingQueue; + + /// HazardRec - The hazard recognizer to use. + HazardRecognizer *HazardRec; + +public: + ScheduleDAGList(SelectionDAG &dag, MachineBasicBlock *bb, + const TargetMachine &tm, + SchedulingPriorityQueue *availqueue, + HazardRecognizer *HR) + : ScheduleDAG(dag, bb, tm), + AvailableQueue(availqueue), HazardRec(HR) { + } + + ~ScheduleDAGList() { + delete HazardRec; + delete AvailableQueue; + } + + void Schedule(); + +private: + void ReleaseSucc(SUnit *SuccSU, bool isChain); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); +}; +} // end anonymous namespace + +HazardRecognizer::~HazardRecognizer() {} + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGList::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + // Build scheduling units. + BuildSchedUnits(); + + AvailableQueue->initNodes(SUnitMap, SUnits); + + ListScheduleTopDown(); + + AvailableQueue->releaseState(); + + DOUT << "*** Final schedule ***\n"; + DEBUG(dumpSchedule()); + DOUT << "\n"; + + // Emit in scheduled order + EmitSchedule(); +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. +void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) { + if (!isChain) + SuccSU->NumPredsLeft--; + else + SuccSU->NumChainPredsLeft--; + + assert(SuccSU->NumPredsLeft >= 0 && SuccSU->NumChainPredsLeft >= 0 && + "List scheduling internal error"); + + if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) { + // Compute how many cycles it will be before this actually becomes + // available. This is the max of the start time of all predecessors plus + // their latencies. + unsigned AvailableCycle = 0; + for (SUnit::pred_iterator I = SuccSU->Preds.begin(), + E = SuccSU->Preds.end(); I != E; ++I) { + // If this is a token edge, we don't need to wait for the latency of the + // preceeding instruction (e.g. a long-latency load) unless there is also + // some other data dependence. + SUnit &Pred = *I->first; + unsigned PredDoneCycle = Pred.Cycle; + if (!I->second) + PredDoneCycle += Pred.Latency; + else if (Pred.Latency) + PredDoneCycle += 1; + + AvailableCycle = std::max(AvailableCycle, PredDoneCycle); + } + + PendingQueue.push_back(std::make_pair(AvailableCycle, SuccSU)); + } +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(&DAG)); + + Sequence.push_back(SU); + SU->Cycle = CurCycle; + + // Bottom up: release successors. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + ReleaseSucc(I->first, I->second); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGList::ListScheduleTopDown() { + unsigned CurCycle = 0; + SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = SUnits[i].isPending = true; + } + } + + // Emit the entry node first. + ScheduleNodeTopDown(Entry, CurCycle); + HazardRec->EmitInstruction(Entry->Node); + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + while (!AvailableQueue->empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i].first == CurCycle) { + AvailableQueue->push(PendingQueue[i].second); + PendingQueue[i].second->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } else { + assert(PendingQueue[i].first > CurCycle && "Negative latency?"); + } + } + + // If there are no instructions available, don't try to issue anything, and + // don't advance the hazard recognizer. + if (AvailableQueue->empty()) { + ++CurCycle; + continue; + } + + SUnit *FoundSUnit = 0; + SDNode *FoundNode = 0; + + bool HasNoopHazards = false; + while (!AvailableQueue->empty()) { + SUnit *CurSUnit = AvailableQueue->pop(); + + // Get the node represented by this SUnit. + FoundNode = CurSUnit->Node; + + // If this is a pseudo op, like copyfromreg, look to see if there is a + // real target node flagged to it. If so, use the target node. + for (unsigned i = 0, e = CurSUnit->FlaggedNodes.size(); + FoundNode->getOpcode() < ISD::BUILTIN_OP_END && i != e; ++i) + FoundNode = CurSUnit->FlaggedNodes[i]; + + HazardRecognizer::HazardType HT = HazardRec->getHazardType(FoundNode); + if (HT == HazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == HazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue->push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule, do it now. + if (FoundSUnit) { + ScheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundNode); + FoundSUnit->isScheduled = true; + AvailableQueue->ScheduledNode(FoundSUnit); + + // If this is a pseudo-op node, we don't want to increment the current + // cycle. + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, just advance + // the current cycle and try again. + DOUT << "*** Advancing cycle, no work to do\n"; + HazardRec->AdvanceCycle(); + ++NumStalls; + ++CurCycle; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DOUT << "*** Emitting noop\n"; + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL SUnit* -> noop + ++NumNoops; + ++CurCycle; + } + } + +#ifndef NDEBUG + // Verify that all SUnits were scheduled. + bool AnyNotSched = false; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (SUnits[i].NumPredsLeft != 0 || SUnits[i].NumChainPredsLeft != 0) { + if (!AnyNotSched) + cerr << "*** List scheduling failed! ***\n"; + SUnits[i].dump(&DAG); + cerr << "has not been scheduled!\n"; + AnyNotSched = true; + } + } + assert(!AnyNotSched); +#endif +} + +//===----------------------------------------------------------------------===// +// LatencyPriorityQueue Implementation +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using latency information to +// reduce the length of the critical path through the basic block. +// +namespace { + class LatencyPriorityQueue; + + /// Sorting functions for the Available queue. + struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> { + LatencyPriorityQueue *PQ; + latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {} + latency_sort(const latency_sort &RHS) : PQ(RHS.PQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; +} // end anonymous namespace + +namespace { + class LatencyPriorityQueue : public SchedulingPriorityQueue { + // SUnits - The SUnits for the current graph. + std::vector<SUnit> *SUnits; + + // Latencies - The latency (max of latency from this node to the bb exit) + // for each node. + std::vector<int> Latencies; + + /// NumNodesSolelyBlocking - This vector contains, for every node in the + /// Queue, the number of nodes that the node is the sole unscheduled + /// predecessor for. This is used as a tie-breaker heuristic for better + /// mobility. + std::vector<unsigned> NumNodesSolelyBlocking; + + std::priority_queue<SUnit*, std::vector<SUnit*>, latency_sort> Queue; +public: + LatencyPriorityQueue() : Queue(latency_sort(this)) { + } + + void initNodes(DenseMap<SDNode*, SUnit*> &sumap, + std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Calculate node priorities. + CalculatePriorities(); + } + void releaseState() { + SUnits = 0; + Latencies.clear(); + } + + unsigned getLatency(unsigned NodeNum) const { + assert(NodeNum < Latencies.size()); + return Latencies[NodeNum]; + } + + unsigned getNumSolelyBlockNodes(unsigned NodeNum) const { + assert(NodeNum < NumNodesSolelyBlocking.size()); + return NumNodesSolelyBlocking[NodeNum]; + } + + bool empty() const { return Queue.empty(); } + + virtual void push(SUnit *U) { + push_impl(U); + } + void push_impl(SUnit *U); + + void push_all(const std::vector<SUnit *> &Nodes) { + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) + push_impl(Nodes[i]); + } + + SUnit *pop() { + if (empty()) return NULL; + SUnit *V = Queue.top(); + Queue.pop(); + return V; + } + + // ScheduledNode - As nodes are scheduled, we look to see if there are any + // successor nodes that have a single unscheduled predecessor. If so, that + // single predecessor has a higher priority, since scheduling it will make + // the node available. + void ScheduledNode(SUnit *Node); + +private: + void CalculatePriorities(); + int CalcLatency(const SUnit &SU); + void AdjustPriorityOfUnscheduledPreds(SUnit *SU); + SUnit *getSingleUnscheduledPred(SUnit *SU); + + /// RemoveFromPriorityQueue - This is a really inefficient way to remove a + /// node from a priority queue. We should roll our own heap to make this + /// better or something. + void RemoveFromPriorityQueue(SUnit *SU) { + std::vector<SUnit*> Temp; + + assert(!Queue.empty() && "Not in queue!"); + while (Queue.top() != SU) { + Temp.push_back(Queue.top()); + Queue.pop(); + assert(!Queue.empty() && "Not in queue!"); + } + + // Remove the node from the PQ. + Queue.pop(); + + // Add all the other nodes back. + for (unsigned i = 0, e = Temp.size(); i != e; ++i) + Queue.push(Temp[i]); + } + }; +} + +bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return LHSNum < RHSNum; +} + + +/// CalcNodePriority - Calculate the maximal path from the node to the exit. +/// +int LatencyPriorityQueue::CalcLatency(const SUnit &SU) { + int &Latency = Latencies[SU.NodeNum]; + if (Latency != -1) + return Latency; + + int MaxSuccLatency = 0; + for (SUnit::const_succ_iterator I = SU.Succs.begin(), E = SU.Succs.end(); + I != E; ++I) + MaxSuccLatency = std::max(MaxSuccLatency, CalcLatency(*I->first)); + + return Latency = MaxSuccLatency + SU.Latency; +} + +/// CalculatePriorities - Calculate priorities of all scheduling units. +void LatencyPriorityQueue::CalculatePriorities() { + Latencies.assign(SUnits->size(), -1); + NumNodesSolelyBlocking.assign(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcLatency((*SUnits)[i]); +} + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->first; + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + + return OnlyAvailablePred; +} + +void LatencyPriorityQueue::push_impl(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->first) == SU) + ++NumNodesBlocking; + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + + Queue.push(SU); +} + + +// ScheduledNode - As nodes are scheduled, we look to see if there are any +// successor nodes that have a single unscheduled predecessor. If so, that +// single predecessor has a higher priority, since scheduling it will make +// the node available. +void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + AdjustPriorityOfUnscheduledPreds(I->first); +} + +/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isPending) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + RemoveFromPriorityQueue(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +/// createTDListDAGScheduler - This creates a top-down list scheduler with a +/// new hazard recognizer. This scheduler takes ownership of the hazard +/// recognizer and deletes it when done. +ScheduleDAG* llvm::createTDListDAGScheduler(SelectionDAGISel *IS, + SelectionDAG *DAG, + MachineBasicBlock *BB) { + return new ScheduleDAGList(*DAG, BB, DAG->getTarget(), + new LatencyPriorityQueue(), + IS->CreateTargetHazardRecognizer()); +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp new file mode 100644 index 0000000..f95be7d --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -0,0 +1,944 @@ +//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Evan Cheng and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements bottom-up and top-down register pressure reduction list +// schedulers, using standard algorithms. The basic approach uses a priority +// queue of available nodes to schedule. One at a time, nodes are taken from +// the priority queue (thus in priority order), checked for legality to +// schedule, and emitted if legal. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/Statistic.h" +#include <climits> +#include <queue> +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +static RegisterScheduler + burrListDAGScheduler("list-burr", + " Bottom-up register reduction list scheduling", + createBURRListDAGScheduler); +static RegisterScheduler + tdrListrDAGScheduler("list-tdrr", + " Top-down register reduction list scheduling", + createTDRRListDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGRRList - The actual register reduction list scheduler +/// implementation. This supports both top-down and bottom-up scheduling. +/// + +class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG { +private: + /// isBottomUp - This is true if the scheduling problem is bottom-up, false if + /// it is top-down. + bool isBottomUp; + + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + SchedulingPriorityQueue *AvailableQueue; + +public: + ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, + const TargetMachine &tm, bool isbottomup, + SchedulingPriorityQueue *availqueue) + : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), + AvailableQueue(availqueue) { + } + + ~ScheduleDAGRRList() { + delete AvailableQueue; + } + + void Schedule(); + +private: + void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle); + void ReleaseSucc(SUnit *SuccSU, bool isChain, unsigned CurCycle); + void ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); + void ListScheduleBottomUp(); + void CommuteNodesToReducePressure(); +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGRRList::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + // Build scheduling units. + BuildSchedUnits(); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(&DAG)); + CalculateDepths(); + CalculateHeights(); + + AvailableQueue->initNodes(SUnitMap, SUnits); + + // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. + if (isBottomUp) + ListScheduleBottomUp(); + else + ListScheduleTopDown(); + + AvailableQueue->releaseState(); + + CommuteNodesToReducePressure(); + + DOUT << "*** Final schedule ***\n"; + DEBUG(dumpSchedule()); + DOUT << "\n"; + + // Emit in scheduled order + EmitSchedule(); +} + +/// CommuteNodesToReducePressure - If a node is two-address and commutable, and +/// it is not the last use of its first operand, add it to the CommuteSet if +/// possible. It will be commuted when it is translated to a MI. +void ScheduleDAGRRList::CommuteNodesToReducePressure() { + SmallPtrSet<SUnit*, 4> OperandSeen; + for (unsigned i = Sequence.size()-1; i != 0; --i) { // Ignore first node. + SUnit *SU = Sequence[i]; + if (!SU) continue; + if (SU->isCommutable) { + unsigned Opc = SU->Node->getTargetOpcode(); + unsigned NumRes = CountResults(SU->Node); + unsigned NumOps = CountOperands(SU->Node); + for (unsigned j = 0; j != NumOps; ++j) { + if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1) + continue; + + SDNode *OpN = SU->Node->getOperand(j).Val; + SUnit *OpSU = SUnitMap[OpN]; + if (OpSU && OperandSeen.count(OpSU) == 1) { + // Ok, so SU is not the last use of OpSU, but SU is two-address so + // it will clobber OpSU. Try to commute SU if no other source operands + // are live below. + bool DoCommute = true; + for (unsigned k = 0; k < NumOps; ++k) { + if (k != j) { + OpN = SU->Node->getOperand(k).Val; + OpSU = SUnitMap[OpN]; + if (OpSU && OperandSeen.count(OpSU) == 1) { + DoCommute = false; + break; + } + } + } + if (DoCommute) + CommuteSet.insert(SU->Node); + } + + // Only look at the first use&def node for now. + break; + } + } + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (!I->second) + OperandSeen.insert(I->first); + } + } +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the Available queue is the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, + unsigned CurCycle) { + // FIXME: the distance between two nodes is not always == the predecessor's + // latency. For example, the reader can very well read the register written + // by the predecessor later than the issue cycle. It also depends on the + // interrupt model (drain vs. freeze). + PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); + + if (!isChain) + PredSU->NumSuccsLeft--; + else + PredSU->NumChainSuccsLeft--; + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) { + cerr << "*** List scheduling failed! ***\n"; + PredSU->dump(&DAG); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) { + // EntryToken has to go last! Special case it here. + if (PredSU->Node->getOpcode() != ISD::EntryToken) { + PredSU->isAvailable = true; + AvailableQueue->push(PredSU); + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(&DAG)); + SU->Cycle = CurCycle; + + AvailableQueue->ScheduledNode(SU); + Sequence.push_back(SU); + + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + ReleasePred(I->first, I->second, CurCycle); + SU->isScheduled = true; +} + +/// isReady - True if node's lower cycle bound is less or equal to the current +/// scheduling cycle. Always true if all nodes have uniform latency 1. +static inline bool isReady(SUnit *SU, unsigned CurCycle) { + return SU->CycleBound <= CurCycle; +} + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGRRList::ListScheduleBottomUp() { + unsigned CurCycle = 0; + // Add root to Available queue. + AvailableQueue->push(SUnitMap[DAG.getRoot().Val]); + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + while (!AvailableQueue->empty()) { + SUnit *CurNode = AvailableQueue->pop(); + while (CurNode && !isReady(CurNode, CurCycle)) { + NotReady.push_back(CurNode); + CurNode = AvailableQueue->pop(); + } + + // Add the nodes that aren't ready back onto the available list. + AvailableQueue->push_all(NotReady); + NotReady.clear(); + + if (CurNode != NULL) + ScheduleNodeBottomUp(CurNode, CurCycle); + CurCycle++; + } + + // Add entry node last + if (DAG.getEntryNode().Val != DAG.getRoot().Val) { + SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; + Sequence.push_back(Entry); + } + + // Reverse the order if it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + + +#ifndef NDEBUG + // Verify that all SUnits were scheduled. + bool AnyNotSched = false; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) { + if (!AnyNotSched) + cerr << "*** List scheduling failed! ***\n"; + SUnits[i].dump(&DAG); + cerr << "has not been scheduled!\n"; + AnyNotSched = true; + } + } + assert(!AnyNotSched); +#endif +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. +void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, + unsigned CurCycle) { + // FIXME: the distance between two nodes is not always == the predecessor's + // latency. For example, the reader can very well read the register written + // by the predecessor later than the issue cycle. It also depends on the + // interrupt model (drain vs. freeze). + SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); + + if (!isChain) + SuccSU->NumPredsLeft--; + else + SuccSU->NumChainPredsLeft--; + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) { + cerr << "*** List scheduling failed! ***\n"; + SuccSU->dump(&DAG); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) { + SuccSU->isAvailable = true; + AvailableQueue->push(SuccSU); + } +} + + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(&DAG)); + SU->Cycle = CurCycle; + + AvailableQueue->ScheduledNode(SU); + Sequence.push_back(SU); + + // Top down: release successors + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + ReleaseSucc(I->first, I->second, CurCycle); + SU->isScheduled = true; +} + +void ScheduleDAGRRList::ListScheduleTopDown() { + unsigned CurCycle = 0; + SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // Emit the entry node first. + ScheduleNodeTopDown(Entry, CurCycle); + CurCycle++; + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + while (!AvailableQueue->empty()) { + SUnit *CurNode = AvailableQueue->pop(); + while (CurNode && !isReady(CurNode, CurCycle)) { + NotReady.push_back(CurNode); + CurNode = AvailableQueue->pop(); + } + + // Add the nodes that aren't ready back onto the available list. + AvailableQueue->push_all(NotReady); + NotReady.clear(); + + if (CurNode != NULL) + ScheduleNodeTopDown(CurNode, CurCycle); + CurCycle++; + } + + +#ifndef NDEBUG + // Verify that all SUnits were scheduled. + bool AnyNotSched = false; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (!SUnits[i].isScheduled) { + if (!AnyNotSched) + cerr << "*** List scheduling failed! ***\n"; + SUnits[i].dump(&DAG); + cerr << "has not been scheduled!\n"; + AnyNotSched = true; + } + } + assert(!AnyNotSched); +#endif +} + + + +//===----------------------------------------------------------------------===// +// RegReductionPriorityQueue Implementation +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// +namespace { + template<class SF> + class RegReductionPriorityQueue; + + /// Sorting functions for the Available queue. + struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; + bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} + bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; + + struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; + td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} + td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; +} // end anonymous namespace + +static inline bool isCopyFromLiveIn(const SUnit *SU) { + SDNode *N = SU->Node; + return N->getOpcode() == ISD::CopyFromReg && + N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; +} + +namespace { + template<class SF> + class VISIBILITY_HIDDEN RegReductionPriorityQueue + : public SchedulingPriorityQueue { + std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue; + + public: + RegReductionPriorityQueue() : + Queue(SF(this)) {} + + virtual void initNodes(DenseMap<SDNode*, SUnit*> &sumap, + std::vector<SUnit> &sunits) {} + virtual void releaseState() {} + + virtual unsigned getNodePriority(const SUnit *SU) const { + return 0; + } + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + Queue.push(U); + } + void push_all(const std::vector<SUnit *> &Nodes) { + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) + Queue.push(Nodes[i]); + } + + SUnit *pop() { + if (empty()) return NULL; + SUnit *V = Queue.top(); + Queue.pop(); + return V; + } + + virtual bool isDUOperand(const SUnit *SU1, const SUnit *SU2) { + return false; + } + }; + + template<class SF> + class VISIBILITY_HIDDEN BURegReductionPriorityQueue + : public RegReductionPriorityQueue<SF> { + // SUnitMap SDNode to SUnit mapping (n -> 1). + DenseMap<SDNode*, SUnit*> *SUnitMap; + + // SUnits - The SUnits for the current graph. + const std::vector<SUnit> *SUnits; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector<unsigned> SethiUllmanNumbers; + + const TargetInstrInfo *TII; + public: + BURegReductionPriorityQueue(const TargetInstrInfo *tii) + : TII(tii) {} + + void initNodes(DenseMap<SDNode*, SUnit*> &sumap, + std::vector<SUnit> &sunits) { + SUnitMap = &sumap; + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + AddPseudoTwoAddrDeps(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + } + + void releaseState() { + SUnits = 0; + SethiUllmanNumbers.clear(); + } + + unsigned getNodePriority(const SUnit *SU) const { + assert(SU->NodeNum < SethiUllmanNumbers.size()); + unsigned Opc = SU->Node->getOpcode(); + if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) + // CopyFromReg should be close to its def because it restricts + // allocation choices. But if it is a livein then perhaps we want it + // closer to its uses so it can be coalesced. + return 0xffff; + else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return 0; + else if (SU->NumSuccs == 0) + // If SU does not have a use, i.e. it doesn't produce a value that would + // be consumed (e.g. store), then it terminates a chain of computation. + // Give it a large SethiUllman number so it will be scheduled right + // before its predecessors that it doesn't lengthen their live ranges. + return 0xffff; + else if (SU->NumPreds == 0) + // If SU does not have a def, schedule it close to its uses because it + // does not lengthen any live ranges. + return 0; + else + return SethiUllmanNumbers[SU->NodeNum]; + } + + bool isDUOperand(const SUnit *SU1, const SUnit *SU2) { + unsigned Opc = SU1->Node->getTargetOpcode(); + unsigned NumRes = ScheduleDAG::CountResults(SU1->Node); + unsigned NumOps = ScheduleDAG::CountOperands(SU1->Node); + for (unsigned i = 0; i != NumOps; ++i) { + if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) == -1) + continue; + if (SU1->Node->getOperand(i).isOperand(SU2->Node)) + return true; + } + return false; + } + private: + bool canClobber(SUnit *SU, SUnit *Op); + void AddPseudoTwoAddrDeps(); + void CalculateSethiUllmanNumbers(); + unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); + }; + + + template<class SF> + class TDRegReductionPriorityQueue : public RegReductionPriorityQueue<SF> { + // SUnitMap SDNode to SUnit mapping (n -> 1). + DenseMap<SDNode*, SUnit*> *SUnitMap; + + // SUnits - The SUnits for the current graph. + const std::vector<SUnit> *SUnits; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector<unsigned> SethiUllmanNumbers; + + public: + TDRegReductionPriorityQueue() {} + + void initNodes(DenseMap<SDNode*, SUnit*> &sumap, + std::vector<SUnit> &sunits) { + SUnitMap = &sumap; + SUnits = &sunits; + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + } + + void releaseState() { + SUnits = 0; + SethiUllmanNumbers.clear(); + } + + unsigned getNodePriority(const SUnit *SU) const { + assert(SU->NodeNum < SethiUllmanNumbers.size()); + return SethiUllmanNumbers[SU->NodeNum]; + } + + private: + void CalculateSethiUllmanNumbers(); + unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); + }; +} + +/// closestSucc - Returns the scheduled cycle of the successor which is +/// closet to the current cycle. +static unsigned closestSucc(const SUnit *SU) { + unsigned MaxCycle = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + unsigned Cycle = I->first->Cycle; + // If there are bunch of CopyToRegs stacked up, they should be considered + // to be at the same position. + if (I->first->Node->getOpcode() == ISD::CopyToReg) + Cycle = closestSucc(I->first)+1; + if (Cycle > MaxCycle) + MaxCycle = Cycle; + } + return MaxCycle; +} + +/// calcMaxScratches - Returns an cost estimate of the worse case requirement +/// for scratch registers. Live-in operands and live-out results don't count +/// since they are "fixed". +static unsigned calcMaxScratches(const SUnit *SU) { + unsigned Scratches = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->second) continue; // ignore chain preds + if (I->first->Node->getOpcode() != ISD::CopyFromReg) + Scratches++; + } + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->second) continue; // ignore chain succs + if (I->first->Node->getOpcode() != ISD::CopyToReg) + Scratches += 10; + } + return Scratches; +} + +// Bottom up +bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + // There used to be a special tie breaker here that looked for + // two-address instructions and preferred the instruction with a + // def&use operand. The special case triggered diagnostics when + // _GLIBCXX_DEBUG was enabled because it broke the strict weak + // ordering that priority_queue requires. It didn't help much anyway + // because AddPseudoTwoAddrDeps already covers many of the cases + // where it would have applied. In addition, it's counter-intuitive + // that a tie breaker would be the first thing attempted. There's a + // "real" tie breaker below that is the operation of last resort. + // The fact that the "special tie breaker" would trigger when there + // wasn't otherwise a tie is what broke the strict weak ordering + // constraint. + + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + if (LPriority > RPriority) + return true; + else if (LPriority == RPriority) { + // Try schedule def + use closer when Sethi-Ullman numbers are the same. + // e.g. + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // and the following instructions are both ready. + // t2 = op c3 + // t4 = op c4 + // + // Then schedule t2 = op first. + // i.e. + // t4 = op c4 + // t2 = op c3 + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // This creates more short live intervals. + unsigned LDist = closestSucc(left); + unsigned RDist = closestSucc(right); + if (LDist < RDist) + return true; + else if (LDist == RDist) { + // Intuitively, it's good to push down instructions whose results are + // liveout so their long live ranges won't conflict with other values + // which are needed inside the BB. Further prioritize liveout instructions + // by the number of operands which are calculated within the BB. + unsigned LScratch = calcMaxScratches(left); + unsigned RScratch = calcMaxScratches(right); + if (LScratch > RScratch) + return true; + else if (LScratch == RScratch) + if (left->Height > right->Height) + return true; + else if (left->Height == right->Height) + if (left->Depth < right->Depth) + return true; + else if (left->Depth == right->Depth) + if (left->CycleBound > right->CycleBound) + return true; + } + } + return false; +} + +// FIXME: This is probably too slow! +static void isReachable(SUnit *SU, SUnit *TargetSU, + SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) { + if (Reached) return; + if (SU == TargetSU) { + Reached = true; + return; + } + if (!Visited.insert(SU)) return; + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; + ++I) + isReachable(I->first, TargetSU, Visited, Reached); +} + +static bool isReachable(SUnit *SU, SUnit *TargetSU) { + SmallPtrSet<SUnit*, 32> Visited; + bool Reached = false; + isReachable(SU, TargetSU, Visited, Reached); + return Reached; +} + +template<class SF> +bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) { + if (SU->isTwoAddress) { + unsigned Opc = SU->Node->getTargetOpcode(); + unsigned NumRes = ScheduleDAG::CountResults(SU->Node); + unsigned NumOps = ScheduleDAG::CountOperands(SU->Node); + for (unsigned i = 0; i != NumOps; ++i) { + if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) { + SDNode *DU = SU->Node->getOperand(i).Val; + if (Op == (*SUnitMap)[DU]) + return true; + } + } + } + return false; +} + + +/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses +/// it as a def&use operand. Add a pseudo control edge from it to the other +/// node (if it won't create a cycle) so the two-address one will be scheduled +/// first (lower in the schedule). +template<class SF> +void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = (SUnit *)&((*SUnits)[i]); + if (!SU->isTwoAddress) + continue; + + SDNode *Node = SU->Node; + if (!Node->isTargetOpcode()) + continue; + + unsigned Opc = Node->getTargetOpcode(); + unsigned NumRes = ScheduleDAG::CountResults(Node); + unsigned NumOps = ScheduleDAG::CountOperands(Node); + for (unsigned j = 0; j != NumOps; ++j) { + if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) { + SDNode *DU = SU->Node->getOperand(j).Val; + SUnit *DUSU = (*SUnitMap)[DU]; + if (!DUSU) continue; + for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); + I != E; ++I) { + if (I->second) continue; + SUnit *SuccSU = I->first; + if (SuccSU != SU && + (!canClobber(SuccSU, DUSU) || + (!SU->isCommutable && SuccSU->isCommutable))){ + if (SuccSU->Depth == SU->Depth && !isReachable(SuccSU, SU)) { + DOUT << "Adding an edge from SU # " << SU->NodeNum + << " to SU #" << SuccSU->NodeNum << "\n"; + if (SU->addPred(SuccSU, true)) + SU->NumChainPredsLeft++; + if (SuccSU->addSucc(SU, true)) + SuccSU->NumChainSuccsLeft++; + } + } + } + } + } + } +} + +/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. +/// Smaller number is the higher priority. +template<class SF> +unsigned BURegReductionPriorityQueue<SF>:: +CalcNodeSethiUllmanNumber(const SUnit *SU) { + unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; + if (SethiUllmanNumber != 0) + return SethiUllmanNumber; + + unsigned Extra = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->second) continue; // ignore chain preds + SUnit *PredSU = I->first; + unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber && !I->second) + Extra++; + } + + SethiUllmanNumber += Extra; + + if (SethiUllmanNumber == 0) + SethiUllmanNumber = 1; + + return SethiUllmanNumber; +} + +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +template<class SF> +void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { + SethiUllmanNumbers.assign(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcNodeSethiUllmanNumber(&(*SUnits)[i]); +} + +static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { + unsigned Sum = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + SUnit *SuccSU = I->first; + for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), + EE = SuccSU->Preds.end(); II != EE; ++II) { + SUnit *PredSU = II->first; + if (!PredSU->isScheduled) + Sum++; + } + } + + return Sum; +} + + +// Top down +bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + bool LIsTarget = left->Node->isTargetOpcode(); + bool RIsTarget = right->Node->isTargetOpcode(); + bool LIsFloater = LIsTarget && left->NumPreds == 0; + bool RIsFloater = RIsTarget && right->NumPreds == 0; + unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0; + unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0; + + if (left->NumSuccs == 0 && right->NumSuccs != 0) + return false; + else if (left->NumSuccs != 0 && right->NumSuccs == 0) + return true; + + // Special tie breaker: if two nodes share a operand, the one that use it + // as a def&use operand is preferred. + if (LIsTarget && RIsTarget) { + if (left->isTwoAddress && !right->isTwoAddress) { + SDNode *DUNode = left->Node->getOperand(0).Val; + if (DUNode->isOperand(right->Node)) + RBonus += 2; + } + if (!left->isTwoAddress && right->isTwoAddress) { + SDNode *DUNode = right->Node->getOperand(0).Val; + if (DUNode->isOperand(left->Node)) + LBonus += 2; + } + } + if (LIsFloater) + LBonus -= 2; + if (RIsFloater) + RBonus -= 2; + if (left->NumSuccs == 1) + LBonus += 2; + if (right->NumSuccs == 1) + RBonus += 2; + + if (LPriority+LBonus < RPriority+RBonus) + return true; + else if (LPriority == RPriority) + if (left->Depth < right->Depth) + return true; + else if (left->Depth == right->Depth) + if (left->NumSuccsLeft > right->NumSuccsLeft) + return true; + else if (left->NumSuccsLeft == right->NumSuccsLeft) + if (left->CycleBound > right->CycleBound) + return true; + return false; +} + +/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. +/// Smaller number is the higher priority. +template<class SF> +unsigned TDRegReductionPriorityQueue<SF>:: +CalcNodeSethiUllmanNumber(const SUnit *SU) { + unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; + if (SethiUllmanNumber != 0) + return SethiUllmanNumber; + + unsigned Opc = SU->Node->getOpcode(); + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + SethiUllmanNumber = 0xffff; + else if (SU->NumSuccsLeft == 0) + // If SU does not have a use, i.e. it doesn't produce a value that would + // be consumed (e.g. store), then it terminates a chain of computation. + // Give it a small SethiUllman number so it will be scheduled right before + // its predecessors that it doesn't lengthen their live ranges. + SethiUllmanNumber = 0; + else if (SU->NumPredsLeft == 0 && + (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) + SethiUllmanNumber = 0xffff; + else { + int Extra = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->second) continue; // ignore chain preds + SUnit *PredSU = I->first; + unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber && !I->second) + Extra++; + } + + SethiUllmanNumber += Extra; + } + + return SethiUllmanNumber; +} + +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +template<class SF> +void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { + SethiUllmanNumbers.assign(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcNodeSethiUllmanNumber(&(*SUnits)[i]); +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, + SelectionDAG *DAG, + MachineBasicBlock *BB) { + const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo(); + return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true, + new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII)); +} + +llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, + SelectionDAG *DAG, + MachineBasicBlock *BB) { + return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false, + new TDRegReductionPriorityQueue<td_ls_rr_sort>()); +} + diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp new file mode 100644 index 0000000..62854f7 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp @@ -0,0 +1,1156 @@ +//===-- ScheduleDAGSimple.cpp - Implement a trivial DAG scheduler ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by James M. Laskey and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a simple two pass scheduler. The first pass attempts to push +// backward any lengthy instructions and critical paths. The second pass packs +// instructions into semi-optimal time slots. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include <algorithm> +using namespace llvm; + +namespace { + +static RegisterScheduler + bfsDAGScheduler("none", " No scheduling: breadth first sequencing", + createBFS_DAGScheduler); +static RegisterScheduler + simpleDAGScheduler("simple", + " Simple two pass scheduling: minimize critical path " + "and maximize processor utilization", + createSimpleDAGScheduler); +static RegisterScheduler + noitinDAGScheduler("simple-noitin", + " Simple two pass scheduling: Same as simple " + "except using generic latency", + createNoItinsDAGScheduler); + +class NodeInfo; +typedef NodeInfo *NodeInfoPtr; +typedef std::vector<NodeInfoPtr> NIVector; +typedef std::vector<NodeInfoPtr>::iterator NIIterator; + +//===--------------------------------------------------------------------===// +/// +/// Node group - This struct is used to manage flagged node groups. +/// +class NodeGroup { +public: + NodeGroup *Next; +private: + NIVector Members; // Group member nodes + NodeInfo *Dominator; // Node with highest latency + unsigned Latency; // Total latency of the group + int Pending; // Number of visits pending before + // adding to order + +public: + // Ctor. + NodeGroup() : Next(NULL), Dominator(NULL), Pending(0) {} + + // Accessors + inline void setDominator(NodeInfo *D) { Dominator = D; } + inline NodeInfo *getTop() { return Members.front(); } + inline NodeInfo *getBottom() { return Members.back(); } + inline NodeInfo *getDominator() { return Dominator; } + inline void setLatency(unsigned L) { Latency = L; } + inline unsigned getLatency() { return Latency; } + inline int getPending() const { return Pending; } + inline void setPending(int P) { Pending = P; } + inline int addPending(int I) { return Pending += I; } + + // Pass thru + inline bool group_empty() { return Members.empty(); } + inline NIIterator group_begin() { return Members.begin(); } + inline NIIterator group_end() { return Members.end(); } + inline void group_push_back(const NodeInfoPtr &NI) { + Members.push_back(NI); + } + inline NIIterator group_insert(NIIterator Pos, const NodeInfoPtr &NI) { + return Members.insert(Pos, NI); + } + inline void group_insert(NIIterator Pos, NIIterator First, + NIIterator Last) { + Members.insert(Pos, First, Last); + } + + static void Add(NodeInfo *D, NodeInfo *U); +}; + +//===--------------------------------------------------------------------===// +/// +/// NodeInfo - This struct tracks information used to schedule the a node. +/// +class NodeInfo { +private: + int Pending; // Number of visits pending before + // adding to order +public: + SDNode *Node; // DAG node + InstrStage *StageBegin; // First stage in itinerary + InstrStage *StageEnd; // Last+1 stage in itinerary + unsigned Latency; // Total cycles to complete instr + bool IsCall : 1; // Is function call + bool IsLoad : 1; // Is memory load + bool IsStore : 1; // Is memory store + unsigned Slot; // Node's time slot + NodeGroup *Group; // Grouping information +#ifndef NDEBUG + unsigned Preorder; // Index before scheduling +#endif + + // Ctor. + NodeInfo(SDNode *N = NULL) + : Pending(0) + , Node(N) + , StageBegin(NULL) + , StageEnd(NULL) + , Latency(0) + , IsCall(false) + , Slot(0) + , Group(NULL) +#ifndef NDEBUG + , Preorder(0) +#endif + {} + + // Accessors + inline bool isInGroup() const { + assert(!Group || !Group->group_empty() && "Group with no members"); + return Group != NULL; + } + inline bool isGroupDominator() const { + return isInGroup() && Group->getDominator() == this; + } + inline int getPending() const { + return Group ? Group->getPending() : Pending; + } + inline void setPending(int P) { + if (Group) Group->setPending(P); + else Pending = P; + } + inline int addPending(int I) { + if (Group) return Group->addPending(I); + else return Pending += I; + } +}; + +//===--------------------------------------------------------------------===// +/// +/// NodeGroupIterator - Iterates over all the nodes indicated by the node +/// info. If the node is in a group then iterate over the members of the +/// group, otherwise just the node info. +/// +class NodeGroupIterator { +private: + NodeInfo *NI; // Node info + NIIterator NGI; // Node group iterator + NIIterator NGE; // Node group iterator end + +public: + // Ctor. + NodeGroupIterator(NodeInfo *N) : NI(N) { + // If the node is in a group then set up the group iterator. Otherwise + // the group iterators will trip first time out. + if (N->isInGroup()) { + // get Group + NodeGroup *Group = NI->Group; + NGI = Group->group_begin(); + NGE = Group->group_end(); + // Prevent this node from being used (will be in members list + NI = NULL; + } + } + + /// next - Return the next node info, otherwise NULL. + /// + NodeInfo *next() { + // If members list + if (NGI != NGE) return *NGI++; + // Use node as the result (may be NULL) + NodeInfo *Result = NI; + // Only use once + NI = NULL; + // Return node or NULL + return Result; + } +}; +//===--------------------------------------------------------------------===// + + +//===--------------------------------------------------------------------===// +/// +/// NodeGroupOpIterator - Iterates over all the operands of a node. If the +/// node is a member of a group, this iterates over all the operands of all +/// the members of the group. +/// +class NodeGroupOpIterator { +private: + NodeInfo *NI; // Node containing operands + NodeGroupIterator GI; // Node group iterator + SDNode::op_iterator OI; // Operand iterator + SDNode::op_iterator OE; // Operand iterator end + + /// CheckNode - Test if node has more operands. If not get the next node + /// skipping over nodes that have no operands. + void CheckNode() { + // Only if operands are exhausted first + while (OI == OE) { + // Get next node info + NodeInfo *NI = GI.next(); + // Exit if nodes are exhausted + if (!NI) return; + // Get node itself + SDNode *Node = NI->Node; + // Set up the operand iterators + OI = Node->op_begin(); + OE = Node->op_end(); + } + } + +public: + // Ctor. + NodeGroupOpIterator(NodeInfo *N) + : NI(N), GI(N), OI(SDNode::op_iterator()), OE(SDNode::op_iterator()) {} + + /// isEnd - Returns true when not more operands are available. + /// + inline bool isEnd() { CheckNode(); return OI == OE; } + + /// next - Returns the next available operand. + /// + inline SDOperand next() { + assert(OI != OE && + "Not checking for end of NodeGroupOpIterator correctly"); + return *OI++; + } +}; + + +//===----------------------------------------------------------------------===// +/// +/// BitsIterator - Provides iteration through individual bits in a bit vector. +/// +template<class T> +class BitsIterator { +private: + T Bits; // Bits left to iterate through + +public: + /// Ctor. + BitsIterator(T Initial) : Bits(Initial) {} + + /// Next - Returns the next bit set or zero if exhausted. + inline T Next() { + // Get the rightmost bit set + T Result = Bits & -Bits; + // Remove from rest + Bits &= ~Result; + // Return single bit or zero + return Result; + } +}; + +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +/// +/// ResourceTally - Manages the use of resources over time intervals. Each +/// item (slot) in the tally vector represents the resources used at a given +/// moment. A bit set to 1 indicates that a resource is in use, otherwise +/// available. An assumption is made that the tally is large enough to schedule +/// all current instructions (asserts otherwise.) +/// +template<class T> +class ResourceTally { +private: + std::vector<T> Tally; // Resources used per slot + typedef typename std::vector<T>::iterator Iter; + // Tally iterator + + /// SlotsAvailable - Returns true if all units are available. + /// + bool SlotsAvailable(Iter Begin, unsigned N, unsigned ResourceSet, + unsigned &Resource) { + assert(N && "Must check availability with N != 0"); + // Determine end of interval + Iter End = Begin + N; + assert(End <= Tally.end() && "Tally is not large enough for schedule"); + + // Iterate thru each resource + BitsIterator<T> Resources(ResourceSet & ~*Begin); + while (unsigned Res = Resources.Next()) { + // Check if resource is available for next N slots + Iter Interval = End; + do { + Interval--; + if (*Interval & Res) break; + } while (Interval != Begin); + + // If available for N + if (Interval == Begin) { + // Success + Resource = Res; + return true; + } + } + + // No luck + Resource = 0; + return false; + } + + /// RetrySlot - Finds a good candidate slot to retry search. + Iter RetrySlot(Iter Begin, unsigned N, unsigned ResourceSet) { + assert(N && "Must check availability with N != 0"); + // Determine end of interval + Iter End = Begin + N; + assert(End <= Tally.end() && "Tally is not large enough for schedule"); + + while (Begin != End--) { + // Clear units in use + ResourceSet &= ~*End; + // If no units left then we should go no further + if (!ResourceSet) return End + 1; + } + // Made it all the way through + return Begin; + } + + /// FindAndReserveStages - Return true if the stages can be completed. If + /// so mark as busy. + bool FindAndReserveStages(Iter Begin, + InstrStage *Stage, InstrStage *StageEnd) { + // If at last stage then we're done + if (Stage == StageEnd) return true; + // Get number of cycles for current stage + unsigned N = Stage->Cycles; + // Check to see if N slots are available, if not fail + unsigned Resource; + if (!SlotsAvailable(Begin, N, Stage->Units, Resource)) return false; + // Check to see if remaining stages are available, if not fail + if (!FindAndReserveStages(Begin + N, Stage + 1, StageEnd)) return false; + // Reserve resource + Reserve(Begin, N, Resource); + // Success + return true; + } + + /// Reserve - Mark busy (set) the specified N slots. + void Reserve(Iter Begin, unsigned N, unsigned Resource) { + // Determine end of interval + Iter End = Begin + N; + assert(End <= Tally.end() && "Tally is not large enough for schedule"); + + // Set resource bit in each slot + for (; Begin < End; Begin++) + *Begin |= Resource; + } + + /// FindSlots - Starting from Begin, locate consecutive slots where all stages + /// can be completed. Returns the address of first slot. + Iter FindSlots(Iter Begin, InstrStage *StageBegin, InstrStage *StageEnd) { + // Track position + Iter Cursor = Begin; + + // Try all possible slots forward + while (true) { + // Try at cursor, if successful return position. + if (FindAndReserveStages(Cursor, StageBegin, StageEnd)) return Cursor; + // Locate a better position + Cursor = RetrySlot(Cursor + 1, StageBegin->Cycles, StageBegin->Units); + } + } + +public: + /// Initialize - Resize and zero the tally to the specified number of time + /// slots. + inline void Initialize(unsigned N) { + Tally.assign(N, 0); // Initialize tally to all zeros. + } + + // FindAndReserve - Locate an ideal slot for the specified stages and mark + // as busy. + unsigned FindAndReserve(unsigned Slot, InstrStage *StageBegin, + InstrStage *StageEnd) { + // Where to begin + Iter Begin = Tally.begin() + Slot; + // Find a free slot + Iter Where = FindSlots(Begin, StageBegin, StageEnd); + // Distance is slot number + unsigned Final = Where - Tally.begin(); + return Final; + } + +}; + +//===----------------------------------------------------------------------===// +/// +/// ScheduleDAGSimple - Simple two pass scheduler. +/// +class VISIBILITY_HIDDEN ScheduleDAGSimple : public ScheduleDAG { +private: + bool NoSched; // Just do a BFS schedule, nothing fancy + bool NoItins; // Don't use itineraries? + ResourceTally<unsigned> Tally; // Resource usage tally + unsigned NSlots; // Total latency + static const unsigned NotFound = ~0U; // Search marker + + unsigned NodeCount; // Number of nodes in DAG + std::map<SDNode *, NodeInfo *> Map; // Map nodes to info + bool HasGroups; // True if there are any groups + NodeInfo *Info; // Info for nodes being scheduled + NIVector Ordering; // Emit ordering of nodes + NodeGroup *HeadNG, *TailNG; // Keep track of allocated NodeGroups + +public: + + // Ctor. + ScheduleDAGSimple(bool noSched, bool noItins, SelectionDAG &dag, + MachineBasicBlock *bb, const TargetMachine &tm) + : ScheduleDAG(dag, bb, tm), NoSched(noSched), NoItins(noItins), NSlots(0), + NodeCount(0), HasGroups(false), Info(NULL), HeadNG(NULL), TailNG(NULL) { + assert(&TII && "Target doesn't provide instr info?"); + assert(&MRI && "Target doesn't provide register info?"); + } + + virtual ~ScheduleDAGSimple() { + if (Info) + delete[] Info; + + NodeGroup *NG = HeadNG; + while (NG) { + NodeGroup *NextSU = NG->Next; + delete NG; + NG = NextSU; + } + } + + void Schedule(); + + /// getNI - Returns the node info for the specified node. + /// + NodeInfo *getNI(SDNode *Node) { return Map[Node]; } + +private: + static bool isDefiner(NodeInfo *A, NodeInfo *B); + void IncludeNode(NodeInfo *NI); + void VisitAll(); + void GatherSchedulingInfo(); + void FakeGroupDominators(); + bool isStrongDependency(NodeInfo *A, NodeInfo *B); + bool isWeakDependency(NodeInfo *A, NodeInfo *B); + void ScheduleBackward(); + void ScheduleForward(); + + void AddToGroup(NodeInfo *D, NodeInfo *U); + /// PrepareNodeInfo - Set up the basic minimum node info for scheduling. + /// + void PrepareNodeInfo(); + + /// IdentifyGroups - Put flagged nodes into groups. + /// + void IdentifyGroups(); + + /// print - Print ordering to specified output stream. + /// + void print(std::ostream &O) const; + void print(std::ostream *O) const { if (O) print(*O); } + + void dump(const char *tag) const; + + virtual void dump() const; + + /// EmitAll - Emit all nodes in schedule sorted order. + /// + void EmitAll(); + + /// printNI - Print node info. + /// + void printNI(std::ostream &O, NodeInfo *NI) const; + void printNI(std::ostream *O, NodeInfo *NI) const { if (O) printNI(*O, NI); } + + /// printChanges - Hilight changes in order caused by scheduling. + /// + void printChanges(unsigned Index) const; +}; + +//===----------------------------------------------------------------------===// +/// Special case itineraries. +/// +enum { + CallLatency = 40, // To push calls back in time + + RSInteger = 0xC0000000, // Two integer units + RSFloat = 0x30000000, // Two float units + RSLoadStore = 0x0C000000, // Two load store units + RSBranch = 0x02000000 // One branch unit +}; +static InstrStage LoadStage = { 5, RSLoadStore }; +static InstrStage StoreStage = { 2, RSLoadStore }; +static InstrStage IntStage = { 2, RSInteger }; +static InstrStage FloatStage = { 3, RSFloat }; +//===----------------------------------------------------------------------===// + +} // namespace + +//===----------------------------------------------------------------------===// + +/// PrepareNodeInfo - Set up the basic minimum node info for scheduling. +/// +void ScheduleDAGSimple::PrepareNodeInfo() { + // Allocate node information + Info = new NodeInfo[NodeCount]; + + unsigned i = 0; + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I, ++i) { + // Fast reference to node schedule info + NodeInfo* NI = &Info[i]; + // Set up map + Map[I] = NI; + // Set node + NI->Node = I; + // Set pending visit count + NI->setPending(I->use_size()); + } +} + +/// IdentifyGroups - Put flagged nodes into groups. +/// +void ScheduleDAGSimple::IdentifyGroups() { + for (unsigned i = 0, N = NodeCount; i < N; i++) { + NodeInfo* NI = &Info[i]; + SDNode *Node = NI->Node; + + // For each operand (in reverse to only look at flags) + for (unsigned N = Node->getNumOperands(); 0 < N--;) { + // Get operand + SDOperand Op = Node->getOperand(N); + // No more flags to walk + if (Op.getValueType() != MVT::Flag) break; + // Add to node group + AddToGroup(getNI(Op.Val), NI); + // Let everyone else know + HasGroups = true; + } + } +} + +/// CountInternalUses - Returns the number of edges between the two nodes. +/// +static unsigned CountInternalUses(NodeInfo *D, NodeInfo *U) { + unsigned N = 0; + for (unsigned M = U->Node->getNumOperands(); 0 < M--;) { + SDOperand Op = U->Node->getOperand(M); + if (Op.Val == D->Node) N++; + } + + return N; +} + +//===----------------------------------------------------------------------===// +/// Add - Adds a definer and user pair to a node group. +/// +void ScheduleDAGSimple::AddToGroup(NodeInfo *D, NodeInfo *U) { + // Get current groups + NodeGroup *DGroup = D->Group; + NodeGroup *UGroup = U->Group; + // If both are members of groups + if (DGroup && UGroup) { + // There may have been another edge connecting + if (DGroup == UGroup) return; + // Add the pending users count + DGroup->addPending(UGroup->getPending()); + // For each member of the users group + NodeGroupIterator UNGI(U); + while (NodeInfo *UNI = UNGI.next() ) { + // Change the group + UNI->Group = DGroup; + // For each member of the definers group + NodeGroupIterator DNGI(D); + while (NodeInfo *DNI = DNGI.next() ) { + // Remove internal edges + DGroup->addPending(-CountInternalUses(DNI, UNI)); + } + } + // Merge the two lists + DGroup->group_insert(DGroup->group_end(), + UGroup->group_begin(), UGroup->group_end()); + } else if (DGroup) { + // Make user member of definers group + U->Group = DGroup; + // Add users uses to definers group pending + DGroup->addPending(U->Node->use_size()); + // For each member of the definers group + NodeGroupIterator DNGI(D); + while (NodeInfo *DNI = DNGI.next() ) { + // Remove internal edges + DGroup->addPending(-CountInternalUses(DNI, U)); + } + DGroup->group_push_back(U); + } else if (UGroup) { + // Make definer member of users group + D->Group = UGroup; + // Add definers uses to users group pending + UGroup->addPending(D->Node->use_size()); + // For each member of the users group + NodeGroupIterator UNGI(U); + while (NodeInfo *UNI = UNGI.next() ) { + // Remove internal edges + UGroup->addPending(-CountInternalUses(D, UNI)); + } + UGroup->group_insert(UGroup->group_begin(), D); + } else { + D->Group = U->Group = DGroup = new NodeGroup(); + DGroup->addPending(D->Node->use_size() + U->Node->use_size() - + CountInternalUses(D, U)); + DGroup->group_push_back(D); + DGroup->group_push_back(U); + + if (HeadNG == NULL) + HeadNG = DGroup; + if (TailNG != NULL) + TailNG->Next = DGroup; + TailNG = DGroup; + } +} + + +/// print - Print ordering to specified output stream. +/// +void ScheduleDAGSimple::print(std::ostream &O) const { +#ifndef NDEBUG + O << "Ordering\n"; + for (unsigned i = 0, N = Ordering.size(); i < N; i++) { + NodeInfo *NI = Ordering[i]; + printNI(O, NI); + O << "\n"; + if (NI->isGroupDominator()) { + NodeGroup *Group = NI->Group; + for (NIIterator NII = Group->group_begin(), E = Group->group_end(); + NII != E; NII++) { + O << " "; + printNI(O, *NII); + O << "\n"; + } + } + } +#endif +} + +void ScheduleDAGSimple::dump(const char *tag) const { + cerr << tag; dump(); +} + +void ScheduleDAGSimple::dump() const { + print(cerr); +} + + +/// EmitAll - Emit all nodes in schedule sorted order. +/// +void ScheduleDAGSimple::EmitAll() { + // If this is the first basic block in the function, and if it has live ins + // that need to be copied into vregs, emit the copies into the top of the + // block before emitting the code for the block. + MachineFunction &MF = DAG.getMachineFunction(); + if (&MF.front() == BB && MF.livein_begin() != MF.livein_end()) { + for (MachineFunction::livein_iterator LI = MF.livein_begin(), + E = MF.livein_end(); LI != E; ++LI) + if (LI->second) + MRI->copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second, + LI->first, RegMap->getRegClass(LI->second)); + } + + DenseMap<SDOperand, unsigned> VRBaseMap; + + // For each node in the ordering + for (unsigned i = 0, N = Ordering.size(); i < N; i++) { + // Get the scheduling info + NodeInfo *NI = Ordering[i]; + if (NI->isInGroup()) { + NodeGroupIterator NGI(Ordering[i]); + while (NodeInfo *NI = NGI.next()) EmitNode(NI->Node, VRBaseMap); + } else { + EmitNode(NI->Node, VRBaseMap); + } + } +} + +/// isFlagDefiner - Returns true if the node defines a flag result. +static bool isFlagDefiner(SDNode *A) { + unsigned N = A->getNumValues(); + return N && A->getValueType(N - 1) == MVT::Flag; +} + +/// isFlagUser - Returns true if the node uses a flag result. +/// +static bool isFlagUser(SDNode *A) { + unsigned N = A->getNumOperands(); + return N && A->getOperand(N - 1).getValueType() == MVT::Flag; +} + +/// printNI - Print node info. +/// +void ScheduleDAGSimple::printNI(std::ostream &O, NodeInfo *NI) const { +#ifndef NDEBUG + SDNode *Node = NI->Node; + O << " " + << std::hex << Node << std::dec + << ", Lat=" << NI->Latency + << ", Slot=" << NI->Slot + << ", ARITY=(" << Node->getNumOperands() << "," + << Node->getNumValues() << ")" + << " " << Node->getOperationName(&DAG); + if (isFlagDefiner(Node)) O << "<#"; + if (isFlagUser(Node)) O << ">#"; +#endif +} + +/// printChanges - Hilight changes in order caused by scheduling. +/// +void ScheduleDAGSimple::printChanges(unsigned Index) const { +#ifndef NDEBUG + // Get the ordered node count + unsigned N = Ordering.size(); + // Determine if any changes + unsigned i = 0; + for (; i < N; i++) { + NodeInfo *NI = Ordering[i]; + if (NI->Preorder != i) break; + } + + if (i < N) { + cerr << Index << ". New Ordering\n"; + + for (i = 0; i < N; i++) { + NodeInfo *NI = Ordering[i]; + cerr << " " << NI->Preorder << ". "; + printNI(cerr, NI); + cerr << "\n"; + if (NI->isGroupDominator()) { + NodeGroup *Group = NI->Group; + for (NIIterator NII = Group->group_begin(), E = Group->group_end(); + NII != E; NII++) { + cerr << " "; + printNI(cerr, *NII); + cerr << "\n"; + } + } + } + } else { + cerr << Index << ". No Changes\n"; + } +#endif +} + +//===----------------------------------------------------------------------===// +/// isDefiner - Return true if node A is a definer for B. +/// +bool ScheduleDAGSimple::isDefiner(NodeInfo *A, NodeInfo *B) { + // While there are A nodes + NodeGroupIterator NII(A); + while (NodeInfo *NI = NII.next()) { + // Extract node + SDNode *Node = NI->Node; + // While there operands in nodes of B + NodeGroupOpIterator NGOI(B); + while (!NGOI.isEnd()) { + SDOperand Op = NGOI.next(); + // If node from A defines a node in B + if (Node == Op.Val) return true; + } + } + return false; +} + +/// IncludeNode - Add node to NodeInfo vector. +/// +void ScheduleDAGSimple::IncludeNode(NodeInfo *NI) { + // Get node + SDNode *Node = NI->Node; + // Ignore entry node + if (Node->getOpcode() == ISD::EntryToken) return; + // Check current count for node + int Count = NI->getPending(); + // If the node is already in list + if (Count < 0) return; + // Decrement count to indicate a visit + Count--; + // If count has gone to zero then add node to list + if (!Count) { + // Add node + if (NI->isInGroup()) { + Ordering.push_back(NI->Group->getDominator()); + } else { + Ordering.push_back(NI); + } + // indicate node has been added + Count--; + } + // Mark as visited with new count + NI->setPending(Count); +} + +/// GatherSchedulingInfo - Get latency and resource information about each node. +/// +void ScheduleDAGSimple::GatherSchedulingInfo() { + // Get instruction itineraries for the target + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + + // For each node + for (unsigned i = 0, N = NodeCount; i < N; i++) { + // Get node info + NodeInfo* NI = &Info[i]; + SDNode *Node = NI->Node; + + // If there are itineraries and it is a machine instruction + if (InstrItins.isEmpty() || NoItins) { + // If machine opcode + if (Node->isTargetOpcode()) { + // Get return type to guess which processing unit + MVT::ValueType VT = Node->getValueType(0); + // Get machine opcode + MachineOpCode TOpc = Node->getTargetOpcode(); + NI->IsCall = TII->isCall(TOpc); + NI->IsLoad = TII->isLoad(TOpc); + NI->IsStore = TII->isStore(TOpc); + + if (TII->isLoad(TOpc)) NI->StageBegin = &LoadStage; + else if (TII->isStore(TOpc)) NI->StageBegin = &StoreStage; + else if (MVT::isInteger(VT)) NI->StageBegin = &IntStage; + else if (MVT::isFloatingPoint(VT)) NI->StageBegin = &FloatStage; + if (NI->StageBegin) NI->StageEnd = NI->StageBegin + 1; + } + } else if (Node->isTargetOpcode()) { + // get machine opcode + MachineOpCode TOpc = Node->getTargetOpcode(); + // Check to see if it is a call + NI->IsCall = TII->isCall(TOpc); + // Get itinerary stages for instruction + unsigned II = TII->getSchedClass(TOpc); + NI->StageBegin = InstrItins.begin(II); + NI->StageEnd = InstrItins.end(II); + } + + // One slot for the instruction itself + NI->Latency = 1; + + // Add long latency for a call to push it back in time + if (NI->IsCall) NI->Latency += CallLatency; + + // Sum up all the latencies + for (InstrStage *Stage = NI->StageBegin, *E = NI->StageEnd; + Stage != E; Stage++) { + NI->Latency += Stage->Cycles; + } + + // Sum up all the latencies for max tally size + NSlots += NI->Latency; + } + + // Unify metrics if in a group + if (HasGroups) { + for (unsigned i = 0, N = NodeCount; i < N; i++) { + NodeInfo* NI = &Info[i]; + + if (NI->isInGroup()) { + NodeGroup *Group = NI->Group; + + if (!Group->getDominator()) { + NIIterator NGI = Group->group_begin(), NGE = Group->group_end(); + NodeInfo *Dominator = *NGI; + unsigned Latency = 0; + + for (NGI++; NGI != NGE; NGI++) { + NodeInfo* NGNI = *NGI; + Latency += NGNI->Latency; + if (Dominator->Latency < NGNI->Latency) Dominator = NGNI; + } + + Dominator->Latency = Latency; + Group->setDominator(Dominator); + } + } + } + } +} + +/// VisitAll - Visit each node breadth-wise to produce an initial ordering. +/// Note that the ordering in the Nodes vector is reversed. +void ScheduleDAGSimple::VisitAll() { + // Add first element to list + NodeInfo *NI = getNI(DAG.getRoot().Val); + if (NI->isInGroup()) { + Ordering.push_back(NI->Group->getDominator()); + } else { + Ordering.push_back(NI); + } + + // Iterate through all nodes that have been added + for (unsigned i = 0; i < Ordering.size(); i++) { // note: size() varies + // Visit all operands + NodeGroupOpIterator NGI(Ordering[i]); + while (!NGI.isEnd()) { + // Get next operand + SDOperand Op = NGI.next(); + // Get node + SDNode *Node = Op.Val; + // Ignore passive nodes + if (isPassiveNode(Node)) continue; + // Check out node + IncludeNode(getNI(Node)); + } + } + + // Add entry node last (IncludeNode filters entry nodes) + if (DAG.getEntryNode().Val != DAG.getRoot().Val) + Ordering.push_back(getNI(DAG.getEntryNode().Val)); + + // Reverse the order + std::reverse(Ordering.begin(), Ordering.end()); +} + +/// FakeGroupDominators - Set dominators for non-scheduling. +/// +void ScheduleDAGSimple::FakeGroupDominators() { + for (unsigned i = 0, N = NodeCount; i < N; i++) { + NodeInfo* NI = &Info[i]; + + if (NI->isInGroup()) { + NodeGroup *Group = NI->Group; + + if (!Group->getDominator()) { + Group->setDominator(NI); + } + } + } +} + +/// isStrongDependency - Return true if node A has results used by node B. +/// I.E., B must wait for latency of A. +bool ScheduleDAGSimple::isStrongDependency(NodeInfo *A, NodeInfo *B) { + // If A defines for B then it's a strong dependency or + // if a load follows a store (may be dependent but why take a chance.) + return isDefiner(A, B) || (A->IsStore && B->IsLoad); +} + +/// isWeakDependency Return true if node A produces a result that will +/// conflict with operands of B. It is assumed that we have called +/// isStrongDependency prior. +bool ScheduleDAGSimple::isWeakDependency(NodeInfo *A, NodeInfo *B) { + // TODO check for conflicting real registers and aliases +#if 0 // FIXME - Since we are in SSA form and not checking register aliasing + return A->Node->getOpcode() == ISD::EntryToken || isStrongDependency(B, A); +#else + return A->Node->getOpcode() == ISD::EntryToken; +#endif +} + +/// ScheduleBackward - Schedule instructions so that any long latency +/// instructions and the critical path get pushed back in time. Time is run in +/// reverse to allow code reuse of the Tally and eliminate the overhead of +/// biasing every slot indices against NSlots. +void ScheduleDAGSimple::ScheduleBackward() { + // Size and clear the resource tally + Tally.Initialize(NSlots); + // Get number of nodes to schedule + unsigned N = Ordering.size(); + + // For each node being scheduled + for (unsigned i = N; 0 < i--;) { + NodeInfo *NI = Ordering[i]; + // Track insertion + unsigned Slot = NotFound; + + // Compare against those previously scheduled nodes + unsigned j = i + 1; + for (; j < N; j++) { + // Get following instruction + NodeInfo *Other = Ordering[j]; + + // Check dependency against previously inserted nodes + if (isStrongDependency(NI, Other)) { + Slot = Other->Slot + Other->Latency; + break; + } else if (isWeakDependency(NI, Other)) { + Slot = Other->Slot; + break; + } + } + + // If independent of others (or first entry) + if (Slot == NotFound) Slot = 0; + +#if 0 // FIXME - measure later + // Find a slot where the needed resources are available + if (NI->StageBegin != NI->StageEnd) + Slot = Tally.FindAndReserve(Slot, NI->StageBegin, NI->StageEnd); +#endif + + // Set node slot + NI->Slot = Slot; + + // Insert sort based on slot + j = i + 1; + for (; j < N; j++) { + // Get following instruction + NodeInfo *Other = Ordering[j]; + // Should we look further (remember slots are in reverse time) + if (Slot >= Other->Slot) break; + // Shuffle other into ordering + Ordering[j - 1] = Other; + } + // Insert node in proper slot + if (j != i + 1) Ordering[j - 1] = NI; + } +} + +/// ScheduleForward - Schedule instructions to maximize packing. +/// +void ScheduleDAGSimple::ScheduleForward() { + // Size and clear the resource tally + Tally.Initialize(NSlots); + // Get number of nodes to schedule + unsigned N = Ordering.size(); + + // For each node being scheduled + for (unsigned i = 0; i < N; i++) { + NodeInfo *NI = Ordering[i]; + // Track insertion + unsigned Slot = NotFound; + + // Compare against those previously scheduled nodes + unsigned j = i; + for (; 0 < j--;) { + // Get following instruction + NodeInfo *Other = Ordering[j]; + + // Check dependency against previously inserted nodes + if (isStrongDependency(Other, NI)) { + Slot = Other->Slot + Other->Latency; + break; + } else if (Other->IsCall || isWeakDependency(Other, NI)) { + Slot = Other->Slot; + break; + } + } + + // If independent of others (or first entry) + if (Slot == NotFound) Slot = 0; + + // Find a slot where the needed resources are available + if (NI->StageBegin != NI->StageEnd) + Slot = Tally.FindAndReserve(Slot, NI->StageBegin, NI->StageEnd); + + // Set node slot + NI->Slot = Slot; + + // Insert sort based on slot + j = i; + for (; 0 < j--;) { + // Get prior instruction + NodeInfo *Other = Ordering[j]; + // Should we look further + if (Slot >= Other->Slot) break; + // Shuffle other into ordering + Ordering[j + 1] = Other; + } + // Insert node in proper slot + if (j != i) Ordering[j + 1] = NI; + } +} + +/// Schedule - Order nodes according to selected style. +/// +void ScheduleDAGSimple::Schedule() { + // Number the nodes + NodeCount = std::distance(DAG.allnodes_begin(), DAG.allnodes_end()); + + // Set up minimum info for scheduling + PrepareNodeInfo(); + // Construct node groups for flagged nodes + IdentifyGroups(); + + // Test to see if scheduling should occur + bool ShouldSchedule = NodeCount > 3 && !NoSched; + // Don't waste time if is only entry and return + if (ShouldSchedule) { + // Get latency and resource requirements + GatherSchedulingInfo(); + } else if (HasGroups) { + // Make sure all the groups have dominators + FakeGroupDominators(); + } + + // Breadth first walk of DAG + VisitAll(); + +#ifndef NDEBUG + static unsigned Count = 0; + Count++; + for (unsigned i = 0, N = Ordering.size(); i < N; i++) { + NodeInfo *NI = Ordering[i]; + NI->Preorder = i; + } +#endif + + // Don't waste time if is only entry and return + if (ShouldSchedule) { + // Push back long instructions and critical path + ScheduleBackward(); + + // Pack instructions to maximize resource utilization + ScheduleForward(); + } + + DEBUG(printChanges(Count)); + + // Emit in scheduled order + EmitAll(); +} + + +/// createSimpleDAGScheduler - This creates a simple two pass instruction +/// scheduler using instruction itinerary. +llvm::ScheduleDAG* llvm::createSimpleDAGScheduler(SelectionDAGISel *IS, + SelectionDAG *DAG, + MachineBasicBlock *BB) { + return new ScheduleDAGSimple(false, false, *DAG, BB, DAG->getTarget()); +} + +/// createNoItinsDAGScheduler - This creates a simple two pass instruction +/// scheduler without using instruction itinerary. +llvm::ScheduleDAG* llvm::createNoItinsDAGScheduler(SelectionDAGISel *IS, + SelectionDAG *DAG, + MachineBasicBlock *BB) { + return new ScheduleDAGSimple(false, true, *DAG, BB, DAG->getTarget()); +} + +/// createBFS_DAGScheduler - This creates a simple breadth first instruction +/// scheduler. +llvm::ScheduleDAG* llvm::createBFS_DAGScheduler(SelectionDAGISel *IS, + SelectionDAG *DAG, + MachineBasicBlock *BB) { + return new ScheduleDAGSimple(true, false, *DAG, BB, DAG->getTarget()); +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp new file mode 100644 index 0000000..9803ab8 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -0,0 +1,3694 @@ +//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Constants.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include <algorithm> +#include <cmath> +using namespace llvm; + +/// makeVTList - Return an instance of the SDVTList struct initialized with the +/// specified members. +static SDVTList makeVTList(const MVT::ValueType *VTs, unsigned NumVTs) { + SDVTList Res = {VTs, NumVTs}; + return Res; +} + +//===----------------------------------------------------------------------===// +// ConstantFPSDNode Class +//===----------------------------------------------------------------------===// + +/// isExactlyValue - We don't rely on operator== working on double values, as +/// it returns true for things that are clearly not equal, like -0.0 and 0.0. +/// As such, this method can be used to do an exact bit-for-bit comparison of +/// two floating point values. +bool ConstantFPSDNode::isExactlyValue(double V) const { + return DoubleToBits(V) == DoubleToBits(Value); +} + +//===----------------------------------------------------------------------===// +// ISD Namespace +//===----------------------------------------------------------------------===// + +/// isBuildVectorAllOnes - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are ~0 or undef. +bool ISD::isBuildVectorAllOnes(const SDNode *N) { + // Look through a bit convert. + if (N->getOpcode() == ISD::BIT_CONVERT) + N = N->getOperand(0).Val; + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-~0 + // elements. + SDOperand NotZero = N->getOperand(i); + if (isa<ConstantSDNode>(NotZero)) { + if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue()) + return false; + } else if (isa<ConstantFPSDNode>(NotZero)) { + MVT::ValueType VT = NotZero.getValueType(); + if (VT== MVT::f64) { + if (DoubleToBits(cast<ConstantFPSDNode>(NotZero)->getValue()) != + (uint64_t)-1) + return false; + } else { + if (FloatToBits(cast<ConstantFPSDNode>(NotZero)->getValue()) != + (uint32_t)-1) + return false; + } + } else + return false; + + // Okay, we have at least one ~0 value, check to see if the rest match or are + // undefs. + for (++i; i != e; ++i) + if (N->getOperand(i) != NotZero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + + +/// isBuildVectorAllZeros - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are 0 or undef. +bool ISD::isBuildVectorAllZeros(const SDNode *N) { + // Look through a bit convert. + if (N->getOpcode() == ISD::BIT_CONVERT) + N = N->getOperand(0).Val; + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-~0 + // elements. + SDOperand Zero = N->getOperand(i); + if (isa<ConstantSDNode>(Zero)) { + if (!cast<ConstantSDNode>(Zero)->isNullValue()) + return false; + } else if (isa<ConstantFPSDNode>(Zero)) { + if (!cast<ConstantFPSDNode>(Zero)->isExactlyValue(0.0)) + return false; + } else + return false; + + // Okay, we have at least one ~0 value, check to see if the rest match or are + // undefs. + for (++i; i != e; ++i) + if (N->getOperand(i) != Zero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + +/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) +/// when given the operation for (X op Y). +ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { + // To perform this operation, we just need to swap the L and G bits of the + // operation. + unsigned OldL = (Operation >> 2) & 1; + unsigned OldG = (Operation >> 1) & 1; + return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits + (OldL << 1) | // New G bit + (OldG << 2)); // New L bit. +} + +/// getSetCCInverse - Return the operation corresponding to !(X op Y), where +/// 'op' is a valid SetCC operation. +ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { + unsigned Operation = Op; + if (isInteger) + Operation ^= 7; // Flip L, G, E bits, but not U. + else + Operation ^= 15; // Flip all of the condition bits. + if (Operation > ISD::SETTRUE2) + Operation &= ~8; // Don't let N and U bits get set. + return ISD::CondCode(Operation); +} + + +/// isSignedOp - For an integer comparison, return 1 if the comparison is a +/// signed operation and 2 if the result is an unsigned comparison. Return zero +/// if the operation does not depend on the sign of the input (setne and seteq). +static int isSignedOp(ISD::CondCode Opcode) { + switch (Opcode) { + default: assert(0 && "Illegal integer setcc operation!"); + case ISD::SETEQ: + case ISD::SETNE: return 0; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return 1; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return 2; + } +} + +/// getSetCCOrOperation - Return the result of a logical OR between different +/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function +/// returns SETCC_INVALID if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed integer setcc with an unsigned integer setcc. + return ISD::SETCC_INVALID; + + unsigned Op = Op1 | Op2; // Combine all of the condition bits. + + // If the N and U bits get set then the resultant comparison DOES suddenly + // care about orderedness, and is true when ordered. + if (Op > ISD::SETTRUE2) + Op &= ~16; // Clear the U bit if the N bit is set. + + // Canonicalize illegal integer setcc's. + if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT + Op = ISD::SETNE; + + return ISD::CondCode(Op); +} + +/// getSetCCAndOperation - Return the result of a logical AND between different +/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This +/// function returns zero if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed setcc with an unsigned setcc. + return ISD::SETCC_INVALID; + + // Combine all of the condition bits. + ISD::CondCode Result = ISD::CondCode(Op1 & Op2); + + // Canonicalize illegal integer setcc's. + if (isInteger) { + switch (Result) { + default: break; + case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT + case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE + case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE + case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE + } + } + + return Result; +} + +const TargetMachine &SelectionDAG::getTarget() const { + return TLI.getTargetMachine(); +} + +//===----------------------------------------------------------------------===// +// SDNode Profile Support +//===----------------------------------------------------------------------===// + +/// AddNodeIDOpcode - Add the node opcode to the NodeID data. +/// +static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { + ID.AddInteger(OpC); +} + +/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them +/// solely with their pointer. +void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { + ID.AddPointer(VTList.VTs); +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + const SDOperand *Ops, unsigned NumOps) { + for (; NumOps; --NumOps, ++Ops) { + ID.AddPointer(Ops->Val); + ID.AddInteger(Ops->ResNo); + } +} + +static void AddNodeIDNode(FoldingSetNodeID &ID, + unsigned short OpC, SDVTList VTList, + const SDOperand *OpList, unsigned N) { + AddNodeIDOpcode(ID, OpC); + AddNodeIDValueTypes(ID, VTList); + AddNodeIDOperands(ID, OpList, N); +} + +/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID +/// data. +static void AddNodeIDNode(FoldingSetNodeID &ID, SDNode *N) { + AddNodeIDOpcode(ID, N->getOpcode()); + // Add the return value info. + AddNodeIDValueTypes(ID, N->getVTList()); + // Add the operand info. + AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands()); + + // Handle SDNode leafs with special info. + switch (N->getOpcode()) { + default: break; // Normal nodes don't need extra info. + case ISD::TargetConstant: + case ISD::Constant: + ID.AddInteger(cast<ConstantSDNode>(N)->getValue()); + break; + case ISD::TargetConstantFP: + case ISD::ConstantFP: + ID.AddDouble(cast<ConstantFPSDNode>(N)->getValue()); + break; + case ISD::TargetGlobalAddress: + case ISD::GlobalAddress: + case ISD::TargetGlobalTLSAddress: + case ISD::GlobalTLSAddress: { + GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); + ID.AddPointer(GA->getGlobal()); + ID.AddInteger(GA->getOffset()); + break; + } + case ISD::BasicBlock: + ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock()); + break; + case ISD::Register: + ID.AddInteger(cast<RegisterSDNode>(N)->getReg()); + break; + case ISD::SRCVALUE: { + SrcValueSDNode *SV = cast<SrcValueSDNode>(N); + ID.AddPointer(SV->getValue()); + ID.AddInteger(SV->getOffset()); + break; + } + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); + break; + case ISD::JumpTable: + case ISD::TargetJumpTable: + ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); + break; + case ISD::ConstantPool: + case ISD::TargetConstantPool: { + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); + ID.AddInteger(CP->getAlignment()); + ID.AddInteger(CP->getOffset()); + if (CP->isMachineConstantPoolEntry()) + CP->getMachineCPVal()->AddSelectionDAGCSEId(ID); + else + ID.AddPointer(CP->getConstVal()); + break; + } + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(N); + ID.AddInteger(LD->getAddressingMode()); + ID.AddInteger(LD->getExtensionType()); + ID.AddInteger(LD->getLoadedVT()); + ID.AddPointer(LD->getSrcValue()); + ID.AddInteger(LD->getSrcValueOffset()); + ID.AddInteger(LD->getAlignment()); + ID.AddInteger(LD->isVolatile()); + break; + } + case ISD::STORE: { + StoreSDNode *ST = cast<StoreSDNode>(N); + ID.AddInteger(ST->getAddressingMode()); + ID.AddInteger(ST->isTruncatingStore()); + ID.AddInteger(ST->getStoredVT()); + ID.AddPointer(ST->getSrcValue()); + ID.AddInteger(ST->getSrcValueOffset()); + ID.AddInteger(ST->getAlignment()); + ID.AddInteger(ST->isVolatile()); + break; + } + } +} + +//===----------------------------------------------------------------------===// +// SelectionDAG Class +//===----------------------------------------------------------------------===// + +/// RemoveDeadNodes - This method deletes all unreachable nodes in the +/// SelectionDAG. +void SelectionDAG::RemoveDeadNodes() { + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted. + HandleSDNode Dummy(getRoot()); + + SmallVector<SDNode*, 128> DeadNodes; + + // Add all obviously-dead nodes to the DeadNodes worklist. + for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) + if (I->use_empty()) + DeadNodes.push_back(I); + + // Process the worklist, deleting the nodes and adding their uses to the + // worklist. + while (!DeadNodes.empty()) { + SDNode *N = DeadNodes.back(); + DeadNodes.pop_back(); + + // Take the node out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Next, brutally remove the operand list. This is safe to do, as there are + // no cycles in the graph. + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { + SDNode *Operand = I->Val; + Operand->removeUser(N); + + // Now that we removed this operand, see if there are no uses of it left. + if (Operand->use_empty()) + DeadNodes.push_back(Operand); + } + if (N->OperandsNeedDelete) + delete[] N->OperandList; + N->OperandList = 0; + N->NumOperands = 0; + + // Finally, remove N itself. + AllNodes.erase(N); + } + + // If the root changed (e.g. it was a dead load, update the root). + setRoot(Dummy.getValue()); +} + +void SelectionDAG::RemoveDeadNode(SDNode *N, std::vector<SDNode*> &Deleted) { + SmallVector<SDNode*, 16> DeadNodes; + DeadNodes.push_back(N); + + // Process the worklist, deleting the nodes and adding their uses to the + // worklist. + while (!DeadNodes.empty()) { + SDNode *N = DeadNodes.back(); + DeadNodes.pop_back(); + + // Take the node out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Next, brutally remove the operand list. This is safe to do, as there are + // no cycles in the graph. + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { + SDNode *Operand = I->Val; + Operand->removeUser(N); + + // Now that we removed this operand, see if there are no uses of it left. + if (Operand->use_empty()) + DeadNodes.push_back(Operand); + } + if (N->OperandsNeedDelete) + delete[] N->OperandList; + N->OperandList = 0; + N->NumOperands = 0; + + // Finally, remove N itself. + Deleted.push_back(N); + AllNodes.erase(N); + } +} + +void SelectionDAG::DeleteNode(SDNode *N) { + assert(N->use_empty() && "Cannot delete a node that is not dead!"); + + // First take this out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Finally, remove uses due to operands of this node, remove from the + // AllNodes list, and delete the node. + DeleteNodeNotInCSEMaps(N); +} + +void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { + + // Remove it from the AllNodes list. + AllNodes.remove(N); + + // Drop all of the operands and decrement used nodes use counts. + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) + I->Val->removeUser(N); + if (N->OperandsNeedDelete) + delete[] N->OperandList; + N->OperandList = 0; + N->NumOperands = 0; + + delete N; +} + +/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that +/// correspond to it. This is useful when we're about to delete or repurpose +/// the node. We don't want future request for structurally identical nodes +/// to return N anymore. +void SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { + bool Erased = false; + switch (N->getOpcode()) { + case ISD::HANDLENODE: return; // noop. + case ISD::STRING: + Erased = StringNodes.erase(cast<StringSDNode>(N)->getValue()); + break; + case ISD::CONDCODE: + assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && + "Cond code doesn't exist!"); + Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0; + CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0; + break; + case ISD::ExternalSymbol: + Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); + break; + case ISD::TargetExternalSymbol: + Erased = + TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); + break; + case ISD::VALUETYPE: + Erased = ValueTypeNodes[cast<VTSDNode>(N)->getVT()] != 0; + ValueTypeNodes[cast<VTSDNode>(N)->getVT()] = 0; + break; + default: + // Remove it from the CSE Map. + Erased = CSEMap.RemoveNode(N); + break; + } +#ifndef NDEBUG + // Verify that the node was actually in one of the CSE maps, unless it has a + // flag result (which cannot be CSE'd) or is one of the special cases that are + // not subject to CSE. + if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && + !N->isTargetOpcode()) { + N->dump(this); + cerr << "\n"; + assert(0 && "Node is not in map!"); + } +#endif +} + +/// AddNonLeafNodeToCSEMaps - Add the specified node back to the CSE maps. It +/// has been taken out and modified in some way. If the specified node already +/// exists in the CSE maps, do not modify the maps, but return the existing node +/// instead. If it doesn't exist, add it and return null. +/// +SDNode *SelectionDAG::AddNonLeafNodeToCSEMaps(SDNode *N) { + assert(N->getNumOperands() && "This is a leaf node!"); + if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag) + return 0; // Never add these nodes. + + // Check that remaining values produced are not flags. + for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Flag) + return 0; // Never CSE anything that produces a flag. + + SDNode *New = CSEMap.GetOrInsertNode(N); + if (New != N) return New; // Node already existed. + return 0; +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDOperand Op, + void *&InsertPos) { + if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag) + return 0; // Never add these nodes. + + // Check that remaining values produced are not flags. + for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Flag) + return 0; // Never CSE anything that produces a flag. + + SDOperand Ops[] = { Op }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + SDOperand Op1, SDOperand Op2, + void *&InsertPos) { + if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag) + return 0; // Never add these nodes. + + // Check that remaining values produced are not flags. + for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Flag) + return 0; // Never CSE anything that produces a flag. + + SDOperand Ops[] = { Op1, Op2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); +} + + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + const SDOperand *Ops,unsigned NumOps, + void *&InsertPos) { + if (N->getOpcode() == ISD::HANDLENODE || N->getValueType(0) == MVT::Flag) + return 0; // Never add these nodes. + + // Check that remaining values produced are not flags. + for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Flag) + return 0; // Never CSE anything that produces a flag. + + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); + + if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + ID.AddInteger(LD->getAddressingMode()); + ID.AddInteger(LD->getExtensionType()); + ID.AddInteger(LD->getLoadedVT()); + ID.AddPointer(LD->getSrcValue()); + ID.AddInteger(LD->getSrcValueOffset()); + ID.AddInteger(LD->getAlignment()); + ID.AddInteger(LD->isVolatile()); + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + ID.AddInteger(ST->getAddressingMode()); + ID.AddInteger(ST->isTruncatingStore()); + ID.AddInteger(ST->getStoredVT()); + ID.AddPointer(ST->getSrcValue()); + ID.AddInteger(ST->getSrcValueOffset()); + ID.AddInteger(ST->getAlignment()); + ID.AddInteger(ST->isVolatile()); + } + + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); +} + + +SelectionDAG::~SelectionDAG() { + while (!AllNodes.empty()) { + SDNode *N = AllNodes.begin(); + N->SetNextInBucket(0); + if (N->OperandsNeedDelete) + delete [] N->OperandList; + N->OperandList = 0; + N->NumOperands = 0; + AllNodes.pop_front(); + } +} + +SDOperand SelectionDAG::getZeroExtendInReg(SDOperand Op, MVT::ValueType VT) { + if (Op.getValueType() == VT) return Op; + int64_t Imm = ~0ULL >> (64-MVT::getSizeInBits(VT)); + return getNode(ISD::AND, Op.getValueType(), Op, + getConstant(Imm, Op.getValueType())); +} + +SDOperand SelectionDAG::getString(const std::string &Val) { + StringSDNode *&N = StringNodes[Val]; + if (!N) { + N = new StringSDNode(Val); + AllNodes.push_back(N); + } + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getConstant(uint64_t Val, MVT::ValueType VT, bool isT) { + assert(MVT::isInteger(VT) && "Cannot create FP integer constant!"); + assert(!MVT::isVector(VT) && "Cannot create Vector ConstantSDNodes!"); + + // Mask out any bits that are not valid for this constant. + Val &= MVT::getIntVTBitMask(VT); + + unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Val); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new ConstantSDNode(isT, Val, VT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + + +SDOperand SelectionDAG::getConstantFP(double Val, MVT::ValueType VT, + bool isTarget) { + assert(MVT::isFloatingPoint(VT) && "Cannot create integer FP constant!"); + MVT::ValueType EltVT = + MVT::isVector(VT) ? MVT::getVectorElementType(VT) : VT; + if (EltVT == MVT::f32) + Val = (float)Val; // Mask out extra precision. + + // Do the map lookup using the actual bit pattern for the floating point + // value, so that we don't have problems with 0.0 comparing equal to -0.0, and + // we don't have issues with SNANs. + unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + ID.AddDouble(Val); + void *IP = 0; + SDNode *N = NULL; + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if (!MVT::isVector(VT)) + return SDOperand(N, 0); + if (!N) { + N = new ConstantFPSDNode(isTarget, Val, EltVT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + } + + SDOperand Result(N, 0); + if (MVT::isVector(VT)) { + SmallVector<SDOperand, 8> Ops; + Ops.assign(MVT::getVectorNumElements(VT), Result); + Result = getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size()); + } + return Result; +} + +SDOperand SelectionDAG::getGlobalAddress(const GlobalValue *GV, + MVT::ValueType VT, int Offset, + bool isTargetGA) { + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + unsigned Opc; + if (GVar && GVar->isThreadLocal()) + Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; + else + Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddPointer(GV); + ID.AddInteger(Offset); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new GlobalAddressSDNode(isTargetGA, GV, VT, Offset); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getFrameIndex(int FI, MVT::ValueType VT, + bool isTarget) { + unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(FI); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new FrameIndexSDNode(FI, VT, isTarget); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getJumpTable(int JTI, MVT::ValueType VT, bool isTarget){ + unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(JTI); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new JumpTableSDNode(JTI, VT, isTarget); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getConstantPool(Constant *C, MVT::ValueType VT, + unsigned Alignment, int Offset, + bool isTarget) { + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + ID.AddPointer(C); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + + +SDOperand SelectionDAG::getConstantPool(MachineConstantPoolValue *C, + MVT::ValueType VT, + unsigned Alignment, int Offset, + bool isTarget) { + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + C->AddSelectionDAGCSEId(ID); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + + +SDOperand SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); + ID.AddPointer(MBB); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new BasicBlockSDNode(MBB); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getValueType(MVT::ValueType VT) { + if ((unsigned)VT >= ValueTypeNodes.size()) + ValueTypeNodes.resize(VT+1); + if (ValueTypeNodes[VT] == 0) { + ValueTypeNodes[VT] = new VTSDNode(VT); + AllNodes.push_back(ValueTypeNodes[VT]); + } + + return SDOperand(ValueTypeNodes[VT], 0); +} + +SDOperand SelectionDAG::getExternalSymbol(const char *Sym, MVT::ValueType VT) { + SDNode *&N = ExternalSymbols[Sym]; + if (N) return SDOperand(N, 0); + N = new ExternalSymbolSDNode(false, Sym, VT); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getTargetExternalSymbol(const char *Sym, + MVT::ValueType VT) { + SDNode *&N = TargetExternalSymbols[Sym]; + if (N) return SDOperand(N, 0); + N = new ExternalSymbolSDNode(true, Sym, VT); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getCondCode(ISD::CondCode Cond) { + if ((unsigned)Cond >= CondCodeNodes.size()) + CondCodeNodes.resize(Cond+1); + + if (CondCodeNodes[Cond] == 0) { + CondCodeNodes[Cond] = new CondCodeSDNode(Cond); + AllNodes.push_back(CondCodeNodes[Cond]); + } + return SDOperand(CondCodeNodes[Cond], 0); +} + +SDOperand SelectionDAG::getRegister(unsigned RegNo, MVT::ValueType VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); + ID.AddInteger(RegNo); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new RegisterSDNode(RegNo, VT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getSrcValue(const Value *V, int Offset) { + assert((!V || isa<PointerType>(V->getType())) && + "SrcValue is not a pointer?"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0); + ID.AddPointer(V); + ID.AddInteger(Offset); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new SrcValueSDNode(V, Offset); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::FoldSetCC(MVT::ValueType VT, SDOperand N1, + SDOperand N2, ISD::CondCode Cond) { + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return getConstant(0, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return getConstant(1, VT); + + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETO: + case ISD::SETUO: + case ISD::SETUEQ: + case ISD::SETUNE: + assert(!MVT::isInteger(N1.getValueType()) && "Illegal setcc for integer!"); + break; + } + + if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val)) { + uint64_t C2 = N2C->getValue(); + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val)) { + uint64_t C1 = N1C->getValue(); + + // Sign extend the operands if required + if (ISD::isSignedIntSetCC(Cond)) { + C1 = N1C->getSignExtended(); + C2 = N2C->getSignExtended(); + } + + switch (Cond) { + default: assert(0 && "Unknown integer setcc!"); + case ISD::SETEQ: return getConstant(C1 == C2, VT); + case ISD::SETNE: return getConstant(C1 != C2, VT); + case ISD::SETULT: return getConstant(C1 < C2, VT); + case ISD::SETUGT: return getConstant(C1 > C2, VT); + case ISD::SETULE: return getConstant(C1 <= C2, VT); + case ISD::SETUGE: return getConstant(C1 >= C2, VT); + case ISD::SETLT: return getConstant((int64_t)C1 < (int64_t)C2, VT); + case ISD::SETGT: return getConstant((int64_t)C1 > (int64_t)C2, VT); + case ISD::SETLE: return getConstant((int64_t)C1 <= (int64_t)C2, VT); + case ISD::SETGE: return getConstant((int64_t)C1 >= (int64_t)C2, VT); + } + } + } + if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.Val)) + if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.Val)) { + double C1 = N1C->getValue(), C2 = N2C->getValue(); + + switch (Cond) { + default: break; // FIXME: Implement the rest of these! + case ISD::SETEQ: return getConstant(C1 == C2, VT); + case ISD::SETNE: return getConstant(C1 != C2, VT); + case ISD::SETLT: return getConstant(C1 < C2, VT); + case ISD::SETGT: return getConstant(C1 > C2, VT); + case ISD::SETLE: return getConstant(C1 <= C2, VT); + case ISD::SETGE: return getConstant(C1 >= C2, VT); + } + } else { + // Ensure that the constant occurs on the RHS. + return getSetCC(VT, N2, N1, ISD::getSetCCSwappedOperands(Cond)); + } + + // Could not fold it. + return SDOperand(); +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDOperand Op, uint64_t Mask, + unsigned Depth) const { + // The masks are not wide enough to represent this type! Should use APInt. + if (Op.getValueType() == MVT::i128) + return false; + + uint64_t KnownZero, KnownOne; + ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + return (KnownZero & Mask) == Mask; +} + +/// ComputeMaskedBits - Determine which of the bits specified in Mask are +/// known to be either zero or one and return them in the KnownZero/KnownOne +/// bitsets. This code only analyzes bits in Mask, in order to short-circuit +/// processing. +void SelectionDAG::ComputeMaskedBits(SDOperand Op, uint64_t Mask, + uint64_t &KnownZero, uint64_t &KnownOne, + unsigned Depth) const { + KnownZero = KnownOne = 0; // Don't know anything. + if (Depth == 6 || Mask == 0) + return; // Limit search depth. + + // The masks are not wide enough to represent this type! Should use APInt. + if (Op.getValueType() == MVT::i128) + return; + + uint64_t KnownZero2, KnownOne2; + + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast<ConstantSDNode>(Op)->getValue() & Mask; + KnownZero = ~KnownOne & Mask; + return; + case ISD::AND: + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + Mask &= ~KnownZero; + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + case ISD::OR: + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + Mask &= ~KnownOne; + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + case ISD::XOR: { + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } + case ISD::SELECT: + ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SELECT_CC: + ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SETCC: + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI.getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) + KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); + return; + case ISD::SHL: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + ComputeMaskedBits(Op.getOperand(0), Mask >> SA->getValue(), + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= SA->getValue(); + KnownOne <<= SA->getValue(); + KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. + } + return; + case ISD::SRL: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + MVT::ValueType VT = Op.getValueType(); + unsigned ShAmt = SA->getValue(); + + uint64_t TypeMask = MVT::getIntVTBitMask(VT); + ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt) & TypeMask, + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero &= TypeMask; + KnownOne &= TypeMask; + KnownZero >>= ShAmt; + KnownOne >>= ShAmt; + + uint64_t HighBits = (1ULL << ShAmt)-1; + HighBits <<= MVT::getSizeInBits(VT)-ShAmt; + KnownZero |= HighBits; // High bits known zero. + } + return; + case ISD::SRA: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + MVT::ValueType VT = Op.getValueType(); + unsigned ShAmt = SA->getValue(); + + // Compute the new bits that are at the top now. + uint64_t TypeMask = MVT::getIntVTBitMask(VT); + + uint64_t InDemandedMask = (Mask << ShAmt) & TypeMask; + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + uint64_t HighBits = (1ULL << ShAmt)-1; + HighBits <<= MVT::getSizeInBits(VT) - ShAmt; + if (HighBits & Mask) + InDemandedMask |= MVT::getIntVTSignBit(VT); + + ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero &= TypeMask; + KnownOne &= TypeMask; + KnownZero >>= ShAmt; + KnownOne >>= ShAmt; + + // Handle the sign bits. + uint64_t SignBit = MVT::getIntVTSignBit(VT); + SignBit >>= ShAmt; // Adjust to where it is now in the mask. + + if (KnownZero & SignBit) { + KnownZero |= HighBits; // New bits are known zero. + } else if (KnownOne & SignBit) { + KnownOne |= HighBits; // New bits are known one. + } + } + return; + case ISD::SIGN_EXTEND_INREG: { + MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask; + + uint64_t InSignBit = MVT::getIntVTSignBit(EVT); + int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT); + + // If the sign extended bits are demanded, we know that the sign + // bit is demanded. + if (NewBits) + InputDemandedBits |= InSignBit; + + ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (KnownZero & InSignBit) { // Input sign bit known clear + KnownZero |= NewBits; + KnownOne &= ~NewBits; + } else if (KnownOne & InSignBit) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + return; + } + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: { + MVT::ValueType VT = Op.getValueType(); + unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1; + KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT); + KnownOne = 0; + return; + } + case ISD::LOAD: { + if (ISD::isZEXTLoad(Op.Val)) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + MVT::ValueType VT = LD->getLoadedVT(); + KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask; + } + return; + } + case ISD::ZERO_EXTEND: { + uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); + uint64_t NewBits = (~InMask) & Mask; + ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, + KnownOne, Depth+1); + KnownZero |= NewBits & Mask; + KnownOne &= ~NewBits; + return; + } + case ISD::SIGN_EXTEND: { + MVT::ValueType InVT = Op.getOperand(0).getValueType(); + unsigned InBits = MVT::getSizeInBits(InVT); + uint64_t InMask = MVT::getIntVTBitMask(InVT); + uint64_t InSignBit = 1ULL << (InBits-1); + uint64_t NewBits = (~InMask) & Mask; + uint64_t InDemandedBits = Mask & InMask; + + // If any of the sign extended bits are demanded, we know that the sign + // bit is demanded. + if (NewBits & Mask) + InDemandedBits |= InSignBit; + + ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero, + KnownOne, Depth+1); + // If the sign bit is known zero or one, the top bits match. + if (KnownZero & InSignBit) { + KnownZero |= NewBits; + KnownOne &= ~NewBits; + } else if (KnownOne & InSignBit) { + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Otherwise, top bits aren't known. + KnownOne &= ~NewBits; + KnownZero &= ~NewBits; + } + return; + } + case ISD::ANY_EXTEND: { + MVT::ValueType VT = Op.getOperand(0).getValueType(); + ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT), + KnownZero, KnownOne, Depth+1); + return; + } + case ISD::TRUNCATE: { + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType()); + KnownZero &= OutMask; + KnownOne &= OutMask; + break; + } + case ISD::AssertZext: { + MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + uint64_t InMask = MVT::getIntVTBitMask(VT); + ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, + KnownOne, Depth+1); + KnownZero |= (~InMask) & Mask; + return; + } + case ISD::ADD: { + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the low clear bits + // common to both LHS & RHS. For example, 8+(X<<3) is known to have the + // low 3 bits clear. + uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero), + CountTrailingZeros_64(~KnownZero2)); + + KnownZero = (1ULL << KnownZeroOut) - 1; + KnownOne = 0; + return; + } + case ISD::SUB: { + ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)); + if (!CLHS) return; + + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + MVT::ValueType VT = CLHS->getValueType(0); + if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) { // sign bit clear + unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1); + uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit + MaskV = ~MaskV & MVT::getIntVTBitMask(VT); + ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the output + // top bits are zero, because we now know that the output is from [0-C]. + if ((KnownZero & MaskV) == MaskV) { + unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue()); + KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask; // Top bits known zero. + KnownOne = 0; // No one bits known. + } else { + KnownZero = KnownOne = 0; // Otherwise, nothing known. + } + } + return; + } + default: + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_VOID: + TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this); + } + return; + } +} + +/// ComputeNumSignBits - Return the number of times the sign bit of the +/// register is replicated into the other bits. We know that at least 1 bit +/// is always equal to the sign bit (itself), but other cases can give us +/// information. For example, immediately after an "SRA X, 2", we know that +/// the top 3 bits are all equal to each other, so we return 3. +unsigned SelectionDAG::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{ + MVT::ValueType VT = Op.getValueType(); + assert(MVT::isInteger(VT) && "Invalid VT!"); + unsigned VTBits = MVT::getSizeInBits(VT); + unsigned Tmp, Tmp2; + + if (Depth == 6) + return 1; // Limit search depth. + + switch (Op.getOpcode()) { + default: break; + case ISD::AssertSext: + Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); + return VTBits-Tmp+1; + case ISD::AssertZext: + Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); + return VTBits-Tmp; + + case ISD::Constant: { + uint64_t Val = cast<ConstantSDNode>(Op)->getValue(); + // If negative, invert the bits, then look at it. + if (Val & MVT::getIntVTSignBit(VT)) + Val = ~Val; + + // Shift the bits so they are the leading bits in the int64_t. + Val <<= 64-VTBits; + + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::min(VTBits, CountLeadingZeros_64(Val)); + } + + case ISD::SIGN_EXTEND: + Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType()); + return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; + + case ISD::SIGN_EXTEND_INREG: + // Max of the input and what this extends. + Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); + Tmp = VTBits-Tmp+1; + + Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); + return std::max(Tmp, Tmp2); + + case ISD::SRA: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + // SRA X, C -> adds C sign bits. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + Tmp += C->getValue(); + if (Tmp > VTBits) Tmp = VTBits; + } + return Tmp; + case ISD::SHL: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (C->getValue() >= VTBits || // Bad shift. + C->getValue() >= Tmp) break; // Shifted all sign bits out. + return Tmp - C->getValue(); + } + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: // NOT is handled here. + // Logical binary ops preserve the number of sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + return std::min(Tmp, Tmp2); + + case ISD::SELECT: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + return std::min(Tmp, Tmp2); + + case ISD::SETCC: + // If setcc returns 0/-1, all bits are sign bits. + if (TLI.getSetCCResultContents() == + TargetLowering::ZeroOrNegativeOneSetCCResult) + return VTBits; + break; + case ISD::ROTL: + case ISD::ROTR: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned RotAmt = C->getValue() & (VTBits-1); + + // Handle rotate right by N like a rotate left by 32-N. + if (Op.getOpcode() == ISD::ROTR) + RotAmt = (VTBits-RotAmt) & (VTBits-1); + + // If we aren't rotating out all of the known-in sign bits, return the + // number that are left. This handles rotl(sext(x), 1) for example. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp > RotAmt+1) return Tmp-RotAmt; + } + break; + case ISD::ADD: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) + if (CRHS->isAllOnesValue()) { + uint64_t KnownZero, KnownOne; + uint64_t Mask = MVT::getIntVTBitMask(VT); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero|1) == Mask) + return VTBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (KnownZero & MVT::getIntVTSignBit(VT)) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + break; + + case ISD::SUB: + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) + if (CLHS->getValue() == 0) { + uint64_t KnownZero, KnownOne; + uint64_t Mask = MVT::getIntVTBitMask(VT); + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero|1) == Mask) + return VTBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (KnownZero & MVT::getIntVTSignBit(VT)) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + break; + case ISD::TRUNCATE: + // FIXME: it's tricky to do anything useful for this, but it is an important + // case for targets like X86. + break; + } + + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (Op.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // '17' bits known + Tmp = MVT::getSizeInBits(LD->getLoadedVT()); + return VTBits-Tmp+1; + case ISD::ZEXTLOAD: // '16' bits known + Tmp = MVT::getSizeInBits(LD->getLoadedVT()); + return VTBits-Tmp; + } + } + + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) { + unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth); + if (NumBits > 1) return NumBits; + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + uint64_t KnownZero, KnownOne; + uint64_t Mask = MVT::getIntVTBitMask(VT); + ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + + uint64_t SignBit = MVT::getIntVTSignBit(VT); + if (KnownZero & SignBit) { // SignBit is 0 + Mask = KnownZero; + } else if (KnownOne & SignBit) { // SignBit is 1; + Mask = KnownOne; + } else { + // Nothing known. + return 1; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask ^= ~0ULL; + Mask <<= 64-VTBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::min(VTBits, CountLeadingZeros_64(Mask)); +} + + +/// getNode - Gets or creates the specified node. +/// +SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new SDNode(Opcode, SDNode::getSDVTList(VT)); + CSEMap.InsertNode(N, IP); + + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Operand) { + unsigned Tmp1; + // Constant fold unary operations with an integer constant operand. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.Val)) { + uint64_t Val = C->getValue(); + switch (Opcode) { + default: break; + case ISD::SIGN_EXTEND: return getConstant(C->getSignExtended(), VT); + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: return getConstant(Val, VT); + case ISD::TRUNCATE: return getConstant(Val, VT); + case ISD::SINT_TO_FP: return getConstantFP(C->getSignExtended(), VT); + case ISD::UINT_TO_FP: return getConstantFP(C->getValue(), VT); + case ISD::BIT_CONVERT: + if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) + return getConstantFP(BitsToFloat(Val), VT); + else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + return getConstantFP(BitsToDouble(Val), VT); + break; + case ISD::BSWAP: + switch(VT) { + default: assert(0 && "Invalid bswap!"); break; + case MVT::i16: return getConstant(ByteSwap_16((unsigned short)Val), VT); + case MVT::i32: return getConstant(ByteSwap_32((unsigned)Val), VT); + case MVT::i64: return getConstant(ByteSwap_64(Val), VT); + } + break; + case ISD::CTPOP: + switch(VT) { + default: assert(0 && "Invalid ctpop!"); break; + case MVT::i1: return getConstant(Val != 0, VT); + case MVT::i8: + Tmp1 = (unsigned)Val & 0xFF; + return getConstant(CountPopulation_32(Tmp1), VT); + case MVT::i16: + Tmp1 = (unsigned)Val & 0xFFFF; + return getConstant(CountPopulation_32(Tmp1), VT); + case MVT::i32: + return getConstant(CountPopulation_32((unsigned)Val), VT); + case MVT::i64: + return getConstant(CountPopulation_64(Val), VT); + } + case ISD::CTLZ: + switch(VT) { + default: assert(0 && "Invalid ctlz!"); break; + case MVT::i1: return getConstant(Val == 0, VT); + case MVT::i8: + Tmp1 = (unsigned)Val & 0xFF; + return getConstant(CountLeadingZeros_32(Tmp1)-24, VT); + case MVT::i16: + Tmp1 = (unsigned)Val & 0xFFFF; + return getConstant(CountLeadingZeros_32(Tmp1)-16, VT); + case MVT::i32: + return getConstant(CountLeadingZeros_32((unsigned)Val), VT); + case MVT::i64: + return getConstant(CountLeadingZeros_64(Val), VT); + } + case ISD::CTTZ: + switch(VT) { + default: assert(0 && "Invalid cttz!"); break; + case MVT::i1: return getConstant(Val == 0, VT); + case MVT::i8: + Tmp1 = (unsigned)Val | 0x100; + return getConstant(CountTrailingZeros_32(Tmp1), VT); + case MVT::i16: + Tmp1 = (unsigned)Val | 0x10000; + return getConstant(CountTrailingZeros_32(Tmp1), VT); + case MVT::i32: + return getConstant(CountTrailingZeros_32((unsigned)Val), VT); + case MVT::i64: + return getConstant(CountTrailingZeros_64(Val), VT); + } + } + } + + // Constant fold unary operations with an floating point constant operand. + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.Val)) + switch (Opcode) { + case ISD::FNEG: + return getConstantFP(-C->getValue(), VT); + case ISD::FABS: + return getConstantFP(fabs(C->getValue()), VT); + case ISD::FP_ROUND: + case ISD::FP_EXTEND: + return getConstantFP(C->getValue(), VT); + case ISD::FP_TO_SINT: + return getConstant((int64_t)C->getValue(), VT); + case ISD::FP_TO_UINT: + return getConstant((uint64_t)C->getValue(), VT); + case ISD::BIT_CONVERT: + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant(FloatToBits(C->getValue()), VT); + else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(DoubleToBits(C->getValue()), VT); + break; + } + + unsigned OpOpcode = Operand.Val->getOpcode(); + switch (Opcode) { + case ISD::TokenFactor: + return Operand; // Factor of one node? No factor. + case ISD::FP_ROUND: + case ISD::FP_EXTEND: + assert(MVT::isFloatingPoint(VT) && + MVT::isFloatingPoint(Operand.getValueType()) && "Invalid FP cast!"); + break; + case ISD::SIGN_EXTEND: + assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) && + "Invalid SIGN_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType() < VT && "Invalid sext node, dst < src!"); + if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) + return getNode(OpOpcode, VT, Operand.Val->getOperand(0)); + break; + case ISD::ZERO_EXTEND: + assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) && + "Invalid ZERO_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType() < VT && "Invalid zext node, dst < src!"); + if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) + return getNode(ISD::ZERO_EXTEND, VT, Operand.Val->getOperand(0)); + break; + case ISD::ANY_EXTEND: + assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) && + "Invalid ANY_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType() < VT && "Invalid anyext node, dst < src!"); + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND) + // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) + return getNode(OpOpcode, VT, Operand.Val->getOperand(0)); + break; + case ISD::TRUNCATE: + assert(MVT::isInteger(VT) && MVT::isInteger(Operand.getValueType()) && + "Invalid TRUNCATE!"); + if (Operand.getValueType() == VT) return Operand; // noop truncate + assert(Operand.getValueType() > VT && "Invalid truncate node, src < dst!"); + if (OpOpcode == ISD::TRUNCATE) + return getNode(ISD::TRUNCATE, VT, Operand.Val->getOperand(0)); + else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) { + // If the source is smaller than the dest, we still need an extend. + if (Operand.Val->getOperand(0).getValueType() < VT) + return getNode(OpOpcode, VT, Operand.Val->getOperand(0)); + else if (Operand.Val->getOperand(0).getValueType() > VT) + return getNode(ISD::TRUNCATE, VT, Operand.Val->getOperand(0)); + else + return Operand.Val->getOperand(0); + } + break; + case ISD::BIT_CONVERT: + // Basic sanity checking. + assert(MVT::getSizeInBits(VT) == MVT::getSizeInBits(Operand.getValueType()) + && "Cannot BIT_CONVERT between types of different sizes!"); + if (VT == Operand.getValueType()) return Operand; // noop conversion. + if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x) + return getNode(ISD::BIT_CONVERT, VT, Operand.getOperand(0)); + if (OpOpcode == ISD::UNDEF) + return getNode(ISD::UNDEF, VT); + break; + case ISD::SCALAR_TO_VECTOR: + assert(MVT::isVector(VT) && !MVT::isVector(Operand.getValueType()) && + MVT::getVectorElementType(VT) == Operand.getValueType() && + "Illegal SCALAR_TO_VECTOR node!"); + break; + case ISD::FNEG: + if (OpOpcode == ISD::FSUB) // -(X-Y) -> (Y-X) + return getNode(ISD::FSUB, VT, Operand.Val->getOperand(1), + Operand.Val->getOperand(0)); + if (OpOpcode == ISD::FNEG) // --X -> X + return Operand.Val->getOperand(0); + break; + case ISD::FABS: + if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) + return getNode(ISD::FABS, VT, Operand.Val->getOperand(0)); + break; + } + + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { // Don't CSE flag producing nodes + FoldingSetNodeID ID; + SDOperand Ops[1] = { Operand }; + AddNodeIDNode(ID, Opcode, VTs, Ops, 1); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + N = new UnarySDNode(Opcode, VTs, Operand); + CSEMap.InsertNode(N, IP); + } else { + N = new UnarySDNode(Opcode, VTs, Operand); + } + AllNodes.push_back(N); + return SDOperand(N, 0); +} + + + +SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT, + SDOperand N1, SDOperand N2) { +#ifndef NDEBUG + switch (Opcode) { + case ISD::TokenFactor: + assert(VT == MVT::Other && N1.getValueType() == MVT::Other && + N2.getValueType() == MVT::Other && "Invalid token factor!"); + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::UDIV: + case ISD::UREM: + case ISD::MULHU: + case ISD::MULHS: + assert(MVT::isInteger(VT) && "This operator does not apply to FP types!"); + // fall through + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::SDIV: + case ISD::SREM: + assert(MVT::isInteger(N1.getValueType()) && "Should use F* for FP ops"); + // fall through. + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; + case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. + assert(N1.getValueType() == VT && + MVT::isFloatingPoint(N1.getValueType()) && + MVT::isFloatingPoint(N2.getValueType()) && + "Invalid FCOPYSIGN!"); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + assert(VT == N1.getValueType() && + "Shift operators return type must be the same as their first arg"); + assert(MVT::isInteger(VT) && MVT::isInteger(N2.getValueType()) && + VT != MVT::i1 && "Shifts only work on integers"); + break; + case ISD::FP_ROUND_INREG: { + MVT::ValueType EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg round!"); + assert(MVT::isFloatingPoint(VT) && MVT::isFloatingPoint(EVT) && + "Cannot FP_ROUND_INREG integer types"); + assert(EVT <= VT && "Not rounding down!"); + break; + } + case ISD::AssertSext: + case ISD::AssertZext: + case ISD::SIGN_EXTEND_INREG: { + MVT::ValueType EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(MVT::isInteger(VT) && MVT::isInteger(EVT) && + "Cannot *_EXTEND_INREG FP types"); + assert(EVT <= VT && "Not extending!"); + } + + default: break; + } +#endif + + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val); + if (N1C) { + if (Opcode == ISD::SIGN_EXTEND_INREG) { + int64_t Val = N1C->getValue(); + unsigned FromBits = MVT::getSizeInBits(cast<VTSDNode>(N2)->getVT()); + Val <<= 64-FromBits; + Val >>= 64-FromBits; + return getConstant(Val, VT); + } + + if (N2C) { + uint64_t C1 = N1C->getValue(), C2 = N2C->getValue(); + switch (Opcode) { + case ISD::ADD: return getConstant(C1 + C2, VT); + case ISD::SUB: return getConstant(C1 - C2, VT); + case ISD::MUL: return getConstant(C1 * C2, VT); + case ISD::UDIV: + if (C2) return getConstant(C1 / C2, VT); + break; + case ISD::UREM : + if (C2) return getConstant(C1 % C2, VT); + break; + case ISD::SDIV : + if (C2) return getConstant(N1C->getSignExtended() / + N2C->getSignExtended(), VT); + break; + case ISD::SREM : + if (C2) return getConstant(N1C->getSignExtended() % + N2C->getSignExtended(), VT); + break; + case ISD::AND : return getConstant(C1 & C2, VT); + case ISD::OR : return getConstant(C1 | C2, VT); + case ISD::XOR : return getConstant(C1 ^ C2, VT); + case ISD::SHL : return getConstant(C1 << C2, VT); + case ISD::SRL : return getConstant(C1 >> C2, VT); + case ISD::SRA : return getConstant(N1C->getSignExtended() >>(int)C2, VT); + case ISD::ROTL : + return getConstant((C1 << C2) | (C1 >> (MVT::getSizeInBits(VT) - C2)), + VT); + case ISD::ROTR : + return getConstant((C1 >> C2) | (C1 << (MVT::getSizeInBits(VT) - C2)), + VT); + default: break; + } + } else { // Cannonicalize constant to RHS if commutative + if (isCommutativeBinOp(Opcode)) { + std::swap(N1C, N2C); + std::swap(N1, N2); + } + } + } + + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.Val); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.Val); + if (N1CFP) { + if (N2CFP) { + double C1 = N1CFP->getValue(), C2 = N2CFP->getValue(); + switch (Opcode) { + case ISD::FADD: return getConstantFP(C1 + C2, VT); + case ISD::FSUB: return getConstantFP(C1 - C2, VT); + case ISD::FMUL: return getConstantFP(C1 * C2, VT); + case ISD::FDIV: + if (C2) return getConstantFP(C1 / C2, VT); + break; + case ISD::FREM : + if (C2) return getConstantFP(fmod(C1, C2), VT); + break; + case ISD::FCOPYSIGN: { + union { + double F; + uint64_t I; + } u1; + u1.F = C1; + if (int64_t(DoubleToBits(C2)) < 0) // Sign bit of RHS set? + u1.I |= 1ULL << 63; // Set the sign bit of the LHS. + else + u1.I &= (1ULL << 63)-1; // Clear the sign bit of the LHS. + return getConstantFP(u1.F, VT); + } + default: break; + } + } else { // Cannonicalize constant to RHS if commutative + if (isCommutativeBinOp(Opcode)) { + std::swap(N1CFP, N2CFP); + std::swap(N1, N2); + } + } + } + + // Canonicalize an UNDEF to the RHS, even over a constant. + if (N1.getOpcode() == ISD::UNDEF) { + if (isCommutativeBinOp(Opcode)) { + std::swap(N1, N2); + } else { + switch (Opcode) { + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: + case ISD::SUB: + case ISD::FSUB: + case ISD::FDIV: + case ISD::FREM: + case ISD::SRA: + return N1; // fold op(undef, arg2) -> undef + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::SRL: + case ISD::SHL: + if (!MVT::isVector(VT)) + return getConstant(0, VT); // fold op(undef, arg2) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N2; + } + } + } + + // Fold a bunch of operators when the RHS is undef. + if (N2.getOpcode() == ISD::UNDEF) { + switch (Opcode) { + case ISD::ADD: + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUB: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::XOR: + return N2; // fold op(arg1, undef) -> undef + case ISD::MUL: + case ISD::AND: + case ISD::SRL: + case ISD::SHL: + if (!MVT::isVector(VT)) + return getConstant(0, VT); // fold op(arg1, undef) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N1; + case ISD::OR: + if (!MVT::isVector(VT)) + return getConstant(MVT::getIntVTBitMask(VT), VT); + // For vectors, we can't easily build an all one vector, just return + // the LHS. + return N1; + case ISD::SRA: + return N1; + } + } + + // Fold operations. + switch (Opcode) { + case ISD::TokenFactor: + // Fold trivial token factors. + if (N1.getOpcode() == ISD::EntryToken) return N2; + if (N2.getOpcode() == ISD::EntryToken) return N1; + break; + + case ISD::AND: + // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's + // worth handling here. + if (N2C && N2C->getValue() == 0) + return N2; + break; + case ISD::OR: + case ISD::XOR: + // (X ^| 0) -> X. This commonly occurs when legalizing i64 values, so it's + // worth handling here. + if (N2C && N2C->getValue() == 0) + return N1; + break; + case ISD::FP_ROUND_INREG: + if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding. + break; + case ISD::SIGN_EXTEND_INREG: { + MVT::ValueType EVT = cast<VTSDNode>(N2)->getVT(); + if (EVT == VT) return N1; // Not actually extending + break; + } + case ISD::EXTRACT_VECTOR_ELT: + assert(N2C && "Bad EXTRACT_VECTOR_ELT!"); + + // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is + // expanding copies of large vectors from registers. + if (N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0) { + unsigned Factor = + MVT::getVectorNumElements(N1.getOperand(0).getValueType()); + return getNode(ISD::EXTRACT_VECTOR_ELT, VT, + N1.getOperand(N2C->getValue() / Factor), + getConstant(N2C->getValue() % Factor, N2.getValueType())); + } + + // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is + // expanding large vector constants. + if (N1.getOpcode() == ISD::BUILD_VECTOR) + return N1.getOperand(N2C->getValue()); + + // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector + // operations are lowered to scalars. + if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) + if (ConstantSDNode *IEC = dyn_cast<ConstantSDNode>(N1.getOperand(2))) { + if (IEC == N2C) + return N1.getOperand(1); + else + return getNode(ISD::EXTRACT_VECTOR_ELT, VT, N1.getOperand(0), N2); + } + break; + case ISD::EXTRACT_ELEMENT: + assert(N2C && (unsigned)N2C->getValue() < 2 && "Bad EXTRACT_ELEMENT!"); + + // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding + // 64-bit integers into 32-bit parts. Instead of building the extract of + // the BUILD_PAIR, only to have legalize rip it apart, just do it now. + if (N1.getOpcode() == ISD::BUILD_PAIR) + return N1.getOperand(N2C->getValue()); + + // EXTRACT_ELEMENT of a constant int is also very common. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + unsigned Shift = MVT::getSizeInBits(VT) * N2C->getValue(); + return getConstant(C->getValue() >> Shift, VT); + } + break; + + // FIXME: figure out how to safely handle things like + // int foo(int x) { return 1 << (x & 255); } + // int bar() { return foo(256); } +#if 0 + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + if (N2.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(N2.getOperand(1))->getVT() != MVT::i1) + return getNode(Opcode, VT, N1, N2.getOperand(0)); + else if (N2.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N2.getOperand(1))) { + // If the and is only masking out bits that cannot effect the shift, + // eliminate the and. + unsigned NumBits = MVT::getSizeInBits(VT); + if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) + return getNode(Opcode, VT, N1, N2.getOperand(0)); + } + break; +#endif + } + + // Memoize this node if possible. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { + SDOperand Ops[] = { N1, N2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + N = new BinarySDNode(Opcode, VTs, N1, N2); + CSEMap.InsertNode(N, IP); + } else { + N = new BinarySDNode(Opcode, VTs, N1, N2); + } + + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT, + SDOperand N1, SDOperand N2, SDOperand N3) { + // Perform various simplifications. + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.Val); + switch (Opcode) { + case ISD::SETCC: { + // Use FoldSetCC to simplify SETCC's. + SDOperand Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get()); + if (Simp.Val) return Simp; + break; + } + case ISD::SELECT: + if (N1C) + if (N1C->getValue()) + return N2; // select true, X, Y -> X + else + return N3; // select false, X, Y -> Y + + if (N2 == N3) return N2; // select C, X, X -> X + break; + case ISD::BRCOND: + if (N2C) + if (N2C->getValue()) // Unconditional branch + return getNode(ISD::BR, MVT::Other, N1, N3); + else + return N1; // Never-taken branch + break; + case ISD::VECTOR_SHUFFLE: + assert(VT == N1.getValueType() && VT == N2.getValueType() && + MVT::isVector(VT) && MVT::isVector(N3.getValueType()) && + N3.getOpcode() == ISD::BUILD_VECTOR && + MVT::getVectorNumElements(VT) == N3.getNumOperands() && + "Illegal VECTOR_SHUFFLE node!"); + break; + case ISD::BIT_CONVERT: + // Fold bit_convert nodes from a type to themselves. + if (N1.getValueType() == VT) + return N1; + break; + } + + // Memoize node if it doesn't produce a flag. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { + SDOperand Ops[] = { N1, N2, N3 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + N = new TernarySDNode(Opcode, VTs, N1, N2, N3); + CSEMap.InsertNode(N, IP); + } else { + N = new TernarySDNode(Opcode, VTs, N1, N2, N3); + } + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT, + SDOperand N1, SDOperand N2, SDOperand N3, + SDOperand N4) { + SDOperand Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, VT, Ops, 4); +} + +SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT, + SDOperand N1, SDOperand N2, SDOperand N3, + SDOperand N4, SDOperand N5) { + SDOperand Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, VT, Ops, 5); +} + +SDOperand SelectionDAG::getLoad(MVT::ValueType VT, + SDOperand Chain, SDOperand Ptr, + const Value *SV, int SVOffset, + bool isVolatile, unsigned Alignment) { + if (Alignment == 0) { // Ensure that codegen never sees alignment 0 + const Type *Ty = 0; + if (VT != MVT::iPTR) { + Ty = MVT::getTypeForValueType(VT); + } else if (SV) { + const PointerType *PT = dyn_cast<PointerType>(SV->getType()); + assert(PT && "Value for load must be a pointer"); + Ty = PT->getElementType(); + } + assert(Ty && "Could not get type information for load"); + Alignment = TLI.getTargetData()->getABITypeAlignment(Ty); + } + SDVTList VTs = getVTList(VT, MVT::Other); + SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType()); + SDOperand Ops[] = { Chain, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); + ID.AddInteger(ISD::UNINDEXED); + ID.AddInteger(ISD::NON_EXTLOAD); + ID.AddInteger(VT); + ID.AddPointer(SV); + ID.AddInteger(SVOffset); + ID.AddInteger(Alignment); + ID.AddInteger(isVolatile); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new LoadSDNode(Ops, VTs, ISD::UNINDEXED, + ISD::NON_EXTLOAD, VT, SV, SVOffset, Alignment, + isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, MVT::ValueType VT, + SDOperand Chain, SDOperand Ptr, + const Value *SV, + int SVOffset, MVT::ValueType EVT, + bool isVolatile, unsigned Alignment) { + // If they are asking for an extending load from/to the same thing, return a + // normal load. + if (VT == EVT) + ExtType = ISD::NON_EXTLOAD; + + if (MVT::isVector(VT)) + assert(EVT == MVT::getVectorElementType(VT) && "Invalid vector extload!"); + else + assert(EVT < VT && "Should only be an extending load, not truncating!"); + assert((ExtType == ISD::EXTLOAD || MVT::isInteger(VT)) && + "Cannot sign/zero extend a FP/Vector load!"); + assert(MVT::isInteger(VT) == MVT::isInteger(EVT) && + "Cannot convert from FP to Int or Int -> FP!"); + + if (Alignment == 0) { // Ensure that codegen never sees alignment 0 + const Type *Ty = 0; + if (VT != MVT::iPTR) { + Ty = MVT::getTypeForValueType(VT); + } else if (SV) { + const PointerType *PT = dyn_cast<PointerType>(SV->getType()); + assert(PT && "Value for load must be a pointer"); + Ty = PT->getElementType(); + } + assert(Ty && "Could not get type information for load"); + Alignment = TLI.getTargetData()->getABITypeAlignment(Ty); + } + SDVTList VTs = getVTList(VT, MVT::Other); + SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType()); + SDOperand Ops[] = { Chain, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); + ID.AddInteger(ISD::UNINDEXED); + ID.AddInteger(ExtType); + ID.AddInteger(EVT); + ID.AddPointer(SV); + ID.AddInteger(SVOffset); + ID.AddInteger(Alignment); + ID.AddInteger(isVolatile); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new LoadSDNode(Ops, VTs, ISD::UNINDEXED, ExtType, EVT, + SV, SVOffset, Alignment, isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand +SelectionDAG::getIndexedLoad(SDOperand OrigLoad, SDOperand Base, + SDOperand Offset, ISD::MemIndexedMode AM) { + LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); + assert(LD->getOffset().getOpcode() == ISD::UNDEF && + "Load is already a indexed load!"); + MVT::ValueType VT = OrigLoad.getValueType(); + SDVTList VTs = getVTList(VT, Base.getValueType(), MVT::Other); + SDOperand Ops[] = { LD->getChain(), Base, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); + ID.AddInteger(AM); + ID.AddInteger(LD->getExtensionType()); + ID.AddInteger(LD->getLoadedVT()); + ID.AddPointer(LD->getSrcValue()); + ID.AddInteger(LD->getSrcValueOffset()); + ID.AddInteger(LD->getAlignment()); + ID.AddInteger(LD->isVolatile()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new LoadSDNode(Ops, VTs, AM, + LD->getExtensionType(), LD->getLoadedVT(), + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getAlignment(), LD->isVolatile()); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getStore(SDOperand Chain, SDOperand Val, + SDOperand Ptr, const Value *SV, int SVOffset, + bool isVolatile, unsigned Alignment) { + MVT::ValueType VT = Val.getValueType(); + + if (Alignment == 0) { // Ensure that codegen never sees alignment 0 + const Type *Ty = 0; + if (VT != MVT::iPTR) { + Ty = MVT::getTypeForValueType(VT); + } else if (SV) { + const PointerType *PT = dyn_cast<PointerType>(SV->getType()); + assert(PT && "Value for store must be a pointer"); + Ty = PT->getElementType(); + } + assert(Ty && "Could not get type information for store"); + Alignment = TLI.getTargetData()->getABITypeAlignment(Ty); + } + SDVTList VTs = getVTList(MVT::Other); + SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType()); + SDOperand Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(ISD::UNINDEXED); + ID.AddInteger(false); + ID.AddInteger(VT); + ID.AddPointer(SV); + ID.AddInteger(SVOffset); + ID.AddInteger(Alignment); + ID.AddInteger(isVolatile); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new StoreSDNode(Ops, VTs, ISD::UNINDEXED, false, + VT, SV, SVOffset, Alignment, isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getTruncStore(SDOperand Chain, SDOperand Val, + SDOperand Ptr, const Value *SV, + int SVOffset, MVT::ValueType SVT, + bool isVolatile, unsigned Alignment) { + MVT::ValueType VT = Val.getValueType(); + bool isTrunc = VT != SVT; + + assert(VT > SVT && "Not a truncation?"); + assert(MVT::isInteger(VT) == MVT::isInteger(SVT) && + "Can't do FP-INT conversion!"); + + if (Alignment == 0) { // Ensure that codegen never sees alignment 0 + const Type *Ty = 0; + if (VT != MVT::iPTR) { + Ty = MVT::getTypeForValueType(VT); + } else if (SV) { + const PointerType *PT = dyn_cast<PointerType>(SV->getType()); + assert(PT && "Value for store must be a pointer"); + Ty = PT->getElementType(); + } + assert(Ty && "Could not get type information for store"); + Alignment = TLI.getTargetData()->getABITypeAlignment(Ty); + } + SDVTList VTs = getVTList(MVT::Other); + SDOperand Undef = getNode(ISD::UNDEF, Ptr.getValueType()); + SDOperand Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(ISD::UNINDEXED); + ID.AddInteger(isTrunc); + ID.AddInteger(SVT); + ID.AddPointer(SV); + ID.AddInteger(SVOffset); + ID.AddInteger(Alignment); + ID.AddInteger(isVolatile); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new StoreSDNode(Ops, VTs, ISD::UNINDEXED, isTrunc, + SVT, SV, SVOffset, Alignment, isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand +SelectionDAG::getIndexedStore(SDOperand OrigStore, SDOperand Base, + SDOperand Offset, ISD::MemIndexedMode AM) { + StoreSDNode *ST = cast<StoreSDNode>(OrigStore); + assert(ST->getOffset().getOpcode() == ISD::UNDEF && + "Store is already a indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDOperand Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(AM); + ID.AddInteger(ST->isTruncatingStore()); + ID.AddInteger(ST->getStoredVT()); + ID.AddPointer(ST->getSrcValue()); + ID.AddInteger(ST->getSrcValueOffset()); + ID.AddInteger(ST->getAlignment()); + ID.AddInteger(ST->isVolatile()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode *N = new StoreSDNode(Ops, VTs, AM, + ST->isTruncatingStore(), ST->getStoredVT(), + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getAlignment(), ST->isVolatile()); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getVAArg(MVT::ValueType VT, + SDOperand Chain, SDOperand Ptr, + SDOperand SV) { + SDOperand Ops[] = { Chain, Ptr, SV }; + return getNode(ISD::VAARG, getVTList(VT, MVT::Other), Ops, 3); +} + +SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT, + const SDOperand *Ops, unsigned NumOps) { + switch (NumOps) { + case 0: return getNode(Opcode, VT); + case 1: return getNode(Opcode, VT, Ops[0]); + case 2: return getNode(Opcode, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + switch (Opcode) { + default: break; + case ISD::SELECT_CC: { + assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); + assert(Ops[0].getValueType() == Ops[1].getValueType() && + "LHS and RHS of condition must have same type!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "True and False arms of SelectCC must have same type!"); + assert(Ops[2].getValueType() == VT && + "select_cc node must be of same type as true and false value!"); + break; + } + case ISD::BR_CC: { + assert(NumOps == 5 && "BR_CC takes 5 operands!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "LHS/RHS of comparison should match types!"); + break; + } + } + + // Memoize nodes. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + N = new SDNode(Opcode, VTs, Ops, NumOps); + CSEMap.InsertNode(N, IP); + } else { + N = new SDNode(Opcode, VTs, Ops, NumOps); + } + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getNode(unsigned Opcode, + std::vector<MVT::ValueType> &ResultTys, + const SDOperand *Ops, unsigned NumOps) { + return getNode(Opcode, getNodeValueTypes(ResultTys), ResultTys.size(), + Ops, NumOps); +} + +SDOperand SelectionDAG::getNode(unsigned Opcode, + const MVT::ValueType *VTs, unsigned NumVTs, + const SDOperand *Ops, unsigned NumOps) { + if (NumVTs == 1) + return getNode(Opcode, VTs[0], Ops, NumOps); + return getNode(Opcode, makeVTList(VTs, NumVTs), Ops, NumOps); +} + +SDOperand SelectionDAG::getNode(unsigned Opcode, SDVTList VTList, + const SDOperand *Ops, unsigned NumOps) { + if (VTList.NumVTs == 1) + return getNode(Opcode, VTList.VTs[0], Ops, NumOps); + + switch (Opcode) { + // FIXME: figure out how to safely handle things like + // int foo(int x) { return 1 << (x & 255); } + // int bar() { return foo(256); } +#if 0 + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: + case ISD::SHL_PARTS: + if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1) + return getNode(Opcode, VT, N1, N2, N3.getOperand(0)); + else if (N3.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { + // If the and is only masking out bits that cannot effect the shift, + // eliminate the and. + unsigned NumBits = MVT::getSizeInBits(VT)*2; + if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) + return getNode(Opcode, VT, N1, N2, N3.getOperand(0)); + } + break; +#endif + } + + // Memoize the node unless it returns a flag. + SDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + if (NumOps == 1) + N = new UnarySDNode(Opcode, VTList, Ops[0]); + else if (NumOps == 2) + N = new BinarySDNode(Opcode, VTList, Ops[0], Ops[1]); + else if (NumOps == 3) + N = new TernarySDNode(Opcode, VTList, Ops[0], Ops[1], Ops[2]); + else + N = new SDNode(Opcode, VTList, Ops, NumOps); + CSEMap.InsertNode(N, IP); + } else { + if (NumOps == 1) + N = new UnarySDNode(Opcode, VTList, Ops[0]); + else if (NumOps == 2) + N = new BinarySDNode(Opcode, VTList, Ops[0], Ops[1]); + else if (NumOps == 3) + N = new TernarySDNode(Opcode, VTList, Ops[0], Ops[1], Ops[2]); + else + N = new SDNode(Opcode, VTList, Ops, NumOps); + } + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDVTList SelectionDAG::getVTList(MVT::ValueType VT) { + if (!MVT::isExtendedVT(VT)) + return makeVTList(SDNode::getValueTypeList(VT), 1); + + for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(), + E = VTList.end(); I != E; ++I) { + if (I->size() == 1 && (*I)[0] == VT) + return makeVTList(&(*I)[0], 1); + } + std::vector<MVT::ValueType> V; + V.push_back(VT); + VTList.push_front(V); + return makeVTList(&(*VTList.begin())[0], 1); +} + +SDVTList SelectionDAG::getVTList(MVT::ValueType VT1, MVT::ValueType VT2) { + for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(), + E = VTList.end(); I != E; ++I) { + if (I->size() == 2 && (*I)[0] == VT1 && (*I)[1] == VT2) + return makeVTList(&(*I)[0], 2); + } + std::vector<MVT::ValueType> V; + V.push_back(VT1); + V.push_back(VT2); + VTList.push_front(V); + return makeVTList(&(*VTList.begin())[0], 2); +} +SDVTList SelectionDAG::getVTList(MVT::ValueType VT1, MVT::ValueType VT2, + MVT::ValueType VT3) { + for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(), + E = VTList.end(); I != E; ++I) { + if (I->size() == 3 && (*I)[0] == VT1 && (*I)[1] == VT2 && + (*I)[2] == VT3) + return makeVTList(&(*I)[0], 3); + } + std::vector<MVT::ValueType> V; + V.push_back(VT1); + V.push_back(VT2); + V.push_back(VT3); + VTList.push_front(V); + return makeVTList(&(*VTList.begin())[0], 3); +} + +SDVTList SelectionDAG::getVTList(const MVT::ValueType *VTs, unsigned NumVTs) { + switch (NumVTs) { + case 0: assert(0 && "Cannot have nodes without results!"); + case 1: return getVTList(VTs[0]); + case 2: return getVTList(VTs[0], VTs[1]); + case 3: return getVTList(VTs[0], VTs[1], VTs[2]); + default: break; + } + + for (std::list<std::vector<MVT::ValueType> >::iterator I = VTList.begin(), + E = VTList.end(); I != E; ++I) { + if (I->size() != NumVTs || VTs[0] != (*I)[0] || VTs[1] != (*I)[1]) continue; + + bool NoMatch = false; + for (unsigned i = 2; i != NumVTs; ++i) + if (VTs[i] != (*I)[i]) { + NoMatch = true; + break; + } + if (!NoMatch) + return makeVTList(&*I->begin(), NumVTs); + } + + VTList.push_front(std::vector<MVT::ValueType>(VTs, VTs+NumVTs)); + return makeVTList(&*VTList.begin()->begin(), NumVTs); +} + + +/// UpdateNodeOperands - *Mutate* the specified node in-place to have the +/// specified operands. If the resultant node already exists in the DAG, +/// this does not modify the specified node, instead it returns the node that +/// already exists. If the resultant node does not exist in the DAG, the +/// input node is returned. As a degenerate case, if you specify the same +/// input operands as the node already has, the input node is returned. +SDOperand SelectionDAG:: +UpdateNodeOperands(SDOperand InN, SDOperand Op) { + SDNode *N = InN.Val; + assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op == N->getOperand(0)) return InN; + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) + return SDOperand(Existing, InN.ResNo); + + // Nope it doesn't. Remove the node from it's current place in the maps. + if (InsertPos) + RemoveNodeFromCSEMaps(N); + + // Now we update the operands. + N->OperandList[0].Val->removeUser(N); + Op.Val->addUser(N); + N->OperandList[0] = Op; + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return InN; +} + +SDOperand SelectionDAG:: +UpdateNodeOperands(SDOperand InN, SDOperand Op1, SDOperand Op2) { + SDNode *N = InN.Val; + assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) + return InN; // No operands changed, just return the input node. + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) + return SDOperand(Existing, InN.ResNo); + + // Nope it doesn't. Remove the node from it's current place in the maps. + if (InsertPos) + RemoveNodeFromCSEMaps(N); + + // Now we update the operands. + if (N->OperandList[0] != Op1) { + N->OperandList[0].Val->removeUser(N); + Op1.Val->addUser(N); + N->OperandList[0] = Op1; + } + if (N->OperandList[1] != Op2) { + N->OperandList[1].Val->removeUser(N); + Op2.Val->addUser(N); + N->OperandList[1] = Op2; + } + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return InN; +} + +SDOperand SelectionDAG:: +UpdateNodeOperands(SDOperand N, SDOperand Op1, SDOperand Op2, SDOperand Op3) { + SDOperand Ops[] = { Op1, Op2, Op3 }; + return UpdateNodeOperands(N, Ops, 3); +} + +SDOperand SelectionDAG:: +UpdateNodeOperands(SDOperand N, SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4) { + SDOperand Ops[] = { Op1, Op2, Op3, Op4 }; + return UpdateNodeOperands(N, Ops, 4); +} + +SDOperand SelectionDAG:: +UpdateNodeOperands(SDOperand N, SDOperand Op1, SDOperand Op2, + SDOperand Op3, SDOperand Op4, SDOperand Op5) { + SDOperand Ops[] = { Op1, Op2, Op3, Op4, Op5 }; + return UpdateNodeOperands(N, Ops, 5); +} + + +SDOperand SelectionDAG:: +UpdateNodeOperands(SDOperand InN, SDOperand *Ops, unsigned NumOps) { + SDNode *N = InN.Val; + assert(N->getNumOperands() == NumOps && + "Update with wrong number of operands"); + + // Check to see if there is no change. + bool AnyChange = false; + for (unsigned i = 0; i != NumOps; ++i) { + if (Ops[i] != N->getOperand(i)) { + AnyChange = true; + break; + } + } + + // No operands changed, just return the input node. + if (!AnyChange) return InN; + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) + return SDOperand(Existing, InN.ResNo); + + // Nope it doesn't. Remove the node from it's current place in the maps. + if (InsertPos) + RemoveNodeFromCSEMaps(N); + + // Now we update the operands. + for (unsigned i = 0; i != NumOps; ++i) { + if (N->OperandList[i] != Ops[i]) { + N->OperandList[i].Val->removeUser(N); + Ops[i].Val->addUser(N); + N->OperandList[i] = Ops[i]; + } + } + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return InN; +} + + +/// MorphNodeTo - This frees the operands of the current node, resets the +/// opcode, types, and operands to the specified value. This should only be +/// used by the SelectionDAG class. +void SDNode::MorphNodeTo(unsigned Opc, SDVTList L, + const SDOperand *Ops, unsigned NumOps) { + NodeType = Opc; + ValueList = L.VTs; + NumValues = L.NumVTs; + + // Clear the operands list, updating used nodes to remove this from their + // use list. + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) + I->Val->removeUser(this); + + // If NumOps is larger than the # of operands we currently have, reallocate + // the operand list. + if (NumOps > NumOperands) { + if (OperandsNeedDelete) + delete [] OperandList; + OperandList = new SDOperand[NumOps]; + OperandsNeedDelete = true; + } + + // Assign the new operands. + NumOperands = NumOps; + + for (unsigned i = 0, e = NumOps; i != e; ++i) { + OperandList[i] = Ops[i]; + SDNode *N = OperandList[i].Val; + N->Uses.push_back(this); + } +} + +/// SelectNodeTo - These are used for target selectors to *mutate* the +/// specified node to have the specified return type, Target opcode, and +/// operands. Note that target opcodes are stored as +/// ISD::BUILTIN_OP_END+TargetOpcode in the node opcode field. +/// +/// Note that SelectNodeTo returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc, + MVT::ValueType VT) { + SDVTList VTs = getVTList(VT); + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, 0, 0); + void *IP = 0; + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + return ON; + + RemoveNodeFromCSEMaps(N); + + N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, 0, 0); + + CSEMap.InsertNode(N, IP); + return N; +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc, + MVT::ValueType VT, SDOperand Op1) { + // If an identical node already exists, use it. + SDVTList VTs = getVTList(VT); + SDOperand Ops[] = { Op1 }; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 1); + void *IP = 0; + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + return ON; + + RemoveNodeFromCSEMaps(N); + N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 1); + CSEMap.InsertNode(N, IP); + return N; +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc, + MVT::ValueType VT, SDOperand Op1, + SDOperand Op2) { + // If an identical node already exists, use it. + SDVTList VTs = getVTList(VT); + SDOperand Ops[] = { Op1, Op2 }; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2); + void *IP = 0; + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + return ON; + + RemoveNodeFromCSEMaps(N); + + N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2); + + CSEMap.InsertNode(N, IP); // Memoize the new node. + return N; +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc, + MVT::ValueType VT, SDOperand Op1, + SDOperand Op2, SDOperand Op3) { + // If an identical node already exists, use it. + SDVTList VTs = getVTList(VT); + SDOperand Ops[] = { Op1, Op2, Op3 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3); + void *IP = 0; + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + return ON; + + RemoveNodeFromCSEMaps(N); + + N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3); + + CSEMap.InsertNode(N, IP); // Memoize the new node. + return N; +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc, + MVT::ValueType VT, const SDOperand *Ops, + unsigned NumOps) { + // If an identical node already exists, use it. + SDVTList VTs = getVTList(VT); + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, NumOps); + void *IP = 0; + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + return ON; + + RemoveNodeFromCSEMaps(N); + N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, NumOps); + + CSEMap.InsertNode(N, IP); // Memoize the new node. + return N; +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc, + MVT::ValueType VT1, MVT::ValueType VT2, + SDOperand Op1, SDOperand Op2) { + SDVTList VTs = getVTList(VT1, VT2); + FoldingSetNodeID ID; + SDOperand Ops[] = { Op1, Op2 }; + AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2); + void *IP = 0; + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + return ON; + + RemoveNodeFromCSEMaps(N); + N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 2); + CSEMap.InsertNode(N, IP); // Memoize the new node. + return N; +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned TargetOpc, + MVT::ValueType VT1, MVT::ValueType VT2, + SDOperand Op1, SDOperand Op2, + SDOperand Op3) { + // If an identical node already exists, use it. + SDVTList VTs = getVTList(VT1, VT2); + SDOperand Ops[] = { Op1, Op2, Op3 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3); + void *IP = 0; + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + return ON; + + RemoveNodeFromCSEMaps(N); + + N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, Ops, 3); + CSEMap.InsertNode(N, IP); // Memoize the new node. + return N; +} + + +/// getTargetNode - These are used for target selectors to create a new node +/// with specified return type(s), target opcode, and operands. +/// +/// Note that getTargetNode returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT) { + return getNode(ISD::BUILTIN_OP_END+Opcode, VT).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1) { + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2) { + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + SDOperand Op1, SDOperand Op2, + SDOperand Op3) { + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Op1, Op2, Op3).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, + const SDOperand *Ops, unsigned NumOps) { + return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops, NumOps).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1) { + const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2); + return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, &Op1, 1).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, + SDOperand Op2) { + const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2); + SDOperand Ops[] = { Op1, Op2 }; + return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, Ops, 2).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, SDOperand Op1, + SDOperand Op2, SDOperand Op3) { + const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2); + SDOperand Ops[] = { Op1, Op2, Op3 }; + return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, Ops, 3).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, + const SDOperand *Ops, unsigned NumOps) { + const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2); + return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, Ops, NumOps).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, MVT::ValueType VT3, + SDOperand Op1, SDOperand Op2) { + const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2, VT3); + SDOperand Ops[] = { Op1, Op2 }; + return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 3, Ops, 2).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, MVT::ValueType VT3, + SDOperand Op1, SDOperand Op2, + SDOperand Op3) { + const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2, VT3); + SDOperand Ops[] = { Op1, Op2, Op3 }; + return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 3, Ops, 3).Val; +} +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2, MVT::ValueType VT3, + const SDOperand *Ops, unsigned NumOps) { + const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2, VT3); + return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 3, Ops, NumOps).Val; +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes From/To have a single result value. +/// +void SelectionDAG::ReplaceAllUsesWith(SDOperand FromN, SDOperand ToN, + std::vector<SDNode*> *Deleted) { + SDNode *From = FromN.Val, *To = ToN.Val; + assert(From->getNumValues() == 1 && To->getNumValues() == 1 && + "Cannot replace with this method!"); + assert(From != To && "Cannot replace uses of with self"); + + while (!From->use_empty()) { + // Process users until they are all gone. + SDNode *U = *From->use_begin(); + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(U); + + for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands; + I != E; ++I) + if (I->Val == From) { + From->removeUser(U); + I->Val = To; + To->addUser(U); + } + + // Now that we have modified U, add it back to the CSE maps. If it already + // exists there, recursively merge the results together. + if (SDNode *Existing = AddNonLeafNodeToCSEMaps(U)) { + ReplaceAllUsesWith(U, Existing, Deleted); + // U is now dead. + if (Deleted) Deleted->push_back(U); + DeleteNodeNotInCSEMaps(U); + } + } +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes From/To have matching types and numbers of result +/// values. +/// +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, + std::vector<SDNode*> *Deleted) { + assert(From != To && "Cannot replace uses of with self"); + assert(From->getNumValues() == To->getNumValues() && + "Cannot use this version of ReplaceAllUsesWith!"); + if (From->getNumValues() == 1) { // If possible, use the faster version. + ReplaceAllUsesWith(SDOperand(From, 0), SDOperand(To, 0), Deleted); + return; + } + + while (!From->use_empty()) { + // Process users until they are all gone. + SDNode *U = *From->use_begin(); + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(U); + + for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands; + I != E; ++I) + if (I->Val == From) { + From->removeUser(U); + I->Val = To; + To->addUser(U); + } + + // Now that we have modified U, add it back to the CSE maps. If it already + // exists there, recursively merge the results together. + if (SDNode *Existing = AddNonLeafNodeToCSEMaps(U)) { + ReplaceAllUsesWith(U, Existing, Deleted); + // U is now dead. + if (Deleted) Deleted->push_back(U); + DeleteNodeNotInCSEMaps(U); + } + } +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version can replace From with any result values. To must match the +/// number and types of values returned by From. +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, + const SDOperand *To, + std::vector<SDNode*> *Deleted) { + if (From->getNumValues() == 1 && To[0].Val->getNumValues() == 1) { + // Degenerate case handled above. + ReplaceAllUsesWith(SDOperand(From, 0), To[0], Deleted); + return; + } + + while (!From->use_empty()) { + // Process users until they are all gone. + SDNode *U = *From->use_begin(); + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(U); + + for (SDOperand *I = U->OperandList, *E = U->OperandList+U->NumOperands; + I != E; ++I) + if (I->Val == From) { + const SDOperand &ToOp = To[I->ResNo]; + From->removeUser(U); + *I = ToOp; + ToOp.Val->addUser(U); + } + + // Now that we have modified U, add it back to the CSE maps. If it already + // exists there, recursively merge the results together. + if (SDNode *Existing = AddNonLeafNodeToCSEMaps(U)) { + ReplaceAllUsesWith(U, Existing, Deleted); + // U is now dead. + if (Deleted) Deleted->push_back(U); + DeleteNodeNotInCSEMaps(U); + } + } +} + +/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.Val alone. The Deleted vector is +/// handled the same was as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValueWith(SDOperand From, SDOperand To, + std::vector<SDNode*> &Deleted) { + assert(From != To && "Cannot replace a value with itself"); + // Handle the simple, trivial, case efficiently. + if (From.Val->getNumValues() == 1 && To.Val->getNumValues() == 1) { + ReplaceAllUsesWith(From, To, &Deleted); + return; + } + + // Get all of the users of From.Val. We want these in a nice, + // deterministically ordered and uniqued set, so we use a SmallSetVector. + SmallSetVector<SDNode*, 16> Users(From.Val->use_begin(), From.Val->use_end()); + + while (!Users.empty()) { + // We know that this user uses some value of From. If it is the right + // value, update it. + SDNode *User = Users.back(); + Users.pop_back(); + + for (SDOperand *Op = User->OperandList, + *E = User->OperandList+User->NumOperands; Op != E; ++Op) { + if (*Op == From) { + // Okay, we know this user needs to be updated. Remove its old self + // from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // Update all operands that match "From". + for (; Op != E; ++Op) { + if (*Op == From) { + From.Val->removeUser(User); + *Op = To; + To.Val->addUser(User); + } + } + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + if (SDNode *Existing = AddNonLeafNodeToCSEMaps(User)) { + unsigned NumDeleted = Deleted.size(); + ReplaceAllUsesWith(User, Existing, &Deleted); + + // User is now dead. + Deleted.push_back(User); + DeleteNodeNotInCSEMaps(User); + + // We have to be careful here, because ReplaceAllUsesWith could have + // deleted a user of From, which means there may be dangling pointers + // in the "Users" setvector. Scan over the deleted node pointers and + // remove them from the setvector. + for (unsigned i = NumDeleted, e = Deleted.size(); i != e; ++i) + Users.remove(Deleted[i]); + } + break; // Exit the operand scanning loop. + } + } + } +} + + +/// AssignNodeIds - Assign a unique node id for each node in the DAG based on +/// their allnodes order. It returns the maximum id. +unsigned SelectionDAG::AssignNodeIds() { + unsigned Id = 0; + for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I){ + SDNode *N = I; + N->setNodeId(Id++); + } + return Id; +} + +/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG +/// based on their topological order. It returns the maximum id and a vector +/// of the SDNodes* in assigned order by reference. +unsigned SelectionDAG::AssignTopologicalOrder(std::vector<SDNode*> &TopOrder) { + unsigned DAGSize = AllNodes.size(); + std::vector<unsigned> InDegree(DAGSize); + std::vector<SDNode*> Sources; + + // Use a two pass approach to avoid using a std::map which is slow. + unsigned Id = 0; + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I){ + SDNode *N = I; + N->setNodeId(Id++); + unsigned Degree = N->use_size(); + InDegree[N->getNodeId()] = Degree; + if (Degree == 0) + Sources.push_back(N); + } + + TopOrder.clear(); + while (!Sources.empty()) { + SDNode *N = Sources.back(); + Sources.pop_back(); + TopOrder.push_back(N); + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { + SDNode *P = I->Val; + unsigned Degree = --InDegree[P->getNodeId()]; + if (Degree == 0) + Sources.push_back(P); + } + } + + // Second pass, assign the actual topological order as node ids. + Id = 0; + for (std::vector<SDNode*>::iterator TI = TopOrder.begin(),TE = TopOrder.end(); + TI != TE; ++TI) + (*TI)->setNodeId(Id++); + + return Id; +} + + + +//===----------------------------------------------------------------------===// +// SDNode Class +//===----------------------------------------------------------------------===// + +// Out-of-line virtual method to give class a home. +void SDNode::ANCHOR() {} +void UnarySDNode::ANCHOR() {} +void BinarySDNode::ANCHOR() {} +void TernarySDNode::ANCHOR() {} +void HandleSDNode::ANCHOR() {} +void StringSDNode::ANCHOR() {} +void ConstantSDNode::ANCHOR() {} +void ConstantFPSDNode::ANCHOR() {} +void GlobalAddressSDNode::ANCHOR() {} +void FrameIndexSDNode::ANCHOR() {} +void JumpTableSDNode::ANCHOR() {} +void ConstantPoolSDNode::ANCHOR() {} +void BasicBlockSDNode::ANCHOR() {} +void SrcValueSDNode::ANCHOR() {} +void RegisterSDNode::ANCHOR() {} +void ExternalSymbolSDNode::ANCHOR() {} +void CondCodeSDNode::ANCHOR() {} +void VTSDNode::ANCHOR() {} +void LoadSDNode::ANCHOR() {} +void StoreSDNode::ANCHOR() {} + +HandleSDNode::~HandleSDNode() { + SDVTList VTs = { 0, 0 }; + MorphNodeTo(ISD::HANDLENODE, VTs, 0, 0); // Drops operand uses. +} + +GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA, + MVT::ValueType VT, int o) + : SDNode(isa<GlobalVariable>(GA) && + dyn_cast<GlobalVariable>(GA)->isThreadLocal() ? + // Thread Local + (isTarget ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress) : + // Non Thread Local + (isTarget ? ISD::TargetGlobalAddress : ISD::GlobalAddress), + getSDVTList(VT)), Offset(o) { + TheGlobal = const_cast<GlobalValue*>(GA); +} + +/// Profile - Gather unique data for the node. +/// +void SDNode::Profile(FoldingSetNodeID &ID) { + AddNodeIDNode(ID, this); +} + +/// getValueTypeList - Return a pointer to the specified value type. +/// +MVT::ValueType *SDNode::getValueTypeList(MVT::ValueType VT) { + static MVT::ValueType VTs[MVT::LAST_VALUETYPE]; + VTs[VT] = VT; + return &VTs[VT]; +} + +/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the +/// indicated value. This method ignores uses of other values defined by this +/// operation. +bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + // If there is only one value, this is easy. + if (getNumValues() == 1) + return use_size() == NUses; + if (Uses.size() < NUses) return false; + + SDOperand TheValue(const_cast<SDNode *>(this), Value); + + SmallPtrSet<SDNode*, 32> UsersHandled; + + for (SDNode::use_iterator UI = Uses.begin(), E = Uses.end(); UI != E; ++UI) { + SDNode *User = *UI; + if (User->getNumOperands() == 1 || + UsersHandled.insert(User)) // First time we've seen this? + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) + if (User->getOperand(i) == TheValue) { + if (NUses == 0) + return false; // too many uses + --NUses; + } + } + + // Found exactly the right number of uses? + return NUses == 0; +} + + +/// isOnlyUse - Return true if this node is the only use of N. +/// +bool SDNode::isOnlyUse(SDNode *N) const { + bool Seen = false; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDNode *User = *I; + if (User == this) + Seen = true; + else + return false; + } + + return Seen; +} + +/// isOperand - Return true if this node is an operand of N. +/// +bool SDOperand::isOperand(SDNode *N) const { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (*this == N->getOperand(i)) + return true; + return false; +} + +bool SDNode::isOperand(SDNode *N) const { + for (unsigned i = 0, e = N->NumOperands; i != e; ++i) + if (this == N->OperandList[i].Val) + return true; + return false; +} + +static void findPredecessor(SDNode *N, const SDNode *P, bool &found, + SmallPtrSet<SDNode *, 32> &Visited) { + if (found || !Visited.insert(N)) + return; + + for (unsigned i = 0, e = N->getNumOperands(); !found && i != e; ++i) { + SDNode *Op = N->getOperand(i).Val; + if (Op == P) { + found = true; + return; + } + findPredecessor(Op, P, found, Visited); + } +} + +/// isPredecessor - Return true if this node is a predecessor of N. This node +/// is either an operand of N or it can be reached by recursively traversing +/// up the operands. +/// NOTE: this is an expensive method. Use it carefully. +bool SDNode::isPredecessor(SDNode *N) const { + SmallPtrSet<SDNode *, 32> Visited; + bool found = false; + findPredecessor(N, this, found, Visited); + return found; +} + +uint64_t SDNode::getConstantOperandVal(unsigned Num) const { + assert(Num < NumOperands && "Invalid child # of SDNode!"); + return cast<ConstantSDNode>(OperandList[Num])->getValue(); +} + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<<Unknown DAG Node>>"; + else { + if (G) { + if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (getOpcode()-ISD::BUILTIN_OP_END < TII->getNumOpcodes()) + return TII->getName(getOpcode()-ISD::BUILTIN_OP_END); + + TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = + TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + } + + return "<<Unknown Target Node>>"; + } + + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::STRING: return "String"; + case ISD::BasicBlock: return "BasicBlock"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + + case ISD::Constant: return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::EHSELECTION: return "EHSELECTION"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IID = cast<ConstantSDNode>(getOperand(0))->getValue(); + return Intrinsic::getName((Intrinsic::ID)IID); + } + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned IID = cast<ConstantSDNode>(getOperand(1))->getValue(); + return Intrinsic::getName((Intrinsic::ID)IID); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstantFP:return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::LABEL: return "label"; + case ISD::HANDLENODE: return "handlenode"; + case ISD::FORMAL_ARGUMENTS: return "formal_arguments"; + case ISD::CALL: return "call"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::FSIN: return "fsin"; + case ISD::FCOS: return "fcos"; + case ISD::FPOWI: return "fpowi"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FADD: return "fadd"; + case ISD::FSUB: return "fsub"; + case ISD::FMUL: return "fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::FREM: return "frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + + case ISD::SETCC: return "setcc"; + case ISD::SELECT: return "select"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::FP_ROUND_INREG: return "fp_round_inreg"; + case ISD::FP_EXTEND: return "fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::BIT_CONVERT: return "bit_convert"; + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::RET: return "ret"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + + // Block memory operations. + case ISD::MEMSET: return "memset"; + case ISD::MEMCPY: return "memcpy"; + case ISD::MEMMOVE: return "memmove"; + + // Bit manipulation + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTLZ: return "ctlz"; + + // Debug info + case ISD::LOCATION: return "location"; + case ISD::DEBUG_LOC: return "debug_loc"; + + case ISD::CONDCODE: + switch (cast<CondCodeSDNode>(this)->get()) { + default: assert(0 && "Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setue"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + } + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: + return ""; + case ISD::PRE_INC: + return "<pre-inc>"; + case ISD::PRE_DEC: + return "<pre-dec>"; + case ISD::POST_INC: + return "<post-inc>"; + case ISD::POST_DEC: + return "<post-dec>"; + } +} + +void SDNode::dump() const { dump(0); } +void SDNode::dump(const SelectionDAG *G) const { + cerr << (void*)this << ": "; + + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) cerr << ","; + if (getValueType(i) == MVT::Other) + cerr << "ch"; + else + cerr << MVT::getValueTypeString(getValueType(i)); + } + cerr << " = " << getOperationName(G); + + cerr << " "; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) cerr << ", "; + cerr << (void*)getOperand(i).Val; + if (unsigned RN = getOperand(i).ResNo) + cerr << ":" << RN; + } + + if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { + cerr << "<" << CSDN->getValue() << ">"; + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { + cerr << "<" << CSDN->getValue() << ">"; + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(this)) { + int offset = GADN->getOffset(); + cerr << "<"; + WriteAsOperand(*cerr.stream(), GADN->getGlobal()) << ">"; + if (offset > 0) + cerr << " + " << offset; + else + cerr << " " << offset; + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { + cerr << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { + cerr << "<" << JTDN->getIndex() << ">"; + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + cerr << "<" << *CP->getMachineCPVal() << ">"; + else + cerr << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + cerr << " + " << offset; + else + cerr << " " << offset; + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { + cerr << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + cerr << LBB->getName() << " "; + cerr << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { + if (G && R->getReg() && MRegisterInfo::isPhysicalRegister(R->getReg())) { + cerr << " " <<G->getTarget().getRegisterInfo()->getName(R->getReg()); + } else { + cerr << " #" << R->getReg(); + } + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(this)) { + cerr << "'" << ES->getSymbol() << "'"; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { + if (M->getValue()) + cerr << "<" << M->getValue() << ":" << M->getOffset() << ">"; + else + cerr << "<null:" << M->getOffset() << ">"; + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { + cerr << ":" << MVT::getValueTypeString(N->getVT()); + } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: + cerr << " <anyext "; + break; + case ISD::SEXTLOAD: + cerr << " <sext "; + break; + case ISD::ZEXTLOAD: + cerr << " <zext "; + break; + } + if (doExt) + cerr << MVT::getValueTypeString(LD->getLoadedVT()) << ">"; + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (AM != "") + cerr << " " << AM; + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { + if (ST->isTruncatingStore()) + cerr << " <trunc " + << MVT::getValueTypeString(ST->getStoredVT()) << ">"; + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (AM != "") + cerr << " " << AM; + } +} + +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).Val->hasOneUse()) + DumpNodes(N->getOperand(i).Val, indent+2, G); + else + cerr << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).Val << ": <multiple use>"; + + + cerr << "\n" << std::string(indent, ' '); + N->dump(G); +} + +void SelectionDAG::dump() const { + cerr << "SelectionDAG has " << AllNodes.size() << " nodes:"; + std::vector<const SDNode*> Nodes; + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) + Nodes.push_back(I); + + std::sort(Nodes.begin(), Nodes.end()); + + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) { + if (!Nodes[i]->hasOneUse() && Nodes[i] != getRoot().Val) + DumpNodes(Nodes[i], 2, this); + } + + if (getRoot().Val) DumpNodes(getRoot().Val, 2, this); + + cerr << "\n\n"; +} + +const Type *ConstantPoolSDNode::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp new file mode 100644 index 0000000..9256faa --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -0,0 +1,4904 @@ +//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAGISel class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "isel" +#include "llvm/ADT/BitVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Constants.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/ParameterAttributes.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include <algorithm> +using namespace llvm; + +#ifndef NDEBUG +static cl::opt<bool> +ViewISelDAGs("view-isel-dags", cl::Hidden, + cl::desc("Pop up a window to show isel dags as they are selected")); +static cl::opt<bool> +ViewSchedDAGs("view-sched-dags", cl::Hidden, + cl::desc("Pop up a window to show sched dags as they are processed")); +#else +static const bool ViewISelDAGs = 0, ViewSchedDAGs = 0; +#endif + +//===---------------------------------------------------------------------===// +/// +/// RegisterScheduler class - Track the registration of instruction schedulers. +/// +//===---------------------------------------------------------------------===// +MachinePassRegistry RegisterScheduler::Registry; + +//===---------------------------------------------------------------------===// +/// +/// ISHeuristic command line option for instruction schedulers. +/// +//===---------------------------------------------------------------------===// +namespace { + cl::opt<RegisterScheduler::FunctionPassCtor, false, + RegisterPassParser<RegisterScheduler> > + ISHeuristic("pre-RA-sched", + cl::init(&createDefaultScheduler), + cl::desc("Instruction schedulers available (before register allocation):")); + + static RegisterScheduler + defaultListDAGScheduler("default", " Best scheduler for the target", + createDefaultScheduler); +} // namespace + +namespace { struct AsmOperandInfo; } + +namespace { + /// RegsForValue - This struct represents the physical registers that a + /// particular value is assigned and the type information about the value. + /// This is needed because values can be promoted into larger registers and + /// expanded into multiple smaller registers than the value. + struct VISIBILITY_HIDDEN RegsForValue { + /// Regs - This list holds the register (for legal and promoted values) + /// or register set (for expanded values) that the value should be assigned + /// to. + std::vector<unsigned> Regs; + + /// RegVT - The value type of each register. + /// + MVT::ValueType RegVT; + + /// ValueVT - The value type of the LLVM value, which may be promoted from + /// RegVT or made from merging the two expanded parts. + MVT::ValueType ValueVT; + + RegsForValue() : RegVT(MVT::Other), ValueVT(MVT::Other) {} + + RegsForValue(unsigned Reg, MVT::ValueType regvt, MVT::ValueType valuevt) + : RegVT(regvt), ValueVT(valuevt) { + Regs.push_back(Reg); + } + RegsForValue(const std::vector<unsigned> ®s, + MVT::ValueType regvt, MVT::ValueType valuevt) + : Regs(regs), RegVT(regvt), ValueVT(valuevt) { + } + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVT value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDOperand getCopyFromRegs(SelectionDAG &DAG, + SDOperand &Chain, SDOperand *Flag) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the + /// specified value into the registers specified by this object. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + void getCopyToRegs(SDOperand Val, SelectionDAG &DAG, + SDOperand &Chain, SDOperand *Flag) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker and includes the number of + /// values added into it. + void AddInlineAsmOperands(unsigned Code, SelectionDAG &DAG, + std::vector<SDOperand> &Ops) const; + }; +} + +namespace llvm { + //===--------------------------------------------------------------------===// + /// createDefaultScheduler - This creates an instruction scheduler appropriate + /// for the target. + ScheduleDAG* createDefaultScheduler(SelectionDAGISel *IS, + SelectionDAG *DAG, + MachineBasicBlock *BB) { + TargetLowering &TLI = IS->getTargetLowering(); + + if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency) { + return createTDListDAGScheduler(IS, DAG, BB); + } else { + assert(TLI.getSchedulingPreference() == + TargetLowering::SchedulingForRegPressure && "Unknown sched type!"); + return createBURRListDAGScheduler(IS, DAG, BB); + } + } + + + //===--------------------------------------------------------------------===// + /// FunctionLoweringInfo - This contains information that is global to a + /// function that is used when lowering a region of the function. + class FunctionLoweringInfo { + public: + TargetLowering &TLI; + Function &Fn; + MachineFunction &MF; + SSARegMap *RegMap; + + FunctionLoweringInfo(TargetLowering &TLI, Function &Fn,MachineFunction &MF); + + /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. + std::map<const BasicBlock*, MachineBasicBlock *> MBBMap; + + /// ValueMap - Since we emit code for the function a basic block at a time, + /// we must remember which virtual registers hold the values for + /// cross-basic-block values. + DenseMap<const Value*, unsigned> ValueMap; + + /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in + /// the entry block. This allows the allocas to be efficiently referenced + /// anywhere in the function. + std::map<const AllocaInst*, int> StaticAllocaMap; + +#ifndef NDEBUG + SmallSet<Instruction*, 8> CatchInfoLost; + SmallSet<Instruction*, 8> CatchInfoFound; +#endif + + unsigned MakeReg(MVT::ValueType VT) { + return RegMap->createVirtualRegister(TLI.getRegClassFor(VT)); + } + + /// isExportedInst - Return true if the specified value is an instruction + /// exported from its block. + bool isExportedInst(const Value *V) { + return ValueMap.count(V); + } + + unsigned CreateRegForValue(const Value *V); + + unsigned InitializeRegForValue(const Value *V) { + unsigned &R = ValueMap[V]; + assert(R == 0 && "Already initialized this value register!"); + return R = CreateRegForValue(V); + } + }; +} + +/// isSelector - Return true if this instruction is a call to the +/// eh.selector intrinsic. +static bool isSelector(Instruction *I) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + return II->getIntrinsicID() == Intrinsic::eh_selector; + return false; +} + +/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by +/// PHI nodes or outside of the basic block that defines it, or used by a +/// switch instruction, which may expand to multiple basic blocks. +static bool isUsedOutsideOfDefiningBlock(Instruction *I) { + if (isa<PHINode>(I)) return true; + BasicBlock *BB = I->getParent(); + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) + if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI) || + // FIXME: Remove switchinst special case. + isa<SwitchInst>(*UI)) + return true; + return false; +} + +/// isOnlyUsedInEntryBlock - If the specified argument is only used in the +/// entry block, return true. This includes arguments used by switches, since +/// the switch may expand into multiple basic blocks. +static bool isOnlyUsedInEntryBlock(Argument *A) { + BasicBlock *Entry = A->getParent()->begin(); + for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI) + if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI)) + return false; // Use not in entry block. + return true; +} + +FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli, + Function &fn, MachineFunction &mf) + : TLI(tli), Fn(fn), MF(mf), RegMap(MF.getSSARegMap()) { + + // Create a vreg for each argument register that is not dead and is used + // outside of the entry block for the function. + for (Function::arg_iterator AI = Fn.arg_begin(), E = Fn.arg_end(); + AI != E; ++AI) + if (!isOnlyUsedInEntryBlock(AI)) + InitializeRegForValue(AI); + + // Initialize the mapping of values to registers. This is only set up for + // instruction values that are used outside of the block that defines + // them. + Function::iterator BB = Fn.begin(), EB = Fn.end(); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { + const Type *Ty = AI->getAllocatedType(); + uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + AI->getAlignment()); + + TySize *= CUI->getZExtValue(); // Get total allocated size. + if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + StaticAllocaMap[AI] = + MF.getFrameInfo()->CreateStackObject(TySize, Align); + } + + for (; BB != EB; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I)) + if (!isa<AllocaInst>(I) || + !StaticAllocaMap.count(cast<AllocaInst>(I))) + InitializeRegForValue(I); + + // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This + // also creates the initial PHI MachineInstrs, though none of the input + // operands are populated. + for (BB = Fn.begin(), EB = Fn.end(); BB != EB; ++BB) { + MachineBasicBlock *MBB = new MachineBasicBlock(BB); + MBBMap[BB] = MBB; + MF.getBasicBlockList().push_back(MBB); + + // Create Machine PHI nodes for LLVM PHI nodes, lowering them as + // appropriate. + PHINode *PN; + for (BasicBlock::iterator I = BB->begin();(PN = dyn_cast<PHINode>(I)); ++I){ + if (PN->use_empty()) continue; + + MVT::ValueType VT = TLI.getValueType(PN->getType()); + unsigned NumRegisters = TLI.getNumRegisters(VT); + unsigned PHIReg = ValueMap[PN]; + assert(PHIReg && "PHI node does not have an assigned virtual register!"); + const TargetInstrInfo *TII = TLI.getTargetMachine().getInstrInfo(); + for (unsigned i = 0; i != NumRegisters; ++i) + BuildMI(MBB, TII->get(TargetInstrInfo::PHI), PHIReg+i); + } + } +} + +/// CreateRegForValue - Allocate the appropriate number of virtual registers of +/// the correctly promoted or expanded types. Assign these registers +/// consecutive vreg numbers and return the first assigned number. +unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { + MVT::ValueType VT = TLI.getValueType(V->getType()); + + unsigned NumRegisters = TLI.getNumRegisters(VT); + MVT::ValueType RegisterVT = TLI.getRegisterType(VT); + + unsigned R = MakeReg(RegisterVT); + for (unsigned i = 1; i != NumRegisters; ++i) + MakeReg(RegisterVT); + + return R; +} + +//===----------------------------------------------------------------------===// +/// SelectionDAGLowering - This is the common target-independent lowering +/// implementation that is parameterized by a TargetLowering object. +/// Also, targets can overload any lowering method. +/// +namespace llvm { +class SelectionDAGLowering { + MachineBasicBlock *CurMBB; + + DenseMap<const Value*, SDOperand> NodeMap; + + /// PendingLoads - Loads are not emitted to the program immediately. We bunch + /// them up and then emit token factor nodes when possible. This allows us to + /// get simple disambiguation between loads without worrying about alias + /// analysis. + std::vector<SDOperand> PendingLoads; + + /// Case - A struct to record the Value for a switch case, and the + /// case's target basic block. + struct Case { + Constant* Low; + Constant* High; + MachineBasicBlock* BB; + + Case() : Low(0), High(0), BB(0) { } + Case(Constant* low, Constant* high, MachineBasicBlock* bb) : + Low(low), High(high), BB(bb) { } + uint64_t size() const { + uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue(); + uint64_t rLow = cast<ConstantInt>(Low)->getSExtValue(); + return (rHigh - rLow + 1ULL); + } + }; + + struct CaseBits { + uint64_t Mask; + MachineBasicBlock* BB; + unsigned Bits; + + CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits): + Mask(mask), BB(bb), Bits(bits) { } + }; + + typedef std::vector<Case> CaseVector; + typedef std::vector<CaseBits> CaseBitsVector; + typedef CaseVector::iterator CaseItr; + typedef std::pair<CaseItr, CaseItr> CaseRange; + + /// CaseRec - A struct with ctor used in lowering switches to a binary tree + /// of conditional branches. + struct CaseRec { + CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) : + CaseBB(bb), LT(lt), GE(ge), Range(r) {} + + /// CaseBB - The MBB in which to emit the compare and branch + MachineBasicBlock *CaseBB; + /// LT, GE - If nonzero, we know the current case value must be less-than or + /// greater-than-or-equal-to these Constants. + Constant *LT; + Constant *GE; + /// Range - A pair of iterators representing the range of case values to be + /// processed at this point in the binary search tree. + CaseRange Range; + }; + + typedef std::vector<CaseRec> CaseRecVector; + + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator () (const Case& C1, const Case& C2) { + assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; + + struct CaseBitsCmp { + bool operator () (const CaseBits& C1, const CaseBits& C2) { + return C1.Bits > C2.Bits; + } + }; + + unsigned Clusterify(CaseVector& Cases, const SwitchInst &SI); + +public: + // TLI - This is information that describes the available target features we + // need for lowering. This indicates when operations are unavailable, + // implemented with a libcall, etc. + TargetLowering &TLI; + SelectionDAG &DAG; + const TargetData *TD; + + /// SwitchCases - Vector of CaseBlock structures used to communicate + /// SwitchInst code generation information. + std::vector<SelectionDAGISel::CaseBlock> SwitchCases; + /// JTCases - Vector of JumpTable structures used to communicate + /// SwitchInst code generation information. + std::vector<SelectionDAGISel::JumpTableBlock> JTCases; + std::vector<SelectionDAGISel::BitTestBlock> BitTestCases; + + /// FuncInfo - Information about the function as a whole. + /// + FunctionLoweringInfo &FuncInfo; + + SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli, + FunctionLoweringInfo &funcinfo) + : TLI(tli), DAG(dag), TD(DAG.getTarget().getTargetData()), + FuncInfo(funcinfo) { + } + + /// getRoot - Return the current virtual root of the Selection DAG. + /// + SDOperand getRoot() { + if (PendingLoads.empty()) + return DAG.getRoot(); + + if (PendingLoads.size() == 1) { + SDOperand Root = PendingLoads[0]; + DAG.setRoot(Root); + PendingLoads.clear(); + return Root; + } + + // Otherwise, we have to make a token factor node. + SDOperand Root = DAG.getNode(ISD::TokenFactor, MVT::Other, + &PendingLoads[0], PendingLoads.size()); + PendingLoads.clear(); + DAG.setRoot(Root); + return Root; + } + + SDOperand CopyValueToVirtualRegister(Value *V, unsigned Reg); + + void visit(Instruction &I) { visit(I.getOpcode(), I); } + + void visit(unsigned Opcode, User &I) { + // Note: this doesn't use InstVisitor, because it has to work with + // ConstantExpr's in addition to instructions. + switch (Opcode) { + default: assert(0 && "Unknown instruction type encountered!"); + abort(); + // Build the switch statement using the Instruction.def file. +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE:return visit##OPCODE((CLASS&)I); +#include "llvm/Instruction.def" + } + } + + void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; } + + SDOperand getLoadFrom(const Type *Ty, SDOperand Ptr, + const Value *SV, SDOperand Root, + bool isVolatile, unsigned Alignment); + + SDOperand getIntPtrConstant(uint64_t Val) { + return DAG.getConstant(Val, TLI.getPointerTy()); + } + + SDOperand getValue(const Value *V); + + void setValue(const Value *V, SDOperand NewN) { + SDOperand &N = NodeMap[V]; + assert(N.Val == 0 && "Already set a value for this node!"); + N = NewN; + } + + void GetRegistersForValue(AsmOperandInfo &OpInfo, bool HasEarlyClobber, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs); + + void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + unsigned Opc); + bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB); + void ExportFromCurrentBlock(Value *V); + void LowerCallTo(Instruction &I, + const Type *CalledValueTy, unsigned CallingConv, + bool IsTailCall, SDOperand Callee, unsigned OpIdx, + MachineBasicBlock *LandingPad = NULL); + + // Terminator instructions. + void visitRet(ReturnInst &I); + void visitBr(BranchInst &I); + void visitSwitch(SwitchInst &I); + void visitUnreachable(UnreachableInst &I) { /* noop */ } + + // Helpers for visitSwitch + bool handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + bool handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + bool handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + bool handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + void visitSwitchCase(SelectionDAGISel::CaseBlock &CB); + void visitBitTestHeader(SelectionDAGISel::BitTestBlock &B); + void visitBitTestCase(MachineBasicBlock* NextMBB, + unsigned Reg, + SelectionDAGISel::BitTestCase &B); + void visitJumpTable(SelectionDAGISel::JumpTable &JT); + void visitJumpTableHeader(SelectionDAGISel::JumpTable &JT, + SelectionDAGISel::JumpTableHeader &JTH); + + // These all get lowered before this pass. + void visitInvoke(InvokeInst &I); + void visitUnwind(UnwindInst &I); + + void visitBinary(User &I, unsigned OpCode); + void visitShift(User &I, unsigned Opcode); + void visitAdd(User &I) { + if (I.getType()->isFPOrFPVector()) + visitBinary(I, ISD::FADD); + else + visitBinary(I, ISD::ADD); + } + void visitSub(User &I); + void visitMul(User &I) { + if (I.getType()->isFPOrFPVector()) + visitBinary(I, ISD::FMUL); + else + visitBinary(I, ISD::MUL); + } + void visitURem(User &I) { visitBinary(I, ISD::UREM); } + void visitSRem(User &I) { visitBinary(I, ISD::SREM); } + void visitFRem(User &I) { visitBinary(I, ISD::FREM); } + void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); } + void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); } + void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); } + void visitAnd (User &I) { visitBinary(I, ISD::AND); } + void visitOr (User &I) { visitBinary(I, ISD::OR); } + void visitXor (User &I) { visitBinary(I, ISD::XOR); } + void visitShl (User &I) { visitShift(I, ISD::SHL); } + void visitLShr(User &I) { visitShift(I, ISD::SRL); } + void visitAShr(User &I) { visitShift(I, ISD::SRA); } + void visitICmp(User &I); + void visitFCmp(User &I); + // Visit the conversion instructions + void visitTrunc(User &I); + void visitZExt(User &I); + void visitSExt(User &I); + void visitFPTrunc(User &I); + void visitFPExt(User &I); + void visitFPToUI(User &I); + void visitFPToSI(User &I); + void visitUIToFP(User &I); + void visitSIToFP(User &I); + void visitPtrToInt(User &I); + void visitIntToPtr(User &I); + void visitBitCast(User &I); + + void visitExtractElement(User &I); + void visitInsertElement(User &I); + void visitShuffleVector(User &I); + + void visitGetElementPtr(User &I); + void visitSelect(User &I); + + void visitMalloc(MallocInst &I); + void visitFree(FreeInst &I); + void visitAlloca(AllocaInst &I); + void visitLoad(LoadInst &I); + void visitStore(StoreInst &I); + void visitPHI(PHINode &I) { } // PHI nodes are handled specially. + void visitCall(CallInst &I); + void visitInlineAsm(CallInst &I); + const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic); + void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic); + + void visitVAStart(CallInst &I); + void visitVAArg(VAArgInst &I); + void visitVAEnd(CallInst &I); + void visitVACopy(CallInst &I); + + void visitMemIntrinsic(CallInst &I, unsigned Op); + + void visitUserOp1(Instruction &I) { + assert(0 && "UserOp1 should not exist at instruction selection time!"); + abort(); + } + void visitUserOp2(Instruction &I) { + assert(0 && "UserOp2 should not exist at instruction selection time!"); + abort(); + } +}; +} // end namespace llvm + + +/// getCopyFromParts - Create a value that contains the +/// specified legal parts combined into the value they represent. +static SDOperand getCopyFromParts(SelectionDAG &DAG, + const SDOperand *Parts, + unsigned NumParts, + MVT::ValueType PartVT, + MVT::ValueType ValueVT, + ISD::NodeType AssertOp = ISD::DELETED_NODE) { + if (!MVT::isVector(ValueVT) || NumParts == 1) { + SDOperand Val = Parts[0]; + + // If the value was expanded, copy from the top part. + if (NumParts > 1) { + assert(NumParts == 2 && + "Cannot expand to more than 2 elts yet!"); + SDOperand Hi = Parts[1]; + if (!DAG.getTargetLoweringInfo().isLittleEndian()) + std::swap(Val, Hi); + return DAG.getNode(ISD::BUILD_PAIR, ValueVT, Val, Hi); + } + + // Otherwise, if the value was promoted or extended, truncate it to the + // appropriate type. + if (PartVT == ValueVT) + return Val; + + if (MVT::isVector(PartVT)) { + assert(MVT::isVector(ValueVT) && "Unknown vector conversion!"); + return DAG.getNode(ISD::BIT_CONVERT, PartVT, Val); + } + + if (MVT::isInteger(PartVT) && + MVT::isInteger(ValueVT)) { + if (ValueVT < PartVT) { + // For a truncate, see if we have any information to + // indicate whether the truncated bits will always be + // zero or sign-extension. + if (AssertOp != ISD::DELETED_NODE) + Val = DAG.getNode(AssertOp, PartVT, Val, + DAG.getValueType(ValueVT)); + return DAG.getNode(ISD::TRUNCATE, ValueVT, Val); + } else { + return DAG.getNode(ISD::ANY_EXTEND, ValueVT, Val); + } + } + + if (MVT::isFloatingPoint(PartVT) && + MVT::isFloatingPoint(ValueVT)) + return DAG.getNode(ISD::FP_ROUND, ValueVT, Val); + + if (MVT::getSizeInBits(PartVT) == + MVT::getSizeInBits(ValueVT)) + return DAG.getNode(ISD::BIT_CONVERT, ValueVT, Val); + + assert(0 && "Unknown mismatch!"); + } + + // Handle a multi-element vector. + MVT::ValueType IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = + DAG.getTargetLoweringInfo() + .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + assert(RegisterVT == Parts[0].getValueType() && + "Part type doesn't match part!"); + + // Assemble the parts into intermediate operands. + SmallVector<SDOperand, 8> Ops(NumIntermediates); + if (NumIntermediates == NumParts) { + // If the register was not expanded, truncate or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + Ops[i] = getCopyFromParts(DAG, &Parts[i], 1, + PartVT, IntermediateVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, build the intermediate operands + // from the parts. + assert(NumIntermediates % NumParts == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumIntermediates / NumParts; + for (unsigned i = 0; i != NumIntermediates; ++i) + Ops[i] = getCopyFromParts(DAG, &Parts[i * Factor], Factor, + PartVT, IntermediateVT); + } + + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate + // operands. + return DAG.getNode(MVT::isVector(IntermediateVT) ? + ISD::CONCAT_VECTORS : + ISD::BUILD_VECTOR, + ValueVT, &Ops[0], NumParts); +} + +/// getCopyToParts - Create a series of nodes that contain the +/// specified value split into legal parts. +static void getCopyToParts(SelectionDAG &DAG, + SDOperand Val, + SDOperand *Parts, + unsigned NumParts, + MVT::ValueType PartVT) { + MVT::ValueType ValueVT = Val.getValueType(); + + if (!MVT::isVector(ValueVT) || NumParts == 1) { + // If the value was expanded, copy from the parts. + if (NumParts > 1) { + for (unsigned i = 0; i != NumParts; ++i) + Parts[i] = DAG.getNode(ISD::EXTRACT_ELEMENT, PartVT, Val, + DAG.getConstant(i, MVT::i32)); + if (!DAG.getTargetLoweringInfo().isLittleEndian()) + std::reverse(Parts, Parts + NumParts); + return; + } + + // If there is a single part and the types differ, this must be + // a promotion. + if (PartVT != ValueVT) { + if (MVT::isVector(PartVT)) { + assert(MVT::isVector(ValueVT) && + "Not a vector-vector cast?"); + Val = DAG.getNode(ISD::BIT_CONVERT, PartVT, Val); + } else if (MVT::isInteger(PartVT) && MVT::isInteger(ValueVT)) { + if (PartVT < ValueVT) + Val = DAG.getNode(ISD::TRUNCATE, PartVT, Val); + else + Val = DAG.getNode(ISD::ANY_EXTEND, PartVT, Val); + } else if (MVT::isFloatingPoint(PartVT) && + MVT::isFloatingPoint(ValueVT)) { + Val = DAG.getNode(ISD::FP_EXTEND, PartVT, Val); + } else if (MVT::getSizeInBits(PartVT) == + MVT::getSizeInBits(ValueVT)) { + Val = DAG.getNode(ISD::BIT_CONVERT, PartVT, Val); + } else { + assert(0 && "Unknown mismatch!"); + } + } + Parts[0] = Val; + return; + } + + // Handle a multi-element vector. + MVT::ValueType IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = + DAG.getTargetLoweringInfo() + .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + unsigned NumElements = MVT::getVectorNumElements(ValueVT); + + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + + // Split the vector into intermediate operands. + SmallVector<SDOperand, 8> Ops(NumIntermediates); + for (unsigned i = 0; i != NumIntermediates; ++i) + if (MVT::isVector(IntermediateVT)) + Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, + IntermediateVT, Val, + DAG.getConstant(i * (NumElements / NumIntermediates), + MVT::i32)); + else + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + IntermediateVT, Val, + DAG.getConstant(i, MVT::i32)); + + // Split the intermediate operands into legal parts. + if (NumParts == NumIntermediates) { + // If the register was not expanded, promote or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + getCopyToParts(DAG, Ops[i], &Parts[i], 1, PartVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, split each the value into + // legal parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + getCopyToParts(DAG, Ops[i], &Parts[i * Factor], Factor, PartVT); + } +} + + +SDOperand SelectionDAGLowering::getValue(const Value *V) { + SDOperand &N = NodeMap[V]; + if (N.Val) return N; + + const Type *VTy = V->getType(); + MVT::ValueType VT = TLI.getValueType(VTy); + if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + visit(CE->getOpcode(), *CE); + SDOperand N1 = NodeMap[V]; + assert(N1.Val && "visit didn't populate the ValueMap!"); + return N1; + } else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) { + return N = DAG.getGlobalAddress(GV, VT); + } else if (isa<ConstantPointerNull>(C)) { + return N = DAG.getConstant(0, TLI.getPointerTy()); + } else if (isa<UndefValue>(C)) { + if (!isa<VectorType>(VTy)) + return N = DAG.getNode(ISD::UNDEF, VT); + + // Create a BUILD_VECTOR of undef nodes. + const VectorType *PTy = cast<VectorType>(VTy); + unsigned NumElements = PTy->getNumElements(); + MVT::ValueType PVT = TLI.getValueType(PTy->getElementType()); + + SmallVector<SDOperand, 8> Ops; + Ops.assign(NumElements, DAG.getNode(ISD::UNDEF, PVT)); + + // Create a VConstant node with generic Vector type. + MVT::ValueType VT = MVT::getVectorType(PVT, NumElements); + return N = DAG.getNode(ISD::BUILD_VECTOR, VT, + &Ops[0], Ops.size()); + } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { + return N = DAG.getConstantFP(CFP->getValue(), VT); + } else if (const VectorType *PTy = dyn_cast<VectorType>(VTy)) { + unsigned NumElements = PTy->getNumElements(); + MVT::ValueType PVT = TLI.getValueType(PTy->getElementType()); + + // Now that we know the number and type of the elements, push a + // Constant or ConstantFP node onto the ops list for each element of + // the vector constant. + SmallVector<SDOperand, 8> Ops; + if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) { + for (unsigned i = 0; i != NumElements; ++i) + Ops.push_back(getValue(CP->getOperand(i))); + } else { + assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); + SDOperand Op; + if (MVT::isFloatingPoint(PVT)) + Op = DAG.getConstantFP(0, PVT); + else + Op = DAG.getConstant(0, PVT); + Ops.assign(NumElements, Op); + } + + // Create a BUILD_VECTOR node. + MVT::ValueType VT = MVT::getVectorType(PVT, NumElements); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], + Ops.size()); + } else { + // Canonicalize all constant ints to be unsigned. + return N = DAG.getConstant(cast<ConstantInt>(C)->getZExtValue(),VT); + } + } + + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + std::map<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + } + + unsigned InReg = FuncInfo.ValueMap[V]; + assert(InReg && "Value not in map!"); + + MVT::ValueType RegisterVT = TLI.getRegisterType(VT); + unsigned NumRegs = TLI.getNumRegisters(VT); + + std::vector<unsigned> Regs(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) + Regs[i] = InReg + i; + + RegsForValue RFV(Regs, RegisterVT, VT); + SDOperand Chain = DAG.getEntryNode(); + + return RFV.getCopyFromRegs(DAG, Chain, NULL); +} + + +void SelectionDAGLowering::visitRet(ReturnInst &I) { + if (I.getNumOperands() == 0) { + DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, getRoot())); + return; + } + SmallVector<SDOperand, 8> NewValues; + NewValues.push_back(getRoot()); + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + SDOperand RetOp = getValue(I.getOperand(i)); + + // If this is an integer return value, we need to promote it ourselves to + // the full width of a register, since getCopyToParts and Legalize will use + // ANY_EXTEND rather than sign/zero. + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling conventions. + if (MVT::isInteger(RetOp.getValueType()) && + RetOp.getValueType() < MVT::i64) { + MVT::ValueType TmpVT; + if (TLI.getTypeAction(MVT::i32) == TargetLowering::Promote) + TmpVT = TLI.getTypeToTransformTo(MVT::i32); + else + TmpVT = MVT::i32; + const FunctionType *FTy = I.getParent()->getParent()->getFunctionType(); + const ParamAttrsList *Attrs = FTy->getParamAttrs(); + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + if (Attrs && Attrs->paramHasAttr(0, ParamAttr::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + if (Attrs && Attrs->paramHasAttr(0, ParamAttr::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + RetOp = DAG.getNode(ExtendKind, TmpVT, RetOp); + NewValues.push_back(RetOp); + NewValues.push_back(DAG.getConstant(false, MVT::i32)); + } else { + MVT::ValueType VT = RetOp.getValueType(); + unsigned NumParts = TLI.getNumRegisters(VT); + MVT::ValueType PartVT = TLI.getRegisterType(VT); + SmallVector<SDOperand, 4> Parts(NumParts); + getCopyToParts(DAG, RetOp, &Parts[0], NumParts, PartVT); + for (unsigned i = 0; i < NumParts; ++i) { + NewValues.push_back(Parts[i]); + NewValues.push_back(DAG.getConstant(false, MVT::i32)); + } + } + } + DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, + &NewValues[0], NewValues.size())); +} + +/// ExportFromCurrentBlock - If this condition isn't known to be exported from +/// the current basic block, add it to ValueMap now so that we'll get a +/// CopyTo/FromReg. +void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) { + // No need to export constants. + if (!isa<Instruction>(V) && !isa<Argument>(V)) return; + + // Already exported? + if (FuncInfo.isExportedInst(V)) return; + + unsigned Reg = FuncInfo.InitializeRegForValue(V); + PendingLoads.push_back(CopyValueToVirtualRegister(V, Reg)); +} + +bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V, + const BasicBlock *FromBB) { + // The operands of the setcc have to be in this block. We don't know + // how to export them from some other block. + if (Instruction *VI = dyn_cast<Instruction>(V)) { + // Can export from current BB. + if (VI->getParent() == FromBB) + return true; + + // Is already exported, noop. + return FuncInfo.isExportedInst(V); + } + + // If this is an argument, we can export it if the BB is the entry block or + // if it is already exported. + if (isa<Argument>(V)) { + if (FromBB == &FromBB->getParent()->getEntryBlock()) + return true; + + // Otherwise, can only export this if it is already exported. + return FuncInfo.isExportedInst(V); + } + + // Otherwise, constants can always be exported. + return true; +} + +static bool InBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() == BB; + return true; +} + +/// FindMergedConditions - If Cond is an expression like +void SelectionDAGLowering::FindMergedConditions(Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + unsigned Opc) { + // If this node is not part of the or/and tree, emit it as a branch. + Instruction *BOp = dyn_cast<Instruction>(Cond); + + if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || + (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOp->getParent() != CurBB->getBasicBlock() || + !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || + !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + const BasicBlock *BB = CurBB->getBasicBlock(); + + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if ((isa<ICmpInst>(Cond) || isa<FCmpInst>(Cond)) && + // The operands of the cmp have to be in this block. We don't know + // how to export them from some other block. If this is the first block + // of the sequence, no exporting is needed. + (CurBB == CurMBB || + (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && + isExportableFromCurrentBlock(BOp->getOperand(1), BB)))) { + BOp = cast<Instruction>(Cond); + ISD::CondCode Condition; + if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { + switch (IC->getPredicate()) { + default: assert(0 && "Unknown icmp predicate opcode!"); + case ICmpInst::ICMP_EQ: Condition = ISD::SETEQ; break; + case ICmpInst::ICMP_NE: Condition = ISD::SETNE; break; + case ICmpInst::ICMP_SLE: Condition = ISD::SETLE; break; + case ICmpInst::ICMP_ULE: Condition = ISD::SETULE; break; + case ICmpInst::ICMP_SGE: Condition = ISD::SETGE; break; + case ICmpInst::ICMP_UGE: Condition = ISD::SETUGE; break; + case ICmpInst::ICMP_SLT: Condition = ISD::SETLT; break; + case ICmpInst::ICMP_ULT: Condition = ISD::SETULT; break; + case ICmpInst::ICMP_SGT: Condition = ISD::SETGT; break; + case ICmpInst::ICMP_UGT: Condition = ISD::SETUGT; break; + } + } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { + ISD::CondCode FPC, FOC; + switch (FC->getPredicate()) { + default: assert(0 && "Unknown fcmp predicate opcode!"); + case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; + case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; + case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; + case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; + case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; + case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; + case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; + case FCmpInst::FCMP_ORD: FOC = ISD::SETEQ; FPC = ISD::SETO; break; + case FCmpInst::FCMP_UNO: FOC = ISD::SETNE; FPC = ISD::SETUO; break; + case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; + case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; + case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; + case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; + case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; + case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; + case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; + } + if (FiniteOnlyFPMath()) + Condition = FOC; + else + Condition = FPC; + } else { + Condition = ISD::SETEQ; // silence warning. + assert(0 && "Unknown compare instruction"); + } + + SelectionDAGISel::CaseBlock CB(Condition, BOp->getOperand(0), + BOp->getOperand(1), NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); + return; + } + + // Create a CaseBlock record representing this branch. + SelectionDAGISel::CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(), + NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); + return; + } + + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI = CurBB; + MachineBasicBlock *TmpBB = new MachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->getBasicBlockList().insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + } +} + +/// If the set of cases should be emitted as a series of branches, return true. +/// If we should emit this as a bunch of and/or'd together conditions, return +/// false. +static bool +ShouldEmitAsBranches(const std::vector<SelectionDAGISel::CaseBlock> &Cases) { + if (Cases.size() != 2) return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + return true; +} + +void SelectionDAGLowering::visitBr(BranchInst &I) { + // Update machine-CFG edges. + MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + if (I.isUnconditional()) { + // If this is not a fall-through branch, emit the branch. + if (Succ0MBB != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(), + DAG.getBasicBlock(Succ0MBB))); + + // Update machine-CFG edges. + CurMBB->addSuccessor(Succ0MBB); + + return; + } + + // If this condition is one of the special cases we handle, do special stuff + // now. + Value *CondVal = I.getCondition(); + MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; + + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + // + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { + if (BOp->hasOneUse() && + (BOp->getOpcode() == Instruction::And || + BOp->getOpcode() == Instruction::Or)) { + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode()); + // If the compares in later blocks need to use values not currently + // exported from this block, export them now. This block should always + // be the first entry. + assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (ShouldEmitAsBranches(SwitchCases)) { + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { + ExportFromCurrentBlock(SwitchCases[i].CmpLHS); + ExportFromCurrentBlock(SwitchCases[i].CmpRHS); + } + + // Emit the branch for this block. + visitSwitchCase(SwitchCases[0]); + SwitchCases.erase(SwitchCases.begin()); + return; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) + CurMBB->getParent()->getBasicBlockList().erase(SwitchCases[i].ThisBB); + + SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + SelectionDAGISel::CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(), + NULL, Succ0MBB, Succ1MBB, CurMBB); + // Use visitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + visitSwitchCase(CB); +} + +/// visitSwitchCase - Emits the necessary code to represent a single node in +/// the binary search tree resulting from lowering a switch instruction. +void SelectionDAGLowering::visitSwitchCase(SelectionDAGISel::CaseBlock &CB) { + SDOperand Cond; + SDOperand CondLHS = getValue(CB.CmpLHS); + + // Build the setcc now. + if (CB.CmpMHS == NULL) { + // Fold "(X == true)" to X and "(X == false)" to !X to + // handle common cases produced by branch lowering. + if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ) + Cond = CondLHS; + else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) { + SDOperand True = DAG.getConstant(1, CondLHS.getValueType()); + Cond = DAG.getNode(ISD::XOR, CondLHS.getValueType(), CondLHS, True); + } else + Cond = DAG.getSetCC(MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); + } else { + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); + + uint64_t Low = cast<ConstantInt>(CB.CmpLHS)->getSExtValue(); + uint64_t High = cast<ConstantInt>(CB.CmpRHS)->getSExtValue(); + + SDOperand CmpOp = getValue(CB.CmpMHS); + MVT::ValueType VT = CmpOp.getValueType(); + + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { + Cond = DAG.getSetCC(MVT::i1, CmpOp, DAG.getConstant(High, VT), ISD::SETLE); + } else { + SDOperand SUB = DAG.getNode(ISD::SUB, VT, CmpOp, DAG.getConstant(Low, VT)); + Cond = DAG.getSetCC(MVT::i1, SUB, + DAG.getConstant(High-Low, VT), ISD::SETULE); + } + + } + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + // If the lhs block is the next block, invert the condition so that we can + // fall through to the lhs instead of the rhs block. + if (CB.TrueBB == NextBlock) { + std::swap(CB.TrueBB, CB.FalseBB); + SDOperand True = DAG.getConstant(1, Cond.getValueType()); + Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True); + } + SDOperand BrCond = DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), Cond, + DAG.getBasicBlock(CB.TrueBB)); + if (CB.FalseBB == NextBlock) + DAG.setRoot(BrCond); + else + DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB))); + // Update successor info + CurMBB->addSuccessor(CB.TrueBB); + CurMBB->addSuccessor(CB.FalseBB); +} + +/// visitJumpTable - Emit JumpTable node in the current MBB +void SelectionDAGLowering::visitJumpTable(SelectionDAGISel::JumpTable &JT) { + // Emit the code for the jump table + assert(JT.Reg != -1U && "Should lower JT Header first!"); + MVT::ValueType PTy = TLI.getPointerTy(); + SDOperand Index = DAG.getCopyFromReg(getRoot(), JT.Reg, PTy); + SDOperand Table = DAG.getJumpTable(JT.JTI, PTy); + DAG.setRoot(DAG.getNode(ISD::BR_JT, MVT::Other, Index.getValue(1), + Table, Index)); + return; +} + +/// visitJumpTableHeader - This function emits necessary code to produce index +/// in the JumpTable from switch case. +void SelectionDAGLowering::visitJumpTableHeader(SelectionDAGISel::JumpTable &JT, + SelectionDAGISel::JumpTableHeader &JTH) { + // Subtract the lowest switch case value from the value being switched on + // and conditional branch to default mbb if the result is greater than the + // difference between smallest and largest cases. + SDOperand SwitchOp = getValue(JTH.SValue); + MVT::ValueType VT = SwitchOp.getValueType(); + SDOperand SUB = DAG.getNode(ISD::SUB, VT, SwitchOp, + DAG.getConstant(JTH.First, VT)); + + // The SDNode we just created, which holds the value being switched on + // minus the the smallest case value, needs to be copied to a virtual + // register so it can be used as an index into the jump table in a + // subsequent basic block. This value may be smaller or larger than the + // target's pointer type, and therefore require extension or truncating. + if (MVT::getSizeInBits(VT) > MVT::getSizeInBits(TLI.getPointerTy())) + SwitchOp = DAG.getNode(ISD::TRUNCATE, TLI.getPointerTy(), SUB); + else + SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(), SUB); + + unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); + SDOperand CopyTo = DAG.getCopyToReg(getRoot(), JumpTableReg, SwitchOp); + JT.Reg = JumpTableReg; + + // Emit the range check for the jump table, and branch to the default + // block for the switch statement if the value being switched on exceeds + // the largest case in the switch. + SDOperand CMP = DAG.getSetCC(TLI.getSetCCResultTy(), SUB, + DAG.getConstant(JTH.Last-JTH.First,VT), + ISD::SETUGT); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + SDOperand BrCond = DAG.getNode(ISD::BRCOND, MVT::Other, CopyTo, CMP, + DAG.getBasicBlock(JT.Default)); + + if (JT.MBB == NextBlock) + DAG.setRoot(BrCond); + else + DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrCond, + DAG.getBasicBlock(JT.MBB))); + + return; +} + +/// visitBitTestHeader - This function emits necessary code to produce value +/// suitable for "bit tests" +void SelectionDAGLowering::visitBitTestHeader(SelectionDAGISel::BitTestBlock &B) { + // Subtract the minimum value + SDOperand SwitchOp = getValue(B.SValue); + MVT::ValueType VT = SwitchOp.getValueType(); + SDOperand SUB = DAG.getNode(ISD::SUB, VT, SwitchOp, + DAG.getConstant(B.First, VT)); + + // Check range + SDOperand RangeCmp = DAG.getSetCC(TLI.getSetCCResultTy(), SUB, + DAG.getConstant(B.Range, VT), + ISD::SETUGT); + + SDOperand ShiftOp; + if (MVT::getSizeInBits(VT) > MVT::getSizeInBits(TLI.getShiftAmountTy())) + ShiftOp = DAG.getNode(ISD::TRUNCATE, TLI.getShiftAmountTy(), SUB); + else + ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, TLI.getShiftAmountTy(), SUB); + + // Make desired shift + SDOperand SwitchVal = DAG.getNode(ISD::SHL, TLI.getPointerTy(), + DAG.getConstant(1, TLI.getPointerTy()), + ShiftOp); + + unsigned SwitchReg = FuncInfo.MakeReg(TLI.getPointerTy()); + SDOperand CopyTo = DAG.getCopyToReg(getRoot(), SwitchReg, SwitchVal); + B.Reg = SwitchReg; + + SDOperand BrRange = DAG.getNode(ISD::BRCOND, MVT::Other, CopyTo, RangeCmp, + DAG.getBasicBlock(B.Default)); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + MachineBasicBlock* MBB = B.Cases[0].ThisBB; + if (MBB == NextBlock) + DAG.setRoot(BrRange); + else + DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, CopyTo, + DAG.getBasicBlock(MBB))); + + CurMBB->addSuccessor(B.Default); + CurMBB->addSuccessor(MBB); + + return; +} + +/// visitBitTestCase - this function produces one "bit test" +void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB, + unsigned Reg, + SelectionDAGISel::BitTestCase &B) { + // Emit bit tests and jumps + SDOperand SwitchVal = DAG.getCopyFromReg(getRoot(), Reg, TLI.getPointerTy()); + + SDOperand AndOp = DAG.getNode(ISD::AND, TLI.getPointerTy(), + SwitchVal, + DAG.getConstant(B.Mask, + TLI.getPointerTy())); + SDOperand AndCmp = DAG.getSetCC(TLI.getSetCCResultTy(), AndOp, + DAG.getConstant(0, TLI.getPointerTy()), + ISD::SETNE); + SDOperand BrAnd = DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), + AndCmp, DAG.getBasicBlock(B.TargetBB)); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + if (NextMBB == NextBlock) + DAG.setRoot(BrAnd); + else + DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrAnd, + DAG.getBasicBlock(NextMBB))); + + CurMBB->addSuccessor(B.TargetBB); + CurMBB->addSuccessor(NextMBB); + + return; +} + +void SelectionDAGLowering::visitInvoke(InvokeInst &I) { + // Retrieve successors. + MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; + MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; + + LowerCallTo(I, I.getCalledValue()->getType(), + I.getCallingConv(), + false, + getValue(I.getOperand(0)), + 3, LandingPad); + + // If the value of the invoke is used outside of its defining block, make it + // available as a virtual register. + if (!I.use_empty()) { + DenseMap<const Value*, unsigned>::iterator VMI = FuncInfo.ValueMap.find(&I); + if (VMI != FuncInfo.ValueMap.end()) + DAG.setRoot(CopyValueToVirtualRegister(&I, VMI->second)); + } + + // Drop into normal successor. + DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(), + DAG.getBasicBlock(Return))); + + // Update successor info + CurMBB->addSuccessor(Return); + CurMBB->addSuccessor(LandingPad); +} + +void SelectionDAGLowering::visitUnwind(UnwindInst &I) { +} + +/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for +/// small case ranges). +bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { + Case& BackCase = *(CR.Range.second-1); + + // Size is the number of Cases represented by this range. + unsigned Size = CR.Range.second - CR.Range.first; + if (Size > 3) + return false; + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + // TODO: If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + + // Rearrange the case blocks so that the last one falls through if possible. + if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { + // The last case block won't fall through into 'NextBlock' if we emit the + // branches in this order. See if rearranging a case value would help. + for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) { + if (I->BB == NextBlock) { + std::swap(*I, BackCase); + break; + } + } + } + + // Create a CaseBlock record representing a conditional branch to + // the Case's target mbb if the value being switched on SV is equal + // to C. + MachineBasicBlock *CurBlock = CR.CaseBB; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + MachineBasicBlock *FallThrough; + if (I != E-1) { + FallThrough = new MachineBasicBlock(CurBlock->getBasicBlock()); + CurMF->getBasicBlockList().insert(BBI, FallThrough); + } else { + // If the last case doesn't match, go to the default block. + FallThrough = Default; + } + + Value *RHS, *LHS, *MHS; + ISD::CondCode CC; + if (I->High == I->Low) { + // This is just small small case range :) containing exactly 1 case + CC = ISD::SETEQ; + LHS = SV; RHS = I->High; MHS = NULL; + } else { + CC = ISD::SETLE; + LHS = I->Low; MHS = SV; RHS = I->High; + } + SelectionDAGISel::CaseBlock CB(CC, LHS, RHS, MHS, + I->BB, FallThrough, CurBlock); + + // If emitting the first comparison, just call visitSwitchCase to emit the + // code into the current block. Otherwise, push the CaseBlock onto the + // vector to be later processed by SDISel, and insert the node's MBB + // before the next MBB. + if (CurBlock == CurMBB) + visitSwitchCase(CB); + else + SwitchCases.push_back(CB); + + CurBlock = FallThrough; + } + + return true; +} + +static inline bool areJTsAllowed(const TargetLowering &TLI) { + return (TLI.isOperationLegal(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegal(ISD::BRIND, MVT::Other)); +} + +/// handleJTSwitchCase - Emit jumptable for current switch case range +bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + + int64_t First = cast<ConstantInt>(FrontCase.Low)->getSExtValue(); + int64_t Last = cast<ConstantInt>(BackCase.High)->getSExtValue(); + + uint64_t TSize = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); + + if (!areJTsAllowed(TLI) || TSize <= 3) + return false; + + double Density = (double)TSize / (double)((Last - First) + 1ULL); + if (Density < 0.4) + return false; + + DOUT << "Lowering jump table\n" + << "First entry: " << First << ". Last entry: " << Last << "\n" + << "Size: " << TSize << ". Density: " << Density << "\n\n"; + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + // Create a new basic block to hold the code for loading the address + // of the jump table, and jumping to it. Update successor information; + // we will either branch to the default case for the switch, or the jump + // table. + MachineBasicBlock *JumpTableBB = new MachineBasicBlock(LLVMBB); + CurMF->getBasicBlockList().insert(BBI, JumpTableBB); + CR.CaseBB->addSuccessor(Default); + CR.CaseBB->addSuccessor(JumpTableBB); + + // Build a vector of destination BBs, corresponding to each target + // of the jump table. If the value of the jump table slot corresponds to + // a case statement, push the case's BB onto the vector, otherwise, push + // the default BB. + std::vector<MachineBasicBlock*> DestBBs; + int64_t TEI = First; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { + int64_t Low = cast<ConstantInt>(I->Low)->getSExtValue(); + int64_t High = cast<ConstantInt>(I->High)->getSExtValue(); + + if ((Low <= TEI) && (TEI <= High)) { + DestBBs.push_back(I->BB); + if (TEI==High) + ++I; + } else { + DestBBs.push_back(Default); + } + } + + // Update successor info. Add one edge to each unique successor. + BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); + for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), + E = DestBBs.end(); I != E; ++I) { + if (!SuccsHandled[(*I)->getNumber()]) { + SuccsHandled[(*I)->getNumber()] = true; + JumpTableBB->addSuccessor(*I); + } + } + + // Create a jump table index for this jump table, or return an existing + // one. + unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs); + + // Set the jump table information so that we can codegen it as a second + // MachineBasicBlock + SelectionDAGISel::JumpTable JT(-1U, JTI, JumpTableBB, Default); + SelectionDAGISel::JumpTableHeader JTH(First, Last, SV, CR.CaseBB, + (CR.CaseBB == CurMBB)); + if (CR.CaseBB == CurMBB) + visitJumpTableHeader(JT, JTH); + + JTCases.push_back(SelectionDAGISel::JumpTableBlock(JTH, JT)); + + return true; +} + +/// handleBTSplitSwitchCase - emit comparison and split binary search tree into +/// 2 subtrees. +bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + // Size is the number of Cases represented by this range. + unsigned Size = CR.Range.second - CR.Range.first; + + int64_t First = cast<ConstantInt>(FrontCase.Low)->getSExtValue(); + int64_t Last = cast<ConstantInt>(BackCase.High)->getSExtValue(); + double FMetric = 0; + CaseItr Pivot = CR.Range.first + Size/2; + + // Select optimal pivot, maximizing sum density of LHS and RHS. This will + // (heuristically) allow us to emit JumpTable's later. + uint64_t TSize = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); + + uint64_t LSize = FrontCase.size(); + uint64_t RSize = TSize-LSize; + DOUT << "Selecting best pivot: \n" + << "First: " << First << ", Last: " << Last <<"\n" + << "LSize: " << LSize << ", RSize: " << RSize << "\n"; + for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; + J!=E; ++I, ++J) { + int64_t LEnd = cast<ConstantInt>(I->High)->getSExtValue(); + int64_t RBegin = cast<ConstantInt>(J->Low)->getSExtValue(); + assert((RBegin-LEnd>=1) && "Invalid case distance"); + double LDensity = (double)LSize / (double)((LEnd - First) + 1ULL); + double RDensity = (double)RSize / (double)((Last - RBegin) + 1ULL); + double Metric = Log2_64(RBegin-LEnd)*(LDensity+RDensity); + // Should always split in some non-trivial place + DOUT <<"=>Step\n" + << "LEnd: " << LEnd << ", RBegin: " << RBegin << "\n" + << "LDensity: " << LDensity << ", RDensity: " << RDensity << "\n" + << "Metric: " << Metric << "\n"; + if (FMetric < Metric) { + Pivot = J; + FMetric = Metric; + DOUT << "Current metric set to: " << FMetric << "\n"; + } + + LSize += J->size(); + RSize -= J->size(); + } + if (areJTsAllowed(TLI)) { + // If our case is dense we *really* should handle it earlier! + assert((FMetric > 0) && "Should handle dense range earlier!"); + } else { + Pivot = CR.Range.first + Size/2; + } + + CaseRange LHSR(CR.Range.first, Pivot); + CaseRange RHSR(Pivot, CR.Range.second); + Constant *C = Pivot->Low; + MachineBasicBlock *FalseBB = 0, *TrueBB = 0; + + // We know that we branch to the LHS if the Value being switched on is + // less than the Pivot value, C. We use this to optimize our binary + // tree a bit, by recognizing that if SV is greater than or equal to the + // LHS's Case Value, and that Case Value is exactly one less than the + // Pivot's Value, then we can branch directly to the LHS's Target, + // rather than creating a leaf node for it. + if ((LHSR.second - LHSR.first) == 1 && + LHSR.first->High == CR.GE && + cast<ConstantInt>(C)->getSExtValue() == + (cast<ConstantInt>(CR.GE)->getSExtValue() + 1LL)) { + TrueBB = LHSR.first->BB; + } else { + TrueBB = new MachineBasicBlock(LLVMBB); + CurMF->getBasicBlockList().insert(BBI, TrueBB); + WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); + } + + // Similar to the optimization above, if the Value being switched on is + // known to be less than the Constant CR.LT, and the current Case Value + // is CR.LT - 1, then we can branch directly to the target block for + // the current Case Value, rather than emitting a RHS leaf node for it. + if ((RHSR.second - RHSR.first) == 1 && CR.LT && + cast<ConstantInt>(RHSR.first->Low)->getSExtValue() == + (cast<ConstantInt>(CR.LT)->getSExtValue() - 1LL)) { + FalseBB = RHSR.first->BB; + } else { + FalseBB = new MachineBasicBlock(LLVMBB); + CurMF->getBasicBlockList().insert(BBI, FalseBB); + WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); + } + + // Create a CaseBlock record representing a conditional branch to + // the LHS node if the value being switched on SV is less than C. + // Otherwise, branch to LHS. + SelectionDAGISel::CaseBlock CB(ISD::SETLT, SV, C, NULL, + TrueBB, FalseBB, CR.CaseBB); + + if (CR.CaseBB == CurMBB) + visitSwitchCase(CB); + else + SwitchCases.push_back(CB); + + return true; +} + +/// handleBitTestsSwitchCase - if current case range has few destination and +/// range span less, than machine word bitwidth, encode case range into series +/// of masks and emit bit tests with these masks. +bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default){ + unsigned IntPtrBits = MVT::getSizeInBits(TLI.getPointerTy()); + + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + unsigned numCmps = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) { + // Single case counts one, case range - two. + if (I->Low == I->High) + numCmps +=1; + else + numCmps +=2; + } + + // Count unique destinations + SmallSet<MachineBasicBlock*, 4> Dests; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + Dests.insert(I->BB); + if (Dests.size() > 3) + // Don't bother the code below, if there are too much unique destinations + return false; + } + DOUT << "Total number of unique destinations: " << Dests.size() << "\n" + << "Total number of comparisons: " << numCmps << "\n"; + + // Compute span of values. + Constant* minValue = FrontCase.Low; + Constant* maxValue = BackCase.High; + uint64_t range = cast<ConstantInt>(maxValue)->getSExtValue() - + cast<ConstantInt>(minValue)->getSExtValue(); + DOUT << "Compare range: " << range << "\n" + << "Low bound: " << cast<ConstantInt>(minValue)->getSExtValue() << "\n" + << "High bound: " << cast<ConstantInt>(maxValue)->getSExtValue() << "\n"; + + if (range>=IntPtrBits || + (!(Dests.size() == 1 && numCmps >= 3) && + !(Dests.size() == 2 && numCmps >= 5) && + !(Dests.size() >= 3 && numCmps >= 6))) + return false; + + DOUT << "Emitting bit tests\n"; + int64_t lowBound = 0; + + // Optimize the case where all the case values fit in a + // word without having to subtract minValue. In this case, + // we can optimize away the subtraction. + if (cast<ConstantInt>(minValue)->getSExtValue() >= 0 && + cast<ConstantInt>(maxValue)->getSExtValue() < IntPtrBits) { + range = cast<ConstantInt>(maxValue)->getSExtValue(); + } else { + lowBound = cast<ConstantInt>(minValue)->getSExtValue(); + } + + CaseBitsVector CasesBits; + unsigned i, count = 0; + + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + MachineBasicBlock* Dest = I->BB; + for (i = 0; i < count; ++i) + if (Dest == CasesBits[i].BB) + break; + + if (i == count) { + assert((count < 3) && "Too much destinations to test!"); + CasesBits.push_back(CaseBits(0, Dest, 0)); + count++; + } + + uint64_t lo = cast<ConstantInt>(I->Low)->getSExtValue() - lowBound; + uint64_t hi = cast<ConstantInt>(I->High)->getSExtValue() - lowBound; + + for (uint64_t j = lo; j <= hi; j++) { + CasesBits[i].Mask |= 1ULL << j; + CasesBits[i].Bits++; + } + + } + std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); + + SelectionDAGISel::BitTestInfo BTC; + + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + DOUT << "Cases:\n"; + for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { + DOUT << "Mask: " << CasesBits[i].Mask << ", Bits: " << CasesBits[i].Bits + << ", BB: " << CasesBits[i].BB << "\n"; + + MachineBasicBlock *CaseBB = new MachineBasicBlock(LLVMBB); + CurMF->getBasicBlockList().insert(BBI, CaseBB); + BTC.push_back(SelectionDAGISel::BitTestCase(CasesBits[i].Mask, + CaseBB, + CasesBits[i].BB)); + } + + SelectionDAGISel::BitTestBlock BTB(lowBound, range, SV, + -1U, (CR.CaseBB == CurMBB), + CR.CaseBB, Default, BTC); + + if (CR.CaseBB == CurMBB) + visitBitTestHeader(BTB); + + BitTestCases.push_back(BTB); + + return true; +} + + +// Clusterify - Transform simple list of Cases into list of CaseRange's +unsigned SelectionDAGLowering::Clusterify(CaseVector& Cases, + const SwitchInst& SI) { + unsigned numCmps = 0; + + // Start with "simple" cases + for (unsigned i = 1; i < SI.getNumSuccessors(); ++i) { + MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)]; + Cases.push_back(Case(SI.getSuccessorValue(i), + SI.getSuccessorValue(i), + SMBB)); + } + sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size()>=2) + // Must recompute end() each iteration because it may be + // invalidated by erase if we hold on to it + for (CaseItr I=Cases.begin(), J=++(Cases.begin()); J!=Cases.end(); ) { + int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue(); + int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue(); + MachineBasicBlock* nextBB = J->BB; + MachineBasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue-currentValue==1) && (currentBB == nextBB)) { + I->High = J->High; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; + } + + return numCmps; +} + +void SelectionDAGLowering::visitSwitch(SwitchInst &SI) { + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + + MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; + + // If there is only the default destination, branch to it if it is not the + // next basic block. Otherwise, just fall through. + if (SI.getNumOperands() == 2) { + // Update machine-CFG edges. + + // If this is not a fall-through branch, emit the branch. + if (Default != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(), + DAG.getBasicBlock(Default))); + + CurMBB->addSuccessor(Default); + return; + } + + // If there are any non-default case statements, create a vector of Cases + // representing each one, and sort the vector so that we can efficiently + // create a binary search tree from them. + CaseVector Cases; + unsigned numCmps = Clusterify(Cases, SI); + DOUT << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << "\n"; + + // Get the Value to be switched on and default basic blocks, which will be + // inserted into CaseBlock records, representing basic blocks in the binary + // search tree. + Value *SV = SI.getOperand(0); + + // Push the initial CaseRec onto the worklist + CaseRecVector WorkList; + WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end()))); + + while (!WorkList.empty()) { + // Grab a record representing a case range to process off the worklist + CaseRec CR = WorkList.back(); + WorkList.pop_back(); + + if (handleBitTestsSwitchCase(CR, WorkList, SV, Default)) + continue; + + // If the range has few cases (two or less) emit a series of specific + // tests. + if (handleSmallSwitchRange(CR, WorkList, SV, Default)) + continue; + + // If the switch has more than 5 blocks, and at least 40% dense, and the + // target supports indirect branches, then emit a jump table rather than + // lowering the switch to a binary tree of conditional branches. + if (handleJTSwitchCase(CR, WorkList, SV, Default)) + continue; + + // Emit binary tree. We need to pick a pivot, and push left and right ranges + // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. + handleBTSplitSwitchCase(CR, WorkList, SV, Default); + } +} + + +void SelectionDAGLowering::visitSub(User &I) { + // -0.0 - X --> fneg + const Type *Ty = I.getType(); + if (isa<VectorType>(Ty)) { + if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) { + const VectorType *DestTy = cast<VectorType>(I.getType()); + const Type *ElTy = DestTy->getElementType(); + if (ElTy->isFloatingPoint()) { + unsigned VL = DestTy->getNumElements(); + std::vector<Constant*> NZ(VL, ConstantFP::get(ElTy, -0.0)); + Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); + if (CV == CNZ) { + SDOperand Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, Op2.getValueType(), Op2)); + return; + } + } + } + } + if (Ty->isFloatingPoint()) { + if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) + if (CFP->isExactlyValue(-0.0)) { + SDOperand Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, Op2.getValueType(), Op2)); + return; + } + } + + visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB); +} + +void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) { + SDOperand Op1 = getValue(I.getOperand(0)); + SDOperand Op2 = getValue(I.getOperand(1)); + + setValue(&I, DAG.getNode(OpCode, Op1.getValueType(), Op1, Op2)); +} + +void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) { + SDOperand Op1 = getValue(I.getOperand(0)); + SDOperand Op2 = getValue(I.getOperand(1)); + + if (MVT::getSizeInBits(TLI.getShiftAmountTy()) < + MVT::getSizeInBits(Op2.getValueType())) + Op2 = DAG.getNode(ISD::TRUNCATE, TLI.getShiftAmountTy(), Op2); + else if (TLI.getShiftAmountTy() > Op2.getValueType()) + Op2 = DAG.getNode(ISD::ANY_EXTEND, TLI.getShiftAmountTy(), Op2); + + setValue(&I, DAG.getNode(Opcode, Op1.getValueType(), Op1, Op2)); +} + +void SelectionDAGLowering::visitICmp(User &I) { + ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; + if (ICmpInst *IC = dyn_cast<ICmpInst>(&I)) + predicate = IC->getPredicate(); + else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) + predicate = ICmpInst::Predicate(IC->getPredicate()); + SDOperand Op1 = getValue(I.getOperand(0)); + SDOperand Op2 = getValue(I.getOperand(1)); + ISD::CondCode Opcode; + switch (predicate) { + case ICmpInst::ICMP_EQ : Opcode = ISD::SETEQ; break; + case ICmpInst::ICMP_NE : Opcode = ISD::SETNE; break; + case ICmpInst::ICMP_UGT : Opcode = ISD::SETUGT; break; + case ICmpInst::ICMP_UGE : Opcode = ISD::SETUGE; break; + case ICmpInst::ICMP_ULT : Opcode = ISD::SETULT; break; + case ICmpInst::ICMP_ULE : Opcode = ISD::SETULE; break; + case ICmpInst::ICMP_SGT : Opcode = ISD::SETGT; break; + case ICmpInst::ICMP_SGE : Opcode = ISD::SETGE; break; + case ICmpInst::ICMP_SLT : Opcode = ISD::SETLT; break; + case ICmpInst::ICMP_SLE : Opcode = ISD::SETLE; break; + default: + assert(!"Invalid ICmp predicate value"); + Opcode = ISD::SETEQ; + break; + } + setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Opcode)); +} + +void SelectionDAGLowering::visitFCmp(User &I) { + FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; + if (FCmpInst *FC = dyn_cast<FCmpInst>(&I)) + predicate = FC->getPredicate(); + else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) + predicate = FCmpInst::Predicate(FC->getPredicate()); + SDOperand Op1 = getValue(I.getOperand(0)); + SDOperand Op2 = getValue(I.getOperand(1)); + ISD::CondCode Condition, FOC, FPC; + switch (predicate) { + case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; + case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; + case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; + case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; + case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; + case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; + case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; + case FCmpInst::FCMP_ORD: FOC = ISD::SETEQ; FPC = ISD::SETO; break; + case FCmpInst::FCMP_UNO: FOC = ISD::SETNE; FPC = ISD::SETUO; break; + case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; + case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; + case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; + case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; + case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; + case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; + case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; + default: + assert(!"Invalid FCmp predicate value"); + FOC = FPC = ISD::SETFALSE; + break; + } + if (FiniteOnlyFPMath()) + Condition = FOC; + else + Condition = FPC; + setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Condition)); +} + +void SelectionDAGLowering::visitSelect(User &I) { + SDOperand Cond = getValue(I.getOperand(0)); + SDOperand TrueVal = getValue(I.getOperand(1)); + SDOperand FalseVal = getValue(I.getOperand(2)); + setValue(&I, DAG.getNode(ISD::SELECT, TrueVal.getValueType(), Cond, + TrueVal, FalseVal)); +} + + +void SelectionDAGLowering::visitTrunc(User &I) { + // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, DestVT, N)); +} + +void SelectionDAGLowering::visitZExt(User &I) { + // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // ZExt also can't be a cast to bool for same reason. So, nothing much to do + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestVT, N)); +} + +void SelectionDAGLowering::visitSExt(User &I) { + // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // SExt also can't be a cast to bool for same reason. So, nothing much to do + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, DestVT, N)); +} + +void SelectionDAGLowering::visitFPTrunc(User &I) { + // FPTrunc is never a no-op cast, no need to check + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, DestVT, N)); +} + +void SelectionDAGLowering::visitFPExt(User &I){ + // FPTrunc is never a no-op cast, no need to check + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, DestVT, N)); +} + +void SelectionDAGLowering::visitFPToUI(User &I) { + // FPToUI is never a no-op cast, no need to check + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, DestVT, N)); +} + +void SelectionDAGLowering::visitFPToSI(User &I) { + // FPToSI is never a no-op cast, no need to check + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, DestVT, N)); +} + +void SelectionDAGLowering::visitUIToFP(User &I) { + // UIToFP is never a no-op cast, no need to check + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, DestVT, N)); +} + +void SelectionDAGLowering::visitSIToFP(User &I){ + // UIToFP is never a no-op cast, no need to check + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, DestVT, N)); +} + +void SelectionDAGLowering::visitPtrToInt(User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType SrcVT = N.getValueType(); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + SDOperand Result; + if (MVT::getSizeInBits(DestVT) < MVT::getSizeInBits(SrcVT)) + Result = DAG.getNode(ISD::TRUNCATE, DestVT, N); + else + // Note: ZERO_EXTEND can handle cases where the sizes are equal too + Result = DAG.getNode(ISD::ZERO_EXTEND, DestVT, N); + setValue(&I, Result); +} + +void SelectionDAGLowering::visitIntToPtr(User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType SrcVT = N.getValueType(); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + if (MVT::getSizeInBits(DestVT) < MVT::getSizeInBits(SrcVT)) + setValue(&I, DAG.getNode(ISD::TRUNCATE, DestVT, N)); + else + // Note: ZERO_EXTEND can handle cases where the sizes are equal too + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestVT, N)); +} + +void SelectionDAGLowering::visitBitCast(User &I) { + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + + // BitCast assures us that source and destination are the same size so this + // is either a BIT_CONVERT or a no-op. + if (DestVT != N.getValueType()) + setValue(&I, DAG.getNode(ISD::BIT_CONVERT, DestVT, N)); // convert types + else + setValue(&I, N); // noop cast. +} + +void SelectionDAGLowering::visitInsertElement(User &I) { + SDOperand InVec = getValue(I.getOperand(0)); + SDOperand InVal = getValue(I.getOperand(1)); + SDOperand InIdx = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(), + getValue(I.getOperand(2))); + + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, + TLI.getValueType(I.getType()), + InVec, InVal, InIdx)); +} + +void SelectionDAGLowering::visitExtractElement(User &I) { + SDOperand InVec = getValue(I.getOperand(0)); + SDOperand InIdx = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(), + getValue(I.getOperand(1))); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + TLI.getValueType(I.getType()), InVec, InIdx)); +} + +void SelectionDAGLowering::visitShuffleVector(User &I) { + SDOperand V1 = getValue(I.getOperand(0)); + SDOperand V2 = getValue(I.getOperand(1)); + SDOperand Mask = getValue(I.getOperand(2)); + + setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, + TLI.getValueType(I.getType()), + V1, V2, Mask)); +} + + +void SelectionDAGLowering::visitGetElementPtr(User &I) { + SDOperand N = getValue(I.getOperand(0)); + const Type *Ty = I.getOperand(0)->getType(); + + for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end(); + OI != E; ++OI) { + Value *Idx = *OI; + if (const StructType *StTy = dyn_cast<StructType>(Ty)) { + unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); + N = DAG.getNode(ISD::ADD, N.getValueType(), N, + getIntPtrConstant(Offset)); + } + Ty = StTy->getElementType(Field); + } else { + Ty = cast<SequentialType>(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->getZExtValue() == 0) continue; + uint64_t Offs = + TD->getTypeSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + N = DAG.getNode(ISD::ADD, N.getValueType(), N, getIntPtrConstant(Offs)); + continue; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = TD->getTypeSize(Ty); + SDOperand IdxN = getValue(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend + // it. + if (IdxN.getValueType() < N.getValueType()) { + IdxN = DAG.getNode(ISD::SIGN_EXTEND, N.getValueType(), IdxN); + } else if (IdxN.getValueType() > N.getValueType()) + IdxN = DAG.getNode(ISD::TRUNCATE, N.getValueType(), IdxN); + + // If this is a multiply by a power of two, turn it into a shl + // immediately. This is a very common case. + if (isPowerOf2_64(ElementSize)) { + unsigned Amt = Log2_64(ElementSize); + IdxN = DAG.getNode(ISD::SHL, N.getValueType(), IdxN, + DAG.getConstant(Amt, TLI.getShiftAmountTy())); + N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN); + continue; + } + + SDOperand Scale = getIntPtrConstant(ElementSize); + IdxN = DAG.getNode(ISD::MUL, N.getValueType(), IdxN, Scale); + N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN); + } + } + setValue(&I, N); +} + +void SelectionDAGLowering::visitAlloca(AllocaInst &I) { + // If this is a fixed sized alloca in the entry block of the function, + // allocate it statically on the stack. + if (FuncInfo.StaticAllocaMap.count(&I)) + return; // getValue will auto-populate this. + + const Type *Ty = I.getAllocatedType(); + uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + I.getAlignment()); + + SDOperand AllocSize = getValue(I.getArraySize()); + MVT::ValueType IntPtr = TLI.getPointerTy(); + if (IntPtr < AllocSize.getValueType()) + AllocSize = DAG.getNode(ISD::TRUNCATE, IntPtr, AllocSize); + else if (IntPtr > AllocSize.getValueType()) + AllocSize = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, AllocSize); + + AllocSize = DAG.getNode(ISD::MUL, IntPtr, AllocSize, + getIntPtrConstant(TySize)); + + // Handle alignment. If the requested alignment is less than the stack + // alignment, ignore it and round the size of the allocation up to the stack + // alignment size. If the size is greater than or equal to the stack + // alignment, we note this in the DYNAMIC_STACKALLOC node. + unsigned StackAlign = + TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); + if (Align < StackAlign) { + Align = 0; + // Add SA-1 to the size. + AllocSize = DAG.getNode(ISD::ADD, AllocSize.getValueType(), AllocSize, + getIntPtrConstant(StackAlign-1)); + // Mask out the low bits for alignment purposes. + AllocSize = DAG.getNode(ISD::AND, AllocSize.getValueType(), AllocSize, + getIntPtrConstant(~(uint64_t)(StackAlign-1))); + } + + SDOperand Ops[] = { getRoot(), AllocSize, getIntPtrConstant(Align) }; + const MVT::ValueType *VTs = DAG.getNodeValueTypes(AllocSize.getValueType(), + MVT::Other); + SDOperand DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, VTs, 2, Ops, 3); + setValue(&I, DSA); + DAG.setRoot(DSA.getValue(1)); + + // Inform the Frame Information that we have just allocated a variable-sized + // object. + CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject(); +} + +void SelectionDAGLowering::visitLoad(LoadInst &I) { + SDOperand Ptr = getValue(I.getOperand(0)); + + SDOperand Root; + if (I.isVolatile()) + Root = getRoot(); + else { + // Do not serialize non-volatile loads against each other. + Root = DAG.getRoot(); + } + + setValue(&I, getLoadFrom(I.getType(), Ptr, I.getOperand(0), + Root, I.isVolatile(), I.getAlignment())); +} + +SDOperand SelectionDAGLowering::getLoadFrom(const Type *Ty, SDOperand Ptr, + const Value *SV, SDOperand Root, + bool isVolatile, + unsigned Alignment) { + SDOperand L = + DAG.getLoad(TLI.getValueType(Ty), Root, Ptr, SV, 0, + isVolatile, Alignment); + + if (isVolatile) + DAG.setRoot(L.getValue(1)); + else + PendingLoads.push_back(L.getValue(1)); + + return L; +} + + +void SelectionDAGLowering::visitStore(StoreInst &I) { + Value *SrcV = I.getOperand(0); + SDOperand Src = getValue(SrcV); + SDOperand Ptr = getValue(I.getOperand(1)); + DAG.setRoot(DAG.getStore(getRoot(), Src, Ptr, I.getOperand(1), 0, + I.isVolatile(), I.getAlignment())); +} + +/// IntrinsicCannotAccessMemory - Return true if the specified intrinsic cannot +/// access memory and has no other side effects at all. +static bool IntrinsicCannotAccessMemory(unsigned IntrinsicID) { +#define GET_NO_MEMORY_INTRINSICS +#include "llvm/Intrinsics.gen" +#undef GET_NO_MEMORY_INTRINSICS + return false; +} + +// IntrinsicOnlyReadsMemory - Return true if the specified intrinsic doesn't +// have any side-effects or if it only reads memory. +static bool IntrinsicOnlyReadsMemory(unsigned IntrinsicID) { +#define GET_SIDE_EFFECT_INFO +#include "llvm/Intrinsics.gen" +#undef GET_SIDE_EFFECT_INFO + return false; +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, + unsigned Intrinsic) { + bool HasChain = !IntrinsicCannotAccessMemory(Intrinsic); + bool OnlyLoad = HasChain && IntrinsicOnlyReadsMemory(Intrinsic); + + // Build the operand list. + SmallVector<SDOperand, 8> Ops; + if (HasChain) { // If this intrinsic has side-effects, chainify it. + if (OnlyLoad) { + // We don't need to serialize loads against other loads. + Ops.push_back(DAG.getRoot()); + } else { + Ops.push_back(getRoot()); + } + } + + // Add the intrinsic ID as an integer operand. + Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); + + // Add all operands of the call to the operand list. + for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { + SDOperand Op = getValue(I.getOperand(i)); + assert(TLI.isTypeLegal(Op.getValueType()) && + "Intrinsic uses a non-legal type?"); + Ops.push_back(Op); + } + + std::vector<MVT::ValueType> VTs; + if (I.getType() != Type::VoidTy) { + MVT::ValueType VT = TLI.getValueType(I.getType()); + if (MVT::isVector(VT)) { + const VectorType *DestTy = cast<VectorType>(I.getType()); + MVT::ValueType EltVT = TLI.getValueType(DestTy->getElementType()); + + VT = MVT::getVectorType(EltVT, DestTy->getNumElements()); + assert(VT != MVT::Other && "Intrinsic uses a non-legal type?"); + } + + assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?"); + VTs.push_back(VT); + } + if (HasChain) + VTs.push_back(MVT::Other); + + const MVT::ValueType *VTList = DAG.getNodeValueTypes(VTs); + + // Create the node. + SDOperand Result; + if (!HasChain) + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VTList, VTs.size(), + &Ops[0], Ops.size()); + else if (I.getType() != Type::VoidTy) + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, VTList, VTs.size(), + &Ops[0], Ops.size()); + else + Result = DAG.getNode(ISD::INTRINSIC_VOID, VTList, VTs.size(), + &Ops[0], Ops.size()); + + if (HasChain) { + SDOperand Chain = Result.getValue(Result.Val->getNumValues()-1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + if (I.getType() != Type::VoidTy) { + if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) { + MVT::ValueType VT = TLI.getValueType(PTy); + Result = DAG.getNode(ISD::BIT_CONVERT, VT, Result); + } + setValue(&I, Result); + } +} + +/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. +static GlobalVariable *ExtractTypeInfo (Value *V) { + V = IntrinsicInst::StripPointerCasts(V); + GlobalVariable *GV = dyn_cast<GlobalVariable>(V); + assert (GV || isa<ConstantPointerNull>(V) && + "TypeInfo must be a global variable or NULL"); + return GV; +} + +/// addCatchInfo - Extract the personality and type infos from an eh.selector +/// call, and add them to the specified machine basic block. +static void addCatchInfo(CallInst &I, MachineModuleInfo *MMI, + MachineBasicBlock *MBB) { + // Inform the MachineModuleInfo of the personality for this landing pad. + ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2)); + assert(CE->getOpcode() == Instruction::BitCast && + isa<Function>(CE->getOperand(0)) && + "Personality should be a function"); + MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0))); + + // Gather all the type infos for this landing pad and pass them along to + // MachineModuleInfo. + std::vector<GlobalVariable *> TyInfo; + unsigned N = I.getNumOperands(); + + for (unsigned i = N - 1; i > 2; --i) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) { + unsigned FilterLength = CI->getZExtValue(); + unsigned FirstCatch = i + FilterLength + 1; + assert (FirstCatch <= N && "Invalid filter length"); + + if (FirstCatch < N) { + TyInfo.reserve(N - FirstCatch); + for (unsigned j = FirstCatch; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + TyInfo.reserve(FilterLength); + for (unsigned j = i + 1; j < FirstCatch; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addFilterTypeInfo(MBB, TyInfo); + TyInfo.clear(); + + N = i; + } + } + + if (N > 3) { + TyInfo.reserve(N - 3); + for (unsigned j = 3; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + } +} + +/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If +/// we want to emit this as a call to a named external function, return the name +/// otherwise lower it and return null. +const char * +SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { + switch (Intrinsic) { + default: + // By default, turn this into a target intrinsic node. + visitTargetIntrinsic(I, Intrinsic); + return 0; + case Intrinsic::vastart: visitVAStart(I); return 0; + case Intrinsic::vaend: visitVAEnd(I); return 0; + case Intrinsic::vacopy: visitVACopy(I); return 0; + case Intrinsic::returnaddress: + setValue(&I, DAG.getNode(ISD::RETURNADDR, TLI.getPointerTy(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::frameaddress: + setValue(&I, DAG.getNode(ISD::FRAMEADDR, TLI.getPointerTy(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::setjmp: + return "_setjmp"+!TLI.usesUnderscoreSetJmp(); + break; + case Intrinsic::longjmp: + return "_longjmp"+!TLI.usesUnderscoreLongJmp(); + break; + case Intrinsic::memcpy_i32: + case Intrinsic::memcpy_i64: + visitMemIntrinsic(I, ISD::MEMCPY); + return 0; + case Intrinsic::memset_i32: + case Intrinsic::memset_i64: + visitMemIntrinsic(I, ISD::MEMSET); + return 0; + case Intrinsic::memmove_i32: + case Intrinsic::memmove_i64: + visitMemIntrinsic(I, ISD::MEMMOVE); + return 0; + + case Intrinsic::dbg_stoppoint: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + DbgStopPointInst &SPI = cast<DbgStopPointInst>(I); + if (MMI && SPI.getContext() && MMI->Verify(SPI.getContext())) { + SDOperand Ops[5]; + + Ops[0] = getRoot(); + Ops[1] = getValue(SPI.getLineValue()); + Ops[2] = getValue(SPI.getColumnValue()); + + DebugInfoDesc *DD = MMI->getDescFor(SPI.getContext()); + assert(DD && "Not a debug information descriptor"); + CompileUnitDesc *CompileUnit = cast<CompileUnitDesc>(DD); + + Ops[3] = DAG.getString(CompileUnit->getFileName()); + Ops[4] = DAG.getString(CompileUnit->getDirectory()); + + DAG.setRoot(DAG.getNode(ISD::LOCATION, MVT::Other, Ops, 5)); + } + + return 0; + } + case Intrinsic::dbg_region_start: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I); + if (MMI && RSI.getContext() && MMI->Verify(RSI.getContext())) { + unsigned LabelID = MMI->RecordRegionStart(RSI.getContext()); + DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, getRoot(), + DAG.getConstant(LabelID, MVT::i32))); + } + + return 0; + } + case Intrinsic::dbg_region_end: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I); + if (MMI && REI.getContext() && MMI->Verify(REI.getContext())) { + unsigned LabelID = MMI->RecordRegionEnd(REI.getContext()); + DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, + getRoot(), DAG.getConstant(LabelID, MVT::i32))); + } + + return 0; + } + case Intrinsic::dbg_func_start: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I); + if (MMI && FSI.getSubprogram() && + MMI->Verify(FSI.getSubprogram())) { + unsigned LabelID = MMI->RecordRegionStart(FSI.getSubprogram()); + DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, + getRoot(), DAG.getConstant(LabelID, MVT::i32))); + } + + return 0; + } + case Intrinsic::dbg_declare: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + DbgDeclareInst &DI = cast<DbgDeclareInst>(I); + if (MMI && DI.getVariable() && MMI->Verify(DI.getVariable())) { + SDOperand AddressOp = getValue(DI.getAddress()); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(AddressOp)) + MMI->RecordVariable(DI.getVariable(), FI->getIndex()); + } + + return 0; + } + + case Intrinsic::eh_exception: { + if (ExceptionHandling) { + if (!CurMBB->isLandingPad()) { + // FIXME: Mark exception register as live in. Hack for PR1508. + unsigned Reg = TLI.getExceptionAddressRegister(); + if (Reg) CurMBB->addLiveIn(Reg); + } + // Insert the EXCEPTIONADDR instruction. + SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + SDOperand Ops[1]; + Ops[0] = DAG.getRoot(); + SDOperand Op = DAG.getNode(ISD::EXCEPTIONADDR, VTs, Ops, 1); + setValue(&I, Op); + DAG.setRoot(Op.getValue(1)); + } else { + setValue(&I, DAG.getConstant(0, TLI.getPointerTy())); + } + return 0; + } + + case Intrinsic::eh_selector:{ + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + + if (ExceptionHandling && MMI) { + if (CurMBB->isLandingPad()) + addCatchInfo(I, MMI, CurMBB); + else { +#ifndef NDEBUG + FuncInfo.CatchInfoLost.insert(&I); +#endif + // FIXME: Mark exception selector register as live in. Hack for PR1508. + unsigned Reg = TLI.getExceptionSelectorRegister(); + if (Reg) CurMBB->addLiveIn(Reg); + } + + // Insert the EHSELECTION instruction. + SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + SDOperand Ops[2]; + Ops[0] = getValue(I.getOperand(1)); + Ops[1] = getRoot(); + SDOperand Op = DAG.getNode(ISD::EHSELECTION, VTs, Ops, 2); + setValue(&I, Op); + DAG.setRoot(Op.getValue(1)); + } else { + setValue(&I, DAG.getConstant(0, TLI.getPointerTy())); + } + + return 0; + } + + case Intrinsic::eh_typeid_for: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + + if (MMI) { + // Find the type id for the given typeinfo. + GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); + + unsigned TypeID = MMI->getTypeIDFor(GV); + setValue(&I, DAG.getConstant(TypeID, MVT::i32)); + } else { + // Return something different to eh_selector. + setValue(&I, DAG.getConstant(1, MVT::i32)); + } + + return 0; + } + + case Intrinsic::eh_return: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + + if (MMI && ExceptionHandling) { + MMI->setCallsEHReturn(true); + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, + MVT::Other, + getRoot(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)))); + } else { + setValue(&I, DAG.getConstant(0, TLI.getPointerTy())); + } + + return 0; + } + + case Intrinsic::eh_unwind_init: { + if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) { + MMI->setCallsUnwindInit(true); + } + + return 0; + } + + case Intrinsic::eh_dwarf_cfa: { + if (ExceptionHandling) { + MVT::ValueType VT = getValue(I.getOperand(1)).getValueType(); + SDOperand Offset = DAG.getNode(ISD::ADD, + TLI.getPointerTy(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, + VT), + getValue(I.getOperand(1))); + setValue(&I, DAG.getNode(ISD::ADD, + TLI.getPointerTy(), + DAG.getNode(ISD::FRAMEADDR, + TLI.getPointerTy(), + DAG.getConstant(0, + TLI.getPointerTy())), + Offset)); + } else { + setValue(&I, DAG.getConstant(0, TLI.getPointerTy())); + } + + return 0; + } + + case Intrinsic::sqrt_f32: + case Intrinsic::sqrt_f64: + setValue(&I, DAG.getNode(ISD::FSQRT, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::powi_f32: + case Intrinsic::powi_f64: + setValue(&I, DAG.getNode(ISD::FPOWI, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)))); + return 0; + case Intrinsic::pcmarker: { + SDOperand Tmp = getValue(I.getOperand(1)); + DAG.setRoot(DAG.getNode(ISD::PCMARKER, MVT::Other, getRoot(), Tmp)); + return 0; + } + case Intrinsic::readcyclecounter: { + SDOperand Op = getRoot(); + SDOperand Tmp = DAG.getNode(ISD::READCYCLECOUNTER, + DAG.getNodeValueTypes(MVT::i64, MVT::Other), 2, + &Op, 1); + setValue(&I, Tmp); + DAG.setRoot(Tmp.getValue(1)); + return 0; + } + case Intrinsic::part_select: { + // Currently not implemented: just abort + assert(0 && "part_select intrinsic not implemented"); + abort(); + } + case Intrinsic::part_set: { + // Currently not implemented: just abort + assert(0 && "part_set intrinsic not implemented"); + abort(); + } + case Intrinsic::bswap: + setValue(&I, DAG.getNode(ISD::BSWAP, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::cttz: { + SDOperand Arg = getValue(I.getOperand(1)); + MVT::ValueType Ty = Arg.getValueType(); + SDOperand result = DAG.getNode(ISD::CTTZ, Ty, Arg); + if (Ty < MVT::i32) + result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result); + else if (Ty > MVT::i32) + result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result); + setValue(&I, result); + return 0; + } + case Intrinsic::ctlz: { + SDOperand Arg = getValue(I.getOperand(1)); + MVT::ValueType Ty = Arg.getValueType(); + SDOperand result = DAG.getNode(ISD::CTLZ, Ty, Arg); + if (Ty < MVT::i32) + result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result); + else if (Ty > MVT::i32) + result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result); + setValue(&I, result); + return 0; + } + case Intrinsic::ctpop: { + SDOperand Arg = getValue(I.getOperand(1)); + MVT::ValueType Ty = Arg.getValueType(); + SDOperand result = DAG.getNode(ISD::CTPOP, Ty, Arg); + if (Ty < MVT::i32) + result = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, result); + else if (Ty > MVT::i32) + result = DAG.getNode(ISD::TRUNCATE, MVT::i32, result); + setValue(&I, result); + return 0; + } + case Intrinsic::stacksave: { + SDOperand Op = getRoot(); + SDOperand Tmp = DAG.getNode(ISD::STACKSAVE, + DAG.getNodeValueTypes(TLI.getPointerTy(), MVT::Other), 2, &Op, 1); + setValue(&I, Tmp); + DAG.setRoot(Tmp.getValue(1)); + return 0; + } + case Intrinsic::stackrestore: { + SDOperand Tmp = getValue(I.getOperand(1)); + DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, MVT::Other, getRoot(), Tmp)); + return 0; + } + case Intrinsic::prefetch: + // FIXME: Currently discarding prefetches. + return 0; + + case Intrinsic::var_annotation: + // Discard annotate attributes + return 0; + } +} + + +void SelectionDAGLowering::LowerCallTo(Instruction &I, + const Type *CalledValueTy, + unsigned CallingConv, + bool IsTailCall, + SDOperand Callee, unsigned OpIdx, + MachineBasicBlock *LandingPad) { + const PointerType *PT = cast<PointerType>(CalledValueTy); + const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + const ParamAttrsList *Attrs = FTy->getParamAttrs(); + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + unsigned BeginLabel = 0, EndLabel = 0; + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Args.reserve(I.getNumOperands()); + for (unsigned i = OpIdx, e = I.getNumOperands(); i != e; ++i) { + Value *Arg = I.getOperand(i); + SDOperand ArgNode = getValue(Arg); + Entry.Node = ArgNode; Entry.Ty = Arg->getType(); + + unsigned attrInd = i - OpIdx + 1; + Entry.isSExt = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::SExt); + Entry.isZExt = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::ZExt); + Entry.isInReg = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::InReg); + Entry.isSRet = Attrs && Attrs->paramHasAttr(attrInd, ParamAttr::StructRet); + Args.push_back(Entry); + } + + if (ExceptionHandling && MMI) { + // Insert a label before the invoke call to mark the try range. This can be + // used to detect deletion of the invoke via the MachineModuleInfo. + BeginLabel = MMI->NextLabelID(); + DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, getRoot(), + DAG.getConstant(BeginLabel, MVT::i32))); + } + + std::pair<SDOperand,SDOperand> Result = + TLI.LowerCallTo(getRoot(), I.getType(), + Attrs && Attrs->paramHasAttr(0, ParamAttr::SExt), + FTy->isVarArg(), CallingConv, IsTailCall, + Callee, Args, DAG); + if (I.getType() != Type::VoidTy) + setValue(&I, Result.first); + DAG.setRoot(Result.second); + + if (ExceptionHandling && MMI) { + // Insert a label at the end of the invoke call to mark the try range. This + // can be used to detect deletion of the invoke via the MachineModuleInfo. + EndLabel = MMI->NextLabelID(); + DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, getRoot(), + DAG.getConstant(EndLabel, MVT::i32))); + + // Inform MachineModuleInfo of range. + MMI->addInvoke(LandingPad, BeginLabel, EndLabel); + } +} + + +void SelectionDAGLowering::visitCall(CallInst &I) { + const char *RenameFn = 0; + if (Function *F = I.getCalledFunction()) { + if (F->isDeclaration()) + if (unsigned IID = F->getIntrinsicID()) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } else { // Not an LLVM intrinsic. + const std::string &Name = F->getName(); + if (Name[0] == 'c' && (Name == "copysign" || Name == "copysignf")) { + if (I.getNumOperands() == 3 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType() && + I.getType() == I.getOperand(2)->getType()) { + SDOperand LHS = getValue(I.getOperand(1)); + SDOperand RHS = getValue(I.getOperand(2)); + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, LHS.getValueType(), + LHS, RHS)); + return; + } + } else if (Name[0] == 'f' && (Name == "fabs" || Name == "fabsf")) { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType()) { + SDOperand Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FABS, Tmp.getValueType(), Tmp)); + return; + } + } else if (Name[0] == 's' && (Name == "sin" || Name == "sinf")) { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType()) { + SDOperand Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FSIN, Tmp.getValueType(), Tmp)); + return; + } + } else if (Name[0] == 'c' && (Name == "cos" || Name == "cosf")) { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType()) { + SDOperand Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FCOS, Tmp.getValueType(), Tmp)); + return; + } + } + } + } else if (isa<InlineAsm>(I.getOperand(0))) { + visitInlineAsm(I); + return; + } + + SDOperand Callee; + if (!RenameFn) + Callee = getValue(I.getOperand(0)); + else + Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); + + LowerCallTo(I, I.getCalledValue()->getType(), + I.getCallingConv(), + I.isTailCall(), + Callee, + 1); +} + + +/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from +/// this value and returns the result as a ValueVT value. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +SDOperand RegsForValue::getCopyFromRegs(SelectionDAG &DAG, + SDOperand &Chain, SDOperand *Flag)const{ + // Copy the legal parts from the registers. + unsigned NumParts = Regs.size(); + SmallVector<SDOperand, 8> Parts(NumParts); + for (unsigned i = 0; i != NumParts; ++i) { + SDOperand Part = Flag ? + DAG.getCopyFromReg(Chain, Regs[i], RegVT, *Flag) : + DAG.getCopyFromReg(Chain, Regs[i], RegVT); + Chain = Part.getValue(1); + if (Flag) + *Flag = Part.getValue(2); + Parts[i] = Part; + } + + // Assemble the legal parts into the final value. + return getCopyFromParts(DAG, &Parts[0], NumParts, RegVT, ValueVT); +} + +/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the +/// specified value into the registers specified by this object. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +void RegsForValue::getCopyToRegs(SDOperand Val, SelectionDAG &DAG, + SDOperand &Chain, SDOperand *Flag) const { + // Get the list of the values's legal parts. + unsigned NumParts = Regs.size(); + SmallVector<SDOperand, 8> Parts(NumParts); + getCopyToParts(DAG, Val, &Parts[0], NumParts, RegVT); + + // Copy the parts into the registers. + for (unsigned i = 0; i != NumParts; ++i) { + SDOperand Part = Flag ? + DAG.getCopyToReg(Chain, Regs[i], Parts[i], *Flag) : + DAG.getCopyToReg(Chain, Regs[i], Parts[i]); + Chain = Part.getValue(0); + if (Flag) + *Flag = Part.getValue(1); + } +} + +/// AddInlineAsmOperands - Add this value to the specified inlineasm node +/// operand list. This adds the code marker and includes the number of +/// values added into it. +void RegsForValue::AddInlineAsmOperands(unsigned Code, SelectionDAG &DAG, + std::vector<SDOperand> &Ops) const { + MVT::ValueType IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy(); + Ops.push_back(DAG.getTargetConstant(Code | (Regs.size() << 3), IntPtrTy)); + for (unsigned i = 0, e = Regs.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(Regs[i], RegVT)); +} + +/// isAllocatableRegister - If the specified register is safe to allocate, +/// i.e. it isn't a stack pointer or some other special register, return the +/// register class for the register. Otherwise, return null. +static const TargetRegisterClass * +isAllocatableRegister(unsigned Reg, MachineFunction &MF, + const TargetLowering &TLI, const MRegisterInfo *MRI) { + MVT::ValueType FoundVT = MVT::Other; + const TargetRegisterClass *FoundRC = 0; + for (MRegisterInfo::regclass_iterator RCI = MRI->regclass_begin(), + E = MRI->regclass_end(); RCI != E; ++RCI) { + MVT::ValueType ThisVT = MVT::Other; + + const TargetRegisterClass *RC = *RCI; + // If none of the the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (TLI.isTypeLegal(*I)) { + // If we have already found this register in a different register class, + // choose the one with the largest VT specified. For example, on + // PowerPC, we favor f64 register classes over f32. + if (FoundVT == MVT::Other || + MVT::getSizeInBits(FoundVT) < MVT::getSizeInBits(*I)) { + ThisVT = *I; + break; + } + } + } + + if (ThisVT == MVT::Other) continue; + + // NOTE: This isn't ideal. In particular, this might allocate the + // frame pointer in functions that need it (due to them not being taken + // out of allocation, because a variable sized allocation hasn't been seen + // yet). This is a slight code pessimization, but should still work. + for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), + E = RC->allocation_order_end(MF); I != E; ++I) + if (*I == Reg) { + // We found a matching register class. Keep looking at others in case + // we find one with larger registers that this physreg is also in. + FoundRC = RC; + FoundVT = ThisVT; + break; + } + } + return FoundRC; +} + + +namespace { +/// AsmOperandInfo - This contains information for each constraint that we are +/// lowering. +struct AsmOperandInfo : public InlineAsm::ConstraintInfo { + /// ConstraintCode - This contains the actual string for the code, like "m". + std::string ConstraintCode; + + /// ConstraintType - Information about the constraint code, e.g. Register, + /// RegisterClass, Memory, Other, Unknown. + TargetLowering::ConstraintType ConstraintType; + + /// CallOperand/CallOperandval - If this is the result output operand or a + /// clobber, this is null, otherwise it is the incoming operand to the + /// CallInst. This gets modified as the asm is processed. + SDOperand CallOperand; + Value *CallOperandVal; + + /// ConstraintVT - The ValueType for the operand value. + MVT::ValueType ConstraintVT; + + /// AssignedRegs - If this is a register or register class operand, this + /// contains the set of register corresponding to the operand. + RegsForValue AssignedRegs; + + AsmOperandInfo(const InlineAsm::ConstraintInfo &info) + : InlineAsm::ConstraintInfo(info), + ConstraintType(TargetLowering::C_Unknown), + CallOperand(0,0), CallOperandVal(0), ConstraintVT(MVT::Other) { + } + + void ComputeConstraintToUse(const TargetLowering &TLI); + + /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers + /// busy in OutputRegs/InputRegs. + void MarkAllocatedRegs(bool isOutReg, bool isInReg, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs) const { + if (isOutReg) + OutputRegs.insert(AssignedRegs.Regs.begin(), AssignedRegs.Regs.end()); + if (isInReg) + InputRegs.insert(AssignedRegs.Regs.begin(), AssignedRegs.Regs.end()); + } +}; +} // end anon namespace. + +/// getConstraintGenerality - Return an integer indicating how general CT is. +static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { + switch (CT) { + default: assert(0 && "Unknown constraint type!"); + case TargetLowering::C_Other: + case TargetLowering::C_Unknown: + return 0; + case TargetLowering::C_Register: + return 1; + case TargetLowering::C_RegisterClass: + return 2; + case TargetLowering::C_Memory: + return 3; + } +} + +void AsmOperandInfo::ComputeConstraintToUse(const TargetLowering &TLI) { + assert(!Codes.empty() && "Must have at least one constraint"); + + std::string *Current = &Codes[0]; + TargetLowering::ConstraintType CurType = TLI.getConstraintType(*Current); + if (Codes.size() == 1) { // Single-letter constraints ('r') are very common. + ConstraintCode = *Current; + ConstraintType = CurType; + return; + } + + unsigned CurGenerality = getConstraintGenerality(CurType); + + // If we have multiple constraints, try to pick the most general one ahead + // of time. This isn't a wonderful solution, but handles common cases. + for (unsigned j = 1, e = Codes.size(); j != e; ++j) { + TargetLowering::ConstraintType ThisType = TLI.getConstraintType(Codes[j]); + unsigned ThisGenerality = getConstraintGenerality(ThisType); + if (ThisGenerality > CurGenerality) { + // This constraint letter is more general than the previous one, + // use it. + CurType = ThisType; + Current = &Codes[j]; + CurGenerality = ThisGenerality; + } + } + + ConstraintCode = *Current; + ConstraintType = CurType; +} + + +void SelectionDAGLowering:: +GetRegistersForValue(AsmOperandInfo &OpInfo, bool HasEarlyClobber, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs) { + // Compute whether this value requires an input register, an output register, + // or both. + bool isOutReg = false; + bool isInReg = false; + switch (OpInfo.Type) { + case InlineAsm::isOutput: + isOutReg = true; + + // If this is an early-clobber output, or if there is an input + // constraint that matches this, we need to reserve the input register + // so no other inputs allocate to it. + isInReg = OpInfo.isEarlyClobber || OpInfo.hasMatchingInput; + break; + case InlineAsm::isInput: + isInReg = true; + isOutReg = false; + break; + case InlineAsm::isClobber: + isOutReg = true; + isInReg = true; + break; + } + + + MachineFunction &MF = DAG.getMachineFunction(); + std::vector<unsigned> Regs; + + // If this is a constraint for a single physreg, or a constraint for a + // register class, find it. + std::pair<unsigned, const TargetRegisterClass*> PhysReg = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + + unsigned NumRegs = 1; + if (OpInfo.ConstraintVT != MVT::Other) + NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT); + MVT::ValueType RegVT; + MVT::ValueType ValueVT = OpInfo.ConstraintVT; + + + // If this is a constraint for a specific physical register, like {r17}, + // assign it now. + if (PhysReg.first) { + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = *PhysReg.second->vt_begin(); + + // Get the actual register value type. This is important, because the user + // may have asked for (e.g.) the AX register in i32 type. We need to + // remember that AX is actually i16 to get the right extension. + RegVT = *PhysReg.second->vt_begin(); + + // This is a explicit reference to a physical register. + Regs.push_back(PhysReg.first); + + // If this is an expanded reference, add the rest of the regs to Regs. + if (NumRegs != 1) { + TargetRegisterClass::iterator I = PhysReg.second->begin(); + TargetRegisterClass::iterator E = PhysReg.second->end(); + for (; *I != PhysReg.first; ++I) + assert(I != E && "Didn't find reg!"); + + // Already added the first reg. + --NumRegs; ++I; + for (; NumRegs; --NumRegs, ++I) { + assert(I != E && "Ran out of registers to allocate!"); + Regs.push_back(*I); + } + } + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); + OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs); + return; + } + + // Otherwise, if this was a reference to an LLVM register class, create vregs + // for this reference. + std::vector<unsigned> RegClassRegs; + const TargetRegisterClass *RC = PhysReg.second; + if (RC) { + // If this is an early clobber or tied register, our regalloc doesn't know + // how to maintain the constraint. If it isn't, go ahead and create vreg + // and let the regalloc do the right thing. + if (!OpInfo.hasMatchingInput && !OpInfo.isEarlyClobber && + // If there is some other early clobber and this is an input register, + // then we are forced to pre-allocate the input reg so it doesn't + // conflict with the earlyclobber. + !(OpInfo.Type == InlineAsm::isInput && HasEarlyClobber)) { + RegVT = *PhysReg.second->vt_begin(); + + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = RegVT; + + // Create the appropriate number of virtual registers. + SSARegMap *RegMap = MF.getSSARegMap(); + for (; NumRegs; --NumRegs) + Regs.push_back(RegMap->createVirtualRegister(PhysReg.second)); + + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); + OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs); + return; + } + + // Otherwise, we can't allocate it. Let the code below figure out how to + // maintain these constraints. + RegClassRegs.assign(PhysReg.second->begin(), PhysReg.second->end()); + + } else { + // This is a reference to a register class that doesn't directly correspond + // to an LLVM register class. Allocate NumRegs consecutive, available, + // registers from the class. + RegClassRegs = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + } + + const MRegisterInfo *MRI = DAG.getTarget().getRegisterInfo(); + unsigned NumAllocated = 0; + for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { + unsigned Reg = RegClassRegs[i]; + // See if this register is available. + if ((isOutReg && OutputRegs.count(Reg)) || // Already used. + (isInReg && InputRegs.count(Reg))) { // Already used. + // Make sure we find consecutive registers. + NumAllocated = 0; + continue; + } + + // Check to see if this register is allocatable (i.e. don't give out the + // stack pointer). + if (RC == 0) { + RC = isAllocatableRegister(Reg, MF, TLI, MRI); + if (!RC) { // Couldn't allocate this register. + // Reset NumAllocated to make sure we return consecutive registers. + NumAllocated = 0; + continue; + } + } + + // Okay, this register is good, we can use it. + ++NumAllocated; + + // If we allocated enough consecutive registers, succeed. + if (NumAllocated == NumRegs) { + unsigned RegStart = (i-NumAllocated)+1; + unsigned RegEnd = i+1; + // Mark all of the allocated registers used. + for (unsigned i = RegStart; i != RegEnd; ++i) + Regs.push_back(RegClassRegs[i]); + + OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), + OpInfo.ConstraintVT); + OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs); + return; + } + } + + // Otherwise, we couldn't allocate enough registers for this. + return; +} + + +/// visitInlineAsm - Handle a call to an InlineAsm object. +/// +void SelectionDAGLowering::visitInlineAsm(CallInst &I) { + InlineAsm *IA = cast<InlineAsm>(I.getOperand(0)); + + /// ConstraintOperands - Information about all of the constraints. + std::vector<AsmOperandInfo> ConstraintOperands; + + SDOperand Chain = getRoot(); + SDOperand Flag; + + std::set<unsigned> OutputRegs, InputRegs; + + // Do a prepass over the constraints, canonicalizing them, and building up the + // ConstraintOperands list. + std::vector<InlineAsm::ConstraintInfo> + ConstraintInfos = IA->ParseConstraints(); + + // SawEarlyClobber - Keep track of whether we saw an earlyclobber output + // constraint. If so, we can't let the register allocator allocate any input + // registers, because it will not know to avoid the earlyclobbered output reg. + bool SawEarlyClobber = false; + + unsigned OpNo = 1; // OpNo - The operand of the CallInst. + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i])); + AsmOperandInfo &OpInfo = ConstraintOperands.back(); + + MVT::ValueType OpVT = MVT::Other; + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + if (!OpInfo.isIndirect) { + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(I.getType() != Type::VoidTy && "Bad inline asm!"); + OpVT = TLI.getValueType(I.getType()); + } else { + OpInfo.CallOperandVal = I.getOperand(OpNo++); + } + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = I.getOperand(OpNo++); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + // If this is an input or an indirect output, process the call argument. + if (OpInfo.CallOperandVal) { + OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); + const Type *OpTy = OpInfo.CallOperandVal->getType(); + // If this is an indirect operand, the operand is a pointer to the + // accessed type. + if (OpInfo.isIndirect) + OpTy = cast<PointerType>(OpTy)->getElementType(); + + // If OpTy is not a first-class value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isFirstClassType() && OpTy->isSized()) { + unsigned BitSize = TD->getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + OpTy = IntegerType::get(BitSize); + break; + } + } + + OpVT = TLI.getValueType(OpTy, true); + } + + OpInfo.ConstraintVT = OpVT; + + // Compute the constraint code and ConstraintType to use. + OpInfo.ComputeConstraintToUse(TLI); + + // Keep track of whether we see an earlyclobber. + SawEarlyClobber |= OpInfo.isEarlyClobber; + + // If this is a memory input, and if the operand is not indirect, do what we + // need to to provide an address for the memory input. + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + !OpInfo.isIndirect) { + assert(OpInfo.Type == InlineAsm::isInput && + "Can only indirectify direct input operands!"); + + // Memory operands really want the address of the value. If we don't have + // an indirect input, put it in the constpool if we can, otherwise spill + // it to a stack slot. + + // If the operand is a float, integer, or vector constant, spill to a + // constant pool entry to get its address. + Value *OpVal = OpInfo.CallOperandVal; + if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || + isa<ConstantVector>(OpVal)) { + OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), + TLI.getPointerTy()); + } else { + // Otherwise, create a stack slot and emit a store to it before the + // asm. + const Type *Ty = OpVal->getType(); + uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty); + unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align); + SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Chain = DAG.getStore(Chain, OpInfo.CallOperand, StackSlot, NULL, 0); + OpInfo.CallOperand = StackSlot; + } + + // There is no longer a Value* corresponding to this operand. + OpInfo.CallOperandVal = 0; + // It is now an indirect operand. + OpInfo.isIndirect = true; + } + + // If this constraint is for a specific register, allocate it before + // anything else. + if (OpInfo.ConstraintType == TargetLowering::C_Register) + GetRegistersForValue(OpInfo, SawEarlyClobber, OutputRegs, InputRegs); + } + ConstraintInfos.clear(); + + + // Second pass - Loop over all of the operands, assigning virtual or physregs + // to registerclass operands. + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + AsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // C_Register operands have already been allocated, Other/Memory don't need + // to be. + if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) + GetRegistersForValue(OpInfo, SawEarlyClobber, OutputRegs, InputRegs); + } + + // AsmNodeOperands - The operands for the ISD::INLINEASM node. + std::vector<SDOperand> AsmNodeOperands; + AsmNodeOperands.push_back(SDOperand()); // reserve space for input chain + AsmNodeOperands.push_back( + DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other)); + + + // Loop over all of the inputs, copying the operand values into the + // appropriate registers and processing the output regs. + RegsForValue RetValRegs; + + // IndirectStoresToEmit - The set of stores to emit after the inline asm node. + std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; + + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + AsmOperandInfo &OpInfo = ConstraintOperands[i]; + + switch (OpInfo.Type) { + case InlineAsm::isOutput: { + if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && + OpInfo.ConstraintType != TargetLowering::C_Register) { + // Memory output, or 'other' output (e.g. 'X' constraint). + assert(OpInfo.isIndirect && "Memory output must be indirect operand"); + + // Add information to the INLINEASM node to know about this output. + unsigned ResOpType = 4/*MEM*/ | (1 << 3); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.push_back(OpInfo.CallOperand); + break; + } + + // Otherwise, this is a register or register class output. + + // Copy the output from the appropriate register. Find a register that + // we can use. + if (OpInfo.AssignedRegs.Regs.empty()) { + cerr << "Couldn't allocate output reg for contraint '" + << OpInfo.ConstraintCode << "'!\n"; + exit(1); + } + + if (!OpInfo.isIndirect) { + // This is the result value of the call. + assert(RetValRegs.Regs.empty() && + "Cannot have multiple output constraints yet!"); + assert(I.getType() != Type::VoidTy && "Bad inline asm!"); + RetValRegs = OpInfo.AssignedRegs; + } else { + IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, + OpInfo.CallOperandVal)); + } + + // Add information to the INLINEASM node to know that this register is + // set. + OpInfo.AssignedRegs.AddInlineAsmOperands(2 /*REGDEF*/, DAG, + AsmNodeOperands); + break; + } + case InlineAsm::isInput: { + SDOperand InOperandVal = OpInfo.CallOperand; + + if (isdigit(OpInfo.ConstraintCode[0])) { // Matching constraint? + // If this is required to match an output register we have already set, + // just use its register. + unsigned OperandNo = atoi(OpInfo.ConstraintCode.c_str()); + + // Scan until we find the definition we already emitted of this operand. + // When we find it, create a RegsForValue operand. + unsigned CurOp = 2; // The first operand. + for (; OperandNo; --OperandNo) { + // Advance to the next operand. + unsigned NumOps = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getValue(); + assert(((NumOps & 7) == 2 /*REGDEF*/ || + (NumOps & 7) == 4 /*MEM*/) && + "Skipped past definitions?"); + CurOp += (NumOps>>3)+1; + } + + unsigned NumOps = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getValue(); + if ((NumOps & 7) == 2 /*REGDEF*/) { + // Add NumOps>>3 registers to MatchedRegs. + RegsForValue MatchedRegs; + MatchedRegs.ValueVT = InOperandVal.getValueType(); + MatchedRegs.RegVT = AsmNodeOperands[CurOp+1].getValueType(); + for (unsigned i = 0, e = NumOps>>3; i != e; ++i) { + unsigned Reg = + cast<RegisterSDNode>(AsmNodeOperands[++CurOp])->getReg(); + MatchedRegs.Regs.push_back(Reg); + } + + // Use the produced MatchedRegs object to + MatchedRegs.getCopyToRegs(InOperandVal, DAG, Chain, &Flag); + MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, DAG, AsmNodeOperands); + break; + } else { + assert((NumOps & 7) == 4/*MEM*/ && "Unknown matching constraint!"); + assert(0 && "matching constraints for memory operands unimp"); + } + } + + if (OpInfo.ConstraintType == TargetLowering::C_Other) { + assert(!OpInfo.isIndirect && + "Don't know how to handle indirect other inputs yet!"); + + InOperandVal = TLI.isOperandValidForConstraint(InOperandVal, + OpInfo.ConstraintCode[0], + DAG); + if (!InOperandVal.Val) { + cerr << "Invalid operand for inline asm constraint '" + << OpInfo.ConstraintCode << "'!\n"; + exit(1); + } + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = 3 /*IMM*/ | (1 << 3); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.push_back(InOperandVal); + break; + } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); + assert(InOperandVal.getValueType() == TLI.getPointerTy() && + "Memory operands expect pointer values"); + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = 4/*MEM*/ | (1 << 3); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.push_back(InOperandVal); + break; + } + + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || + OpInfo.ConstraintType == TargetLowering::C_Register) && + "Unknown constraint type!"); + assert(!OpInfo.isIndirect && + "Don't know how to handle indirect register inputs yet!"); + + // Copy the input into the appropriate registers. + assert(!OpInfo.AssignedRegs.Regs.empty() && + "Couldn't allocate input reg!"); + + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, Chain, &Flag); + + OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, DAG, + AsmNodeOperands); + break; + } + case InlineAsm::isClobber: { + // Add the clobbered value to the operand list, so that the register + // allocator is aware that the physreg got clobbered. + if (!OpInfo.AssignedRegs.Regs.empty()) + OpInfo.AssignedRegs.AddInlineAsmOperands(2/*REGDEF*/, DAG, + AsmNodeOperands); + break; + } + } + } + + // Finish up input operands. + AsmNodeOperands[0] = Chain; + if (Flag.Val) AsmNodeOperands.push_back(Flag); + + Chain = DAG.getNode(ISD::INLINEASM, + DAG.getNodeValueTypes(MVT::Other, MVT::Flag), 2, + &AsmNodeOperands[0], AsmNodeOperands.size()); + Flag = Chain.getValue(1); + + // If this asm returns a register value, copy the result from that register + // and set it as the value of the call. + if (!RetValRegs.Regs.empty()) { + SDOperand Val = RetValRegs.getCopyFromRegs(DAG, Chain, &Flag); + + // If the result of the inline asm is a vector, it may have the wrong + // width/num elts. Make sure to convert it to the right type with + // bit_convert. + if (MVT::isVector(Val.getValueType())) { + const VectorType *VTy = cast<VectorType>(I.getType()); + MVT::ValueType DesiredVT = TLI.getValueType(VTy); + + Val = DAG.getNode(ISD::BIT_CONVERT, DesiredVT, Val); + } + + setValue(&I, Val); + } + + std::vector<std::pair<SDOperand, Value*> > StoresToEmit; + + // Process indirect outputs, first output all of the flagged copies out of + // physregs. + for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { + RegsForValue &OutRegs = IndirectStoresToEmit[i].first; + Value *Ptr = IndirectStoresToEmit[i].second; + SDOperand OutVal = OutRegs.getCopyFromRegs(DAG, Chain, &Flag); + StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); + } + + // Emit the non-flagged stores from the physregs. + SmallVector<SDOperand, 8> OutChains; + for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) + OutChains.push_back(DAG.getStore(Chain, StoresToEmit[i].first, + getValue(StoresToEmit[i].second), + StoresToEmit[i].second, 0)); + if (!OutChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &OutChains[0], OutChains.size()); + DAG.setRoot(Chain); +} + + +void SelectionDAGLowering::visitMalloc(MallocInst &I) { + SDOperand Src = getValue(I.getOperand(0)); + + MVT::ValueType IntPtr = TLI.getPointerTy(); + + if (IntPtr < Src.getValueType()) + Src = DAG.getNode(ISD::TRUNCATE, IntPtr, Src); + else if (IntPtr > Src.getValueType()) + Src = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, Src); + + // Scale the source by the type size. + uint64_t ElementSize = TD->getTypeSize(I.getType()->getElementType()); + Src = DAG.getNode(ISD::MUL, Src.getValueType(), + Src, getIntPtrConstant(ElementSize)); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Src; + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Args.push_back(Entry); + + std::pair<SDOperand,SDOperand> Result = + TLI.LowerCallTo(getRoot(), I.getType(), false, false, CallingConv::C, true, + DAG.getExternalSymbol("malloc", IntPtr), + Args, DAG); + setValue(&I, Result.first); // Pointers always fit in registers + DAG.setRoot(Result.second); +} + +void SelectionDAGLowering::visitFree(FreeInst &I) { + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = getValue(I.getOperand(0)); + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Args.push_back(Entry); + MVT::ValueType IntPtr = TLI.getPointerTy(); + std::pair<SDOperand,SDOperand> Result = + TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, CallingConv::C, true, + DAG.getExternalSymbol("free", IntPtr), Args, DAG); + DAG.setRoot(Result.second); +} + +// InsertAtEndOfBasicBlock - This method should be implemented by targets that +// mark instructions with the 'usesCustomDAGSchedInserter' flag. These +// instructions are special in various ways, which require special support to +// insert. The specified MachineInstr is created but not inserted into any +// basic blocks, and the scheduler passes ownership of it to this method. +MachineBasicBlock *TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, + MachineBasicBlock *MBB) { + cerr << "If a target marks an instruction with " + << "'usesCustomDAGSchedInserter', it must implement " + << "TargetLowering::InsertAtEndOfBasicBlock!\n"; + abort(); + return 0; +} + +void SelectionDAGLowering::visitVAStart(CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VASTART, MVT::Other, getRoot(), + getValue(I.getOperand(1)), + DAG.getSrcValue(I.getOperand(1)))); +} + +void SelectionDAGLowering::visitVAArg(VAArgInst &I) { + SDOperand V = DAG.getVAArg(TLI.getValueType(I.getType()), getRoot(), + getValue(I.getOperand(0)), + DAG.getSrcValue(I.getOperand(0))); + setValue(&I, V); + DAG.setRoot(V.getValue(1)); +} + +void SelectionDAGLowering::visitVAEnd(CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VAEND, MVT::Other, getRoot(), + getValue(I.getOperand(1)), + DAG.getSrcValue(I.getOperand(1)))); +} + +void SelectionDAGLowering::visitVACopy(CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VACOPY, MVT::Other, getRoot(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)), + DAG.getSrcValue(I.getOperand(1)), + DAG.getSrcValue(I.getOperand(2)))); +} + +/// TargetLowering::LowerArguments - This is the default LowerArguments +/// implementation, which just inserts a FORMAL_ARGUMENTS node. FIXME: When all +/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be +/// integrated into SDISel. +std::vector<SDOperand> +TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { + const FunctionType *FTy = F.getFunctionType(); + const ParamAttrsList *Attrs = FTy->getParamAttrs(); + // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node. + std::vector<SDOperand> Ops; + Ops.push_back(DAG.getRoot()); + Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy())); + Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy())); + + // Add one result value for each formal argument. + std::vector<MVT::ValueType> RetVals; + unsigned j = 1; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++I, ++j) { + MVT::ValueType VT = getValueType(I->getType()); + unsigned Flags = ISD::ParamFlags::NoFlagSet; + unsigned OriginalAlignment = + getTargetData()->getABITypeAlignment(I->getType()); + + // FIXME: Distinguish between a formal with no [sz]ext attribute from one + // that is zero extended! + if (Attrs && Attrs->paramHasAttr(j, ParamAttr::ZExt)) + Flags &= ~(ISD::ParamFlags::SExt); + if (Attrs && Attrs->paramHasAttr(j, ParamAttr::SExt)) + Flags |= ISD::ParamFlags::SExt; + if (Attrs && Attrs->paramHasAttr(j, ParamAttr::InReg)) + Flags |= ISD::ParamFlags::InReg; + if (Attrs && Attrs->paramHasAttr(j, ParamAttr::StructRet)) + Flags |= ISD::ParamFlags::StructReturn; + if (Attrs && Attrs->paramHasAttr(j, ParamAttr::ByVal)) + Flags |= ISD::ParamFlags::ByVal; + Flags |= (OriginalAlignment << ISD::ParamFlags::OrigAlignmentOffs); + + switch (getTypeAction(VT)) { + default: assert(0 && "Unknown type action!"); + case Legal: + RetVals.push_back(VT); + Ops.push_back(DAG.getConstant(Flags, MVT::i32)); + break; + case Promote: + RetVals.push_back(getTypeToTransformTo(VT)); + Ops.push_back(DAG.getConstant(Flags, MVT::i32)); + break; + case Expand: { + // If this is an illegal type, it needs to be broken up to fit into + // registers. + MVT::ValueType RegisterVT = getRegisterType(VT); + unsigned NumRegs = getNumRegisters(VT); + for (unsigned i = 0; i != NumRegs; ++i) { + RetVals.push_back(RegisterVT); + // if it isn't first piece, alignment must be 1 + if (i > 0) + Flags = (Flags & (~ISD::ParamFlags::OrigAlignment)) | + (1 << ISD::ParamFlags::OrigAlignmentOffs); + Ops.push_back(DAG.getConstant(Flags, MVT::i32)); + } + break; + } + } + } + + RetVals.push_back(MVT::Other); + + // Create the node. + SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS, + DAG.getNodeValueTypes(RetVals), RetVals.size(), + &Ops[0], Ops.size()).Val; + unsigned NumArgRegs = Result->getNumValues() - 1; + DAG.setRoot(SDOperand(Result, NumArgRegs)); + + // Set up the return result vector. + Ops.clear(); + unsigned i = 0; + unsigned Idx = 1; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; + ++I, ++Idx) { + MVT::ValueType VT = getValueType(I->getType()); + + switch (getTypeAction(VT)) { + default: assert(0 && "Unknown type action!"); + case Legal: + Ops.push_back(SDOperand(Result, i++)); + break; + case Promote: { + SDOperand Op(Result, i++); + if (MVT::isInteger(VT)) { + if (Attrs && Attrs->paramHasAttr(Idx, ParamAttr::SExt)) + Op = DAG.getNode(ISD::AssertSext, Op.getValueType(), Op, + DAG.getValueType(VT)); + else if (Attrs && Attrs->paramHasAttr(Idx, ParamAttr::ZExt)) + Op = DAG.getNode(ISD::AssertZext, Op.getValueType(), Op, + DAG.getValueType(VT)); + Op = DAG.getNode(ISD::TRUNCATE, VT, Op); + } else { + assert(MVT::isFloatingPoint(VT) && "Not int or FP?"); + Op = DAG.getNode(ISD::FP_ROUND, VT, Op); + } + Ops.push_back(Op); + break; + } + case Expand: { + MVT::ValueType PartVT = getRegisterType(VT); + unsigned NumParts = getNumRegisters(VT); + SmallVector<SDOperand, 4> Parts(NumParts); + for (unsigned j = 0; j != NumParts; ++j) + Parts[j] = SDOperand(Result, i++); + Ops.push_back(getCopyFromParts(DAG, &Parts[0], NumParts, PartVT, VT)); + break; + } + } + } + assert(i == NumArgRegs && "Argument register count mismatch!"); + return Ops; +} + + +/// TargetLowering::LowerCallTo - This is the default LowerCallTo +/// implementation, which just inserts an ISD::CALL node, which is later custom +/// lowered by the target to something concrete. FIXME: When all targets are +/// migrated to using ISD::CALL, this hook should be integrated into SDISel. +std::pair<SDOperand, SDOperand> +TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, + bool RetTyIsSigned, bool isVarArg, + unsigned CallingConv, bool isTailCall, + SDOperand Callee, + ArgListTy &Args, SelectionDAG &DAG) { + SmallVector<SDOperand, 32> Ops; + Ops.push_back(Chain); // Op#0 - Chain + Ops.push_back(DAG.getConstant(CallingConv, getPointerTy())); // Op#1 - CC + Ops.push_back(DAG.getConstant(isVarArg, getPointerTy())); // Op#2 - VarArg + Ops.push_back(DAG.getConstant(isTailCall, getPointerTy())); // Op#3 - Tail + Ops.push_back(Callee); + + // Handle all of the outgoing arguments. + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + MVT::ValueType VT = getValueType(Args[i].Ty); + SDOperand Op = Args[i].Node; + unsigned Flags = ISD::ParamFlags::NoFlagSet; + unsigned OriginalAlignment = + getTargetData()->getABITypeAlignment(Args[i].Ty); + + if (Args[i].isSExt) + Flags |= ISD::ParamFlags::SExt; + if (Args[i].isZExt) + Flags |= ISD::ParamFlags::ZExt; + if (Args[i].isInReg) + Flags |= ISD::ParamFlags::InReg; + if (Args[i].isSRet) + Flags |= ISD::ParamFlags::StructReturn; + Flags |= OriginalAlignment << ISD::ParamFlags::OrigAlignmentOffs; + + switch (getTypeAction(VT)) { + default: assert(0 && "Unknown type action!"); + case Legal: + Ops.push_back(Op); + Ops.push_back(DAG.getConstant(Flags, MVT::i32)); + break; + case Promote: + if (MVT::isInteger(VT)) { + unsigned ExtOp; + if (Args[i].isSExt) + ExtOp = ISD::SIGN_EXTEND; + else if (Args[i].isZExt) + ExtOp = ISD::ZERO_EXTEND; + else + ExtOp = ISD::ANY_EXTEND; + Op = DAG.getNode(ExtOp, getTypeToTransformTo(VT), Op); + } else { + assert(MVT::isFloatingPoint(VT) && "Not int or FP?"); + Op = DAG.getNode(ISD::FP_EXTEND, getTypeToTransformTo(VT), Op); + } + Ops.push_back(Op); + Ops.push_back(DAG.getConstant(Flags, MVT::i32)); + break; + case Expand: { + MVT::ValueType PartVT = getRegisterType(VT); + unsigned NumParts = getNumRegisters(VT); + SmallVector<SDOperand, 4> Parts(NumParts); + getCopyToParts(DAG, Op, &Parts[0], NumParts, PartVT); + for (unsigned i = 0; i != NumParts; ++i) { + // if it isn't first piece, alignment must be 1 + unsigned MyFlags = Flags; + if (i != 0) + MyFlags = (MyFlags & (~ISD::ParamFlags::OrigAlignment)) | + (1 << ISD::ParamFlags::OrigAlignmentOffs); + + Ops.push_back(Parts[i]); + Ops.push_back(DAG.getConstant(MyFlags, MVT::i32)); + } + break; + } + } + } + + // Figure out the result value types. + MVT::ValueType VT = getValueType(RetTy); + MVT::ValueType RegisterVT = getRegisterType(VT); + unsigned NumRegs = getNumRegisters(VT); + SmallVector<MVT::ValueType, 4> RetTys(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) + RetTys[i] = RegisterVT; + + RetTys.push_back(MVT::Other); // Always has a chain. + + // Create the CALL node. + SDOperand Res = DAG.getNode(ISD::CALL, + DAG.getVTList(&RetTys[0], NumRegs + 1), + &Ops[0], Ops.size()); + SDOperand Chain = Res.getValue(NumRegs); + + // Gather up the call result into a single value. + if (RetTy != Type::VoidTy) { + ISD::NodeType AssertOp = ISD::AssertSext; + if (!RetTyIsSigned) + AssertOp = ISD::AssertZext; + SmallVector<SDOperand, 4> Results(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) + Results[i] = Res.getValue(i); + Res = getCopyFromParts(DAG, &Results[0], NumRegs, RegisterVT, VT, AssertOp); + } + + return std::make_pair(Res, Chain); +} + +SDOperand TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { + assert(0 && "LowerOperation not implemented for this target!"); + abort(); + return SDOperand(); +} + +SDOperand TargetLowering::CustomPromoteOperation(SDOperand Op, + SelectionDAG &DAG) { + assert(0 && "CustomPromoteOperation not implemented for this target!"); + abort(); + return SDOperand(); +} + +/// getMemsetValue - Vectorized representation of the memset value +/// operand. +static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT, + SelectionDAG &DAG) { + MVT::ValueType CurVT = VT; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { + uint64_t Val = C->getValue() & 255; + unsigned Shift = 8; + while (CurVT != MVT::i8) { + Val = (Val << Shift) | Val; + Shift <<= 1; + CurVT = (MVT::ValueType)((unsigned)CurVT - 1); + } + return DAG.getConstant(Val, VT); + } else { + Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value); + unsigned Shift = 8; + while (CurVT != MVT::i8) { + Value = + DAG.getNode(ISD::OR, VT, + DAG.getNode(ISD::SHL, VT, Value, + DAG.getConstant(Shift, MVT::i8)), Value); + Shift <<= 1; + CurVT = (MVT::ValueType)((unsigned)CurVT - 1); + } + + return Value; + } +} + +/// getMemsetStringVal - Similar to getMemsetValue. Except this is only +/// used when a memcpy is turned into a memset when the source is a constant +/// string ptr. +static SDOperand getMemsetStringVal(MVT::ValueType VT, + SelectionDAG &DAG, TargetLowering &TLI, + std::string &Str, unsigned Offset) { + uint64_t Val = 0; + unsigned MSB = MVT::getSizeInBits(VT) / 8; + if (TLI.isLittleEndian()) + Offset = Offset + MSB - 1; + for (unsigned i = 0; i != MSB; ++i) { + Val = (Val << 8) | (unsigned char)Str[Offset]; + Offset += TLI.isLittleEndian() ? -1 : 1; + } + return DAG.getConstant(Val, VT); +} + +/// getMemBasePlusOffset - Returns base and offset node for the +static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset, + SelectionDAG &DAG, TargetLowering &TLI) { + MVT::ValueType VT = Base.getValueType(); + return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT)); +} + +/// MeetsMaxMemopRequirement - Determines if the number of memory ops required +/// to replace the memset / memcpy is below the threshold. It also returns the +/// types of the sequence of memory ops to perform memset / memcpy. +static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType> &MemOps, + unsigned Limit, uint64_t Size, + unsigned Align, TargetLowering &TLI) { + MVT::ValueType VT; + + if (TLI.allowsUnalignedMemoryAccesses()) { + VT = MVT::i64; + } else { + switch (Align & 7) { + case 0: + VT = MVT::i64; + break; + case 4: + VT = MVT::i32; + break; + case 2: + VT = MVT::i16; + break; + default: + VT = MVT::i8; + break; + } + } + + MVT::ValueType LVT = MVT::i64; + while (!TLI.isTypeLegal(LVT)) + LVT = (MVT::ValueType)((unsigned)LVT - 1); + assert(MVT::isInteger(LVT)); + + if (VT > LVT) + VT = LVT; + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned VTSize = MVT::getSizeInBits(VT) / 8; + while (VTSize > Size) { + VT = (MVT::ValueType)((unsigned)VT - 1); + VTSize >>= 1; + } + assert(MVT::isInteger(VT)); + + if (++NumMemOps > Limit) + return false; + MemOps.push_back(VT); + Size -= VTSize; + } + + return true; +} + +void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) { + SDOperand Op1 = getValue(I.getOperand(1)); + SDOperand Op2 = getValue(I.getOperand(2)); + SDOperand Op3 = getValue(I.getOperand(3)); + SDOperand Op4 = getValue(I.getOperand(4)); + unsigned Align = (unsigned)cast<ConstantSDNode>(Op4)->getValue(); + if (Align == 0) Align = 1; + + if (ConstantSDNode *Size = dyn_cast<ConstantSDNode>(Op3)) { + std::vector<MVT::ValueType> MemOps; + + // Expand memset / memcpy to a series of load / store ops + // if the size operand falls below a certain threshold. + SmallVector<SDOperand, 8> OutChains; + switch (Op) { + default: break; // Do nothing for now. + case ISD::MEMSET: { + if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(), + Size->getValue(), Align, TLI)) { + unsigned NumMemOps = MemOps.size(); + unsigned Offset = 0; + for (unsigned i = 0; i < NumMemOps; i++) { + MVT::ValueType VT = MemOps[i]; + unsigned VTSize = MVT::getSizeInBits(VT) / 8; + SDOperand Value = getMemsetValue(Op2, VT, DAG); + SDOperand Store = DAG.getStore(getRoot(), Value, + getMemBasePlusOffset(Op1, Offset, DAG, TLI), + I.getOperand(1), Offset); + OutChains.push_back(Store); + Offset += VTSize; + } + } + break; + } + case ISD::MEMCPY: { + if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemcpy(), + Size->getValue(), Align, TLI)) { + unsigned NumMemOps = MemOps.size(); + unsigned SrcOff = 0, DstOff = 0, SrcDelta = 0; + GlobalAddressSDNode *G = NULL; + std::string Str; + bool CopyFromStr = false; + + if (Op2.getOpcode() == ISD::GlobalAddress) + G = cast<GlobalAddressSDNode>(Op2); + else if (Op2.getOpcode() == ISD::ADD && + Op2.getOperand(0).getOpcode() == ISD::GlobalAddress && + Op2.getOperand(1).getOpcode() == ISD::Constant) { + G = cast<GlobalAddressSDNode>(Op2.getOperand(0)); + SrcDelta = cast<ConstantSDNode>(Op2.getOperand(1))->getValue(); + } + if (G) { + GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal()); + if (GV && GV->isConstant()) { + Str = GV->getStringValue(false); + if (!Str.empty()) { + CopyFromStr = true; + SrcOff += SrcDelta; + } + } + } + + for (unsigned i = 0; i < NumMemOps; i++) { + MVT::ValueType VT = MemOps[i]; + unsigned VTSize = MVT::getSizeInBits(VT) / 8; + SDOperand Value, Chain, Store; + + if (CopyFromStr) { + Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff); + Chain = getRoot(); + Store = + DAG.getStore(Chain, Value, + getMemBasePlusOffset(Op1, DstOff, DAG, TLI), + I.getOperand(1), DstOff); + } else { + Value = DAG.getLoad(VT, getRoot(), + getMemBasePlusOffset(Op2, SrcOff, DAG, TLI), + I.getOperand(2), SrcOff); + Chain = Value.getValue(1); + Store = + DAG.getStore(Chain, Value, + getMemBasePlusOffset(Op1, DstOff, DAG, TLI), + I.getOperand(1), DstOff); + } + OutChains.push_back(Store); + SrcOff += VTSize; + DstOff += VTSize; + } + } + break; + } + } + + if (!OutChains.empty()) { + DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, + &OutChains[0], OutChains.size())); + return; + } + } + + DAG.setRoot(DAG.getNode(Op, MVT::Other, getRoot(), Op1, Op2, Op3, Op4)); +} + +//===----------------------------------------------------------------------===// +// SelectionDAGISel code +//===----------------------------------------------------------------------===// + +unsigned SelectionDAGISel::MakeReg(MVT::ValueType VT) { + return RegMap->createVirtualRegister(TLI.getRegClassFor(VT)); +} + +void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.setPreservesAll(); +} + + + +bool SelectionDAGISel::runOnFunction(Function &Fn) { + MachineFunction &MF = MachineFunction::construct(&Fn, TLI.getTargetMachine()); + RegMap = MF.getSSARegMap(); + DOUT << "\n\n\n=== " << Fn.getName() << "\n"; + + FunctionLoweringInfo FuncInfo(TLI, Fn, MF); + + if (ExceptionHandling) + for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator())) + // Mark landing pad. + FuncInfo.MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); + + for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + SelectBasicBlock(I, MF, FuncInfo); + + // Add function live-ins to entry block live-in set. + BasicBlock *EntryBB = &Fn.getEntryBlock(); + BB = FuncInfo.MBBMap[EntryBB]; + if (!MF.livein_empty()) + for (MachineFunction::livein_iterator I = MF.livein_begin(), + E = MF.livein_end(); I != E; ++I) + BB->addLiveIn(I->first); + +#ifndef NDEBUG + assert(FuncInfo.CatchInfoFound.size() == FuncInfo.CatchInfoLost.size() && + "Not all catch info was assigned to a landing pad!"); +#endif + + return true; +} + +SDOperand SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, + unsigned Reg) { + SDOperand Op = getValue(V); + assert((Op.getOpcode() != ISD::CopyFromReg || + cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && + "Copy from a reg to the same reg!"); + + MVT::ValueType SrcVT = Op.getValueType(); + MVT::ValueType RegisterVT = TLI.getRegisterType(SrcVT); + unsigned NumRegs = TLI.getNumRegisters(SrcVT); + SmallVector<SDOperand, 8> Regs(NumRegs); + SmallVector<SDOperand, 8> Chains(NumRegs); + + // Copy the value by legal parts into sequential virtual registers. + getCopyToParts(DAG, Op, &Regs[0], NumRegs, RegisterVT); + for (unsigned i = 0; i != NumRegs; ++i) + Chains[i] = DAG.getCopyToReg(getRoot(), Reg + i, Regs[i]); + return DAG.getNode(ISD::TokenFactor, MVT::Other, &Chains[0], NumRegs); +} + +void SelectionDAGISel:: +LowerArguments(BasicBlock *LLVMBB, SelectionDAGLowering &SDL, + std::vector<SDOperand> &UnorderedChains) { + // If this is the entry block, emit arguments. + Function &F = *LLVMBB->getParent(); + FunctionLoweringInfo &FuncInfo = SDL.FuncInfo; + SDOperand OldRoot = SDL.DAG.getRoot(); + std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG); + + unsigned a = 0; + for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); + AI != E; ++AI, ++a) + if (!AI->use_empty()) { + SDL.setValue(AI, Args[a]); + + // If this argument is live outside of the entry block, insert a copy from + // whereever we got it to the vreg that other BB's will reference it as. + DenseMap<const Value*, unsigned>::iterator VMI=FuncInfo.ValueMap.find(AI); + if (VMI != FuncInfo.ValueMap.end()) { + SDOperand Copy = SDL.CopyValueToVirtualRegister(AI, VMI->second); + UnorderedChains.push_back(Copy); + } + } + + // Finally, if the target has anything special to do, allow it to do so. + // FIXME: this should insert code into the DAG! + EmitFunctionEntryCode(F, SDL.DAG.getMachineFunction()); +} + +static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, + MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { + assert(!FLI.MBBMap[SrcBB]->isLandingPad() && + "Copying catch info out of a landing pad!"); + for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I) + if (isSelector(I)) { + // Apply the catch info to DestBB. + addCatchInfo(cast<CallInst>(*I), MMI, FLI.MBBMap[DestBB]); +#ifndef NDEBUG + FLI.CatchInfoFound.insert(I); +#endif + } +} + +void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB, + std::vector<std::pair<MachineInstr*, unsigned> > &PHINodesToUpdate, + FunctionLoweringInfo &FuncInfo) { + SelectionDAGLowering SDL(DAG, TLI, FuncInfo); + + std::vector<SDOperand> UnorderedChains; + + // Lower any arguments needed in this block if this is the entry block. + if (LLVMBB == &LLVMBB->getParent()->getEntryBlock()) + LowerArguments(LLVMBB, SDL, UnorderedChains); + + BB = FuncInfo.MBBMap[LLVMBB]; + SDL.setCurrentBasicBlock(BB); + + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + + if (ExceptionHandling && MMI && BB->isLandingPad()) { + // Add a label to mark the beginning of the landing pad. Deletion of the + // landing pad can thus be detected via the MachineModuleInfo. + unsigned LabelID = MMI->addLandingPad(BB); + DAG.setRoot(DAG.getNode(ISD::LABEL, MVT::Other, DAG.getEntryNode(), + DAG.getConstant(LabelID, MVT::i32))); + + // Mark exception register as live in. + unsigned Reg = TLI.getExceptionAddressRegister(); + if (Reg) BB->addLiveIn(Reg); + + // Mark exception selector register as live in. + Reg = TLI.getExceptionSelectorRegister(); + if (Reg) BB->addLiveIn(Reg); + + // FIXME: Hack around an exception handling flaw (PR1508): the personality + // function and list of typeids logically belong to the invoke (or, if you + // like, the basic block containing the invoke), and need to be associated + // with it in the dwarf exception handling tables. Currently however the + // information is provided by an intrinsic (eh.selector) that can be moved + // to unexpected places by the optimizers: if the unwind edge is critical, + // then breaking it can result in the intrinsics being in the successor of + // the landing pad, not the landing pad itself. This results in exceptions + // not being caught because no typeids are associated with the invoke. + // This may not be the only way things can go wrong, but it is the only way + // we try to work around for the moment. + BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); + + if (Br && Br->isUnconditional()) { // Critical edge? + BasicBlock::iterator I, E; + for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) + if (isSelector(I)) + break; + + if (I == E) + // No catch info found - try to extract some from the successor. + copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, FuncInfo); + } + } + + // Lower all of the non-terminator instructions. + for (BasicBlock::iterator I = LLVMBB->begin(), E = --LLVMBB->end(); + I != E; ++I) + SDL.visit(*I); + + // Ensure that all instructions which are used outside of their defining + // blocks are available as virtual registers. Invoke is handled elsewhere. + for (BasicBlock::iterator I = LLVMBB->begin(), E = LLVMBB->end(); I != E;++I) + if (!I->use_empty() && !isa<PHINode>(I) && !isa<InvokeInst>(I)) { + DenseMap<const Value*, unsigned>::iterator VMI =FuncInfo.ValueMap.find(I); + if (VMI != FuncInfo.ValueMap.end()) + UnorderedChains.push_back( + SDL.CopyValueToVirtualRegister(I, VMI->second)); + } + + // Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to + // ensure constants are generated when needed. Remember the virtual registers + // that need to be added to the Machine PHI nodes as input. We cannot just + // directly add them, because expansion might result in multiple MBB's for one + // BB. As such, the start of the BB might correspond to a different MBB than + // the end. + // + TerminatorInst *TI = LLVMBB->getTerminator(); + + // Emit constants only once even if used by multiple PHI nodes. + std::map<Constant*, unsigned> ConstantsOut; + + // Vector bool would be better, but vector<bool> is really slow. + std::vector<unsigned char> SuccsHandled; + if (TI->getNumSuccessors()) + SuccsHandled.resize(BB->getParent()->getNumBlockIDs()); + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + unsigned SuccMBBNo = SuccMBB->getNumber(); + if (SuccsHandled[SuccMBBNo]) continue; + SuccsHandled[SuccMBBNo] = true; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + PHINode *PN; + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::iterator I = SuccBB->begin(); + (PN = dyn_cast<PHINode>(I)); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + unsigned Reg; + Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + if (Constant *C = dyn_cast<Constant>(PHIOp)) { + unsigned &RegOut = ConstantsOut[C]; + if (RegOut == 0) { + RegOut = FuncInfo.CreateRegForValue(C); + UnorderedChains.push_back( + SDL.CopyValueToVirtualRegister(C, RegOut)); + } + Reg = RegOut; + } else { + Reg = FuncInfo.ValueMap[PHIOp]; + if (Reg == 0) { + assert(isa<AllocaInst>(PHIOp) && + FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && + "Didn't codegen value into a register!??"); + Reg = FuncInfo.CreateRegForValue(PHIOp); + UnorderedChains.push_back( + SDL.CopyValueToVirtualRegister(PHIOp, Reg)); + } + } + + // Remember that this register needs to added to the machine PHI node as + // the input for this MBB. + MVT::ValueType VT = TLI.getValueType(PN->getType()); + unsigned NumRegisters = TLI.getNumRegisters(VT); + for (unsigned i = 0, e = NumRegisters; i != e; ++i) + PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + } + } + ConstantsOut.clear(); + + // Turn all of the unordered chains into one factored node. + if (!UnorderedChains.empty()) { + SDOperand Root = SDL.getRoot(); + if (Root.getOpcode() != ISD::EntryToken) { + unsigned i = 0, e = UnorderedChains.size(); + for (; i != e; ++i) { + assert(UnorderedChains[i].Val->getNumOperands() > 1); + if (UnorderedChains[i].Val->getOperand(0) == Root) + break; // Don't add the root if we already indirectly depend on it. + } + + if (i == e) + UnorderedChains.push_back(Root); + } + DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, + &UnorderedChains[0], UnorderedChains.size())); + } + + // Lower the terminator after the copies are emitted. + SDL.visit(*LLVMBB->getTerminator()); + + // Copy over any CaseBlock records that may now exist due to SwitchInst + // lowering, as well as any jump table information. + SwitchCases.clear(); + SwitchCases = SDL.SwitchCases; + JTCases.clear(); + JTCases = SDL.JTCases; + BitTestCases.clear(); + BitTestCases = SDL.BitTestCases; + + // Make sure the root of the DAG is up-to-date. + DAG.setRoot(SDL.getRoot()); +} + +void SelectionDAGISel::CodeGenAndEmitDAG(SelectionDAG &DAG) { + // Get alias analysis for load/store combining. + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + + // Run the DAG combiner in pre-legalize mode. + DAG.Combine(false, AA); + + DOUT << "Lowered selection DAG:\n"; + DEBUG(DAG.dump()); + + // Second step, hack on the DAG until it only uses operations and types that + // the target supports. + DAG.Legalize(); + + DOUT << "Legalized selection DAG:\n"; + DEBUG(DAG.dump()); + + // Run the DAG combiner in post-legalize mode. + DAG.Combine(true, AA); + + if (ViewISelDAGs) DAG.viewGraph(); + + // Third, instruction select all of the operations to machine code, adding the + // code to the MachineBasicBlock. + InstructionSelectBasicBlock(DAG); + + DOUT << "Selected machine code:\n"; + DEBUG(BB->dump()); +} + +void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, MachineFunction &MF, + FunctionLoweringInfo &FuncInfo) { + std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate; + { + SelectionDAG DAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>()); + CurDAG = &DAG; + + // First step, lower LLVM code to some DAG. This DAG may use operations and + // types that are not supported by the target. + BuildSelectionDAG(DAG, LLVMBB, PHINodesToUpdate, FuncInfo); + + // Second step, emit the lowered DAG as machine code. + CodeGenAndEmitDAG(DAG); + } + + DOUT << "Total amount of phi nodes to update: " + << PHINodesToUpdate.size() << "\n"; + DEBUG(for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) + DOUT << "Node " << i << " : (" << PHINodesToUpdate[i].first + << ", " << PHINodesToUpdate[i].second << ")\n";); + + // Next, now that we know what the last MBB the LLVM BB expanded is, update + // PHI nodes in successors. + if (SwitchCases.empty() && JTCases.empty() && BitTestCases.empty()) { + for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = PHINodesToUpdate[i].first; + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + PHI->addRegOperand(PHINodesToUpdate[i].second, false); + PHI->addMachineBasicBlockOperand(BB); + } + return; + } + + for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!BitTestCases[i].Emitted) { + SelectionDAG HSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>()); + CurDAG = &HSDAG; + SelectionDAGLowering HSDL(HSDAG, TLI, FuncInfo); + // Set the current basic block to the mbb we wish to insert the code into + BB = BitTestCases[i].Parent; + HSDL.setCurrentBasicBlock(BB); + // Emit the code + HSDL.visitBitTestHeader(BitTestCases[i]); + HSDAG.setRoot(HSDL.getRoot()); + CodeGenAndEmitDAG(HSDAG); + } + + for (unsigned j = 0, ej = BitTestCases[i].Cases.size(); j != ej; ++j) { + SelectionDAG BSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>()); + CurDAG = &BSDAG; + SelectionDAGLowering BSDL(BSDAG, TLI, FuncInfo); + // Set the current basic block to the mbb we wish to insert the code into + BB = BitTestCases[i].Cases[j].ThisBB; + BSDL.setCurrentBasicBlock(BB); + // Emit the code + if (j+1 != ej) + BSDL.visitBitTestCase(BitTestCases[i].Cases[j+1].ThisBB, + BitTestCases[i].Reg, + BitTestCases[i].Cases[j]); + else + BSDL.visitBitTestCase(BitTestCases[i].Default, + BitTestCases[i].Reg, + BitTestCases[i].Cases[j]); + + + BSDAG.setRoot(BSDL.getRoot()); + CodeGenAndEmitDAG(BSDAG); + } + + // Update PHI Nodes + for (unsigned pi = 0, pe = PHINodesToUpdate.size(); pi != pe; ++pi) { + MachineInstr *PHI = PHINodesToUpdate[pi].first; + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + // This is "default" BB. We have two jumps to it. From "header" BB and + // from last "case" BB. + if (PHIBB == BitTestCases[i].Default) { + PHI->addRegOperand(PHINodesToUpdate[pi].second, false); + PHI->addMachineBasicBlockOperand(BitTestCases[i].Parent); + PHI->addRegOperand(PHINodesToUpdate[pi].second, false); + PHI->addMachineBasicBlockOperand(BitTestCases[i].Cases.back().ThisBB); + } + // One of "cases" BB. + for (unsigned j = 0, ej = BitTestCases[i].Cases.size(); j != ej; ++j) { + MachineBasicBlock* cBB = BitTestCases[i].Cases[j].ThisBB; + if (cBB->succ_end() != + std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) { + PHI->addRegOperand(PHINodesToUpdate[pi].second, false); + PHI->addMachineBasicBlockOperand(cBB); + } + } + } + } + + // If the JumpTable record is filled in, then we need to emit a jump table. + // Updating the PHI nodes is tricky in this case, since we need to determine + // whether the PHI is a successor of the range check MBB or the jump table MBB + for (unsigned i = 0, e = JTCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!JTCases[i].first.Emitted) { + SelectionDAG HSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>()); + CurDAG = &HSDAG; + SelectionDAGLowering HSDL(HSDAG, TLI, FuncInfo); + // Set the current basic block to the mbb we wish to insert the code into + BB = JTCases[i].first.HeaderBB; + HSDL.setCurrentBasicBlock(BB); + // Emit the code + HSDL.visitJumpTableHeader(JTCases[i].second, JTCases[i].first); + HSDAG.setRoot(HSDL.getRoot()); + CodeGenAndEmitDAG(HSDAG); + } + + SelectionDAG JSDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>()); + CurDAG = &JSDAG; + SelectionDAGLowering JSDL(JSDAG, TLI, FuncInfo); + // Set the current basic block to the mbb we wish to insert the code into + BB = JTCases[i].second.MBB; + JSDL.setCurrentBasicBlock(BB); + // Emit the code + JSDL.visitJumpTable(JTCases[i].second); + JSDAG.setRoot(JSDL.getRoot()); + CodeGenAndEmitDAG(JSDAG); + + // Update PHI Nodes + for (unsigned pi = 0, pe = PHINodesToUpdate.size(); pi != pe; ++pi) { + MachineInstr *PHI = PHINodesToUpdate[pi].first; + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + // "default" BB. We can go there only from header BB. + if (PHIBB == JTCases[i].second.Default) { + PHI->addRegOperand(PHINodesToUpdate[pi].second, false); + PHI->addMachineBasicBlockOperand(JTCases[i].first.HeaderBB); + } + // JT BB. Just iterate over successors here + if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) { + PHI->addRegOperand(PHINodesToUpdate[pi].second, false); + PHI->addMachineBasicBlockOperand(BB); + } + } + } + + // If the switch block involved a branch to one of the actual successors, we + // need to update PHI nodes in that block. + for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = PHINodesToUpdate[i].first; + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + if (BB->isSuccessor(PHI->getParent())) { + PHI->addRegOperand(PHINodesToUpdate[i].second, false); + PHI->addMachineBasicBlockOperand(BB); + } + } + + // If we generated any switch lowering information, build and codegen any + // additional DAGs necessary. + for (unsigned i = 0, e = SwitchCases.size(); i != e; ++i) { + SelectionDAG SDAG(TLI, MF, getAnalysisToUpdate<MachineModuleInfo>()); + CurDAG = &SDAG; + SelectionDAGLowering SDL(SDAG, TLI, FuncInfo); + + // Set the current basic block to the mbb we wish to insert the code into + BB = SwitchCases[i].ThisBB; + SDL.setCurrentBasicBlock(BB); + + // Emit the code + SDL.visitSwitchCase(SwitchCases[i]); + SDAG.setRoot(SDL.getRoot()); + CodeGenAndEmitDAG(SDAG); + + // Handle any PHI nodes in successors of this chunk, as if we were coming + // from the original BB before switch expansion. Note that PHI nodes can + // occur multiple times in PHINodesToUpdate. We have to be very careful to + // handle them the right number of times. + while ((BB = SwitchCases[i].TrueBB)) { // Handle LHS and RHS. + for (MachineBasicBlock::iterator Phi = BB->begin(); + Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){ + // This value for this PHI node is recorded in PHINodesToUpdate, get it. + for (unsigned pn = 0; ; ++pn) { + assert(pn != PHINodesToUpdate.size() && "Didn't find PHI entry!"); + if (PHINodesToUpdate[pn].first == Phi) { + Phi->addRegOperand(PHINodesToUpdate[pn].second, false); + Phi->addMachineBasicBlockOperand(SwitchCases[i].ThisBB); + break; + } + } + } + + // Don't process RHS if same block as LHS. + if (BB == SwitchCases[i].FalseBB) + SwitchCases[i].FalseBB = 0; + + // If we haven't handled the RHS, do so now. Otherwise, we're done. + SwitchCases[i].TrueBB = SwitchCases[i].FalseBB; + SwitchCases[i].FalseBB = 0; + } + assert(SwitchCases[i].TrueBB == 0 && SwitchCases[i].FalseBB == 0); + } +} + + +//===----------------------------------------------------------------------===// +/// ScheduleAndEmitDAG - Pick a safe ordering and emit instructions for each +/// target node in the graph. +void SelectionDAGISel::ScheduleAndEmitDAG(SelectionDAG &DAG) { + if (ViewSchedDAGs) DAG.viewGraph(); + + RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault(); + + if (!Ctor) { + Ctor = ISHeuristic; + RegisterScheduler::setDefault(Ctor); + } + + ScheduleDAG *SL = Ctor(this, &DAG, BB); + BB = SL->Run(); + delete SL; +} + + +HazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { + return new HazardRecognizer(); +} + +//===----------------------------------------------------------------------===// +// Helper functions used by the generated instruction selector. +//===----------------------------------------------------------------------===// +// Calls to these methods are generated by tblgen. + +/// CheckAndMask - The isel is trying to match something like (and X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckAndMask(SDOperand LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) { + uint64_t ActualMask = RHS->getValue(); + uint64_t DesiredMask =DesiredMaskS & MVT::getIntVTBitMask(LHS.getValueType()); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask & ~DesiredMask) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + uint64_t NeededMask = DesiredMask & ~ActualMask; + if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + +/// CheckOrMask - The isel is trying to match something like (or X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckOrMask(SDOperand LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) { + uint64_t ActualMask = RHS->getValue(); + uint64_t DesiredMask =DesiredMaskS & MVT::getIntVTBitMask(LHS.getValueType()); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask & ~DesiredMask) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + uint64_t NeededMask = DesiredMask & ~ActualMask; + + uint64_t KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); + + // If all the missing bits in the or are already known to be set, match! + if ((NeededMask & KnownOne) == NeededMask) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + + +/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated +/// by tblgen. Others should not call it. +void SelectionDAGISel:: +SelectInlineAsmMemoryOperands(std::vector<SDOperand> &Ops, SelectionDAG &DAG) { + std::vector<SDOperand> InOps; + std::swap(InOps, Ops); + + Ops.push_back(InOps[0]); // input chain. + Ops.push_back(InOps[1]); // input asm string. + + unsigned i = 2, e = InOps.size(); + if (InOps[e-1].getValueType() == MVT::Flag) + --e; // Don't process a flag operand if it is here. + + while (i != e) { + unsigned Flags = cast<ConstantSDNode>(InOps[i])->getValue(); + if ((Flags & 7) != 4 /*MEM*/) { + // Just skip over this operand, copying the operands verbatim. + Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+(Flags >> 3) + 1); + i += (Flags >> 3) + 1; + } else { + assert((Flags >> 3) == 1 && "Memory operand with multiple values?"); + // Otherwise, this is a memory operand. Ask the target to select it. + std::vector<SDOperand> SelOps; + if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps, DAG)) { + cerr << "Could not match memory address. Inline asm failure!\n"; + exit(1); + } + + // Add this to the output node. + MVT::ValueType IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy(); + Ops.push_back(DAG.getTargetConstant(4/*MEM*/ | (SelOps.size() << 3), + IntPtrTy)); + Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); + i += 2; + } + } + + // Add the flag input back if present. + if (e != InOps.size()) + Ops.push_back(InOps.back()); +} + +char SelectionDAGISel::ID = 0; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp new file mode 100644 index 0000000..12b5682 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -0,0 +1,245 @@ +//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" +#include <fstream> +#include <sstream> +using namespace llvm; + +namespace llvm { + template<> + struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits { + static std::string getGraphName(const SelectionDAG *G) { + return G->getMachineFunction().getFunction()->getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool hasNodeAddressLabel(const SDNode *Node, + const SelectionDAG *Graph) { + return true; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + template<typename EdgeIter> + static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { + SDOperand Op = EI.getNode()->getOperand(EI.getOperand()); + MVT::ValueType VT = Op.getValueType(); + if (VT == MVT::Flag) + return "color=red,style=bold"; + else if (VT == MVT::Other) + return "color=blue,style=dashed"; + return ""; + } + + + static std::string getNodeLabel(const SDNode *Node, + const SelectionDAG *Graph); + static std::string getNodeAttributes(const SDNode *N, + const SelectionDAG *Graph) { +#ifndef NDEBUG + const std::string &Attrs = Graph->getGraphAttrs(N); + if (!Attrs.empty()) { + if (Attrs.find("shape=") == std::string::npos) + return std::string("shape=Mrecord,") + Attrs; + else + return Attrs; + } +#endif + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(SelectionDAG *G, + GraphWriter<SelectionDAG*> &GW) { + GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + if (G->getRoot().Val) + GW.emitEdge(0, -1, G->getRoot().Val, -1, ""); + } + }; +} + +std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + std::string Op = Node->getOperationName(G); + + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + if (Node->getValueType(i) == MVT::Other) + Op += ":ch"; + else + Op = Op + ":" + MVT::getValueTypeString(Node->getValueType(i)); + + if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Node)) { + Op += ": " + utostr(CSDN->getValue()); + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(Node)) { + Op += ": " + ftostr(CSDN->getValue()); + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(Node)) { + int offset = GADN->getOffset(); + Op += ": " + GADN->getGlobal()->getName(); + if (offset > 0) + Op += "+" + itostr(offset); + else + Op += itostr(offset); + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(Node)) { + Op += " " + itostr(FIDN->getIndex()); + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(Node)) { + Op += " " + itostr(JTDN->getIndex()); + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Node)){ + if (CP->isMachineConstantPoolEntry()) { + std::ostringstream SS; + CP->getMachineCPVal()->print(SS); + Op += "<" + SS.str() + ">"; + } else { + if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) + Op += "<" + ftostr(CFP->getValue()) + ">"; + else if (ConstantInt *CI = dyn_cast<ConstantInt>(CP->getConstVal())) + Op += "<" + utostr(CI->getZExtValue()) + ">"; + else { + std::ostringstream SS; + WriteAsOperand(SS, CP->getConstVal(), false); + Op += "<" + SS.str() + ">"; + } + } + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(Node)) { + Op = "BB: "; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + Op += LBB->getName(); + //Op += " " + (const void*)BBDN->getBasicBlock(); + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node)) { + if (G && R->getReg() != 0 && + MRegisterInfo::isPhysicalRegister(R->getReg())) { + Op = Op + " " + G->getTarget().getRegisterInfo()->getName(R->getReg()); + } else { + Op += " #" + utostr(R->getReg()); + } + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(Node)) { + Op += "'" + std::string(ES->getSymbol()) + "'"; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(Node)) { + if (M->getValue()) + Op += "<" + M->getValue()->getName() + ":" + itostr(M->getOffset()) + ">"; + else + Op += "<null:" + itostr(M->getOffset()) + ">"; + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(Node)) { + Op = Op + " VT=" + MVT::getValueTypeString(N->getVT()); + } else if (const StringSDNode *N = dyn_cast<StringSDNode>(Node)) { + Op = Op + "\"" + N->getValue() + "\""; + } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node)) { + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: + Op = Op + "<anyext "; + break; + case ISD::SEXTLOAD: + Op = Op + " <sext "; + break; + case ISD::ZEXTLOAD: + Op = Op + " <zext "; + break; + } + if (doExt) + Op = Op + MVT::getValueTypeString(LD->getLoadedVT()) + ">"; + + Op += LD->getIndexedModeName(LD->getAddressingMode()); + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Node)) { + if (ST->isTruncatingStore()) + Op = Op + "<trunc " + MVT::getValueTypeString(ST->getStoredVT()) + ">"; + Op += ST->getIndexedModeName(ST->getAddressingMode()); + } + + return Op; +} + + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void SelectionDAG::viewGraph() { +// This code is only for debugging! +#ifndef NDEBUG + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName()); +#else + cerr << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + + +/// clearGraphAttrs - Clear all previously defined node graph attributes. +/// Intended to be used from a debugging tool (eg. gdb). +void SelectionDAG::clearGraphAttrs() { +#ifndef NDEBUG + NodeGraphAttrs.clear(); +#else + cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".) +/// +void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { +#ifndef NDEBUG + NodeGraphAttrs[N] = Attrs; +#else + cerr << "SelectionDAG::setGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".) +/// Used from getNodeAttributes. +const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { +#ifndef NDEBUG + std::map<const SDNode *, std::string>::const_iterator I = + NodeGraphAttrs.find(N); + + if (I != NodeGraphAttrs.end()) + return I->second; + else + return ""; +#else + cerr << "SelectionDAG::getGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; + return std::string(""); +#endif +} + +/// setGraphColor - Convenience for setting node color attribute. +/// +void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) { +#ifndef NDEBUG + NodeGraphAttrs[N] = std::string("color=") + Color; +#else + cerr << "SelectionDAG::setGraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp new file mode 100644 index 0000000..1b7b436 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -0,0 +1,1753 @@ +//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +/// InitLibcallNames - Set default libcall names. +/// +static void InitLibcallNames(const char **Names) { + Names[RTLIB::SHL_I32] = "__ashlsi3"; + Names[RTLIB::SHL_I64] = "__ashldi3"; + Names[RTLIB::SRL_I32] = "__lshrsi3"; + Names[RTLIB::SRL_I64] = "__lshrdi3"; + Names[RTLIB::SRA_I32] = "__ashrsi3"; + Names[RTLIB::SRA_I64] = "__ashrdi3"; + Names[RTLIB::MUL_I32] = "__mulsi3"; + Names[RTLIB::MUL_I64] = "__muldi3"; + Names[RTLIB::SDIV_I32] = "__divsi3"; + Names[RTLIB::SDIV_I64] = "__divdi3"; + Names[RTLIB::UDIV_I32] = "__udivsi3"; + Names[RTLIB::UDIV_I64] = "__udivdi3"; + Names[RTLIB::SREM_I32] = "__modsi3"; + Names[RTLIB::SREM_I64] = "__moddi3"; + Names[RTLIB::UREM_I32] = "__umodsi3"; + Names[RTLIB::UREM_I64] = "__umoddi3"; + Names[RTLIB::NEG_I32] = "__negsi2"; + Names[RTLIB::NEG_I64] = "__negdi2"; + Names[RTLIB::ADD_F32] = "__addsf3"; + Names[RTLIB::ADD_F64] = "__adddf3"; + Names[RTLIB::SUB_F32] = "__subsf3"; + Names[RTLIB::SUB_F64] = "__subdf3"; + Names[RTLIB::MUL_F32] = "__mulsf3"; + Names[RTLIB::MUL_F64] = "__muldf3"; + Names[RTLIB::DIV_F32] = "__divsf3"; + Names[RTLIB::DIV_F64] = "__divdf3"; + Names[RTLIB::REM_F32] = "fmodf"; + Names[RTLIB::REM_F64] = "fmod"; + Names[RTLIB::NEG_F32] = "__negsf2"; + Names[RTLIB::NEG_F64] = "__negdf2"; + Names[RTLIB::POWI_F32] = "__powisf2"; + Names[RTLIB::POWI_F64] = "__powidf2"; + Names[RTLIB::SQRT_F32] = "sqrtf"; + Names[RTLIB::SQRT_F64] = "sqrt"; + Names[RTLIB::SIN_F32] = "sinf"; + Names[RTLIB::SIN_F64] = "sin"; + Names[RTLIB::COS_F32] = "cosf"; + Names[RTLIB::COS_F64] = "cos"; + Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; + Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; + Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; + Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; + Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; + Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; + Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; + Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; + Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; + Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; + Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; + Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; + Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; + Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; + Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; + Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; + Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; + Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; + Names[RTLIB::OEQ_F32] = "__eqsf2"; + Names[RTLIB::OEQ_F64] = "__eqdf2"; + Names[RTLIB::UNE_F32] = "__nesf2"; + Names[RTLIB::UNE_F64] = "__nedf2"; + Names[RTLIB::OGE_F32] = "__gesf2"; + Names[RTLIB::OGE_F64] = "__gedf2"; + Names[RTLIB::OLT_F32] = "__ltsf2"; + Names[RTLIB::OLT_F64] = "__ltdf2"; + Names[RTLIB::OLE_F32] = "__lesf2"; + Names[RTLIB::OLE_F64] = "__ledf2"; + Names[RTLIB::OGT_F32] = "__gtsf2"; + Names[RTLIB::OGT_F64] = "__gtdf2"; + Names[RTLIB::UO_F32] = "__unordsf2"; + Names[RTLIB::UO_F64] = "__unorddf2"; + Names[RTLIB::O_F32] = "__unordsf2"; + Names[RTLIB::O_F64] = "__unorddf2"; +} + +/// InitCmpLibcallCCs - Set default comparison libcall CC. +/// +static void InitCmpLibcallCCs(ISD::CondCode *CCs) { + memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); + CCs[RTLIB::OEQ_F32] = ISD::SETEQ; + CCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CCs[RTLIB::UNE_F32] = ISD::SETNE; + CCs[RTLIB::UNE_F64] = ISD::SETNE; + CCs[RTLIB::OGE_F32] = ISD::SETGE; + CCs[RTLIB::OGE_F64] = ISD::SETGE; + CCs[RTLIB::OLT_F32] = ISD::SETLT; + CCs[RTLIB::OLT_F64] = ISD::SETLT; + CCs[RTLIB::OLE_F32] = ISD::SETLE; + CCs[RTLIB::OLE_F64] = ISD::SETLE; + CCs[RTLIB::OGT_F32] = ISD::SETGT; + CCs[RTLIB::OGT_F64] = ISD::SETGT; + CCs[RTLIB::UO_F32] = ISD::SETNE; + CCs[RTLIB::UO_F64] = ISD::SETNE; + CCs[RTLIB::O_F32] = ISD::SETEQ; + CCs[RTLIB::O_F64] = ISD::SETEQ; +} + +TargetLowering::TargetLowering(TargetMachine &tm) + : TM(tm), TD(TM.getTargetData()) { + assert(ISD::BUILTIN_OP_END <= 156 && + "Fixed size array in TargetLowering is not large enough!"); + // All operations default to being supported. + memset(OpActions, 0, sizeof(OpActions)); + memset(LoadXActions, 0, sizeof(LoadXActions)); + memset(&StoreXActions, 0, sizeof(StoreXActions)); + memset(&IndexedModeActions, 0, sizeof(IndexedModeActions)); + + // Set all indexed load / store to expand. + for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { + for (unsigned IM = (unsigned)ISD::PRE_INC; + IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { + setIndexedLoadAction(IM, (MVT::ValueType)VT, Expand); + setIndexedStoreAction(IM, (MVT::ValueType)VT, Expand); + } + } + + IsLittleEndian = TD->isLittleEndian(); + UsesGlobalOffsetTable = false; + ShiftAmountTy = SetCCResultTy = PointerTy = getValueType(TD->getIntPtrType()); + ShiftAmtHandling = Undefined; + memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + memset(TargetDAGCombineArray, 0, + sizeof(TargetDAGCombineArray)/sizeof(TargetDAGCombineArray[0])); + maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; + allowUnalignedMemoryAccesses = false; + UseUnderscoreSetJmp = false; + UseUnderscoreLongJmp = false; + SelectIsExpensive = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; + StackPointerRegisterToSaveRestore = 0; + ExceptionPointerRegister = 0; + ExceptionSelectorRegister = 0; + SchedPreferenceInfo = SchedulingForLatency; + JumpBufSize = 0; + JumpBufAlignment = 0; + IfCvtBlockSizeLimit = 2; + + InitLibcallNames(LibcallRoutineNames); + InitCmpLibcallCCs(CmpLibcallCCs); +} + +TargetLowering::~TargetLowering() {} + +/// computeRegisterProperties - Once all of the register classes are added, +/// this allows us to compute derived properties we expose. +void TargetLowering::computeRegisterProperties() { + assert(MVT::LAST_VALUETYPE <= 32 && + "Too many value types for ValueTypeActions to hold!"); + + // Everything defaults to needing one register. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + NumRegistersForVT[i] = 1; + RegisterTypeForVT[i] = TransformToType[i] = i; + } + // ...except isVoid, which doesn't need any registers. + NumRegistersForVT[MVT::isVoid] = 0; + + // Find the largest integer register class. + unsigned LargestIntReg = MVT::i128; + for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) + assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); + + // Every integer value type larger than this largest register takes twice as + // many registers to represent as the previous ValueType. + for (MVT::ValueType ExpandedReg = LargestIntReg + 1; + MVT::isInteger(ExpandedReg); ++ExpandedReg) { + NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; + RegisterTypeForVT[ExpandedReg] = LargestIntReg; + TransformToType[ExpandedReg] = ExpandedReg - 1; + ValueTypeActions.setTypeAction(ExpandedReg, Expand); + } + + // Inspect all of the ValueType's smaller than the largest integer + // register to see which ones need promotion. + MVT::ValueType LegalIntReg = LargestIntReg; + for (MVT::ValueType IntReg = LargestIntReg - 1; + IntReg >= MVT::i1; --IntReg) { + if (isTypeLegal(IntReg)) { + LegalIntReg = IntReg; + } else { + RegisterTypeForVT[IntReg] = TransformToType[IntReg] = LegalIntReg; + ValueTypeActions.setTypeAction(IntReg, Promote); + } + } + + // Decide how to handle f64. If the target does not have native f64 support, + // expand it to i64 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; + RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; + TransformToType[MVT::f64] = MVT::i64; + ValueTypeActions.setTypeAction(MVT::f64, Expand); + } + + // Decide how to handle f32. If the target does not have native support for + // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. + if (!isTypeLegal(MVT::f32)) { + if (isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; + TransformToType[MVT::f32] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::f32, Promote); + } else { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f32] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f32, Expand); + } + } + + // Loop over all of the vector value types to see which need transformations. + for (MVT::ValueType i = MVT::FIRST_VECTOR_VALUETYPE; + i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { + if (!isTypeLegal(i)) { + MVT::ValueType IntermediateVT, RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdown(i, + IntermediateVT, NumIntermediates, + RegisterVT); + RegisterTypeForVT[i] = RegisterVT; + TransformToType[i] = MVT::Other; // this isn't actually used + ValueTypeActions.setTypeAction(i, Expand); + } + } +} + +const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { + return NULL; +} + +/// getVectorTypeBreakdown - Vector types are broken down into some number of +/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 +/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. +/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. +/// +/// This method returns the number of registers needed, and the VT for each +/// register. It also returns the VT and quantity of the intermediate values +/// before they are promoted/expanded. +/// +unsigned TargetLowering::getVectorTypeBreakdown(MVT::ValueType VT, + MVT::ValueType &IntermediateVT, + unsigned &NumIntermediates, + MVT::ValueType &RegisterVT) const { + // Figure out the right, legal destination reg to copy into. + unsigned NumElts = MVT::getVectorNumElements(VT); + MVT::ValueType EltTy = MVT::getVectorElementType(VT); + + unsigned NumVectorRegs = 1; + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && + !isTypeLegal(MVT::getVectorType(EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + MVT::ValueType NewVT = MVT::getVectorType(EltTy, NumElts); + if (!isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + MVT::ValueType DestVT = getTypeToTransformTo(NewVT); + RegisterVT = DestVT; + if (DestVT < NewVT) { + // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(MVT::getSizeInBits(NewVT)/MVT::getSizeInBits(DestVT)); + } else { + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; + } + + return 1; +} + +//===----------------------------------------------------------------------===// +// Optimization Methods +//===----------------------------------------------------------------------===// + +/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// specified instruction is a constant integer. If so, check to see if there +/// are any bits set in the constant that are not demanded. If so, shrink the +/// constant and return true. +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDOperand Op, + uint64_t Demanded) { + // FIXME: ISD::SELECT, ISD::SELECT_CC + switch(Op.getOpcode()) { + default: break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + if ((~Demanded & C->getValue()) != 0) { + MVT::ValueType VT = Op.getValueType(); + SDOperand New = DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0), + DAG.getConstant(Demanded & C->getValue(), + VT)); + return CombineTo(Op, New); + } + break; + } + return false; +} + +/// SimplifyDemandedBits - Look at Op. At this point, we know that only the +/// DemandedMask bits of the result of Op are ever used downstream. If we can +/// use this information to simplify Op, create a new simplified DAG node and +/// return true, returning the original and new nodes in Old and New. Otherwise, +/// analyze the expression and return a mask of KnownOne and KnownZero bits for +/// the expression (used to simplify the caller). The KnownZero/One bits may +/// only be accurate for those bits in the DemandedMask. +bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask, + uint64_t &KnownZero, + uint64_t &KnownOne, + TargetLoweringOpt &TLO, + unsigned Depth) const { + KnownZero = KnownOne = 0; // Don't know anything. + + // The masks are not wide enough to represent this type! Should use APInt. + if (Op.getValueType() == MVT::i128) + return false; + + // Other users may use these bits. + if (!Op.Val->hasOneUse()) { + if (Depth != 0) { + // If not at the root, Just compute the KnownZero/KnownOne bits to + // simplify things downstream. + TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + return false; + } + // If this is the root being simplified, allow it to have multiple uses, + // just set the DemandedMask to all bits. + DemandedMask = MVT::getIntVTBitMask(Op.getValueType()); + } else if (DemandedMask == 0) { + // Not demanding any bits from Op. + if (Op.getOpcode() != ISD::UNDEF) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::UNDEF, Op.getValueType())); + return false; + } else if (Depth == 6) { // Limit search depth. + return false; + } + + uint64_t KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast<ConstantSDNode>(Op)->getValue() & DemandedMask; + KnownZero = ~KnownOne & DemandedMask; + return false; // Don't fall through, will infinitely loop. + case ISD::AND: + // If the RHS is a constant, check to see if the LHS would be zero without + // using the bits from the RHS. Below, we use knowledge about the RHS to + // simplify the LHS, here we're using information from the LHS to simplify + // the RHS. + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + uint64_t LHSZero, LHSOne; + TLO.DAG.ComputeMaskedBits(Op.getOperand(0), DemandedMask, + LHSZero, LHSOne, Depth+1); + // If the LHS already has zeros where RHSC does, this and is dead. + if ((LHSZero & DemandedMask) == (~RHSC->getValue() & DemandedMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + // If any of the set bits in the RHS are known zero on the LHS, shrink + // the constant. + if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & DemandedMask)) + return true; + } + + if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownZero, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known one on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if ((DemandedMask & ~KnownZero2 & KnownOne)==(DemandedMask & ~KnownZero2)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((DemandedMask & ~KnownZero & KnownOne2)==(DemandedMask & ~KnownZero)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((DemandedMask & (KnownZero|KnownZero2)) == DemandedMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, DemandedMask & ~KnownZero2)) + return true; + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + break; + case ISD::OR: + if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownOne, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if ((DemandedMask & ~KnownOne2 & KnownZero) == (DemandedMask & ~KnownOne2)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((DemandedMask & ~KnownOne & KnownZero2) == (DemandedMask & ~KnownOne)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((DemandedMask & (~KnownZero) & KnownOne2) == + (DemandedMask & (~KnownZero))) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((DemandedMask & (~KnownZero2) & KnownOne) == + (DemandedMask & (~KnownZero2))) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) + return true; + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + break; + case ISD::XOR: + if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if ((DemandedMask & KnownZero) == DemandedMask) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((DemandedMask & KnownZero2) == DemandedMask) + return TLO.CombineTo(Op, Op.getOperand(1)); + + // If all of the unknown bits are known to be zero on one side or the other + // (but not both) turn this into an *inclusive* or. + // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 + if ((DemandedMask & ~KnownZero & ~KnownZero2) == 0) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, Op.getValueType(), + Op.getOperand(0), + Op.getOperand(1))); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + + // If all of the demanded bits on one side are known, and all of the set + // bits on that side are also known to be set on the other side, turn this + // into an AND, as we know the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) { // all known + if ((KnownOne & KnownOne2) == KnownOne) { + MVT::ValueType VT = Op.getValueType(); + SDOperand ANDC = TLO.DAG.getConstant(~KnownOne & DemandedMask, VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, VT, Op.getOperand(0), + ANDC)); + } + } + + // If the RHS is a constant, see if we can simplify it. + // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. + if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) + return true; + + KnownZero = KnownZeroOut; + KnownOne = KnownOneOut; + break; + case ISD::SETCC: + // If we know the result of a setcc has the top bits zero, use this info. + if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) + KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); + break; + case ISD::SELECT: + if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SELECT_CC: + if (SimplifyDemandedBits(Op.getOperand(3), DemandedMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SHL: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getValue(); + SDOperand InOp = Op.getOperand(0); + + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a + // single shift. We can do this if the bottom bits (which are shifted + // out) are never demanded. + if (InOp.getOpcode() == ISD::SRL && + isa<ConstantSDNode>(InOp.getOperand(1))) { + if (ShAmt && (DemandedMask & ((1ULL << ShAmt)-1)) == 0) { + unsigned C1 = cast<ConstantSDNode>(InOp.getOperand(1))->getValue(); + unsigned Opc = ISD::SHL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SRL; + } + + SDOperand NewSA = + TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + MVT::ValueType VT = Op.getValueType(); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, VT, + InOp.getOperand(0), NewSA)); + } + } + + if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask >> ShAmt, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + KnownZero <<= SA->getValue(); + KnownOne <<= SA->getValue(); + KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. + } + break; + case ISD::SRL: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + MVT::ValueType VT = Op.getValueType(); + unsigned ShAmt = SA->getValue(); + uint64_t TypeMask = MVT::getIntVTBitMask(VT); + unsigned VTSize = MVT::getSizeInBits(VT); + SDOperand InOp = Op.getOperand(0); + + // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a + // single shift. We can do this if the top bits (which are shifted out) + // are never demanded. + if (InOp.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(InOp.getOperand(1))) { + if (ShAmt && (DemandedMask & (~0ULL << (VTSize-ShAmt))) == 0) { + unsigned C1 = cast<ConstantSDNode>(InOp.getOperand(1))->getValue(); + unsigned Opc = ISD::SRL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SHL; + } + + SDOperand NewSA = + TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, VT, + InOp.getOperand(0), NewSA)); + } + } + + // Compute the new bits that are at the top now. + if (SimplifyDemandedBits(InOp, (DemandedMask << ShAmt) & TypeMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero &= TypeMask; + KnownOne &= TypeMask; + KnownZero >>= ShAmt; + KnownOne >>= ShAmt; + + uint64_t HighBits = (1ULL << ShAmt)-1; + HighBits <<= VTSize - ShAmt; + KnownZero |= HighBits; // High bits known zero. + } + break; + case ISD::SRA: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + MVT::ValueType VT = Op.getValueType(); + unsigned ShAmt = SA->getValue(); + + // Compute the new bits that are at the top now. + uint64_t TypeMask = MVT::getIntVTBitMask(VT); + + uint64_t InDemandedMask = (DemandedMask << ShAmt) & TypeMask; + + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + uint64_t HighBits = (1ULL << ShAmt)-1; + HighBits <<= MVT::getSizeInBits(VT) - ShAmt; + if (HighBits & DemandedMask) + InDemandedMask |= MVT::getIntVTSignBit(VT); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero &= TypeMask; + KnownOne &= TypeMask; + KnownZero >>= ShAmt; + KnownOne >>= ShAmt; + + // Handle the sign bits. + uint64_t SignBit = MVT::getIntVTSignBit(VT); + SignBit >>= ShAmt; // Adjust to where it is now in the mask. + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if ((KnownZero & SignBit) || (HighBits & ~DemandedMask) == HighBits) { + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, VT, Op.getOperand(0), + Op.getOperand(1))); + } else if (KnownOne & SignBit) { // New bits are known one. + KnownOne |= HighBits; + } + } + break; + case ISD::SIGN_EXTEND_INREG: { + MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & DemandedMask; + + // If none of the extended bits are demanded, eliminate the sextinreg. + if (NewBits == 0) + return TLO.CombineTo(Op, Op.getOperand(0)); + + uint64_t InSignBit = MVT::getIntVTSignBit(EVT); + int64_t InputDemandedBits = DemandedMask & MVT::getIntVTBitMask(EVT); + + // Since the sign extended bits are demanded, we know that the sign + // bit is demanded. + InputDemandedBits |= InSignBit; + + if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + + // If the input sign bit is known zero, convert this into a zero extension. + if (KnownZero & InSignBit) + return TLO.CombineTo(Op, + TLO.DAG.getZeroExtendInReg(Op.getOperand(0), EVT)); + + if (KnownOne & InSignBit) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + break; + } + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: { + MVT::ValueType VT = Op.getValueType(); + unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1; + KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT); + KnownOne = 0; + break; + } + case ISD::LOAD: { + if (ISD::isZEXTLoad(Op.Val)) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + MVT::ValueType VT = LD->getLoadedVT(); + KnownZero |= ~MVT::getIntVTBitMask(VT) & DemandedMask; + } + break; + } + case ISD::ZERO_EXTEND: { + uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); + + // If none of the top bits are demanded, convert this into an any_extend. + uint64_t NewBits = (~InMask) & DemandedMask; + if (NewBits == 0) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, + Op.getValueType(), + Op.getOperand(0))); + + if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero |= NewBits; + break; + } + case ISD::SIGN_EXTEND: { + MVT::ValueType InVT = Op.getOperand(0).getValueType(); + uint64_t InMask = MVT::getIntVTBitMask(InVT); + uint64_t InSignBit = MVT::getIntVTSignBit(InVT); + uint64_t NewBits = (~InMask) & DemandedMask; + + // If none of the top bits are demanded, convert this into an any_extend. + if (NewBits == 0) + return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND,Op.getValueType(), + Op.getOperand(0))); + + // Since some of the sign extended bits are demanded, we know that the sign + // bit is demanded. + uint64_t InDemandedBits = DemandedMask & InMask; + InDemandedBits |= InSignBit; + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + + // If the sign bit is known zero, convert this to a zero extend. + if (KnownZero & InSignBit) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, + Op.getValueType(), + Op.getOperand(0))); + + // If the sign bit is known one, the top bits match. + if (KnownOne & InSignBit) { + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Otherwise, top bits aren't known. + KnownOne &= ~NewBits; + KnownZero &= ~NewBits; + } + break; + } + case ISD::ANY_EXTEND: { + uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); + if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + break; + } + case ISD::TRUNCATE: { + // Simplify the input, using demanded bit information, and compute the known + // zero/one bits live out. + if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + + // If the input is only used by this truncate, see if we can shrink it based + // on the known demanded bits. + if (Op.getOperand(0).Val->hasOneUse()) { + SDOperand In = Op.getOperand(0); + switch (In.getOpcode()) { + default: break; + case ISD::SRL: + // Shrink SRL by a constant if none of the high bits shifted in are + // demanded. + if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){ + uint64_t HighBits = MVT::getIntVTBitMask(In.getValueType()); + HighBits &= ~MVT::getIntVTBitMask(Op.getValueType()); + HighBits >>= ShAmt->getValue(); + + if (ShAmt->getValue() < MVT::getSizeInBits(Op.getValueType()) && + (DemandedMask & HighBits) == 0) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDOperand NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, + Op.getValueType(), + In.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL,Op.getValueType(), + NewTrunc, In.getOperand(1))); + } + } + break; + } + } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType()); + KnownZero &= OutMask; + KnownOne &= OutMask; + break; + } + case ISD::AssertZext: { + MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + uint64_t InMask = MVT::getIntVTBitMask(VT); + if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero |= ~InMask & DemandedMask; + break; + } + case ISD::ADD: + case ISD::SUB: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_VOID: + // Just use ComputeMaskedBits to compute output bits. + TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + break; + } + + // If we know the value of all of the demanded bits, return this as a + // constant. + if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); + + return false; +} + +/// computeMaskedBitsForTargetNode - Determine which of the bits specified +/// in Mask are known to be either zero or one and return them in the +/// KnownZero/KnownOne bitsets. +void TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, + uint64_t Mask, + uint64_t &KnownZero, + uint64_t &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + KnownZero = 0; + KnownOne = 0; +} + +/// ComputeNumSignBitsForTargetNode - This method can be implemented by +/// targets that want to expose additional information about sign bits to the +/// DAG Combiner. +unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDOperand Op, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use ComputeNumSignBits if you don't know whether Op" + " is a target node!"); + return 1; +} + + +/// SimplifySetCC - Try to simplify a setcc built with the specified operands +/// and cc. If it is unable to simplify it, return a null SDOperand. +SDOperand +TargetLowering::SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1, + ISD::CondCode Cond, bool foldBooleans, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return DAG.getConstant(0, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return DAG.getConstant(1, VT); + } + + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val)) { + uint64_t C1 = N1C->getValue(); + if (isa<ConstantSDNode>(N0.Val)) { + return DAG.FoldSetCC(VT, N0, N1, Cond); + } else { + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an + // equality comparison, then we're just comparing whether X itself is + // zero. + if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + N0.getOperand(0).getOpcode() == ISD::CTLZ && + N0.getOperand(1).getOpcode() == ISD::Constant) { + unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getValue(); + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + ShAmt == Log2_32(MVT::getSizeInBits(N0.getValueType()))) { + if ((C1 == 0) == (Cond == ISD::SETEQ)) { + // (srl (ctlz x), 5) == 0 -> X != 0 + // (srl (ctlz x), 5) != 1 -> X != 0 + Cond = ISD::SETNE; + } else { + // (srl (ctlz x), 5) != 0 -> X == 0 + // (srl (ctlz x), 5) == 1 -> X == 0 + Cond = ISD::SETEQ; + } + SDOperand Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(VT, N0.getOperand(0).getOperand(0), + Zero, Cond); + } + } + + // If the LHS is a ZERO_EXTEND, perform the comparison on the input. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + unsigned InSize = MVT::getSizeInBits(N0.getOperand(0).getValueType()); + + // If the comparison constant has bits in the upper part, the + // zero-extended value could never match. + if (C1 & (~0ULL << InSize)) { + unsigned VSize = MVT::getSizeInBits(N0.getValueType()); + switch (Cond) { + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETEQ: return DAG.getConstant(0, VT); + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETNE: return DAG.getConstant(1, VT); + case ISD::SETGT: + case ISD::SETGE: + // True if the sign bit of C1 is set. + return DAG.getConstant((C1 & (1ULL << (VSize-1))) != 0, VT); + case ISD::SETLT: + case ISD::SETLE: + // True if the sign bit of C1 isn't set. + return DAG.getConstant((C1 & (1ULL << (VSize-1))) == 0, VT); + default: + break; + } + } + + // Otherwise, we can perform the comparison with the low bits. + switch (Cond) { + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + return DAG.getSetCC(VT, N0.getOperand(0), + DAG.getConstant(C1, N0.getOperand(0).getValueType()), + Cond); + default: + break; // todo, be more careful with signed comparisons + } + } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + MVT::ValueType ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); + unsigned ExtSrcTyBits = MVT::getSizeInBits(ExtSrcTy); + MVT::ValueType ExtDstTy = N0.getValueType(); + unsigned ExtDstTyBits = MVT::getSizeInBits(ExtDstTy); + + // If the extended part has any inconsistent bits, it cannot ever + // compare equal. In other words, they have to be all ones or all + // zeros. + uint64_t ExtBits = + (~0ULL >> (64-ExtSrcTyBits)) & (~0ULL << (ExtDstTyBits-1)); + if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits) + return DAG.getConstant(Cond == ISD::SETNE, VT); + + SDOperand ZextOp; + MVT::ValueType Op0Ty = N0.getOperand(0).getValueType(); + if (Op0Ty == ExtSrcTy) { + ZextOp = N0.getOperand(0); + } else { + int64_t Imm = ~0ULL >> (64-ExtSrcTyBits); + ZextOp = DAG.getNode(ISD::AND, Op0Ty, N0.getOperand(0), + DAG.getConstant(Imm, Op0Ty)); + } + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(ZextOp.Val); + // Otherwise, make this a use of a zext. + return DAG.getSetCC(VT, ZextOp, + DAG.getConstant(C1 & (~0ULL>>(64-ExtSrcTyBits)), + ExtDstTy), + Cond); + } else if ((N1C->getValue() == 0 || N1C->getValue() == 1) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + + // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC + if (N0.getOpcode() == ISD::SETCC) { + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getValue() != 1); + if (TrueWhenTrue) + return N0; + + // Invert the condition. + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + CC = ISD::getSetCCInverse(CC, + MVT::isInteger(N0.getOperand(0).getValueType())); + return DAG.getSetCC(VT, N0.getOperand(0), N0.getOperand(1), CC); + } + + if ((N0.getOpcode() == ISD::XOR || + (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR && + N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && + isa<ConstantSDNode>(N0.getOperand(1)) && + cast<ConstantSDNode>(N0.getOperand(1))->getValue() == 1) { + // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We + // can only do this if the top bits are known zero. + if (DAG.MaskedValueIsZero(N0, + MVT::getIntVTBitMask(N0.getValueType())-1)){ + // Okay, get the un-inverted input value. + SDOperand Val; + if (N0.getOpcode() == ISD::XOR) + Val = N0.getOperand(0); + else { + assert(N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR); + // ((X^1)&1)^1 -> X & 1 + Val = DAG.getNode(ISD::AND, N0.getValueType(), + N0.getOperand(0).getOperand(0), + N0.getOperand(1)); + } + return DAG.getSetCC(VT, Val, N1, + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + } + } + + uint64_t MinVal, MaxVal; + unsigned OperandBitSize = MVT::getSizeInBits(N1C->getValueType(0)); + if (ISD::isSignedIntSetCC(Cond)) { + MinVal = 1ULL << (OperandBitSize-1); + if (OperandBitSize != 1) // Avoid X >> 64, which is undefined. + MaxVal = ~0ULL >> (65-OperandBitSize); + else + MaxVal = 0; + } else { + MinVal = 0; + MaxVal = ~0ULL >> (64-OperandBitSize); + } + + // Canonicalize GE/LE comparisons to use GT/LT comparisons. + if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { + if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true + --C1; // X >= C0 --> X > (C0-1) + return DAG.getSetCC(VT, N0, DAG.getConstant(C1, N1.getValueType()), + (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + } + + if (Cond == ISD::SETLE || Cond == ISD::SETULE) { + if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true + ++C1; // X <= C0 --> X < (C0+1) + return DAG.getSetCC(VT, N0, DAG.getConstant(C1, N1.getValueType()), + (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + } + + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) + return DAG.getConstant(0, VT); // X < MIN --> false + if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) + return DAG.getConstant(1, VT); // X >= MIN --> true + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) + return DAG.getConstant(0, VT); // X > MAX --> false + if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) + return DAG.getConstant(1, VT); // X <= MAX --> true + + // Canonicalize setgt X, Min --> setne X, Min + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) + return DAG.getSetCC(VT, N0, N1, ISD::SETNE); + // Canonicalize setlt X, Max --> setne X, Max + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) + return DAG.getSetCC(VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) + return DAG.getSetCC(VT, N0, DAG.getConstant(MinVal, N0.getValueType()), + ISD::SETEQ); + // If we have setugt X, Max-1, turn it into seteq X, Max + else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) + return DAG.getSetCC(VT, N0, DAG.getConstant(MaxVal, N0.getValueType()), + ISD::SETEQ); + + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && OperandBitSize != 1 && + C1 == (~0ULL >> (65-OperandBitSize))) + return DAG.getSetCC(VT, N0, DAG.getConstant(0, N1.getValueType()), + ISD::SETLT); + + // FIXME: Implement the rest of these. + + // Fold bit comparisons when we can. + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + VT == N0.getValueType() && N0.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = + dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 + // Perform the xform if the AND RHS is a single bit. + if (isPowerOf2_64(AndRHS->getValue())) { + return DAG.getNode(ISD::SRL, VT, N0, + DAG.getConstant(Log2_64(AndRHS->getValue()), + getShiftAmountTy())); + } + } else if (Cond == ISD::SETEQ && C1 == AndRHS->getValue()) { + // (X & 8) == 8 --> (X & 8) >> 3 + // Perform the xform if C1 is a single bit. + if (isPowerOf2_64(C1)) { + return DAG.getNode(ISD::SRL, VT, N0, + DAG.getConstant(Log2_64(C1), getShiftAmountTy())); + } + } + } + } + } else if (isa<ConstantSDNode>(N0.Val)) { + // Ensure that the constant occurs on the RHS. + return DAG.getSetCC(VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + } + + if (isa<ConstantFPSDNode>(N0.Val)) { + // Constant fold or commute setcc. + SDOperand O = DAG.FoldSetCC(VT, N0, N1, Cond); + if (O.Val) return O; + } + + if (N0 == N1) { + // We can always fold X == X for integer setcc's. + if (MVT::isInteger(N0.getValueType())) + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + unsigned UOF = ISD::getUnorderedFlavor(Cond); + if (UOF == 2) // FP operators that are undefined on NaNs. + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) + return DAG.getConstant(UOF, VT); + // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO + // if it is not already. + ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; + if (NewCond != Cond) + return DAG.getSetCC(VT, N0, N1, NewCond); + } + + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + MVT::isInteger(N0.getValueType())) { + if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || + N0.getOpcode() == ISD::XOR) { + // Simplify (X+Y) == (X+Z) --> Y == Z + if (N0.getOpcode() == N1.getOpcode()) { + if (N0.getOperand(0) == N1.getOperand(0)) + return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(1), Cond); + if (N0.getOperand(1) == N1.getOperand(1)) + return DAG.getSetCC(VT, N0.getOperand(0), N1.getOperand(0), Cond); + if (DAG.isCommutativeBinOp(N0.getOpcode())) { + // If X op Y == Y op X, try other combinations. + if (N0.getOperand(0) == N1.getOperand(1)) + return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(0), Cond); + if (N0.getOperand(1) == N1.getOperand(0)) + return DAG.getSetCC(VT, N0.getOperand(0), N1.getOperand(1), Cond); + } + } + + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { + if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + // Turn (X+C1) == C2 --> X == C2-C1 + if (N0.getOpcode() == ISD::ADD && N0.Val->hasOneUse()) { + return DAG.getSetCC(VT, N0.getOperand(0), + DAG.getConstant(RHSC->getValue()-LHSR->getValue(), + N0.getValueType()), Cond); + } + + // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. + if (N0.getOpcode() == ISD::XOR) + // If we know that all of the inverted bits are zero, don't bother + // performing the inversion. + if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getValue())) + return DAG.getSetCC(VT, N0.getOperand(0), + DAG.getConstant(LHSR->getValue()^RHSC->getValue(), + N0.getValueType()), Cond); + } + + // Turn (C1-X) == C2 --> X == C1-C2 + if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { + if (N0.getOpcode() == ISD::SUB && N0.Val->hasOneUse()) { + return DAG.getSetCC(VT, N0.getOperand(1), + DAG.getConstant(SUBC->getValue()-RHSC->getValue(), + N0.getValueType()), Cond); + } + } + } + + // Simplify (X+Z) == X --> Z == 0 + if (N0.getOperand(0) == N1) + return DAG.getSetCC(VT, N0.getOperand(1), + DAG.getConstant(0, N0.getValueType()), Cond); + if (N0.getOperand(1) == N1) { + if (DAG.isCommutativeBinOp(N0.getOpcode())) + return DAG.getSetCC(VT, N0.getOperand(0), + DAG.getConstant(0, N0.getValueType()), Cond); + else if (N0.Val->hasOneUse()) { + assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + // (Z-X) == X --> Z == X<<1 + SDOperand SH = DAG.getNode(ISD::SHL, N1.getValueType(), + N1, + DAG.getConstant(1, getShiftAmountTy())); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.Val); + return DAG.getSetCC(VT, N0.getOperand(0), SH, Cond); + } + } + } + + if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || + N1.getOpcode() == ISD::XOR) { + // Simplify X == (X+Z) --> Z == 0 + if (N1.getOperand(0) == N0) { + return DAG.getSetCC(VT, N1.getOperand(1), + DAG.getConstant(0, N1.getValueType()), Cond); + } else if (N1.getOperand(1) == N0) { + if (DAG.isCommutativeBinOp(N1.getOpcode())) { + return DAG.getSetCC(VT, N1.getOperand(0), + DAG.getConstant(0, N1.getValueType()), Cond); + } else if (N1.Val->hasOneUse()) { + assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); + // X == (Z-X) --> X<<1 == Z + SDOperand SH = DAG.getNode(ISD::SHL, N1.getValueType(), N0, + DAG.getConstant(1, getShiftAmountTy())); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.Val); + return DAG.getSetCC(VT, SH, N1.getOperand(0), Cond); + } + } + } + } + + // Fold away ALL boolean setcc's. + SDOperand Temp; + if (N0.getValueType() == MVT::i1 && foldBooleans) { + switch (Cond) { + default: assert(0 && "Unknown integer setcc!"); + case ISD::SETEQ: // X == Y -> (X^Y)^1 + Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, N1); + N0 = DAG.getNode(ISD::XOR, MVT::i1, Temp, DAG.getConstant(1, MVT::i1)); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.Val); + break; + case ISD::SETNE: // X != Y --> (X^Y) + N0 = DAG.getNode(ISD::XOR, MVT::i1, N0, N1); + break; + case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> X^1 & Y + case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> X^1 & Y + Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, DAG.getConstant(1, MVT::i1)); + N0 = DAG.getNode(ISD::AND, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.Val); + break; + case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> Y^1 & X + case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> Y^1 & X + Temp = DAG.getNode(ISD::XOR, MVT::i1, N1, DAG.getConstant(1, MVT::i1)); + N0 = DAG.getNode(ISD::AND, MVT::i1, N0, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.Val); + break; + case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> X^1 | Y + case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> X^1 | Y + Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, DAG.getConstant(1, MVT::i1)); + N0 = DAG.getNode(ISD::OR, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.Val); + break; + case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> Y^1 | X + case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> Y^1 | X + Temp = DAG.getNode(ISD::XOR, MVT::i1, N1, DAG.getConstant(1, MVT::i1)); + N0 = DAG.getNode(ISD::OR, MVT::i1, N0, Temp); + break; + } + if (VT != MVT::i1) { + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(N0.Val); + // FIXME: If running after legalize, we probably can't do this. + N0 = DAG.getNode(ISD::ZERO_EXTEND, VT, N0); + } + return N0; + } + + // Could not fold it. + return SDOperand(); +} + +SDOperand TargetLowering:: +PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + // Default implementation: no optimization. + return SDOperand(); +} + +//===----------------------------------------------------------------------===// +// Inline Assembler Implementation Methods +//===----------------------------------------------------------------------===// + +TargetLowering::ConstraintType +TargetLowering::getConstraintType(const std::string &Constraint) const { + // FIXME: lots more standard ones to handle. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'r': return C_RegisterClass; + case 'm': // memory + case 'o': // offsetable + case 'V': // not offsetable + return C_Memory; + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': // Relocatable Constant + case 'X': // Allow ANY value. + case 'I': // Target registers. + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + return C_Other; + } + } + + if (Constraint.size() > 1 && Constraint[0] == '{' && + Constraint[Constraint.size()-1] == '}') + return C_Register; + return C_Unknown; +} + +/// isOperandValidForConstraint - Return the specified operand (possibly +/// modified) if the specified SDOperand is valid for the specified target +/// constraint letter, otherwise return null. +SDOperand TargetLowering::isOperandValidForConstraint(SDOperand Op, + char ConstraintLetter, + SelectionDAG &DAG) { + switch (ConstraintLetter) { + default: break; + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': // Relocatable Constant + case 'X': { // Allows any operand. + // These operands are interested in values of the form (GV+C), where C may + // be folded in as an offset of GV, or it may be explicitly added. Also, it + // is possible and fine if either GV or C are missing. + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); + + // If we have "(add GV, C)", pull out GV/C + if (Op.getOpcode() == ISD::ADD) { + C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); + if (C == 0 || GA == 0) { + C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); + GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); + } + if (C == 0 || GA == 0) + C = 0, GA = 0; + } + + // If we find a valid operand, map to the TargetXXX version so that the + // value itself doesn't get selected. + if (GA) { // Either &GV or &GV+C + if (ConstraintLetter != 'n') { + int64_t Offs = GA->getOffset(); + if (C) Offs += C->getValue(); + return DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getValueType(), + Offs); + } + } + if (C) { // just C, no GV. + // Simple constants are not allowed for 's'. + if (ConstraintLetter != 's') + return DAG.getTargetConstant(C->getValue(), Op.getValueType()); + } + break; + } + } + return SDOperand(0,0); +} + +std::vector<unsigned> TargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT::ValueType VT) const { + return std::vector<unsigned>(); +} + + +std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, + MVT::ValueType VT) const { + if (Constraint[0] != '{') + return std::pair<unsigned, const TargetRegisterClass*>(0, 0); + assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); + + // Remove the braces from around the name. + std::string RegName(Constraint.begin()+1, Constraint.end()-1); + + // Figure out which register class contains this reg. + const MRegisterInfo *RI = TM.getRegisterInfo(); + for (MRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), + E = RI->regclass_end(); RCI != E; ++RCI) { + const TargetRegisterClass *RC = *RCI; + + // If none of the the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + bool isLegal = false; + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) { + isLegal = true; + break; + } + } + + if (!isLegal) continue; + + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + if (StringsEqualNoCase(RegName, RI->get(*I).Name)) + return std::make_pair(*I, RC); + } + } + + return std::pair<unsigned, const TargetRegisterClass*>(0, 0); +} + +//===----------------------------------------------------------------------===// +// Loop Strength Reduction hooks +//===----------------------------------------------------------------------===// + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + // The default implementation of this implements a conservative RISCy, r+r and + // r+i addr mode. + + // Allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // Only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + } + + return true; +} + +// Magic for divide replacement + +struct ms { + int64_t m; // magic number + int64_t s; // shift amount +}; + +struct mu { + uint64_t m; // magic number + int64_t a; // add indicator + int64_t s; // shift amount +}; + +/// magic - calculate the magic numbers required to codegen an integer sdiv as +/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, +/// or -1. +static ms magic32(int32_t d) { + int32_t p; + uint32_t ad, anc, delta, q1, r1, q2, r2, t; + const uint32_t two31 = 0x80000000U; + struct ms mag; + + ad = abs(d); + t = two31 + ((uint32_t)d >> 31); + anc = t - 1 - t%ad; // absolute value of nc + p = 31; // initialize p + q1 = two31/anc; // initialize q1 = 2p/abs(nc) + r1 = two31 - q1*anc; // initialize r1 = rem(2p,abs(nc)) + q2 = two31/ad; // initialize q2 = 2p/abs(d) + r2 = two31 - q2*ad; // initialize r2 = rem(2p,abs(d)) + do { + p = p + 1; + q1 = 2*q1; // update q1 = 2p/abs(nc) + r1 = 2*r1; // update r1 = rem(2p/abs(nc)) + if (r1 >= anc) { // must be unsigned comparison + q1 = q1 + 1; + r1 = r1 - anc; + } + q2 = 2*q2; // update q2 = 2p/abs(d) + r2 = 2*r2; // update r2 = rem(2p/abs(d)) + if (r2 >= ad) { // must be unsigned comparison + q2 = q2 + 1; + r2 = r2 - ad; + } + delta = ad - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + mag.m = (int32_t)(q2 + 1); // make sure to sign extend + if (d < 0) mag.m = -mag.m; // resulting magic number + mag.s = p - 32; // resulting shift + return mag; +} + +/// magicu - calculate the magic numbers required to codegen an integer udiv as +/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. +static mu magicu32(uint32_t d) { + int32_t p; + uint32_t nc, delta, q1, r1, q2, r2; + struct mu magu; + magu.a = 0; // initialize "add" indicator + nc = - 1 - (-d)%d; + p = 31; // initialize p + q1 = 0x80000000/nc; // initialize q1 = 2p/nc + r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc) + q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d + r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d) + do { + p = p + 1; + if (r1 >= nc - r1 ) { + q1 = 2*q1 + 1; // update q1 + r1 = 2*r1 - nc; // update r1 + } + else { + q1 = 2*q1; // update q1 + r1 = 2*r1; // update r1 + } + if (r2 + 1 >= d - r2) { + if (q2 >= 0x7FFFFFFF) magu.a = 1; + q2 = 2*q2 + 1; // update q2 + r2 = 2*r2 + 1 - d; // update r2 + } + else { + if (q2 >= 0x80000000) magu.a = 1; + q2 = 2*q2; // update q2 + r2 = 2*r2 + 1; // update r2 + } + delta = d - 1 - r2; + } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0))); + magu.m = q2 + 1; // resulting magic number + magu.s = p - 32; // resulting shift + return magu; +} + +/// magic - calculate the magic numbers required to codegen an integer sdiv as +/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, +/// or -1. +static ms magic64(int64_t d) { + int64_t p; + uint64_t ad, anc, delta, q1, r1, q2, r2, t; + const uint64_t two63 = 9223372036854775808ULL; // 2^63 + struct ms mag; + + ad = d >= 0 ? d : -d; + t = two63 + ((uint64_t)d >> 63); + anc = t - 1 - t%ad; // absolute value of nc + p = 63; // initialize p + q1 = two63/anc; // initialize q1 = 2p/abs(nc) + r1 = two63 - q1*anc; // initialize r1 = rem(2p,abs(nc)) + q2 = two63/ad; // initialize q2 = 2p/abs(d) + r2 = two63 - q2*ad; // initialize r2 = rem(2p,abs(d)) + do { + p = p + 1; + q1 = 2*q1; // update q1 = 2p/abs(nc) + r1 = 2*r1; // update r1 = rem(2p/abs(nc)) + if (r1 >= anc) { // must be unsigned comparison + q1 = q1 + 1; + r1 = r1 - anc; + } + q2 = 2*q2; // update q2 = 2p/abs(d) + r2 = 2*r2; // update r2 = rem(2p/abs(d)) + if (r2 >= ad) { // must be unsigned comparison + q2 = q2 + 1; + r2 = r2 - ad; + } + delta = ad - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + mag.m = q2 + 1; + if (d < 0) mag.m = -mag.m; // resulting magic number + mag.s = p - 64; // resulting shift + return mag; +} + +/// magicu - calculate the magic numbers required to codegen an integer udiv as +/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. +static mu magicu64(uint64_t d) +{ + int64_t p; + uint64_t nc, delta, q1, r1, q2, r2; + struct mu magu; + magu.a = 0; // initialize "add" indicator + nc = - 1 - (-d)%d; + p = 63; // initialize p + q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc + r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc) + q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d + r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d) + do { + p = p + 1; + if (r1 >= nc - r1 ) { + q1 = 2*q1 + 1; // update q1 + r1 = 2*r1 - nc; // update r1 + } + else { + q1 = 2*q1; // update q1 + r1 = 2*r1; // update r1 + } + if (r2 + 1 >= d - r2) { + if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1; + q2 = 2*q2 + 1; // update q2 + r2 = 2*r2 + 1 - d; // update r2 + } + else { + if (q2 >= 0x8000000000000000ull) magu.a = 1; + q2 = 2*q2; // update q2 + r2 = 2*r2 + 1; // update r2 + } + delta = d - 1 - r2; + } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0))); + magu.m = q2 + 1; // resulting magic number + magu.s = p - 64; // resulting shift + return magu; +} + +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDOperand TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, + std::vector<SDNode*>* Created) const { + MVT::ValueType VT = N->getValueType(0); + + // Check to see if we can do this. + if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) + return SDOperand(); // BuildSDIV only operates on i32 or i64 + if (!isOperationLegal(ISD::MULHS, VT)) + return SDOperand(); // Make sure the target supports MULHS. + + int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended(); + ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d); + + // Multiply the numerator (operand 0) by the magic value + SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + // If d > 0 and m < 0, add the numerator + if (d > 0 && magics.m < 0) { + Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0)); + if (Created) + Created->push_back(Q.Val); + } + // If d < 0 and m > 0, subtract the numerator. + if (d < 0 && magics.m > 0) { + Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0)); + if (Created) + Created->push_back(Q.Val); + } + // Shift right algebraic if shift value is nonzero + if (magics.s > 0) { + Q = DAG.getNode(ISD::SRA, VT, Q, + DAG.getConstant(magics.s, getShiftAmountTy())); + if (Created) + Created->push_back(Q.Val); + } + // Extract the sign bit and add it to the quotient + SDOperand T = + DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(MVT::getSizeInBits(VT)-1, + getShiftAmountTy())); + if (Created) + Created->push_back(T.Val); + return DAG.getNode(ISD::ADD, VT, Q, T); +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDOperand TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, + std::vector<SDNode*>* Created) const { + MVT::ValueType VT = N->getValueType(0); + + // Check to see if we can do this. + if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) + return SDOperand(); // BuildUDIV only operates on i32 or i64 + if (!isOperationLegal(ISD::MULHU, VT)) + return SDOperand(); // Make sure the target supports MULHU. + + uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue(); + mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d); + + // Multiply the numerator (operand 0) by the magic value + SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + if (Created) + Created->push_back(Q.Val); + + if (magics.a == 0) { + return DAG.getNode(ISD::SRL, VT, Q, + DAG.getConstant(magics.s, getShiftAmountTy())); + } else { + SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q); + if (Created) + Created->push_back(NPQ.Val); + NPQ = DAG.getNode(ISD::SRL, VT, NPQ, + DAG.getConstant(1, getShiftAmountTy())); + if (Created) + Created->push_back(NPQ.Val); + NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q); + if (Created) + Created->push_back(NPQ.Val); + return DAG.getNode(ISD::SRL, VT, NPQ, + DAG.getConstant(magics.s-1, getShiftAmountTy())); + } +} diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp new file mode 100644 index 0000000..3d8618f --- /dev/null +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -0,0 +1,1138 @@ +//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple register coalescing pass that attempts to +// aggressively coalesce every register copy that it can. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "simpleregistercoalescing" +#include "llvm/CodeGen/SimpleRegisterCoalescing.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "VirtRegMap.h" +#include "llvm/Value.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <cmath> +using namespace llvm; + +STATISTIC(numJoins , "Number of interval joins performed"); +STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); +STATISTIC(numAborts , "Number of times interval joining aborted"); + +char SimpleRegisterCoalescing::ID = 0; +namespace { + static cl::opt<bool> + EnableJoining("join-liveintervals", + cl::desc("Coalesce copies (default=true)"), + cl::init(true)); + + RegisterPass<SimpleRegisterCoalescing> + X("simple-register-coalescing", + "Simple register coalescing to eliminate all possible register copies"); +} + +const PassInfo *llvm::SimpleRegisterCoalescingID = X.getPassInfo(); + +void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { + //AU.addPreserved<LiveVariables>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreservedID(PHIEliminationID); + AU.addPreservedID(TwoAddressInstructionPassID); + AU.addRequired<LiveVariables>(); + AU.addRequired<LiveIntervals>(); + AU.addRequired<LoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA +/// being the source and IntB being the dest, thus this defines a value number +/// in IntB. If the source value number (in IntA) is defined by a copy from B, +/// see if we can merge these two pieces of B into a single value number, +/// eliminating a copy. For example: +/// +/// A3 = B0 +/// ... +/// B1 = A3 <- this copy +/// +/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 +/// value number to be replaced with B0 (which simplifies the B liveinterval). +/// +/// This returns true if an interval was modified. +/// +bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB, + MachineInstr *CopyMI) { + unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); + unsigned BValNo = BLR->ValId; + + // Get the location that B is defined at. Two options: either this value has + // an unknown definition point or it is defined at CopyIdx. If unknown, we + // can't process it. + unsigned BValNoDefIdx = IntB.getInstForValNum(BValNo); + if (BValNoDefIdx == ~0U) return false; + assert(BValNoDefIdx == CopyIdx && + "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A0 in the example. + LiveInterval::iterator AValLR = IntA.FindLiveRangeContaining(CopyIdx-1); + unsigned AValNo = AValLR->ValId; + + // If AValNo is defined as a copy from IntB, we can potentially process this. + + // Get the instruction that defines this value number. + unsigned SrcReg = IntA.getSrcRegForValNum(AValNo); + if (!SrcReg) return false; // Not defined by a copy. + + // If the value number is not defined by a copy instruction, ignore it. + + // If the source register comes from an interval other than IntB, we can't + // handle this. + if (rep(SrcReg) != IntB.reg) return false; + + // Get the LiveRange in IntB that this value number starts with. + unsigned AValNoInstIdx = IntA.getInstForValNum(AValNo); + LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNoInstIdx-1); + + // Make sure that the end of the live range is inside the same block as + // CopyMI. + MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end-1); + if (!ValLREndInst || + ValLREndInst->getParent() != CopyMI->getParent()) return false; + + // Okay, we now know that ValLR ends in the same block that the CopyMI + // live-range starts. If there are no intervening live ranges between them in + // IntB, we can merge them. + if (ValLR+1 != BLR) return false; + + DOUT << "\nExtending: "; IntB.print(DOUT, mri_); + + // We are about to delete CopyMI, so need to remove it as the 'instruction + // that defines this value #'. + IntB.setValueNumberInfo(BValNo, std::make_pair(~0U, 0)); + + // Okay, we can merge them. We need to insert a new liverange: + // [ValLR.end, BLR.begin) of either value number, then we merge the + // two value numbers. + unsigned FillerStart = ValLR->end, FillerEnd = BLR->start; + IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); + + // If the IntB live range is assigned to a physical register, and if that + // physreg has aliases, + if (MRegisterInfo::isPhysicalRegister(IntB.reg)) { + // Update the liveintervals of sub-registers. + for (const unsigned *AS = mri_->getSubRegisters(IntB.reg); *AS; ++AS) { + LiveInterval &AliasLI = li_->getInterval(*AS); + AliasLI.addRange(LiveRange(FillerStart, FillerEnd, + AliasLI.getNextValue(~0U, 0))); + } + } + + // Okay, merge "B1" into the same value number as "B0". + if (BValNo != ValLR->ValId) + IntB.MergeValueNumberInto(BValNo, ValLR->ValId); + DOUT << " result = "; IntB.print(DOUT, mri_); + DOUT << "\n"; + + // If the source instruction was killing the source register before the + // merge, unset the isKill marker given the live range has been extended. + int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); + if (UIdx != -1) + ValLREndInst->getOperand(UIdx).unsetIsKill(); + + // Finally, delete the copy instruction. + li_->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + ++numPeep; + return true; +} + +/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, +/// which are the src/dst of the copy instruction CopyMI. This returns true +/// if the copy was successfully coalesced away, or if it is never possible +/// to coalesce this copy, due to register constraints. It returns +/// false if it is not currently possible to coalesce this interval, but +/// it may be possible if other things get coalesced. +bool SimpleRegisterCoalescing::JoinCopy(MachineInstr *CopyMI, + unsigned SrcReg, unsigned DstReg, bool PhysOnly) { + DOUT << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI; + + // Get representative registers. + unsigned repSrcReg = rep(SrcReg); + unsigned repDstReg = rep(DstReg); + + // If they are already joined we continue. + if (repSrcReg == repDstReg) { + DOUT << "\tCopy already coalesced.\n"; + return true; // Not coalescable. + } + + bool SrcIsPhys = MRegisterInfo::isPhysicalRegister(repSrcReg); + bool DstIsPhys = MRegisterInfo::isPhysicalRegister(repDstReg); + if (PhysOnly && !SrcIsPhys && !DstIsPhys) + // Only joining physical registers with virtual registers in this round. + return true; + + // If they are both physical registers, we cannot join them. + if (SrcIsPhys && DstIsPhys) { + DOUT << "\tCan not coalesce physregs.\n"; + return true; // Not coalescable. + } + + // We only join virtual registers with allocatable physical registers. + if (SrcIsPhys && !allocatableRegs_[repSrcReg]) { + DOUT << "\tSrc reg is unallocatable physreg.\n"; + return true; // Not coalescable. + } + if (DstIsPhys && !allocatableRegs_[repDstReg]) { + DOUT << "\tDst reg is unallocatable physreg.\n"; + return true; // Not coalescable. + } + + // If they are not of the same register class, we cannot join them. + if (differingRegisterClasses(repSrcReg, repDstReg)) { + DOUT << "\tSrc/Dest are different register classes.\n"; + return true; // Not coalescable. + } + + LiveInterval &SrcInt = li_->getInterval(repSrcReg); + LiveInterval &DstInt = li_->getInterval(repDstReg); + assert(SrcInt.reg == repSrcReg && DstInt.reg == repDstReg && + "Register mapping is horribly broken!"); + + DOUT << "\t\tInspecting "; SrcInt.print(DOUT, mri_); + DOUT << " and "; DstInt.print(DOUT, mri_); + DOUT << ": "; + + // Check if it is necessary to propagate "isDead" property before intervals + // are joined. + MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg); + bool isDead = mopd->isDead(); + bool isShorten = false; + unsigned SrcStart = 0, RemoveStart = 0; + unsigned SrcEnd = 0, RemoveEnd = 0; + if (isDead) { + unsigned CopyIdx = li_->getInstructionIndex(CopyMI); + LiveInterval::iterator SrcLR = + SrcInt.FindLiveRangeContaining(li_->getUseIndex(CopyIdx)); + RemoveStart = SrcStart = SrcLR->start; + RemoveEnd = SrcEnd = SrcLR->end; + // The instruction which defines the src is only truly dead if there are + // no intermediate uses and there isn't a use beyond the copy. + // FIXME: find the last use, mark is kill and shorten the live range. + if (SrcEnd > li_->getDefIndex(CopyIdx)) { + isDead = false; + } else { + MachineOperand *MOU; + MachineInstr *LastUse= lastRegisterUse(SrcStart, CopyIdx, repSrcReg, MOU); + if (LastUse) { + // Shorten the liveinterval to the end of last use. + MOU->setIsKill(); + isDead = false; + isShorten = true; + RemoveStart = li_->getDefIndex(li_->getInstructionIndex(LastUse)); + RemoveEnd = SrcEnd; + } else { + MachineInstr *SrcMI = li_->getInstructionFromIndex(SrcStart); + if (SrcMI) { + MachineOperand *mops = findDefOperand(SrcMI, repSrcReg); + if (mops) + // A dead def should have a single cycle interval. + ++RemoveStart; + } + } + } + } + + // We need to be careful about coalescing a source physical register with a + // virtual register. Once the coalescing is done, it cannot be broken and + // these are not spillable! If the destination interval uses are far away, + // think twice about coalescing them! + if (!mopd->isDead() && (SrcIsPhys || DstIsPhys)) { + LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; + unsigned JoinVReg = SrcIsPhys ? repDstReg : repSrcReg; + unsigned JoinPReg = SrcIsPhys ? repSrcReg : repDstReg; + const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(JoinVReg); + unsigned Threshold = allocatableRCRegs_[RC].count(); + + // If the virtual register live interval is long has it has low use desity, + // do not join them, instead mark the physical register as its allocation + // preference. + unsigned Length = JoinVInt.getSize() / InstrSlots::NUM; + LiveVariables::VarInfo &vi = lv_->getVarInfo(JoinVReg); + if (Length > Threshold && + (((float)vi.NumUses / Length) < (1.0 / Threshold))) { + JoinVInt.preference = JoinPReg; + ++numAborts; + DOUT << "\tMay tie down a physical register, abort!\n"; + return false; + } + } + + // Okay, attempt to join these two intervals. On failure, this returns false. + // Otherwise, if one of the intervals being joined is a physreg, this method + // always canonicalizes DstInt to be it. The output "SrcInt" will not have + // been modified, so we can use this information below to update aliases. + if (JoinIntervals(DstInt, SrcInt)) { + if (isDead) { + // Result of the copy is dead. Propagate this property. + if (SrcStart == 0) { + assert(MRegisterInfo::isPhysicalRegister(repSrcReg) && + "Live-in must be a physical register!"); + // Live-in to the function but dead. Remove it from entry live-in set. + // JoinIntervals may end up swapping the two intervals. + mf_->begin()->removeLiveIn(repSrcReg); + } else { + MachineInstr *SrcMI = li_->getInstructionFromIndex(SrcStart); + if (SrcMI) { + MachineOperand *mops = findDefOperand(SrcMI, repSrcReg); + if (mops) + mops->setIsDead(); + } + } + } + + if (isShorten || isDead) { + // Shorten the live interval. + LiveInterval &LiveInInt = (repSrcReg == DstInt.reg) ? DstInt : SrcInt; + LiveInInt.removeRange(RemoveStart, RemoveEnd); + } + } else { + // Coalescing failed. + + // If we can eliminate the copy without merging the live ranges, do so now. + if (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI)) + return true; + + // Otherwise, we are unable to join the intervals. + DOUT << "Interference!\n"; + return false; + } + + bool Swapped = repSrcReg == DstInt.reg; + if (Swapped) + std::swap(repSrcReg, repDstReg); + assert(MRegisterInfo::isVirtualRegister(repSrcReg) && + "LiveInterval::join didn't work right!"); + + // If we're about to merge live ranges into a physical register live range, + // we have to update any aliased register's live ranges to indicate that they + // have clobbered values for this range. + if (MRegisterInfo::isPhysicalRegister(repDstReg)) { + // Unset unnecessary kills. + if (!DstInt.containsOneValue()) { + for (LiveInterval::Ranges::const_iterator I = SrcInt.begin(), + E = SrcInt.end(); I != E; ++I) + unsetRegisterKills(I->start, I->end, repDstReg); + } + + // Update the liveintervals of sub-registers. + for (const unsigned *AS = mri_->getSubRegisters(repDstReg); *AS; ++AS) + li_->getInterval(*AS).MergeInClobberRanges(SrcInt); + } else { + // Merge use info if the destination is a virtual register. + LiveVariables::VarInfo& dVI = lv_->getVarInfo(repDstReg); + LiveVariables::VarInfo& sVI = lv_->getVarInfo(repSrcReg); + dVI.NumUses += sVI.NumUses; + } + + DOUT << "\n\t\tJoined. Result = "; DstInt.print(DOUT, mri_); + DOUT << "\n"; + + // Remember these liveintervals have been joined. + JoinedLIs.set(repSrcReg - MRegisterInfo::FirstVirtualRegister); + if (MRegisterInfo::isVirtualRegister(repDstReg)) + JoinedLIs.set(repDstReg - MRegisterInfo::FirstVirtualRegister); + + // If the intervals were swapped by Join, swap them back so that the register + // mapping (in the r2i map) is correct. + if (Swapped) SrcInt.swap(DstInt); + li_->removeInterval(repSrcReg); + r2rMap_[repSrcReg] = repDstReg; + + // Finally, delete the copy instruction. + li_->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + ++numPeep; + ++numJoins; + return true; +} + +/// ComputeUltimateVN - Assuming we are going to join two live intervals, +/// compute what the resultant value numbers for each value in the input two +/// ranges will be. This is complicated by copies between the two which can +/// and will commonly cause multiple value numbers to be merged into one. +/// +/// VN is the value number that we're trying to resolve. InstDefiningValue +/// keeps track of the new InstDefiningValue assignment for the result +/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of +/// whether a value in this or other is a copy from the opposite set. +/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have +/// already been assigned. +/// +/// ThisFromOther[x] - If x is defined as a copy from the other interval, this +/// contains the value number the copy is from. +/// +static unsigned ComputeUltimateVN(unsigned VN, + SmallVector<std::pair<unsigned, + unsigned>, 16> &ValueNumberInfo, + SmallVector<int, 16> &ThisFromOther, + SmallVector<int, 16> &OtherFromThis, + SmallVector<int, 16> &ThisValNoAssignments, + SmallVector<int, 16> &OtherValNoAssignments, + LiveInterval &ThisLI, LiveInterval &OtherLI) { + // If the VN has already been computed, just return it. + if (ThisValNoAssignments[VN] >= 0) + return ThisValNoAssignments[VN]; +// assert(ThisValNoAssignments[VN] != -2 && "Cyclic case?"); + + // If this val is not a copy from the other val, then it must be a new value + // number in the destination. + int OtherValNo = ThisFromOther[VN]; + if (OtherValNo == -1) { + ValueNumberInfo.push_back(ThisLI.getValNumInfo(VN)); + return ThisValNoAssignments[VN] = ValueNumberInfo.size()-1; + } + + // Otherwise, this *is* a copy from the RHS. If the other side has already + // been computed, return it. + if (OtherValNoAssignments[OtherValNo] >= 0) + return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo]; + + // Mark this value number as currently being computed, then ask what the + // ultimate value # of the other value is. + ThisValNoAssignments[VN] = -2; + unsigned UltimateVN = + ComputeUltimateVN(OtherValNo, ValueNumberInfo, + OtherFromThis, ThisFromOther, + OtherValNoAssignments, ThisValNoAssignments, + OtherLI, ThisLI); + return ThisValNoAssignments[VN] = UltimateVN; +} + +static bool InVector(unsigned Val, const SmallVector<unsigned, 8> &V) { + return std::find(V.begin(), V.end(), Val) != V.end(); +} + +/// SimpleJoin - Attempt to joint the specified interval into this one. The +/// caller of this method must guarantee that the RHS only contains a single +/// value number and that the RHS is not defined by a copy from this +/// interval. This returns false if the intervals are not joinable, or it +/// joins them and returns true. +bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS) { + assert(RHS.containsOneValue()); + + // Some number (potentially more than one) value numbers in the current + // interval may be defined as copies from the RHS. Scan the overlapping + // portions of the LHS and RHS, keeping track of this and looking for + // overlapping live ranges that are NOT defined as copies. If these exist, we + // cannot coalesce. + + LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end(); + LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end(); + + if (LHSIt->start < RHSIt->start) { + LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start); + if (LHSIt != LHS.begin()) --LHSIt; + } else if (RHSIt->start < LHSIt->start) { + RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start); + if (RHSIt != RHS.begin()) --RHSIt; + } + + SmallVector<unsigned, 8> EliminatedLHSVals; + + while (1) { + // Determine if these live intervals overlap. + bool Overlaps = false; + if (LHSIt->start <= RHSIt->start) + Overlaps = LHSIt->end > RHSIt->start; + else + Overlaps = RHSIt->end > LHSIt->start; + + // If the live intervals overlap, there are two interesting cases: if the + // LHS interval is defined by a copy from the RHS, it's ok and we record + // that the LHS value # is the same as the RHS. If it's not, then we cannot + // coalesce these live ranges and we bail out. + if (Overlaps) { + // If we haven't already recorded that this value # is safe, check it. + if (!InVector(LHSIt->ValId, EliminatedLHSVals)) { + // Copy from the RHS? + unsigned SrcReg = LHS.getSrcRegForValNum(LHSIt->ValId); + if (rep(SrcReg) != RHS.reg) + return false; // Nope, bail out. + + EliminatedLHSVals.push_back(LHSIt->ValId); + } + + // We know this entire LHS live range is okay, so skip it now. + if (++LHSIt == LHSEnd) break; + continue; + } + + if (LHSIt->end < RHSIt->end) { + if (++LHSIt == LHSEnd) break; + } else { + // One interesting case to check here. It's possible that we have + // something like "X3 = Y" which defines a new value number in the LHS, + // and is the last use of this liverange of the RHS. In this case, we + // want to notice this copy (so that it gets coalesced away) even though + // the live ranges don't actually overlap. + if (LHSIt->start == RHSIt->end) { + if (InVector(LHSIt->ValId, EliminatedLHSVals)) { + // We already know that this value number is going to be merged in + // if coalescing succeeds. Just skip the liverange. + if (++LHSIt == LHSEnd) break; + } else { + // Otherwise, if this is a copy from the RHS, mark it as being merged + // in. + if (rep(LHS.getSrcRegForValNum(LHSIt->ValId)) == RHS.reg) { + EliminatedLHSVals.push_back(LHSIt->ValId); + + // We know this entire LHS live range is okay, so skip it now. + if (++LHSIt == LHSEnd) break; + } + } + } + + if (++RHSIt == RHSEnd) break; + } + } + + // If we got here, we know that the coalescing will be successful and that + // the value numbers in EliminatedLHSVals will all be merged together. Since + // the most common case is that EliminatedLHSVals has a single number, we + // optimize for it: if there is more than one value, we merge them all into + // the lowest numbered one, then handle the interval as if we were merging + // with one value number. + unsigned LHSValNo; + if (EliminatedLHSVals.size() > 1) { + // Loop through all the equal value numbers merging them into the smallest + // one. + unsigned Smallest = EliminatedLHSVals[0]; + for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) { + if (EliminatedLHSVals[i] < Smallest) { + // Merge the current notion of the smallest into the smaller one. + LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]); + Smallest = EliminatedLHSVals[i]; + } else { + // Merge into the smallest. + LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest); + } + } + LHSValNo = Smallest; + } else { + assert(!EliminatedLHSVals.empty() && "No copies from the RHS?"); + LHSValNo = EliminatedLHSVals[0]; + } + + // Okay, now that there is a single LHS value number that we're merging the + // RHS into, update the value number info for the LHS to indicate that the + // value number is defined where the RHS value number was. + LHS.setValueNumberInfo(LHSValNo, RHS.getValNumInfo(0)); + + // Okay, the final step is to loop over the RHS live intervals, adding them to + // the LHS. + LHS.MergeRangesInAsValue(RHS, LHSValNo); + LHS.weight += RHS.weight; + if (RHS.preference && !LHS.preference) + LHS.preference = RHS.preference; + + return true; +} + +/// JoinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. Otherwise, if one of the intervals being joined is a +/// physreg, this method always canonicalizes LHS to be it. The output +/// "RHS" will not have been modified, so we can use this information +/// below to update aliases. +bool SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS) { + // Compute the final value assignment, assuming that the live ranges can be + // coalesced. + SmallVector<int, 16> LHSValNoAssignments; + SmallVector<int, 16> RHSValNoAssignments; + SmallVector<std::pair<unsigned,unsigned>, 16> ValueNumberInfo; + + // If a live interval is a physical register, conservatively check if any + // of its sub-registers is overlapping the live interval of the virtual + // register. If so, do not coalesce. + if (MRegisterInfo::isPhysicalRegister(LHS.reg) && + *mri_->getSubRegisters(LHS.reg)) { + for (const unsigned* SR = mri_->getSubRegisters(LHS.reg); *SR; ++SR) + if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, mri_)); + return false; + } + } else if (MRegisterInfo::isPhysicalRegister(RHS.reg) && + *mri_->getSubRegisters(RHS.reg)) { + for (const unsigned* SR = mri_->getSubRegisters(RHS.reg); *SR; ++SR) + if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { + DOUT << "Interfere with sub-register "; + DEBUG(li_->getInterval(*SR).print(DOUT, mri_)); + return false; + } + } + + // Compute ultimate value numbers for the LHS and RHS values. + if (RHS.containsOneValue()) { + // Copies from a liveinterval with a single value are simple to handle and + // very common, handle the special case here. This is important, because + // often RHS is small and LHS is large (e.g. a physreg). + + // Find out if the RHS is defined as a copy from some value in the LHS. + int RHSValID = -1; + std::pair<unsigned,unsigned> RHSValNoInfo; + unsigned RHSSrcReg = RHS.getSrcRegForValNum(0); + if ((RHSSrcReg == 0 || rep(RHSSrcReg) != LHS.reg)) { + // If RHS is not defined as a copy from the LHS, we can use simpler and + // faster checks to see if the live ranges are coalescable. This joiner + // can't swap the LHS/RHS intervals though. + if (!MRegisterInfo::isPhysicalRegister(RHS.reg)) { + return SimpleJoin(LHS, RHS); + } else { + RHSValNoInfo = RHS.getValNumInfo(0); + } + } else { + // It was defined as a copy from the LHS, find out what value # it is. + unsigned ValInst = RHS.getInstForValNum(0); + RHSValID = LHS.getLiveRangeContaining(ValInst-1)->ValId; + RHSValNoInfo = LHS.getValNumInfo(RHSValID); + } + + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + ValueNumberInfo.resize(LHS.getNumValNums()); + + // Okay, *all* of the values in LHS that are defined as a copy from RHS + // should now get updated. + for (unsigned VN = 0, e = LHS.getNumValNums(); VN != e; ++VN) { + if (unsigned LHSSrcReg = LHS.getSrcRegForValNum(VN)) { + if (rep(LHSSrcReg) != RHS.reg) { + // If this is not a copy from the RHS, its value number will be + // unmodified by the coalescing. + ValueNumberInfo[VN] = LHS.getValNumInfo(VN); + LHSValNoAssignments[VN] = VN; + } else if (RHSValID == -1) { + // Otherwise, it is a copy from the RHS, and we don't already have a + // value# for it. Keep the current value number, but remember it. + LHSValNoAssignments[VN] = RHSValID = VN; + ValueNumberInfo[VN] = RHSValNoInfo; + } else { + // Otherwise, use the specified value #. + LHSValNoAssignments[VN] = RHSValID; + if (VN != (unsigned)RHSValID) + ValueNumberInfo[VN].first = ~1U; + else + ValueNumberInfo[VN] = RHSValNoInfo; + } + } else { + ValueNumberInfo[VN] = LHS.getValNumInfo(VN); + LHSValNoAssignments[VN] = VN; + } + } + + assert(RHSValID != -1 && "Didn't find value #?"); + RHSValNoAssignments[0] = RHSValID; + + } else { + // Loop over the value numbers of the LHS, seeing if any are defined from + // the RHS. + SmallVector<int, 16> LHSValsDefinedFromRHS; + LHSValsDefinedFromRHS.resize(LHS.getNumValNums(), -1); + for (unsigned VN = 0, e = LHS.getNumValNums(); VN != e; ++VN) { + unsigned ValSrcReg = LHS.getSrcRegForValNum(VN); + if (ValSrcReg == 0) // Src not defined by a copy? + continue; + + // DstReg is known to be a register in the LHS interval. If the src is + // from the RHS interval, we can use its value #. + if (rep(ValSrcReg) != RHS.reg) + continue; + + // Figure out the value # from the RHS. + unsigned ValInst = LHS.getInstForValNum(VN); + LHSValsDefinedFromRHS[VN] = RHS.getLiveRangeContaining(ValInst-1)->ValId; + } + + // Loop over the value numbers of the RHS, seeing if any are defined from + // the LHS. + SmallVector<int, 16> RHSValsDefinedFromLHS; + RHSValsDefinedFromLHS.resize(RHS.getNumValNums(), -1); + for (unsigned VN = 0, e = RHS.getNumValNums(); VN != e; ++VN) { + unsigned ValSrcReg = RHS.getSrcRegForValNum(VN); + if (ValSrcReg == 0) // Src not defined by a copy? + continue; + + // DstReg is known to be a register in the RHS interval. If the src is + // from the LHS interval, we can use its value #. + if (rep(ValSrcReg) != LHS.reg) + continue; + + // Figure out the value # from the LHS. + unsigned ValInst = RHS.getInstForValNum(VN); + RHSValsDefinedFromLHS[VN] = LHS.getLiveRangeContaining(ValInst-1)->ValId; + } + + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + ValueNumberInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); + + for (unsigned VN = 0, e = LHS.getNumValNums(); VN != e; ++VN) { + if (LHSValNoAssignments[VN] >= 0 || LHS.getInstForValNum(VN) == ~2U) + continue; + ComputeUltimateVN(VN, ValueNumberInfo, + LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, + LHSValNoAssignments, RHSValNoAssignments, LHS, RHS); + } + for (unsigned VN = 0, e = RHS.getNumValNums(); VN != e; ++VN) { + if (RHSValNoAssignments[VN] >= 0 || RHS.getInstForValNum(VN) == ~2U) + continue; + // If this value number isn't a copy from the LHS, it's a new number. + if (RHSValsDefinedFromLHS[VN] == -1) { + ValueNumberInfo.push_back(RHS.getValNumInfo(VN)); + RHSValNoAssignments[VN] = ValueNumberInfo.size()-1; + continue; + } + + ComputeUltimateVN(VN, ValueNumberInfo, + RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, + RHSValNoAssignments, LHSValNoAssignments, RHS, LHS); + } + } + + // Armed with the mappings of LHS/RHS values to ultimate values, walk the + // interval lists to see if these intervals are coalescable. + LiveInterval::const_iterator I = LHS.begin(); + LiveInterval::const_iterator IE = LHS.end(); + LiveInterval::const_iterator J = RHS.begin(); + LiveInterval::const_iterator JE = RHS.end(); + + // Skip ahead until the first place of potential sharing. + if (I->start < J->start) { + I = std::upper_bound(I, IE, J->start); + if (I != LHS.begin()) --I; + } else if (J->start < I->start) { + J = std::upper_bound(J, JE, I->start); + if (J != RHS.begin()) --J; + } + + while (1) { + // Determine if these two live ranges overlap. + bool Overlaps; + if (I->start < J->start) { + Overlaps = I->end > J->start; + } else { + Overlaps = J->end > I->start; + } + + // If so, check value # info to determine if they are really different. + if (Overlaps) { + // If the live range overlap will map to the same value number in the + // result liverange, we can still coalesce them. If not, we can't. + if (LHSValNoAssignments[I->ValId] != RHSValNoAssignments[J->ValId]) + return false; + } + + if (I->end < J->end) { + ++I; + if (I == IE) break; + } else { + ++J; + if (J == JE) break; + } + } + + // If we get here, we know that we can coalesce the live ranges. Ask the + // intervals to coalesce themselves now. + LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], + ValueNumberInfo); + return true; +} + +namespace { + // DepthMBBCompare - Comparison predicate that sort first based on the loop + // depth of the basic block (the unsigned), and then on the MBB number. + struct DepthMBBCompare { + typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; + bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { + if (LHS.first > RHS.first) return true; // Deeper loops first + return LHS.first == RHS.first && + LHS.second->getNumber() < RHS.second->getNumber(); + } + }; +} + +void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, + std::vector<CopyRec> *TryAgain, bool PhysOnly) { + DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n"; + + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E;) { + MachineInstr *Inst = MII++; + + // If this isn't a copy, we can't join intervals. + unsigned SrcReg, DstReg; + if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg)) continue; + + if (TryAgain && !JoinCopy(Inst, SrcReg, DstReg, PhysOnly)) + TryAgain->push_back(getCopyRec(Inst, SrcReg, DstReg)); + } +} + +void SimpleRegisterCoalescing::joinIntervals() { + DOUT << "********** JOINING INTERVALS ***********\n"; + + JoinedLIs.resize(li_->getNumIntervals()); + JoinedLIs.reset(); + + std::vector<CopyRec> TryAgainList; + const LoopInfo &LI = getAnalysis<LoopInfo>(); + if (LI.begin() == LI.end()) { + // If there are no loops in the function, join intervals in function order. + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); + I != E; ++I) + CopyCoalesceInMBB(I, &TryAgainList); + } else { + // Otherwise, join intervals in inner loops before other intervals. + // Unfortunately we can't just iterate over loop hierarchy here because + // there may be more MBB's than BB's. Collect MBB's for sorting. + + // Join intervals in the function prolog first. We want to join physical + // registers with virtual registers before the intervals got too long. + std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); I != E;++I) + MBBs.push_back(std::make_pair(LI.getLoopDepth(I->getBasicBlock()), I)); + + // Sort by loop depth. + std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); + + // Finally, join intervals in loop nest order. + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) + CopyCoalesceInMBB(MBBs[i].second, NULL, true); + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) + CopyCoalesceInMBB(MBBs[i].second, &TryAgainList, false); + } + + // Joining intervals can allow other intervals to be joined. Iteratively join + // until we make no progress. + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; + + for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { + CopyRec &TheCopy = TryAgainList[i]; + if (TheCopy.MI && + JoinCopy(TheCopy.MI, TheCopy.SrcReg, TheCopy.DstReg)) { + TheCopy.MI = 0; // Mark this one as done. + ProgressMade = true; + } + } + } + + // Some live range has been lengthened due to colaescing, eliminate the + // unnecessary kills. + int RegNum = JoinedLIs.find_first(); + while (RegNum != -1) { + unsigned Reg = RegNum + MRegisterInfo::FirstVirtualRegister; + unsigned repReg = rep(Reg); + LiveInterval &LI = li_->getInterval(repReg); + LiveVariables::VarInfo& svi = lv_->getVarInfo(Reg); + for (unsigned i = 0, e = svi.Kills.size(); i != e; ++i) { + MachineInstr *Kill = svi.Kills[i]; + // Suppose vr1 = op vr2, x + // and vr1 and vr2 are coalesced. vr2 should still be marked kill + // unless it is a two-address operand. + if (li_->isRemoved(Kill) || hasRegisterDef(Kill, repReg)) + continue; + if (LI.liveAt(li_->getInstructionIndex(Kill) + InstrSlots::NUM)) + unsetRegisterKill(Kill, repReg); + } + RegNum = JoinedLIs.find_next(RegNum); + } + + DOUT << "*** Register mapping ***\n"; + for (int i = 0, e = r2rMap_.size(); i != e; ++i) + if (r2rMap_[i]) { + DOUT << " reg " << i << " -> "; + DEBUG(printRegName(r2rMap_[i])); + DOUT << "\n"; + } +} + +/// Return true if the two specified registers belong to different register +/// classes. The registers may be either phys or virt regs. +bool SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA, + unsigned RegB) const { + + // Get the register classes for the first reg. + if (MRegisterInfo::isPhysicalRegister(RegA)) { + assert(MRegisterInfo::isVirtualRegister(RegB) && + "Shouldn't consider two physregs!"); + return !mf_->getSSARegMap()->getRegClass(RegB)->contains(RegA); + } + + // Compare against the regclass for the second reg. + const TargetRegisterClass *RegClass = mf_->getSSARegMap()->getRegClass(RegA); + if (MRegisterInfo::isVirtualRegister(RegB)) + return RegClass != mf_->getSSARegMap()->getRegClass(RegB); + else + return !RegClass->contains(RegB); +} + +/// lastRegisterUse - Returns the last use of the specific register between +/// cycles Start and End. It also returns the use operand by reference. It +/// returns NULL if there are no uses. +MachineInstr * +SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, unsigned Reg, + MachineOperand *&MOU) { + int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM; + int s = Start; + while (e >= s) { + // Skip deleted instructions + MachineInstr *MI = li_->getInstructionFromIndex(e); + while ((e - InstrSlots::NUM) >= s && !MI) { + e -= InstrSlots::NUM; + MI = li_->getInstructionFromIndex(e); + } + if (e < s || MI == NULL) + return NULL; + + for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse() && MO.getReg() && + mri_->regsOverlap(rep(MO.getReg()), Reg)) { + MOU = &MO; + return MI; + } + } + + e -= InstrSlots::NUM; + } + + return NULL; +} + + +/// findDefOperand - Returns the MachineOperand that is a def of the specific +/// register. It returns NULL if the def is not found. +MachineOperand *SimpleRegisterCoalescing::findDefOperand(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && + mri_->regsOverlap(rep(MO.getReg()), Reg)) + return &MO; + } + return NULL; +} + +/// unsetRegisterKill - Unset IsKill property of all uses of specific register +/// of the specific instruction. +void SimpleRegisterCoalescing::unsetRegisterKill(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse() && MO.isKill() && MO.getReg() && + mri_->regsOverlap(rep(MO.getReg()), Reg)) + MO.unsetIsKill(); + } +} + +/// unsetRegisterKills - Unset IsKill property of all uses of specific register +/// between cycles Start and End. +void SimpleRegisterCoalescing::unsetRegisterKills(unsigned Start, unsigned End, + unsigned Reg) { + int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM; + int s = Start; + while (e >= s) { + // Skip deleted instructions + MachineInstr *MI = li_->getInstructionFromIndex(e); + while ((e - InstrSlots::NUM) >= s && !MI) { + e -= InstrSlots::NUM; + MI = li_->getInstructionFromIndex(e); + } + if (e < s || MI == NULL) + return; + + for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse() && MO.isKill() && MO.getReg() && + mri_->regsOverlap(rep(MO.getReg()), Reg)) { + MO.unsetIsKill(); + } + } + + e -= InstrSlots::NUM; + } +} + +/// hasRegisterDef - True if the instruction defines the specific register. +/// +bool SimpleRegisterCoalescing::hasRegisterDef(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && + mri_->regsOverlap(rep(MO.getReg()), Reg)) + return true; + } + return false; +} + +void SimpleRegisterCoalescing::printRegName(unsigned reg) const { + if (MRegisterInfo::isPhysicalRegister(reg)) + cerr << mri_->getName(reg); + else + cerr << "%reg" << reg; +} + +void SimpleRegisterCoalescing::releaseMemory() { + r2rMap_.clear(); + JoinedLIs.clear(); +} + +static bool isZeroLengthInterval(LiveInterval *li) { + for (LiveInterval::Ranges::const_iterator + i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) + if (i->end - i->start > LiveIntervals::InstrSlots::NUM) + return false; + return true; +} + +bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + tm_ = &fn.getTarget(); + mri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + li_ = &getAnalysis<LiveIntervals>(); + lv_ = &getAnalysis<LiveVariables>(); + + DOUT << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'; + + allocatableRegs_ = mri_->getAllocatableSet(fn); + for (MRegisterInfo::regclass_iterator I = mri_->regclass_begin(), + E = mri_->regclass_end(); I != E; ++I) + allocatableRCRegs_.insert(std::make_pair(*I,mri_->getAllocatableSet(fn, *I))); + + r2rMap_.grow(mf_->getSSARegMap()->getLastVirtReg()); + + // Join (coalesce) intervals if requested. + if (EnableJoining) { + joinIntervals(); + DOUT << "********** INTERVALS POST JOINING **********\n"; + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { + I->second.print(DOUT, mri_); + DOUT << "\n"; + } + } + + // perform a final pass over the instructions and compute spill + // weights, coalesce virtual registers and remove identity moves. + const LoopInfo &loopInfo = getAnalysis<LoopInfo>(); + + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + MachineBasicBlock* mbb = mbbi; + unsigned loopDepth = loopInfo.getLoopDepth(mbb->getBasicBlock()); + + for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); + mii != mie; ) { + // if the move will be an identity move delete it + unsigned srcReg, dstReg, RegRep; + if (tii_->isMoveInstr(*mii, srcReg, dstReg) && + (RegRep = rep(srcReg)) == rep(dstReg)) { + // remove from def list + LiveInterval &RegInt = li_->getOrCreateInterval(RegRep); + MachineOperand *MO = mii->findRegisterDefOperand(dstReg); + // If def of this move instruction is dead, remove its live range from + // the dstination register's live interval. + if (MO->isDead()) { + unsigned MoveIdx = li_->getDefIndex(li_->getInstructionIndex(mii)); + LiveInterval::iterator MLR = RegInt.FindLiveRangeContaining(MoveIdx); + RegInt.removeRange(MLR->start, MoveIdx+1); + if (RegInt.empty()) + li_->removeInterval(RegRep); + } + li_->RemoveMachineInstrFromMaps(mii); + mii = mbbi->erase(mii); + ++numPeep; + } else { + SmallSet<unsigned, 4> UniqueUses; + for (unsigned i = 0, e = mii->getNumOperands(); i != e; ++i) { + const MachineOperand &mop = mii->getOperand(i); + if (mop.isRegister() && mop.getReg() && + MRegisterInfo::isVirtualRegister(mop.getReg())) { + // replace register with representative register + unsigned reg = rep(mop.getReg()); + mii->getOperand(i).setReg(reg); + + // Multiple uses of reg by the same instruction. It should not + // contribute to spill weight again. + if (UniqueUses.count(reg) != 0) + continue; + LiveInterval &RegInt = li_->getInterval(reg); + float w = (mop.isUse()+mop.isDef()) * powf(10.0F, (float)loopDepth); + // If the definition instruction is re-materializable, its spill + // weight is half of what it would have been normally unless it's + // a load from fixed stack slot. + int Dummy; + if (RegInt.remat && !tii_->isLoadFromStackSlot(RegInt.remat, Dummy)) + w /= 2; + RegInt.weight += w; + UniqueUses.insert(reg); + } + } + ++mii; + } + } + } + + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { + LiveInterval &LI = I->second; + if (MRegisterInfo::isVirtualRegister(LI.reg)) { + // If the live interval length is essentially zero, i.e. in every live + // range the use follows def immediately, it doesn't make sense to spill + // it and hope it will be easier to allocate for this li. + if (isZeroLengthInterval(&LI)) + LI.weight = HUGE_VALF; + + // Slightly prefer live interval that has been assigned a preferred reg. + if (LI.preference) + LI.weight *= 1.01F; + + // Divide the weight of the interval by its size. This encourages + // spilling of intervals that are large and have few uses, and + // discourages spilling of small intervals with many uses. + LI.weight /= LI.getSize(); + } + } + + DEBUG(dump()); + return true; +} + +/// print - Implement the dump method. +void SimpleRegisterCoalescing::print(std::ostream &O, const Module* m) const { + li_->print(O, m); +} diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp new file mode 100644 index 0000000..372b1b3 --- /dev/null +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -0,0 +1,228 @@ +//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TwoAddress instruction pass which is used +// by most register allocators. Two-Address instructions are rewritten +// from: +// +// A = B op C +// +// to: +// +// A = B +// A op= C +// +// Note that if a register allocator chooses to use this pass, that it +// has to be capable of handling the non-SSA nature of these rewritten +// virtual registers. +// +// It is also worth noting that the duplicate operand of the two +// address instruction is removed. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "twoaddrinstr" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); +STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); +STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); + +namespace { + struct VISIBILITY_HIDDEN TwoAddressInstructionPass + : public MachineFunctionPass { + static char ID; // Pass identification, replacement for typeid + TwoAddressInstructionPass() : MachineFunctionPass((intptr_t)&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + /// runOnMachineFunction - pass entry point + bool runOnMachineFunction(MachineFunction&); + }; + + char TwoAddressInstructionPass::ID = 0; + RegisterPass<TwoAddressInstructionPass> + X("twoaddressinstruction", "Two-Address instruction pass"); +} + +const PassInfo *llvm::TwoAddressInstructionPassID = X.getPassInfo(); + +void TwoAddressInstructionPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LiveVariables>(); + AU.addPreserved<LiveVariables>(); + AU.addPreservedID(PHIEliminationID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// runOnMachineFunction - Reduce two-address instructions to two +/// operands. +/// +bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { + DOUT << "Machine Function\n"; + const TargetMachine &TM = MF.getTarget(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + const MRegisterInfo &MRI = *TM.getRegisterInfo(); + LiveVariables &LV = getAnalysis<LiveVariables>(); + + bool MadeChange = false; + + DOUT << "********** REWRITING TWO-ADDR INSTRS **********\n"; + DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + + for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + mbbi != mbbe; ++mbbi) { + for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + mi != me; ++mi) { + const TargetInstrDescriptor *TID = mi->getInstrDescriptor(); + + bool FirstTied = true; + for (unsigned si = 1, e = TID->numOperands; si < e; ++si) { + int ti = TID->getOperandConstraint(si, TOI::TIED_TO); + if (ti == -1) + continue; + + if (FirstTied) { + ++NumTwoAddressInstrs; + DOUT << '\t'; DEBUG(mi->print(*cerr.stream(), &TM)); + } + FirstTied = false; + + assert(mi->getOperand(si).isRegister() && mi->getOperand(si).getReg() && + mi->getOperand(si).isUse() && "two address instruction invalid"); + + // if the two operands are the same we just remove the use + // and mark the def as def&use, otherwise we have to insert a copy. + if (mi->getOperand(ti).getReg() != mi->getOperand(si).getReg()) { + // rewrite: + // a = b op c + // to: + // a = b + // a = a op c + unsigned regA = mi->getOperand(ti).getReg(); + unsigned regB = mi->getOperand(si).getReg(); + + assert(MRegisterInfo::isVirtualRegister(regA) && + MRegisterInfo::isVirtualRegister(regB) && + "cannot update physical register live information"); + +#ifndef NDEBUG + // First, verify that we don't have a use of a in the instruction (a = + // b + a for example) because our transformation will not work. This + // should never occur because we are in SSA form. + for (unsigned i = 0; i != mi->getNumOperands(); ++i) + assert((int)i == ti || + !mi->getOperand(i).isRegister() || + mi->getOperand(i).getReg() != regA); +#endif + + // If this instruction is not the killing user of B, see if we can + // rearrange the code to make it so. Making it the killing user will + // allow us to coalesce A and B together, eliminating the copy we are + // about to insert. + if (!LV.KillsRegister(mi, regB)) { + // If this instruction is commutative, check to see if C dies. If + // so, swap the B and C operands. This makes the live ranges of A + // and C joinable. + // FIXME: This code also works for A := B op C instructions. + if ((TID->Flags & M_COMMUTABLE) && mi->getNumOperands() == 3) { + assert(mi->getOperand(3-si).isRegister() && + "Not a proper commutative instruction!"); + unsigned regC = mi->getOperand(3-si).getReg(); + if (LV.KillsRegister(mi, regC)) { + DOUT << "2addr: COMMUTING : " << *mi; + MachineInstr *NewMI = TII.commuteInstruction(mi); + if (NewMI == 0) { + DOUT << "2addr: COMMUTING FAILED!\n"; + } else { + DOUT << "2addr: COMMUTED TO: " << *NewMI; + // If the instruction changed to commute it, update livevar. + if (NewMI != mi) { + LV.instructionChanged(mi, NewMI); // Update live variables + mbbi->insert(mi, NewMI); // Insert the new inst + mbbi->erase(mi); // Nuke the old inst. + mi = NewMI; + } + + ++NumCommuted; + regB = regC; + goto InstructionRearranged; + } + } + } + + // If this instruction is potentially convertible to a true + // three-address instruction, + if (TID->Flags & M_CONVERTIBLE_TO_3_ADDR) + // FIXME: This assumes there are no more operands which are tied + // to another register. +#ifndef NDEBUG + for (unsigned i = si+1, e = TID->numOperands; i < e; ++i) + assert(TID->getOperandConstraint(i, TOI::TIED_TO) == -1); +#endif + + if (MachineInstr *New = TII.convertToThreeAddress(mbbi, mi, LV)) { + DOUT << "2addr: CONVERTING 2-ADDR: " << *mi; + DOUT << "2addr: TO 3-ADDR: " << *New; + mbbi->erase(mi); // Nuke the old inst. + mi = New; + ++NumConvertedTo3Addr; + // Done with this instruction. + break; + } + } + + InstructionRearranged: + const TargetRegisterClass* rc = MF.getSSARegMap()->getRegClass(regA); + MRI.copyRegToReg(*mbbi, mi, regA, regB, rc); + + MachineBasicBlock::iterator prevMi = prior(mi); + DOUT << "\t\tprepend:\t"; DEBUG(prevMi->print(*cerr.stream(), &TM)); + + // Update live variables for regA + LiveVariables::VarInfo& varInfo = LV.getVarInfo(regA); + varInfo.DefInst = prevMi; + + if (LV.removeVirtualRegisterKilled(regB, mbbi, mi)) + LV.addVirtualRegisterKilled(regB, prevMi); + + if (LV.removeVirtualRegisterDead(regB, mbbi, mi)) + LV.addVirtualRegisterDead(regB, prevMi); + + // replace all occurences of regB with regA + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + if (mi->getOperand(i).isRegister() && + mi->getOperand(i).getReg() == regB) + mi->getOperand(i).setReg(regA); + } + } + + assert(mi->getOperand(ti).isDef() && mi->getOperand(si).isUse()); + mi->getOperand(ti).setReg(mi->getOperand(si).getReg()); + MadeChange = true; + + DOUT << "\t\trewrite to:\t"; DEBUG(mi->print(*cerr.stream(), &TM)); + } + } + } + + return MadeChange; +} diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp new file mode 100644 index 0000000..0a611ab --- /dev/null +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -0,0 +1,81 @@ +//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is an extremely simple version of the SimplifyCFG pass. Its sole +// job is to delete LLVM basic blocks that are not reachable from the entry +// node. To do this, it performs a simple depth first traversal of the CFG, +// then deletes any unvisited nodes. +// +// Note that this pass is really a hack. In particular, the instruction +// selectors for various targets should just not generate code for unreachable +// blocks. Until LLVM has a more systematic way of defining instruction +// selectors, however, we cannot really expect them to handle additional +// complexity. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Constant.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DepthFirstIterator.h" +using namespace llvm; + +namespace { + class VISIBILITY_HIDDEN UnreachableBlockElim : public FunctionPass { + virtual bool runOnFunction(Function &F); + public: + static char ID; // Pass identification, replacement for typeid + UnreachableBlockElim() : FunctionPass((intptr_t)&ID) {} + }; + char UnreachableBlockElim::ID = 0; + RegisterPass<UnreachableBlockElim> + X("unreachableblockelim", "Remove unreachable blocks from the CFG"); +} + +FunctionPass *llvm::createUnreachableBlockEliminationPass() { + return new UnreachableBlockElim(); +} + +bool UnreachableBlockElim::runOnFunction(Function &F) { + std::set<BasicBlock*> Reachable; + + // Mark all reachable blocks. + for (df_ext_iterator<Function*> I = df_ext_begin(&F, Reachable), + E = df_ext_end(&F, Reachable); I != E; ++I) + /* Mark all reachable blocks */; + + // Loop over all dead blocks, remembering them and deleting all instructions + // in them. + std::vector<BasicBlock*> DeadBlocks; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + if (!Reachable.count(I)) { + BasicBlock *BB = I; + DeadBlocks.push_back(BB); + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { + PN->replaceAllUsesWith(Constant::getNullValue(PN->getType())); + BB->getInstList().pop_front(); + } + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) + (*SI)->removePredecessor(BB); + BB->dropAllReferences(); + } + + if (DeadBlocks.empty()) return false; + + // Actually remove the blocks now. + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) + F.getBasicBlockList().erase(DeadBlocks[i]); + + return true; +} diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp new file mode 100644 index 0000000..196e849 --- /dev/null +++ b/lib/CodeGen/VirtRegMap.cpp @@ -0,0 +1,1118 @@ +//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the VirtRegMap class. +// +// It also contains implementations of the the Spiller interface, which, given a +// virtual register map and a machine function, eliminates all virtual +// references by replacing them with physical register references - adding spill +// code as necessary. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "spiller" +#include "VirtRegMap.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumSpills, "Number of register spills"); +STATISTIC(NumReMats, "Number of re-materialization"); +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumReused, "Number of values reused"); +STATISTIC(NumDSE , "Number of dead stores elided"); +STATISTIC(NumDCE , "Number of copies elided"); + +namespace { + enum SpillerName { simple, local }; + + static cl::opt<SpillerName> + SpillerOpt("spiller", + cl::desc("Spiller to use: (default: local)"), + cl::Prefix, + cl::values(clEnumVal(simple, " simple spiller"), + clEnumVal(local, " local spiller"), + clEnumValEnd), + cl::init(local)); +} + +//===----------------------------------------------------------------------===// +// VirtRegMap implementation +//===----------------------------------------------------------------------===// + +VirtRegMap::VirtRegMap(MachineFunction &mf) + : TII(*mf.getTarget().getInstrInfo()), MF(mf), + Virt2PhysMap(NO_PHYS_REG), Virt2StackSlotMap(NO_STACK_SLOT), + ReMatId(MAX_STACK_SLOT+1) { + grow(); +} + +void VirtRegMap::grow() { + Virt2PhysMap.grow(MF.getSSARegMap()->getLastVirtReg()); + Virt2StackSlotMap.grow(MF.getSSARegMap()->getLastVirtReg()); +} + +int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { + assert(MRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign stack slot to already spilled register"); + const TargetRegisterClass* RC = MF.getSSARegMap()->getRegClass(virtReg); + int frameIndex = MF.getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + Virt2StackSlotMap[virtReg] = frameIndex; + ++NumSpills; + return frameIndex; +} + +void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int frameIndex) { + assert(MRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign stack slot to already spilled register"); + assert((frameIndex >= 0 || + (frameIndex >= MF.getFrameInfo()->getObjectIndexBegin())) && + "illegal fixed frame index"); + Virt2StackSlotMap[virtReg] = frameIndex; +} + +int VirtRegMap::assignVirtReMatId(unsigned virtReg) { + assert(MRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign re-mat id to already spilled register"); + const MachineInstr *DefMI = getReMaterializedMI(virtReg); + int FrameIdx; + if (TII.isLoadFromStackSlot((MachineInstr*)DefMI, FrameIdx)) { + // Load from stack slot is re-materialize as reload from the stack slot! + Virt2StackSlotMap[virtReg] = FrameIdx; + return FrameIdx; + } + Virt2StackSlotMap[virtReg] = ReMatId; + return ReMatId++; +} + +void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI, + unsigned OpNo, MachineInstr *NewMI) { + // Move previous memory references folded to new instruction. + MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI); + for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI), + E = MI2VirtMap.end(); I != E && I->first == OldMI; ) { + MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second)); + MI2VirtMap.erase(I++); + } + + ModRef MRInfo; + const TargetInstrDescriptor *TID = OldMI->getInstrDescriptor(); + if (TID->getOperandConstraint(OpNo, TOI::TIED_TO) != -1 || + TID->findTiedToSrcOperand(OpNo) != -1) { + // Folded a two-address operand. + MRInfo = isModRef; + } else if (OldMI->getOperand(OpNo).isDef()) { + MRInfo = isMod; + } else { + MRInfo = isRef; + } + + // add new memory reference + MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo))); +} + +void VirtRegMap::print(std::ostream &OS) const { + const MRegisterInfo* MRI = MF.getTarget().getRegisterInfo(); + + OS << "********** REGISTER MAP **********\n"; + for (unsigned i = MRegisterInfo::FirstVirtualRegister, + e = MF.getSSARegMap()->getLastVirtReg(); i <= e; ++i) { + if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG) + OS << "[reg" << i << " -> " << MRI->getName(Virt2PhysMap[i]) << "]\n"; + + } + + for (unsigned i = MRegisterInfo::FirstVirtualRegister, + e = MF.getSSARegMap()->getLastVirtReg(); i <= e; ++i) + if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT) + OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] << "]\n"; + OS << '\n'; +} + +void VirtRegMap::dump() const { + print(DOUT); +} + + +//===----------------------------------------------------------------------===// +// Simple Spiller Implementation +//===----------------------------------------------------------------------===// + +Spiller::~Spiller() {} + +namespace { + struct VISIBILITY_HIDDEN SimpleSpiller : public Spiller { + bool runOnMachineFunction(MachineFunction& mf, VirtRegMap &VRM); + }; +} + +bool SimpleSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) { + DOUT << "********** REWRITE MACHINE CODE **********\n"; + DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + const TargetMachine &TM = MF.getTarget(); + const MRegisterInfo &MRI = *TM.getRegisterInfo(); + + // LoadedRegs - Keep track of which vregs are loaded, so that we only load + // each vreg once (in the case where a spilled vreg is used by multiple + // operands). This is always smaller than the number of operands to the + // current machine instr, so it should be small. + std::vector<unsigned> LoadedRegs; + + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + DOUT << MBBI->getBasicBlock()->getName() << ":\n"; + MachineBasicBlock &MBB = *MBBI; + for (MachineBasicBlock::iterator MII = MBB.begin(), + E = MBB.end(); MII != E; ++MII) { + MachineInstr &MI = *MII; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (MO.isRegister() && MO.getReg()) + if (MRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned VirtReg = MO.getReg(); + unsigned PhysReg = VRM.getPhys(VirtReg); + if (VRM.hasStackSlot(VirtReg)) { + int StackSlot = VRM.getStackSlot(VirtReg); + const TargetRegisterClass* RC = + MF.getSSARegMap()->getRegClass(VirtReg); + + if (MO.isUse() && + std::find(LoadedRegs.begin(), LoadedRegs.end(), VirtReg) + == LoadedRegs.end()) { + MRI.loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC); + LoadedRegs.push_back(VirtReg); + ++NumLoads; + DOUT << '\t' << *prior(MII); + } + + if (MO.isDef()) { + MRI.storeRegToStackSlot(MBB, next(MII), PhysReg, StackSlot, RC); + ++NumStores; + } + } + MF.setPhysRegUsed(PhysReg); + MI.getOperand(i).setReg(PhysReg); + } else { + MF.setPhysRegUsed(MO.getReg()); + } + } + + DOUT << '\t' << MI; + LoadedRegs.clear(); + } + } + return true; +} + +//===----------------------------------------------------------------------===// +// Local Spiller Implementation +//===----------------------------------------------------------------------===// + +namespace { + /// LocalSpiller - This spiller does a simple pass over the machine basic + /// block to attempt to keep spills in registers as much as possible for + /// blocks that have low register pressure (the vreg may be spilled due to + /// register pressure in other blocks). + class VISIBILITY_HIDDEN LocalSpiller : public Spiller { + const MRegisterInfo *MRI; + const TargetInstrInfo *TII; + public: + bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) { + MRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); + DOUT << "\n**** Local spiller rewriting function '" + << MF.getFunction()->getName() << "':\n"; + + std::vector<MachineInstr *> ReMatedMIs; + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB) + RewriteMBB(*MBB, VRM, ReMatedMIs); + for (unsigned i = 0, e = ReMatedMIs.size(); i != e; ++i) + delete ReMatedMIs[i]; + return true; + } + private: + void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM, + std::vector<MachineInstr*> &ReMatedMIs); + }; +} + +/// AvailableSpills - As the local spiller is scanning and rewriting an MBB from +/// top down, keep track of which spills slots are available in each register. +/// +/// Note that not all physregs are created equal here. In particular, some +/// physregs are reloads that we are allowed to clobber or ignore at any time. +/// Other physregs are values that the register allocated program is using that +/// we cannot CHANGE, but we can read if we like. We keep track of this on a +/// per-stack-slot basis as the low bit in the value of the SpillSlotsAvailable +/// entries. The predicate 'canClobberPhysReg()' checks this bit and +/// addAvailable sets it if. +namespace { +class VISIBILITY_HIDDEN AvailableSpills { + const MRegisterInfo *MRI; + const TargetInstrInfo *TII; + + // SpillSlotsAvailable - This map keeps track of all of the spilled virtual + // register values that are still available, due to being loaded or stored to, + // but not invalidated yet. + std::map<int, unsigned> SpillSlotsAvailable; + + // PhysRegsAvailable - This is the inverse of SpillSlotsAvailable, indicating + // which stack slot values are currently held by a physreg. This is used to + // invalidate entries in SpillSlotsAvailable when a physreg is modified. + std::multimap<unsigned, int> PhysRegsAvailable; + + void disallowClobberPhysRegOnly(unsigned PhysReg); + + void ClobberPhysRegOnly(unsigned PhysReg); +public: + AvailableSpills(const MRegisterInfo *mri, const TargetInstrInfo *tii) + : MRI(mri), TII(tii) { + } + + const MRegisterInfo *getRegInfo() const { return MRI; } + + /// getSpillSlotPhysReg - If the specified stack slot is available in a + /// physical register, return that PhysReg, otherwise return 0. + unsigned getSpillSlotPhysReg(int Slot) const { + std::map<int, unsigned>::const_iterator I = SpillSlotsAvailable.find(Slot); + if (I != SpillSlotsAvailable.end()) { + return I->second >> 1; // Remove the CanClobber bit. + } + return 0; + } + + /// addAvailable - Mark that the specified stack slot is available in the + /// specified physreg. If CanClobber is true, the physreg can be modified at + /// any time without changing the semantics of the program. + void addAvailable(int Slot, MachineInstr *MI, unsigned Reg, + bool CanClobber = true) { + // If this stack slot is thought to be available in some other physreg, + // remove its record. + ModifyStackSlot(Slot); + + PhysRegsAvailable.insert(std::make_pair(Reg, Slot)); + SpillSlotsAvailable[Slot] = (Reg << 1) | (unsigned)CanClobber; + + if (Slot > VirtRegMap::MAX_STACK_SLOT) + DOUT << "Remembering RM#" << Slot-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Remembering SS#" << Slot; + DOUT << " in physreg " << MRI->getName(Reg) << "\n"; + } + + /// canClobberPhysReg - Return true if the spiller is allowed to change the + /// value of the specified stackslot register if it desires. The specified + /// stack slot must be available in a physreg for this query to make sense. + bool canClobberPhysReg(int Slot) const { + assert(SpillSlotsAvailable.count(Slot) && "Slot not available!"); + return SpillSlotsAvailable.find(Slot)->second & 1; + } + + /// disallowClobberPhysReg - Unset the CanClobber bit of the specified + /// stackslot register. The register is still available but is no longer + /// allowed to be modifed. + void disallowClobberPhysReg(unsigned PhysReg); + + /// ClobberPhysReg - This is called when the specified physreg changes + /// value. We use this to invalidate any info about stuff we thing lives in + /// it and any of its aliases. + void ClobberPhysReg(unsigned PhysReg); + + /// ModifyStackSlot - This method is called when the value in a stack slot + /// changes. This removes information about which register the previous value + /// for this slot lives in (as the previous value is dead now). + void ModifyStackSlot(int Slot); +}; +} + +/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified +/// stackslot register. The register is still available but is no longer +/// allowed to be modifed. +void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) { + std::multimap<unsigned, int>::iterator I = + PhysRegsAvailable.lower_bound(PhysReg); + while (I != PhysRegsAvailable.end() && I->first == PhysReg) { + int Slot = I->second; + I++; + assert((SpillSlotsAvailable[Slot] >> 1) == PhysReg && + "Bidirectional map mismatch!"); + SpillSlotsAvailable[Slot] &= ~1; + DOUT << "PhysReg " << MRI->getName(PhysReg) + << " copied, it is available for use but can no longer be modified\n"; + } +} + +/// disallowClobberPhysReg - Unset the CanClobber bit of the specified +/// stackslot register and its aliases. The register and its aliases may +/// still available but is no longer allowed to be modifed. +void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) { + for (const unsigned *AS = MRI->getAliasSet(PhysReg); *AS; ++AS) + disallowClobberPhysRegOnly(*AS); + disallowClobberPhysRegOnly(PhysReg); +} + +/// ClobberPhysRegOnly - This is called when the specified physreg changes +/// value. We use this to invalidate any info about stuff we thing lives in it. +void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) { + std::multimap<unsigned, int>::iterator I = + PhysRegsAvailable.lower_bound(PhysReg); + while (I != PhysRegsAvailable.end() && I->first == PhysReg) { + int Slot = I->second; + PhysRegsAvailable.erase(I++); + assert((SpillSlotsAvailable[Slot] >> 1) == PhysReg && + "Bidirectional map mismatch!"); + SpillSlotsAvailable.erase(Slot); + DOUT << "PhysReg " << MRI->getName(PhysReg) + << " clobbered, invalidating "; + if (Slot > VirtRegMap::MAX_STACK_SLOT) + DOUT << "RM#" << Slot-VirtRegMap::MAX_STACK_SLOT-1 << "\n"; + else + DOUT << "SS#" << Slot << "\n"; + } +} + +/// ClobberPhysReg - This is called when the specified physreg changes +/// value. We use this to invalidate any info about stuff we thing lives in +/// it and any of its aliases. +void AvailableSpills::ClobberPhysReg(unsigned PhysReg) { + for (const unsigned *AS = MRI->getAliasSet(PhysReg); *AS; ++AS) + ClobberPhysRegOnly(*AS); + ClobberPhysRegOnly(PhysReg); +} + +/// ModifyStackSlot - This method is called when the value in a stack slot +/// changes. This removes information about which register the previous value +/// for this slot lives in (as the previous value is dead now). +void AvailableSpills::ModifyStackSlot(int Slot) { + std::map<int, unsigned>::iterator It = SpillSlotsAvailable.find(Slot); + if (It == SpillSlotsAvailable.end()) return; + unsigned Reg = It->second >> 1; + SpillSlotsAvailable.erase(It); + + // This register may hold the value of multiple stack slots, only remove this + // stack slot from the set of values the register contains. + std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg); + for (; ; ++I) { + assert(I != PhysRegsAvailable.end() && I->first == Reg && + "Map inverse broken!"); + if (I->second == Slot) break; + } + PhysRegsAvailable.erase(I); +} + + + +/// InvalidateKills - MI is going to be deleted. If any of its operands are +/// marked kill, then invalidate the information. +static void InvalidateKills(MachineInstr &MI, BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned Reg = MO.getReg(); + if (KillOps[Reg] == &MO) { + RegKills.reset(Reg); + KillOps[Reg] = NULL; + } + } +} + +/// UpdateKills - Track and update kill info. If a MI reads a register that is +/// marked kill, then it must be due to register reuse. Transfer the kill info +/// over. +static void UpdateKills(MachineInstr &MI, BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + const TargetInstrDescriptor *TID = MI.getInstrDescriptor(); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + + if (RegKills[Reg]) { + // That can't be right. Register is killed but not re-defined and it's + // being reused. Let's fix that. + KillOps[Reg]->unsetIsKill(); + if (i < TID->numOperands && + TID->getOperandConstraint(i, TOI::TIED_TO) == -1) + // Unless it's a two-address operand, this is the new kill. + MO.setIsKill(); + } + + if (MO.isKill()) { + RegKills.set(Reg); + KillOps[Reg] = &MO; + } + } + + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + RegKills.reset(Reg); + KillOps[Reg] = NULL; + } +} + + +// ReusedOp - For each reused operand, we keep track of a bit of information, in +// case we need to rollback upon processing a new operand. See comments below. +namespace { + struct ReusedOp { + // The MachineInstr operand that reused an available value. + unsigned Operand; + + // StackSlot - The spill slot of the value being reused. + unsigned StackSlot; + + // PhysRegReused - The physical register the value was available in. + unsigned PhysRegReused; + + // AssignedPhysReg - The physreg that was assigned for use by the reload. + unsigned AssignedPhysReg; + + // VirtReg - The virtual register itself. + unsigned VirtReg; + + ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr, + unsigned vreg) + : Operand(o), StackSlot(ss), PhysRegReused(prr), AssignedPhysReg(apr), + VirtReg(vreg) {} + }; + + /// ReuseInfo - This maintains a collection of ReuseOp's for each operand that + /// is reused instead of reloaded. + class VISIBILITY_HIDDEN ReuseInfo { + MachineInstr &MI; + std::vector<ReusedOp> Reuses; + BitVector PhysRegsClobbered; + public: + ReuseInfo(MachineInstr &mi, const MRegisterInfo *mri) : MI(mi) { + PhysRegsClobbered.resize(mri->getNumRegs()); + } + + bool hasReuses() const { + return !Reuses.empty(); + } + + /// addReuse - If we choose to reuse a virtual register that is already + /// available instead of reloading it, remember that we did so. + void addReuse(unsigned OpNo, unsigned StackSlot, + unsigned PhysRegReused, unsigned AssignedPhysReg, + unsigned VirtReg) { + // If the reload is to the assigned register anyway, no undo will be + // required. + if (PhysRegReused == AssignedPhysReg) return; + + // Otherwise, remember this. + Reuses.push_back(ReusedOp(OpNo, StackSlot, PhysRegReused, + AssignedPhysReg, VirtReg)); + } + + void markClobbered(unsigned PhysReg) { + PhysRegsClobbered.set(PhysReg); + } + + bool isClobbered(unsigned PhysReg) const { + return PhysRegsClobbered.test(PhysReg); + } + + /// GetRegForReload - We are about to emit a reload into PhysReg. If there + /// is some other operand that is using the specified register, either pick + /// a new register to use, or evict the previous reload and use this reg. + unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI, + AvailableSpills &Spills, + std::map<int, MachineInstr*> &MaybeDeadStores, + SmallSet<unsigned, 8> &Rejected, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + if (Reuses.empty()) return PhysReg; // This is most often empty. + + for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) { + ReusedOp &Op = Reuses[ro]; + // If we find some other reuse that was supposed to use this register + // exactly for its reload, we can change this reload to use ITS reload + // register. That is, unless its reload register has already been + // considered and subsequently rejected because it has also been reused + // by another operand. + if (Op.PhysRegReused == PhysReg && + Rejected.count(Op.AssignedPhysReg) == 0) { + // Yup, use the reload register that we didn't use before. + unsigned NewReg = Op.AssignedPhysReg; + Rejected.insert(PhysReg); + return GetRegForReload(NewReg, MI, Spills, MaybeDeadStores, Rejected, + RegKills, KillOps); + } else { + // Otherwise, we might also have a problem if a previously reused + // value aliases the new register. If so, codegen the previous reload + // and use this one. + unsigned PRRU = Op.PhysRegReused; + const MRegisterInfo *MRI = Spills.getRegInfo(); + if (MRI->areAliases(PRRU, PhysReg)) { + // Okay, we found out that an alias of a reused register + // was used. This isn't good because it means we have + // to undo a previous reuse. + MachineBasicBlock *MBB = MI->getParent(); + const TargetRegisterClass *AliasRC = + MBB->getParent()->getSSARegMap()->getRegClass(Op.VirtReg); + + // Copy Op out of the vector and remove it, we're going to insert an + // explicit load for it. + ReusedOp NewOp = Op; + Reuses.erase(Reuses.begin()+ro); + + // Ok, we're going to try to reload the assigned physreg into the + // slot that we were supposed to in the first place. However, that + // register could hold a reuse. Check to see if it conflicts or + // would prefer us to use a different register. + unsigned NewPhysReg = GetRegForReload(NewOp.AssignedPhysReg, + MI, Spills, MaybeDeadStores, + Rejected, RegKills, KillOps); + + MRI->loadRegFromStackSlot(*MBB, MI, NewPhysReg, + NewOp.StackSlot, AliasRC); + Spills.ClobberPhysReg(NewPhysReg); + Spills.ClobberPhysReg(NewOp.PhysRegReused); + + // Any stores to this stack slot are not dead anymore. + MaybeDeadStores.erase(NewOp.StackSlot); + + MI->getOperand(NewOp.Operand).setReg(NewPhysReg); + + Spills.addAvailable(NewOp.StackSlot, MI, NewPhysReg); + ++NumLoads; + MachineBasicBlock::iterator MII = MI; + --MII; + UpdateKills(*MII, RegKills, KillOps); + DOUT << '\t' << *MII; + + DOUT << "Reuse undone!\n"; + --NumReused; + + // Finally, PhysReg is now available, go ahead and use it. + return PhysReg; + } + } + } + return PhysReg; + } + + /// GetRegForReload - Helper for the above GetRegForReload(). Add a + /// 'Rejected' set to remember which registers have been considered and + /// rejected for the reload. This avoids infinite looping in case like + /// this: + /// t1 := op t2, t3 + /// t2 <- assigned r0 for use by the reload but ended up reuse r1 + /// t3 <- assigned r1 for use by the reload but ended up reuse r0 + /// t1 <- desires r1 + /// sees r1 is taken by t2, tries t2's reload register r0 + /// sees r0 is taken by t3, tries t3's reload register r1 + /// sees r1 is taken by t2, tries t2's reload register r0 ... + unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI, + AvailableSpills &Spills, + std::map<int, MachineInstr*> &MaybeDeadStores, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + SmallSet<unsigned, 8> Rejected; + return GetRegForReload(PhysReg, MI, Spills, MaybeDeadStores, Rejected, + RegKills, KillOps); + } + }; +} + + +/// rewriteMBB - Keep track of which spills are available even after the +/// register allocator is done with them. If possible, avoid reloading vregs. +void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM, + std::vector<MachineInstr*> &ReMatedMIs) { + DOUT << MBB.getBasicBlock()->getName() << ":\n"; + + // Spills - Keep track of which spilled values are available in physregs so + // that we can choose to reuse the physregs instead of emitting reloads. + AvailableSpills Spills(MRI, TII); + + // MaybeDeadStores - When we need to write a value back into a stack slot, + // keep track of the inserted store. If the stack slot value is never read + // (because the value was used from some available register, for example), and + // subsequently stored to, the original store is dead. This map keeps track + // of inserted stores that are not used. If we see a subsequent store to the + // same stack slot, the original store is deleted. + std::map<int, MachineInstr*> MaybeDeadStores; + + // Keep track of kill information. + BitVector RegKills(MRI->getNumRegs()); + std::vector<MachineOperand*> KillOps; + KillOps.resize(MRI->getNumRegs(), NULL); + + MachineFunction &MF = *MBB.getParent(); + for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); + MII != E; ) { + MachineInstr &MI = *MII; + MachineBasicBlock::iterator NextMII = MII; ++NextMII; + VirtRegMap::MI2VirtMapTy::const_iterator I, End; + + bool Erased = false; + bool BackTracked = false; + + /// ReusedOperands - Keep track of operand reuse in case we need to undo + /// reuse. + ReuseInfo ReusedOperands(MI, MRI); + + // Loop over all of the implicit defs, clearing them from our available + // sets. + const TargetInstrDescriptor *TID = MI.getInstrDescriptor(); + + // If this instruction is being rematerialized, just remove it! + int FrameIdx; + if (TII->isTriviallyReMaterializable(&MI) || + TII->isLoadFromStackSlot(&MI, FrameIdx)) { + Erased = true; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isRegister() || MO.getReg() == 0) + continue; // Ignore non-register operands. + if (MO.isDef() && !VRM.isReMaterialized(MO.getReg())) { + Erased = false; + break; + } + } + if (Erased) { + VRM.RemoveFromFoldedVirtMap(&MI); + ReMatedMIs.push_back(MI.removeFromParent()); + goto ProcessNextInst; + } + } + + if (TID->ImplicitDefs) { + const unsigned *ImpDef = TID->ImplicitDefs; + for ( ; *ImpDef; ++ImpDef) { + MF.setPhysRegUsed(*ImpDef); + ReusedOperands.markClobbered(*ImpDef); + Spills.ClobberPhysReg(*ImpDef); + } + } + + // Process all of the spilled uses and all non spilled reg references. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isRegister() || MO.getReg() == 0) + continue; // Ignore non-register operands. + + if (MRegisterInfo::isPhysicalRegister(MO.getReg())) { + // Ignore physregs for spilling, but remember that it is used by this + // function. + MF.setPhysRegUsed(MO.getReg()); + ReusedOperands.markClobbered(MO.getReg()); + continue; + } + + assert(MRegisterInfo::isVirtualRegister(MO.getReg()) && + "Not a virtual or a physical register?"); + + unsigned VirtReg = MO.getReg(); + if (!VRM.hasStackSlot(VirtReg)) { + // This virtual register was assigned a physreg! + unsigned Phys = VRM.getPhys(VirtReg); + MF.setPhysRegUsed(Phys); + if (MO.isDef()) + ReusedOperands.markClobbered(Phys); + MI.getOperand(i).setReg(Phys); + continue; + } + + // This virtual register is now known to be a spilled value. + if (!MO.isUse()) + continue; // Handle defs in the loop below (handle use&def here though) + + bool doReMat = VRM.isReMaterialized(VirtReg); + int StackSlot = VRM.getStackSlot(VirtReg); + unsigned PhysReg; + + // Check to see if this stack slot is available. + if ((PhysReg = Spills.getSpillSlotPhysReg(StackSlot))) { + // This spilled operand might be part of a two-address operand. If this + // is the case, then changing it will necessarily require changing the + // def part of the instruction as well. However, in some cases, we + // aren't allowed to modify the reused register. If none of these cases + // apply, reuse it. + bool CanReuse = true; + int ti = TID->getOperandConstraint(i, TOI::TIED_TO); + if (ti != -1 && + MI.getOperand(ti).isReg() && + MI.getOperand(ti).getReg() == VirtReg) { + // Okay, we have a two address operand. We can reuse this physreg as + // long as we are allowed to clobber the value and there isn't an + // earlier def that has already clobbered the physreg. + CanReuse = Spills.canClobberPhysReg(StackSlot) && + !ReusedOperands.isClobbered(PhysReg); + } + + if (CanReuse) { + // If this stack slot value is already available, reuse it! + if (StackSlot > VirtRegMap::MAX_STACK_SLOT) + DOUT << "Reusing RM#" << StackSlot-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Reusing SS#" << StackSlot; + DOUT << " from physreg " + << MRI->getName(PhysReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << MRI->getName(VRM.getPhys(VirtReg)) << "\n"; + MI.getOperand(i).setReg(PhysReg); + + // The only technical detail we have is that we don't know that + // PhysReg won't be clobbered by a reloaded stack slot that occurs + // later in the instruction. In particular, consider 'op V1, V2'. + // If V1 is available in physreg R0, we would choose to reuse it + // here, instead of reloading it into the register the allocator + // indicated (say R1). However, V2 might have to be reloaded + // later, and it might indicate that it needs to live in R0. When + // this occurs, we need to have information available that + // indicates it is safe to use R1 for the reload instead of R0. + // + // To further complicate matters, we might conflict with an alias, + // or R0 and R1 might not be compatible with each other. In this + // case, we actually insert a reload for V1 in R1, ensuring that + // we can get at R0 or its alias. + ReusedOperands.addReuse(i, StackSlot, PhysReg, + VRM.getPhys(VirtReg), VirtReg); + if (ti != -1) + // Only mark it clobbered if this is a use&def operand. + ReusedOperands.markClobbered(PhysReg); + ++NumReused; + continue; + } + + // Otherwise we have a situation where we have a two-address instruction + // whose mod/ref operand needs to be reloaded. This reload is already + // available in some register "PhysReg", but if we used PhysReg as the + // operand to our 2-addr instruction, the instruction would modify + // PhysReg. This isn't cool if something later uses PhysReg and expects + // to get its initial value. + // + // To avoid this problem, and to avoid doing a load right after a store, + // we emit a copy from PhysReg into the designated register for this + // operand. + unsigned DesignatedReg = VRM.getPhys(VirtReg); + assert(DesignatedReg && "Must map virtreg to physreg!"); + + // Note that, if we reused a register for a previous operand, the + // register we want to reload into might not actually be + // available. If this occurs, use the register indicated by the + // reuser. + if (ReusedOperands.hasReuses()) + DesignatedReg = ReusedOperands.GetRegForReload(DesignatedReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps); + + // If the mapped designated register is actually the physreg we have + // incoming, we don't need to inserted a dead copy. + if (DesignatedReg == PhysReg) { + // If this stack slot value is already available, reuse it! + if (StackSlot > VirtRegMap::MAX_STACK_SLOT) + DOUT << "Reusing RM#" << StackSlot-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Reusing SS#" << StackSlot; + DOUT << " from physreg " << MRI->getName(PhysReg) << " for vreg" + << VirtReg + << " instead of reloading into same physreg.\n"; + MI.getOperand(i).setReg(PhysReg); + ReusedOperands.markClobbered(PhysReg); + ++NumReused; + continue; + } + + const TargetRegisterClass* RC = MF.getSSARegMap()->getRegClass(VirtReg); + MF.setPhysRegUsed(DesignatedReg); + ReusedOperands.markClobbered(DesignatedReg); + MRI->copyRegToReg(MBB, &MI, DesignatedReg, PhysReg, RC); + + MachineInstr *CopyMI = prior(MII); + UpdateKills(*CopyMI, RegKills, KillOps); + + // This invalidates DesignatedReg. + Spills.ClobberPhysReg(DesignatedReg); + + Spills.addAvailable(StackSlot, &MI, DesignatedReg); + MI.getOperand(i).setReg(DesignatedReg); + DOUT << '\t' << *prior(MII); + ++NumReused; + continue; + } + + // Otherwise, reload it and remember that we have it. + PhysReg = VRM.getPhys(VirtReg); + assert(PhysReg && "Must map virtreg to physreg!"); + const TargetRegisterClass* RC = MF.getSSARegMap()->getRegClass(VirtReg); + + // Note that, if we reused a register for a previous operand, the + // register we want to reload into might not actually be + // available. If this occurs, use the register indicated by the + // reuser. + if (ReusedOperands.hasReuses()) + PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps); + + MF.setPhysRegUsed(PhysReg); + ReusedOperands.markClobbered(PhysReg); + if (doReMat) { + MRI->reMaterialize(MBB, &MI, PhysReg, VRM.getReMaterializedMI(VirtReg)); + ++NumReMats; + } else { + MRI->loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC); + ++NumLoads; + } + // This invalidates PhysReg. + Spills.ClobberPhysReg(PhysReg); + + // Any stores to this stack slot are not dead anymore. + if (!doReMat) + MaybeDeadStores.erase(StackSlot); + Spills.addAvailable(StackSlot, &MI, PhysReg); + // Assumes this is the last use. IsKill will be unset if reg is reused + // unless it's a two-address operand. + if (TID->getOperandConstraint(i, TOI::TIED_TO) == -1) + MI.getOperand(i).setIsKill(); + MI.getOperand(i).setReg(PhysReg); + UpdateKills(*prior(MII), RegKills, KillOps); + DOUT << '\t' << *prior(MII); + } + + DOUT << '\t' << MI; + + // If we have folded references to memory operands, make sure we clear all + // physical registers that may contain the value of the spilled virtual + // register + for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) { + DOUT << "Folded vreg: " << I->second.first << " MR: " + << I->second.second; + unsigned VirtReg = I->second.first; + VirtRegMap::ModRef MR = I->second.second; + if (!VRM.hasStackSlot(VirtReg)) { + DOUT << ": No stack slot!\n"; + continue; + } + int SS = VRM.getStackSlot(VirtReg); + DOUT << " - StackSlot: " << SS << "\n"; + + // If this folded instruction is just a use, check to see if it's a + // straight load from the virt reg slot. + if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) { + int FrameIdx; + if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) { + if (FrameIdx == SS) { + // If this spill slot is available, turn it into a copy (or nothing) + // instead of leaving it as a load! + if (unsigned InReg = Spills.getSpillSlotPhysReg(SS)) { + DOUT << "Promoted Load To Copy: " << MI; + if (DestReg != InReg) { + MRI->copyRegToReg(MBB, &MI, DestReg, InReg, + MF.getSSARegMap()->getRegClass(VirtReg)); + // Revisit the copy so we make sure to notice the effects of the + // operation on the destreg (either needing to RA it if it's + // virtual or needing to clobber any values if it's physical). + NextMII = &MI; + --NextMII; // backtrack to the copy. + BackTracked = true; + } else + DOUT << "Removing now-noop copy: " << MI; + + VRM.RemoveFromFoldedVirtMap(&MI); + MBB.erase(&MI); + Erased = true; + goto ProcessNextInst; + } + } + } + } + + // If this reference is not a use, any previous store is now dead. + // Otherwise, the store to this stack slot is not dead anymore. + std::map<int, MachineInstr*>::iterator MDSI = MaybeDeadStores.find(SS); + if (MDSI != MaybeDeadStores.end()) { + if (MR & VirtRegMap::isRef) // Previous store is not dead. + MaybeDeadStores.erase(MDSI); + else { + // If we get here, the store is dead, nuke it now. + assert(VirtRegMap::isMod && "Can't be modref!"); + DOUT << "Removed dead store:\t" << *MDSI->second; + InvalidateKills(*MDSI->second, RegKills, KillOps); + MBB.erase(MDSI->second); + VRM.RemoveFromFoldedVirtMap(MDSI->second); + MaybeDeadStores.erase(MDSI); + ++NumDSE; + } + } + + // If the spill slot value is available, and this is a new definition of + // the value, the value is not available anymore. + if (MR & VirtRegMap::isMod) { + // Notice that the value in this stack slot has been modified. + Spills.ModifyStackSlot(SS); + + // If this is *just* a mod of the value, check to see if this is just a + // store to the spill slot (i.e. the spill got merged into the copy). If + // so, realize that the vreg is available now, and add the store to the + // MaybeDeadStore info. + int StackSlot; + if (!(MR & VirtRegMap::isRef)) { + if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) { + assert(MRegisterInfo::isPhysicalRegister(SrcReg) && + "Src hasn't been allocated yet?"); + // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark + // this as a potentially dead store in case there is a subsequent + // store into the stack slot without a read from it. + MaybeDeadStores[StackSlot] = &MI; + + // If the stack slot value was previously available in some other + // register, change it now. Otherwise, make the register available, + // in PhysReg. + Spills.addAvailable(StackSlot, &MI, SrcReg, false/*don't clobber*/); + } + } + } + } + + // Process all of the spilled defs. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (MO.isRegister() && MO.getReg() && MO.isDef()) { + unsigned VirtReg = MO.getReg(); + + if (!MRegisterInfo::isVirtualRegister(VirtReg)) { + // Check to see if this is a noop copy. If so, eliminate the + // instruction before considering the dest reg to be changed. + unsigned Src, Dst; + if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) { + ++NumDCE; + DOUT << "Removing now-noop copy: " << MI; + MBB.erase(&MI); + Erased = true; + VRM.RemoveFromFoldedVirtMap(&MI); + Spills.disallowClobberPhysReg(VirtReg); + goto ProcessNextInst; + } + + // If it's not a no-op copy, it clobbers the value in the destreg. + Spills.ClobberPhysReg(VirtReg); + ReusedOperands.markClobbered(VirtReg); + + // Check to see if this instruction is a load from a stack slot into + // a register. If so, this provides the stack slot value in the reg. + int FrameIdx; + if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) { + assert(DestReg == VirtReg && "Unknown load situation!"); + + // Otherwise, if it wasn't available, remember that it is now! + Spills.addAvailable(FrameIdx, &MI, DestReg); + goto ProcessNextInst; + } + + continue; + } + + // The only vregs left are stack slot definitions. + int StackSlot = VRM.getStackSlot(VirtReg); + const TargetRegisterClass *RC = MF.getSSARegMap()->getRegClass(VirtReg); + + // If this def is part of a two-address operand, make sure to execute + // the store from the correct physical register. + unsigned PhysReg; + int TiedOp = MI.getInstrDescriptor()->findTiedToSrcOperand(i); + if (TiedOp != -1) + PhysReg = MI.getOperand(TiedOp).getReg(); + else { + PhysReg = VRM.getPhys(VirtReg); + if (ReusedOperands.isClobbered(PhysReg)) { + // Another def has taken the assigned physreg. It must have been a + // use&def which got it due to reuse. Undo the reuse! + PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps); + } + } + + MF.setPhysRegUsed(PhysReg); + ReusedOperands.markClobbered(PhysReg); + MRI->storeRegToStackSlot(MBB, next(MII), PhysReg, StackSlot, RC); + DOUT << "Store:\t" << *next(MII); + MI.getOperand(i).setReg(PhysReg); + + // If there is a dead store to this stack slot, nuke it now. + MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; + if (LastStore) { + DOUT << "Removed dead store:\t" << *LastStore; + ++NumDSE; + InvalidateKills(*LastStore, RegKills, KillOps); + MBB.erase(LastStore); + VRM.RemoveFromFoldedVirtMap(LastStore); + } + LastStore = next(MII); + + // If the stack slot value was previously available in some other + // register, change it now. Otherwise, make the register available, + // in PhysReg. + Spills.ModifyStackSlot(StackSlot); + Spills.ClobberPhysReg(PhysReg); + Spills.addAvailable(StackSlot, LastStore, PhysReg); + ++NumStores; + + // Check to see if this is a noop copy. If so, eliminate the + // instruction before considering the dest reg to be changed. + { + unsigned Src, Dst; + if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) { + ++NumDCE; + DOUT << "Removing now-noop copy: " << MI; + MBB.erase(&MI); + Erased = true; + VRM.RemoveFromFoldedVirtMap(&MI); + UpdateKills(*LastStore, RegKills, KillOps); + goto ProcessNextInst; + } + } + } + } + ProcessNextInst: + if (!Erased && !BackTracked) + for (MachineBasicBlock::iterator II = MI; II != NextMII; ++II) + UpdateKills(*II, RegKills, KillOps); + MII = NextMII; + } +} + + +llvm::Spiller* llvm::createSpiller() { + switch (SpillerOpt) { + default: assert(0 && "Unreachable!"); + case local: + return new LocalSpiller(); + case simple: + return new SimpleSpiller(); + } +} diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h new file mode 100644 index 0000000..b7cbe51c --- /dev/null +++ b/lib/CodeGen/VirtRegMap.h @@ -0,0 +1,211 @@ +//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a virtual register map. This maps virtual registers to +// physical registers and virtual registers to stack slots. It is created and +// updated by a register allocator and then used by a machine code rewriter that +// adds spill code and rewrites virtual into physical register references. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_VIRTREGMAP_H +#define LLVM_CODEGEN_VIRTREGMAP_H + +#include "llvm/Target/MRegisterInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/Support/Streams.h" +#include <map> + +namespace llvm { + class MachineInstr; + class TargetInstrInfo; + + class VirtRegMap { + public: + enum { + NO_PHYS_REG = 0, + NO_STACK_SLOT = (1L << 30)-1, + MAX_STACK_SLOT = (1L << 18)-1 + }; + + enum ModRef { isRef = 1, isMod = 2, isModRef = 3 }; + typedef std::multimap<MachineInstr*, + std::pair<unsigned, ModRef> > MI2VirtMapTy; + + private: + const TargetInstrInfo &TII; + + MachineFunction &MF; + /// Virt2PhysMap - This is a virtual to physical register + /// mapping. Each virtual register is required to have an entry in + /// it; even spilled virtual registers (the register mapped to a + /// spilled register is the temporary used to load it from the + /// stack). + IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap; + /// Virt2StackSlotMap - This is virtual register to stack slot + /// mapping. Each spilled virtual register has an entry in it + /// which corresponds to the stack slot this register is spilled + /// at. + IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap; + /// MI2VirtMap - This is MachineInstr to virtual register + /// mapping. In the case of memory spill code being folded into + /// instructions, we need to know which virtual register was + /// read/written by this instruction. + MI2VirtMapTy MI2VirtMap; + + /// ReMatMap - This is virtual register to re-materialized instruction + /// mapping. Each virtual register whose definition is going to be + /// re-materialized has an entry in it. + std::map<unsigned, const MachineInstr*> ReMatMap; + + /// ReMatId - Instead of assigning a stack slot to a to be rematerialized + /// virtual register, an unique id is being assigned. This keeps track of + /// the highest id used so far. Note, this starts at (1<<18) to avoid + /// conflicts with stack slot numbers. + int ReMatId; + + VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT + void operator=(const VirtRegMap&); // DO NOT IMPLEMENT + + public: + VirtRegMap(MachineFunction &mf); + + void grow(); + + /// @brief returns true if the specified virtual register is + /// mapped to a physical register + bool hasPhys(unsigned virtReg) const { + return getPhys(virtReg) != NO_PHYS_REG; + } + + /// @brief returns the physical register mapped to the specified + /// virtual register + unsigned getPhys(unsigned virtReg) const { + assert(MRegisterInfo::isVirtualRegister(virtReg)); + return Virt2PhysMap[virtReg]; + } + + /// @brief creates a mapping for the specified virtual register to + /// the specified physical register + void assignVirt2Phys(unsigned virtReg, unsigned physReg) { + assert(MRegisterInfo::isVirtualRegister(virtReg) && + MRegisterInfo::isPhysicalRegister(physReg)); + assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && + "attempt to assign physical register to already mapped " + "virtual register"); + Virt2PhysMap[virtReg] = physReg; + } + + /// @brief clears the specified virtual register's, physical + /// register mapping + void clearVirt(unsigned virtReg) { + assert(MRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2PhysMap[virtReg] != NO_PHYS_REG && + "attempt to clear a not assigned virtual register"); + Virt2PhysMap[virtReg] = NO_PHYS_REG; + } + + /// @brief clears all virtual to physical register mappings + void clearAllVirt() { + Virt2PhysMap.clear(); + grow(); + } + + /// @brief returns true is the specified virtual register is + /// mapped to a stack slot + bool hasStackSlot(unsigned virtReg) const { + return getStackSlot(virtReg) != NO_STACK_SLOT; + } + + /// @brief returns the stack slot mapped to the specified virtual + /// register + int getStackSlot(unsigned virtReg) const { + assert(MRegisterInfo::isVirtualRegister(virtReg)); + return Virt2StackSlotMap[virtReg]; + } + + /// @brief create a mapping for the specifed virtual register to + /// the next available stack slot + int assignVirt2StackSlot(unsigned virtReg); + /// @brief create a mapping for the specified virtual register to + /// the specified stack slot + void assignVirt2StackSlot(unsigned virtReg, int frameIndex); + + /// @brief assign an unique re-materialization id to the specified + /// virtual register. + int assignVirtReMatId(unsigned virtReg); + + /// @brief returns true if the specified virtual register is being + /// re-materialized. + bool isReMaterialized(unsigned virtReg) const { + return ReMatMap.count(virtReg) != 0; + } + + /// @brief returns the original machine instruction being re-issued + /// to re-materialize the specified virtual register. + const MachineInstr *getReMaterializedMI(unsigned virtReg) { + return ReMatMap[virtReg]; + } + + /// @brief records the specified virtual register will be + /// re-materialized and the original instruction which will be re-issed + /// for this purpose. + void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) { + ReMatMap[virtReg] = def; + } + + /// @brief Updates information about the specified virtual register's value + /// folded into newMI machine instruction. The OpNum argument indicates the + /// operand number of OldMI that is folded. + void virtFolded(unsigned VirtReg, MachineInstr *OldMI, unsigned OpNum, + MachineInstr *NewMI); + + /// @brief returns the virtual registers' values folded in memory + /// operands of this instruction + std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator> + getFoldedVirts(MachineInstr* MI) const { + return MI2VirtMap.equal_range(MI); + } + + /// RemoveFromFoldedVirtMap - If the specified machine instruction is in + /// the folded instruction map, remove its entry from the map. + void RemoveFromFoldedVirtMap(MachineInstr *MI) { + MI2VirtMap.erase(MI); + } + + void print(std::ostream &OS) const; + void print(std::ostream *OS) const { if (OS) print(*OS); } + void dump() const; + }; + + inline std::ostream *operator<<(std::ostream *OS, const VirtRegMap &VRM) { + VRM.print(OS); + return OS; + } + inline std::ostream &operator<<(std::ostream &OS, const VirtRegMap &VRM) { + VRM.print(OS); + return OS; + } + + /// Spiller interface: Implementations of this interface assign spilled + /// virtual registers to stack slots, rewriting the code. + struct Spiller { + virtual ~Spiller(); + virtual bool runOnMachineFunction(MachineFunction &MF, + VirtRegMap &VRM) = 0; + }; + + /// createSpiller - Create an return a spiller object, as specified on the + /// command line. + Spiller* createSpiller(); + +} // End llvm namespace + +#endif |