35 files changed, 2848 insertions, 579 deletions
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 413142e..3d58306 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -136,21 +136,21 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
 
 void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
                                        raw_ostream &O) {
-  char Value = MI->getOperand(OpNo).getImm();
+  int Value = MI->getOperand(OpNo).getImm();
   Value = SignExtend32<5>(Value);
   O << (int)Value;
 }
 
 void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
                                        raw_ostream &O) {
-  unsigned char Value = MI->getOperand(OpNo).getImm();
+  unsigned int Value = MI->getOperand(OpNo).getImm();
   assert(Value <= 31 && "Invalid u5imm argument!");
   O << (unsigned int)Value;
 }
 
 void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
                                        raw_ostream &O) {
-  unsigned char Value = MI->getOperand(OpNo).getImm();
+  unsigned int Value = MI->getOperand(OpNo).getImm();
   assert(Value <= 63 && "Invalid u6imm argument!");
   O << (unsigned int)Value;
 }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 48de583..f24edf6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -13,8 +13,8 @@
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCMachObjectWriter.h"
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/ELF.h"
@@ -29,9 +29,16 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
   case FK_Data_1:
   case FK_Data_2:
   case FK_Data_4:
+  case FK_Data_8:
+  case PPC::fixup_ppc_toc:
+  case PPC::fixup_ppc_tlsreg:
+  case PPC::fixup_ppc_nofixup:
     return Value;
+  case PPC::fixup_ppc_lo14:
+  case PPC::fixup_ppc_toc16_ds:
+    return (Value & 0xffff) << 2;
   case PPC::fixup_ppc_brcond14:
-    return Value & 0x3ffc;
+    return Value & 0xfffc;
   case PPC::fixup_ppc_br24:
     return Value & 0x3fffffc;
 #if 0
@@ -41,6 +48,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
   case PPC::fixup_ppc_ha16:
     return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
   case PPC::fixup_ppc_lo16:
+  case PPC::fixup_ppc_toc16:
     return Value & 0xffff;
   }
 }
@@ -55,7 +63,9 @@ public:
   void RecordRelocation(MachObjectWriter *Writer,
                         const MCAssembler &Asm, const MCAsmLayout &Layout,
                         const MCFragment *Fragment, const MCFixup &Fixup,
-                        MCValue Target, uint64_t &FixedValue) {}
+                        MCValue Target, uint64_t &FixedValue) {
+    llvm_unreachable("Relocation emission for MachO/PPC unimplemented!");
+  }
 };
 
 class PPCAsmBackend : public MCAsmBackend {
@@ -72,7 +82,12 @@ public:
       { "fixup_ppc_brcond14",    16,     14,   MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_ppc_lo16",        16,     16,   0 },
       { "fixup_ppc_ha16",        16,     16,   0 },
-      { "fixup_ppc_lo14",        16,     14,   0 }
+      { "fixup_ppc_lo14",        16,     14,   0 },
+      { "fixup_ppc_toc",          0,     64,   0 },
+      { "fixup_ppc_toc16",       16,     16,   0 },
+      { "fixup_ppc_toc16_ds",    16,     14,   0 },
+      { "fixup_ppc_tlsreg",       0,      0,   0 },
+      { "fixup_ppc_nofixup",      0,      0,   0 }
     };
 
     if (Kind < FirstTargetFixupKind)
@@ -83,6 +98,20 @@ public:
     return Infos[Kind - FirstTargetFixupKind];
   }
 
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const {
+    Value = adjustFixupValue(Fixup.getKind(), Value);
+    if (!Value) return;           // Doesn't change encoding.
+
+    unsigned Offset = Fixup.getOffset();
+
+    // For each byte of the fragment that the fixup touches, mask in the bits
+    // from the fixup value. The Value has been "split up" into the appropriate
+    // bitfields above.
+    for (unsigned i = 0; i != 4; ++i)
+      Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
+  }
+
   bool mayNeedRelaxation(const MCInst &Inst) const {
     // FIXME.
     return false;
@@ -90,7 +119,7 @@ public:
 
   bool fixupNeedsRelaxation(const MCFixup &Fixup,
                             uint64_t Value,
-                            const MCInstFragment *DF,
+                            const MCRelaxableFragment *DF,
                             const MCAsmLayout &Layout) const {
     // FIXME.
     llvm_unreachable("relaxInstruction() unimplemented");
@@ -126,11 +155,6 @@ namespace {
   public:
     DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
 
-    void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
-                    uint64_t Value) const {
-      llvm_unreachable("UNIMP");
-    }
-
     MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
       bool is64 = getPointerSize() == 8;
       return createMachObjectWriter(new PPCMachObjectWriter(
@@ -152,19 +176,6 @@ namespace {
     ELFPPCAsmBackend(const Target &T, uint8_t OSABI) :
       PPCAsmBackend(T), OSABI(OSABI) { }
 
-    void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
-                    uint64_t Value) const {
-      Value = adjustFixupValue(Fixup.getKind(), Value);
-      if (!Value) return;           // Doesn't change encoding.
-
-      unsigned Offset = Fixup.getOffset();
-
-      // For each byte of the fragment that the fixup touches, mask in the bits from
-      // the fixup value. The Value has been "split up" into the appropriate
-      // bitfields above.
-      for (unsigned i = 0; i != 4; ++i)
-        Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
-    }
 
     MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
       bool is64 = getPointerSize() == 8;
@@ -181,7 +192,7 @@ namespace {
 
 
 
-MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) {
+MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
   if (Triple(TT).isOSDarwin())
     return new DarwinPPCAsmBackend(T);
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index a197981..d61e741 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -7,9 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/PPCFixupKinds.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
@@ -21,13 +24,35 @@ namespace {
 
     virtual ~PPCELFObjectWriter();
   protected:
+    virtual unsigned getRelocTypeInner(const MCValue &Target,
+                                       const MCFixup &Fixup,
+                                       bool IsPCRel) const;
     virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                                   bool IsPCRel, bool IsRelocWithSymbol,
                                   int64_t Addend) const;
+    virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target,
+                                                    const MCFixup &Fixup,
+                                                    bool IsPCRel) const;
     virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
+
+    virtual void sortRelocs(const MCAssembler &Asm,
+                            std::vector<ELFRelocationEntry> &Relocs);
+  };
+
+  class PPCELFRelocationEntry : public ELFRelocationEntry {
+  public:
+    PPCELFRelocationEntry(const ELFRelocationEntry &RE);
+    bool operator<(const PPCELFRelocationEntry &RE) const {
+      return (RE.r_offset < r_offset ||
+              (RE.r_offset == r_offset && RE.Type > Type));
+    }
   };
 }
 
+PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE)
+  : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol,
+                       RE.r_addend, *RE.Fixup) {}
+
 PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
   : MCELFObjectTargetWriter(Is64Bit, OSABI,
                             Is64Bit ?  ELF::EM_PPC64 : ELF::EM_PPC,
@@ -36,11 +61,13 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
 PPCELFObjectWriter::~PPCELFObjectWriter() {
 }
 
-unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
-                                             const MCFixup &Fixup,
-                                             bool IsPCRel,
-                                             bool IsRelocWithSymbol,
-                                             int64_t Addend) const {
+unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
+                                               const MCFixup &Fixup,
+                                               bool IsPCRel) const
+{
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
   // determine the type of the relocation
   unsigned Type;
   if (IsPCRel) {
@@ -50,9 +77,14 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
     case PPC::fixup_ppc_br24:
       Type = ELF::R_PPC_REL24;
       break;
+    case FK_Data_4:
     case FK_PCRel_4:
       Type = ELF::R_PPC_REL32;
       break;
+    case FK_Data_8:
+    case FK_PCRel_8:
+      Type = ELF::R_PPC64_REL64;
+      break;
     }
   } else {
     switch ((unsigned)Fixup.getKind()) {
@@ -61,17 +93,105 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
       Type = ELF::R_PPC_ADDR24;
       break;
     case PPC::fixup_ppc_brcond14:
-      Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_
+      Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_
       break;
     case PPC::fixup_ppc_ha16:
-      Type = ELF::R_PPC_ADDR16_HA;
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_PPC_TPREL16_HA:
+        Type = ELF::R_PPC_TPREL16_HA;
+        break;
+      case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
+        Type = ELF::R_PPC64_DTPREL16_HA;
+        break;
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_PPC_ADDR16_HA;
+	break;
+      case MCSymbolRefExpr::VK_PPC_TOC16_HA:
+        Type = ELF::R_PPC64_TOC16_HA;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA:
+        Type = ELF::R_PPC64_GOT_TPREL16_HA;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA:
+        Type = ELF::R_PPC64_GOT_TLSGD16_HA;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA:
+        Type = ELF::R_PPC64_GOT_TLSLD16_HA;
+        break;
+      }
       break;
     case PPC::fixup_ppc_lo16:
-      Type = ELF::R_PPC_ADDR16_LO;
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
+        Type = ELF::R_PPC_TPREL16_LO;
+        break;
+      case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
+        Type = ELF::R_PPC64_DTPREL16_LO;
+        break;
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_PPC_ADDR16_LO;
+	break;
+      case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+        Type = ELF::R_PPC64_TOC16_LO;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO:
+        Type = ELF::R_PPC64_GOT_TLSGD16_LO;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
+        Type = ELF::R_PPC64_GOT_TLSLD16_LO;
+        break;
+      }
       break;
     case PPC::fixup_ppc_lo14:
       Type = ELF::R_PPC_ADDR14;
       break;
+    case PPC::fixup_ppc_toc:
+      Type = ELF::R_PPC64_TOC;
+      break;
+    case PPC::fixup_ppc_toc16:
+      Type = ELF::R_PPC64_TOC16;
+      break;
+    case PPC::fixup_ppc_toc16_ds:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+        Type = ELF::R_PPC64_TOC16_DS;
+	break;
+      case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+        Type = ELF::R_PPC64_TOC16_LO_DS;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO:
+        Type = ELF::R_PPC64_GOT_TPREL16_LO_DS;
+        break;
+      }
+      break;
+    case PPC::fixup_ppc_tlsreg:
+      Type = ELF::R_PPC64_TLS;
+      break;
+    case PPC::fixup_ppc_nofixup:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_PPC_TLSGD:
+        Type = ELF::R_PPC64_TLSGD;
+        break;
+      case MCSymbolRefExpr::VK_PPC_TLSLD:
+        Type = ELF::R_PPC64_TLSLD;
+        break;
+      }
+      break;
+    case FK_Data_8:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_PPC_TOC:
+        Type = ELF::R_PPC64_TOC;
+        break;
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_PPC64_ADDR64;
+	break;
+      }
+      break;
     case FK_Data_4:
       Type = ELF::R_PPC_ADDR32;
       break;
@@ -83,11 +203,41 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
   return Type;
 }
 
+unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel,
+                                          bool IsRelocWithSymbol,
+                                          int64_t Addend) const {
+  return getRelocTypeInner(Target, Fixup, IsPCRel);
+}
+
+const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Target,
+                                                            const MCFixup &Fixup,
+                                                            bool IsPCRel) const {
+  assert(Target.getSymA() && "SymA cannot be 0");
+  const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol();
+
+  unsigned RelocType = getRelocTypeInner(Target, Fixup, IsPCRel);
+
+  // The .odp creation emits a relocation against the symbol ".TOC." which
+  // create a R_PPC64_TOC relocation. However the relocation symbol name
+  // in final object creation should be NULL, since the symbol does not
+  // really exist, it is just the reference to TOC base for the current
+  // object file.
+  bool EmitThisSym = RelocType != ELF::R_PPC64_TOC;
+
+  if (EmitThisSym && !Symbol.isTemporary())
+    return &Symbol;
+  return NULL;
+}
+
 void PPCELFObjectWriter::
 adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
   switch ((unsigned)Fixup.getKind()) {
     case PPC::fixup_ppc_ha16:
     case PPC::fixup_ppc_lo16:
+    case PPC::fixup_ppc_toc16:
+    case PPC::fixup_ppc_toc16_ds:
       RelocOffset += 2;
       break;
     default:
@@ -95,6 +245,34 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
   }
 }
 
+// The standard sorter only sorts on the r_offset field, but PowerPC can
+// have multiple relocations at the same offset.  Sort secondarily on the
+// relocation type to avoid nondeterminism.
+void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+                                    std::vector<ELFRelocationEntry> &Relocs) {
+
+  // Copy to a temporary vector of relocation entries having a different
+  // sort function.
+  std::vector<PPCELFRelocationEntry> TmpRelocs;
+  
+  for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin();
+       R != Relocs.end(); ++R) {
+    TmpRelocs.push_back(PPCELFRelocationEntry(*R));
+  }
+
+  // Sort in place by ascending r_offset and descending r_type.
+  array_pod_sort(TmpRelocs.begin(), TmpRelocs.end());
+
+  // Copy back to the original vector.
+  unsigned I = 0;
+  for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin();
+       R != TmpRelocs.end(); ++R, ++I) {
+    Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type,
+                                   R->Symbol, R->r_addend, *R->Fixup);
+  }
+}
+
+
 MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
                                                bool Is64Bit,
                                                uint8_t OSABI) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index b3c889e..7917f77 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -34,6 +34,23 @@ enum Fixups {
   /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
   /// like 'std'.
   fixup_ppc_lo14,
+
+  /// fixup_ppc_toc - Insert value of TOC base (.TOC.).
+  fixup_ppc_toc,
+
+  /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base.
+  fixup_ppc_toc16,
+
+  /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with
+  /// implied 2 zero bits
+  fixup_ppc_toc16_ds,
+
+  /// fixup_ppc_tlsreg - Insert thread-pointer register number.
+  fixup_ppc_tlsreg,
+
+  /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call
+  /// to __tls_get_addr for the TLS general and local dynamic models.
+  fixup_ppc_nofixup,
   
   // Marker
   LastTargetFixupKind,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index b9ea8b5..215aa40 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -59,8 +59,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
   HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
 
   // Exceptions handling
-  if (!is64Bit)
-    ExceptionsType = ExceptionHandling::DwarfCFI;
+  ExceptionsType = ExceptionHandling::DwarfCFI;
     
   ZeroDirective = "\t.space\t";
   Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index f652422..d048426 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -12,29 +12,45 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCBaseInfo.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
 
 namespace {
 class PPCMCCodeEmitter : public MCCodeEmitter {
-  PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT
-  void operator=(const PPCMCCodeEmitter &);   // DO NOT IMPLEMENT
-  
+  PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+
+  const MCSubtargetInfo &STI;
+  Triple TT;
+
 public:
   PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
-                   MCContext &ctx) {
+                   MCContext &ctx)
+    : STI(sti), TT(STI.getTargetTriple()) {
   }
   
   ~PPCMCCodeEmitter() {}
 
+  bool is64BitMode() const {
+    return (STI.getFeatureBits() & PPC::Feature64Bit) != 0;
+  }
+
+  bool isSVR4ABI() const {
+    return TT.isMacOSX() == 0;
+  }
+
   unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
                                SmallVectorImpl<MCFixup> &Fixups) const;
   unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
@@ -47,6 +63,8 @@ public:
                             SmallVectorImpl<MCFixup> &Fixups) const;
   unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
                              SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
   unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
                                SmallVectorImpl<MCFixup> &Fixups) const;
 
@@ -61,11 +79,20 @@ public:
                                  SmallVectorImpl<MCFixup> &Fixups) const;
   void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups) const {
-    unsigned Bits = getBinaryCodeForInstr(MI, Fixups);
+    uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
+
+    // BL8_NOP_ELF, BLA8_NOP_ELF, etc., all have a size of 8 because of the
+    // following 'nop'.
+    unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
+    unsigned Opcode = MI.getOpcode();
+    if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF ||
+        Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD)
+      Size = 8;
     
     // Output the constant in big endian byte order.
-    for (unsigned i = 0; i != 4; ++i) {
-      OS << (char)(Bits >> 24);
+    int ShiftValue = (Size * 8) - 8;
+    for (unsigned i = 0; i != Size; ++i) {
+      OS << (char)(Bits >> ShiftValue);
       Bits <<= 8;
     }
     
@@ -92,6 +119,17 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
   // Add a fixup for the branch target.
   Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_br24));
+
+  // For special TLS calls, add another fixup for the symbol.  Apparently
+  // BL8_NOP_ELF, BL8_NOP_ELF_TLSGD, and BL8_NOP_ELF_TLSLD are sufficiently
+  // similar that TblGen will not generate a separate case for the latter
+  // two, so this is the only way to get the extra fixup generated.
+  unsigned Opcode = MI.getOpcode();
+  if (Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) {
+    const MCOperand &MO2 = MI.getOperand(OpNo+1);
+    Fixups.push_back(MCFixup::Create(0, MO2.getExpr(),
+                                     (MCFixupKind)PPC::fixup_ppc_nofixup));
+  }
   return 0;
 }
 
@@ -140,8 +178,12 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
     return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
   
   // Add a fixup for the displacement field.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                   (MCFixupKind)PPC::fixup_ppc_lo16));
+  if (isSVR4ABI() && is64BitMode())
+    Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                     (MCFixupKind)PPC::fixup_ppc_toc16));
+  else
+    Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                     (MCFixupKind)PPC::fixup_ppc_lo16));
   return RegBits;
 }
 
@@ -158,17 +200,36 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
     return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
   
   // Add a fixup for the branch target.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                   (MCFixupKind)PPC::fixup_ppc_lo14));
+  if (isSVR4ABI() && is64BitMode())
+    Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                     (MCFixupKind)PPC::fixup_ppc_toc16_ds));
+  else
+    Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                     (MCFixupKind)PPC::fixup_ppc_lo14));
   return RegBits;
 }
 
 
+unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups);
+  
+  // Add a fixup for the TLS register, which simply provides a relocation
+  // hint to the linker that this statement is part of a relocation sequence.
+  // Return the thread-pointer register's encoding.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_tlsreg));
+  return getPPCRegisterNumbering(PPC::X13);
+}
+
 unsigned PPCMCCodeEmitter::
 get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
                     SmallVectorImpl<MCFixup> &Fixups) const {
   const MCOperand &MO = MI.getOperand(OpNo);
-  assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+  assert((MI.getOpcode() == PPC::MTCRF || 
+          MI.getOpcode() == PPC::MFOCRF ||
+          MI.getOpcode() == PPC::MTCRF8) &&
          (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
   return 0x80 >> getPPCRegisterNumbering(MO.getReg());
 }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 6568e82..2209f93 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCMCTargetDesc.h"
-#include "PPCMCAsmInfo.h"
 #include "InstPrinter/PPCInstPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "PPCMCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -70,7 +70,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
 
   // Initial state of the frame pointer is R1.
   MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(PPC::R1, 0);
+  MachineLocation Src(isPPC64? PPC::X1 : PPC::R1, 0);
   MAI->addInitialFrameState(0, Dst, Src);
 
   return MAI;
@@ -88,6 +88,11 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
     else
       RM = Reloc::Static;
   }
+  if (CM == CodeModel::Default) {
+    Triple T(TT);
+    if (!T.isOSDarwin() && T.getArch() == Triple::ppc64)
+      CM = CodeModel::Medium;
+  }
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 7162e15..4a42092 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -14,6 +14,9 @@
 #ifndef PPCMCTARGETDESC_H
 #define PPCMCTARGETDESC_H
 
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
@@ -36,7 +39,7 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
                                       const MCSubtargetInfo &STI,
                                       MCContext &Ctx);
 
-MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT);
+MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU);
 
 /// createPPCELFObjectWriter - Construct an PPC ELF object writer.
 MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index f872e86..972e138 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -14,6 +14,9 @@
 #ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
 #define LLVM_TARGET_POWERPC_PPCPREDICATES_H
 
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
 namespace llvm {
 namespace PPC {
   /// Predicate - These are "(BI << 5) | BO"  for various predicates.
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 9103e12..e6d38eb 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -65,13 +65,14 @@ namespace llvm {
     MO_NLP_HIDDEN_FLAG = 16,
 
     /// The next are not flags but distinct values.
-    MO_ACCESS_MASK = 224,
+    MO_ACCESS_MASK = 0xe0,
 
     /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
-    MO_LO16 = 32, MO_HA16 = 64,
+    MO_LO16 = 1 << 5,
+    MO_HA16 = 2 << 5,
 
-    MO_TPREL16_HA = 96,
-    MO_TPREL16_LO = 128
+    MO_TPREL16_HA = 3 << 5,
+    MO_TPREL16_LO = 4 << 5
   };
   } // end namespace PPCII
   
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 6e0e8bb..adb673b 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -18,14 +18,13 @@
 
 #define DEBUG_TYPE "asmprinter"
 #include "PPC.h"
-#include "PPCTargetMachine.h"
-#include "PPCSubtarget.h"
 #include "InstPrinter/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -33,33 +32,36 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 namespace {
   class PPCAsmPrinter : public AsmPrinter {
   protected:
-    DenseMap<MCSymbol*, MCSymbol*> TOC;
+    MapVector<MCSymbol*, MCSymbol*> TOC;
     const PPCSubtarget &Subtarget;
     uint64_t TOCLabelID;
   public:
@@ -71,6 +73,7 @@ namespace {
       return "PowerPC Assembly Printer";
     }
 
+    MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
 
     virtual void EmitInstruction(const MachineInstr *MI);
 
@@ -284,8 +287,22 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                                           unsigned AsmVariant,
                                           const char *ExtraCode,
                                           raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'y': // A memory reference for an X-form instruction
+      {
+        const char *RegName = "r0";
+        if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+        O << RegName << ", ";
+        printOperand(MI, OpNo, O);
+        return false;
+      }
+    }
+  }
+
   assert(MI->getOperand(OpNo).isReg());
   O << "0(";
   printOperand(MI, OpNo, O);
@@ -294,6 +311,25 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
 }
 
 
+/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
+/// exists for it.  If not, create one.  Then return a symbol that references
+/// the TOC entry.
+MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
+
+  MCSymbol *&TOCEntry = TOC[Sym];
+
+  // To avoid name clash check if the name already exists.
+  while (TOCEntry == 0) {
+    if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+                                "C" + Twine(TOCLabelID++)) == 0) {
+      TOCEntry = GetTempSymbol("C", TOCLabelID);
+    }
+  }
+
+  return TOCEntry;
+}
+
+
 /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
 /// the current output stream.
 ///
@@ -334,14 +370,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     MCSymbol *PICBase = MF->getPICBaseSymbol();
     
     // Emit the 'bl'.
-    TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here.
-    
-    
-    // FIXME: We would like an efficient form for this, so we don't have to do
-    // a lot of extra uniquing.
-    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::
-                                             Create(PICBase, OutContext)));
-    OutStreamer.EmitInstruction(TmpInst);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL_Darwin) // Darwin vs SVR4 doesn't matter here.
+      // FIXME: We would like an efficient form for this, so we don't have to do
+      // a lot of extra uniquing.
+      .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
     
     // Emit the label.
     OutStreamer.EmitLabel(PICBase);
@@ -367,10 +399,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       MOSymbol = GetCPISymbol(MO.getIndex());
     else if (MO.isJTI())
       MOSymbol = GetJTISymbol(MO.getIndex());
-    MCSymbol *&TOCEntry = TOC[MOSymbol];
-    if (TOCEntry == 0)
-      TOCEntry = GetTempSymbol("C", TOCLabelID++);
-    
+
+    MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+
     const MCExpr *Exp =
       MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY,
                               OutContext);
@@ -379,15 +410,298 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
       
+  case PPC::ADDIStocHA: {
+    // Transform %Xd = ADDIStocHA %X2, <ga:@sym>
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+    // Change the opcode to ADDIS8.  If the global address is external,
+    // has common linkage, is a function address, or is a jump table
+    // address, then generate a TOC entry and reference that.  Otherwise
+    // reference the symbol directly.
+    TmpInst.setOpcode(PPC::ADDIS8);
+    const MachineOperand &MO = MI->getOperand(2);
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
+           "Invalid operand for ADDIStocHA!");
+    MCSymbol *MOSymbol = 0;
+    bool IsExternal = false;
+    bool IsFunction = false;
+    bool IsCommon = false;
+    bool IsAvailExt = false;
+
+    if (MO.isGlobal()) {
+      const GlobalValue *GValue = MO.getGlobal();
+      const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+      const GlobalValue *RealGValue = GAlias ?
+        GAlias->resolveAliasedGlobal(false) : GValue;
+      MOSymbol = Mang->getSymbol(RealGValue);
+      const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+      IsExternal = GVar && !GVar->hasInitializer();
+      IsCommon = GVar && RealGValue->hasCommonLinkage();
+      IsFunction = !GVar;
+      IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage();
+    } else if (MO.isCPI())
+      MOSymbol = GetCPISymbol(MO.getIndex());
+    else if (MO.isJTI())
+      MOSymbol = GetJTISymbol(MO.getIndex());
+
+    if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI())
+      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_HA,
+                              OutContext);
+    TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case PPC::LDtocL: {
+    // Transform %Xd = LDtocL <ga:@sym>, %Xs
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+    // Change the opcode to LDrs, which is a form of LD with the offset
+    // specified by a SymbolLo.  If the global address is external, has
+    // common linkage, or is a jump table address, then reference the
+    // associated TOC entry.  Otherwise reference the symbol directly.
+    TmpInst.setOpcode(PPC::LDrs);
+    const MachineOperand &MO = MI->getOperand(1);
+    assert((MO.isGlobal() || MO.isJTI()) && "Invalid operand for LDtocL!");
+    MCSymbol *MOSymbol = 0;
+
+    if (MO.isJTI())
+      MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+    else {
+      const GlobalValue *GValue = MO.getGlobal();
+      const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+      const GlobalValue *RealGValue = GAlias ?
+        GAlias->resolveAliasedGlobal(false) : GValue;
+      MOSymbol = Mang->getSymbol(RealGValue);
+      const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+    
+      if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
+          RealGValue->hasAvailableExternallyLinkage())
+        MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+    }
+
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+                              OutContext);
+    TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case PPC::ADDItocL: {
+    // Transform %Xd = ADDItocL %Xs, <ga:@sym>
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+    // Change the opcode to ADDI8L.  If the global address is external, then
+    // generate a TOC entry and reference that.  Otherwise reference the
+    // symbol directly.
+    TmpInst.setOpcode(PPC::ADDI8L);
+    const MachineOperand &MO = MI->getOperand(2);
+    assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
+    MCSymbol *MOSymbol = 0;
+    bool IsExternal = false;
+    bool IsFunction = false;
+
+    if (MO.isGlobal()) {
+      const GlobalValue *GValue = MO.getGlobal();
+      const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+      const GlobalValue *RealGValue = GAlias ?
+        GAlias->resolveAliasedGlobal(false) : GValue;
+      MOSymbol = Mang->getSymbol(RealGValue);
+      const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+      IsExternal = GVar && !GVar->hasInitializer();
+      IsFunction = !GVar;
+    } else if (MO.isCPI())
+      MOSymbol = GetCPISymbol(MO.getIndex());
+
+    if (IsFunction || IsExternal)
+      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+                              OutContext);
+    TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case PPC::ADDISgotTprelHA: {
+    // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym>
+    // Into:      %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymGotTprel =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(PPC::X2)
+                                .addExpr(SymGotTprel));
+    return;
+  }
+  case PPC::LDgotTprelL: {
+    // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+    // Change the opcode to LDrs, which is a form of LD with the offset
+    // specified by a SymbolLo.
+    TmpInst.setOpcode(PPC::LDrs);
+    const MachineOperand &MO = MI->getOperand(1);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO,
+                              OutContext);
+    TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case PPC::ADDIStlsgdHA: {
+    // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
+    // Into:      %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymGotTlsGD =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(PPC::X2)
+                                .addExpr(SymGotTlsGD));
+    return;
+  }
+  case PPC::ADDItlsgdL: {
+    // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
+    // Into:      %Xd = ADDI8L %Xs, sym@got@tlsgd@l
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymGotTlsGD =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(MI->getOperand(1).getReg())
+                                .addExpr(SymGotTlsGD));
+    return;
+  }
+  case PPC::GETtlsADDR: {
+    // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
+    // Into:      BL8_NOP_ELF_TLSGD __tls_get_addr(sym@tlsgd)
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+
+    StringRef Name = "__tls_get_addr";
+    MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+    const MCSymbolRefExpr *TlsRef = 
+      MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymVar =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSGD)
+                                .addExpr(TlsRef)
+                                .addExpr(SymVar));
+    return;
+  }
+  case PPC::ADDIStlsldHA: {
+    // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
+    // Into:      %Xd = ADDIS8 %X2, sym@got@tlsld@ha
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymGotTlsLD =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(PPC::X2)
+                                .addExpr(SymGotTlsLD));
+    return;
+  }
+  case PPC::ADDItlsldL: {
+    // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
+    // Into:      %Xd = ADDI8L %Xs, sym@got@tlsld@l
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymGotTlsLD =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(MI->getOperand(1).getReg())
+                                .addExpr(SymGotTlsLD));
+    return;
+  }
+  case PPC::GETtlsldADDR: {
+    // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
+    // Into:      BL8_NOP_ELF_TLSLD __tls_get_addr(sym@tlsld)
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+
+    StringRef Name = "__tls_get_addr";
+    MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+    const MCSymbolRefExpr *TlsRef = 
+      MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymVar =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSLD)
+                                .addExpr(TlsRef)
+                                .addExpr(SymVar));
+    return;
+  }
+  case PPC::ADDISdtprelHA: {
+    // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
+    // Into:      %Xd = ADDIS8 %X3, sym@dtprel@ha
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymDtprel =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_HA,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(PPC::X3)
+                                .addExpr(SymDtprel));
+    return;
+  }
+  case PPC::ADDIdtprelL: {
+    // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
+    // Into:      %Xd = ADDI8L %Xs, sym@dtprel@l
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymDtprel =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(MI->getOperand(1).getReg())
+                                .addExpr(SymDtprel));
+    return;
+  }
   case PPC::MFCRpseud:
   case PPC::MFCR8pseud:
     // Transform: %R3 = MFCRpseud %CR7
     // Into:      %R3 = MFCR      ;; cr7
     OutStreamer.AddComment(PPCInstPrinter::
                            getRegisterName(MI->getOperand(1).getReg()));
-    TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR);
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    OutStreamer.EmitInstruction(TmpInst);
+    OutStreamer.EmitInstruction(MCInstBuilder(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR)
+      .addReg(MI->getOperand(0).getReg()));
     return;
   case PPC::SYNC:
     // In Book E sync is called msync, handle this special case here...
@@ -415,11 +729,17 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
   OutStreamer.EmitValueToAlignment(8);
   MCSymbol *Symbol1 = 
     OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
-  MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase"));
+  // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
+  // entry point.
   OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
                         8/*size*/, 0/*addrspace*/);
-  OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext),
+  MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+  // Generates a R_PPC64_TOC relocation for TOC base insertion.
+  OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
+                        MCSymbolRefExpr::VK_PPC_TOC, OutContext),
                         8/*size*/, 0/*addrspace*/);
+  // Emit a null environment pointer.
+  OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */);
   OutStreamer.SwitchSection(Current);
 
   MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
@@ -430,7 +750,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
 
 
 bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
-  const TargetData *TD = TM.getTargetData();
+  const DataLayout *TD = TM.getDataLayout();
 
   bool isPPC64 = TD->getPointerSizeInBits() == 64;
 
@@ -440,12 +760,11 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
         SectionKind::getReadOnly());
     OutStreamer.SwitchSection(Section);
 
-    // FIXME: This is nondeterminstic!
-    for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
+    for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
          E = TOC.end(); I != E; ++I) {
       OutStreamer.EmitLabel(I->second);
-      OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) +
-                              "[TC]," + I->first->getName());
+      MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
+      OutStreamer.EmitTCEntry(*S);
     }
   }
 
@@ -524,21 +843,18 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
 
 static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
   // Remove $stub suffix, add $lazy_ptr.
-  SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
-  TmpStr += "$lazy_ptr";
-  return Ctx.GetOrCreateSymbol(TmpStr.str());
+  StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5);
+  return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr");
 }
 
 static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
   // Add $tmp suffix to $stub, yielding $stub$tmp.
-  SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
-  TmpStr += "$tmp";
-  return Ctx.GetOrCreateSymbol(TmpStr.str());
+  return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp");
 }
 
 void PPCDarwinAsmPrinter::
 EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
-  bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
+  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
   
   const TargetLoweringObjectFileMachO &TLOFMacho = 
     static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
@@ -564,32 +880,50 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
                                            
       OutStreamer.EmitLabel(Stub);
       OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+      // mflr r0
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
       // FIXME: MCize this.
-      OutStreamer.EmitRawText(StringRef("\tmflr r0"));
-      OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName()));
+      OutStreamer.EmitRawText("\tbcl 20, 31, " + Twine(AnonSymbol->getName()));
       OutStreamer.EmitLabel(AnonSymbol);
-      OutStreamer.EmitRawText(StringRef("\tmflr r11"));
-      OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+
-                              "-" + AnonSymbol->getName() + ")");
-      OutStreamer.EmitRawText(StringRef("\tmtlr r0"));
-      
-      if (isPPC64)
-        OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
-                                "-" + AnonSymbol->getName() + ")(r11)");
-      else
-        OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
-                                "-" + AnonSymbol->getName() + ")(r11)");
-      OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
-      OutStreamer.EmitRawText(StringRef("\tbctr"));
-      
+      // mflr r11
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
+      // addis r11, r11, ha16(LazyPtr - AnonSymbol)
+      const MCExpr *Sub =
+        MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext),
+                                MCSymbolRefExpr::Create(AnonSymbol, OutContext),
+                                OutContext);
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
+        .addReg(PPC::R11)
+        .addReg(PPC::R11)
+        .addExpr(Sub));
+      // mtlr r0
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
+
+      // ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
+      // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
+      OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+        .addReg(PPC::R12)
+        .addExpr(Sub).addExpr(Sub)
+        .addReg(PPC::R11));
+      // mtctr r12
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+      // bctr
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+
       OutStreamer.SwitchSection(LSPSection);
       OutStreamer.EmitLabel(LazyPtr);
       OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-      
-      if (isPPC64)
-        OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
-      else
-        OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+
+      MCSymbol *DyldStubBindingHelper =
+        OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+      if (isPPC64) {
+        // .quad dyld_stub_binding_helper
+        OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+      } else {
+        // .long dyld_stub_binding_helper
+        OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+      }
     }
     OutStreamer.AddBlankLine();
     return;
@@ -609,23 +943,42 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
     EmitAlignment(4);
     OutStreamer.EmitLabel(Stub);
     OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-    OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")");
-    if (isPPC64)
-      OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
-                              ")(r11)");
-    else
-      OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
-                              ")(r11)");
-    OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
-    OutStreamer.EmitRawText(StringRef("\tbctr"));
+    // lis r11, ha16(LazyPtr)
+    const MCExpr *LazyPtrHa16 =
+      MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_HA16,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS)
+      .addReg(PPC::R11)
+      .addExpr(LazyPtrHa16));
+
+    const MCExpr *LazyPtrLo16 =
+      MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_LO16,
+                              OutContext);
+    // ldu r12, lo16(LazyPtr)(r11)
+    // lwzu r12, lo16(LazyPtr)(r11)
+    OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+      .addReg(PPC::R12)
+      .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
+      .addReg(PPC::R11));
+
+    // mtctr r12
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+    // bctr
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+
     OutStreamer.SwitchSection(LSPSection);
     OutStreamer.EmitLabel(LazyPtr);
     OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-    
-    if (isPPC64)
-      OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
-    else
-      OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+
+    MCSymbol *DyldStubBindingHelper =
+      OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+    if (isPPC64) {
+      // .quad dyld_stub_binding_helper
+      OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+    } else {
+      // .long dyld_stub_binding_helper
+      OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+    }
   }
   
   OutStreamer.AddBlankLine();
@@ -633,7 +986,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
 
 
 bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
-  bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
+  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
 
   // Darwin/PPC always uses mach-o.
   const TargetLoweringObjectFileMachO &TLOFMacho = 
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 21a0fb2..9911575 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -17,13 +17,13 @@
 
 #define DEBUG_TYPE "ppc-branch-select"
 #include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 STATISTIC(NumExpanded, "Number of branches expanded to long format");
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 2a2abb1..a74932c 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -31,20 +31,20 @@
 
 #define DEBUG_TYPE "ctrloops"
 #include "PPC.h"
-#include "PPCTargetMachine.h"
 #include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Constants.h"
-#include "llvm/PassSupport.h"
+#include "PPCTargetMachine.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/PassSupport.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index b2b5364..3f87e88 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -12,12 +12,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
 //===----------------------------------------------------------------------===//
 // Return Value Calling Convention
 //===----------------------------------------------------------------------===//
 
 // Return-value convention for PowerPC
 def RetCC_PPC : CallingConv<[
+  // On PPC64, integer return values are always promoted to i64
+  CCIfType<[i32], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>,
+
   CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
   CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
   
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 252a2d1..d68bfd1 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -12,15 +12,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPCTargetMachine.h"
-#include "PPCRelocations.h"
 #include "PPC.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
@@ -68,6 +68,7 @@ namespace {
     unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
 
     const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
 
@@ -243,6 +244,13 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
 }
 
 
+unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
+                                           unsigned OpNo) const {
+  llvm_unreachable("TLS not supported on the old JIT.");
+  return 0;
+}
+
+
 unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                            const MachineOperand &MO) const {
 
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index c24afa9..5901f36 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -12,15 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCFrameLowering.h"
+#include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
 #include "PPCMachineFunctionInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
@@ -49,6 +50,11 @@ static const uint16_t VRRegNo[] = {
 /// to manipulate the VRSAVE register, even though it uses vector registers.
 /// This can happen when the only registers used are known to be live in or out
 /// of the function.  Remove all of the VRSAVE related code from the function.
+/// FIXME: The removal of the code results in a compile failure at -O0 when the
+/// function contains a function call, as the GPR containing original VRSAVE
+/// contents is spilled and reloaded around the call.  Without the prolog code,
+/// the spill instruction refers to an undefined register.  This code needs
+/// to account for all uses of that GPR.
 static void RemoveVRSaveCode(MachineInstr *MI) {
   MachineBasicBlock *Entry = MI->getParent();
   MachineFunction *MF = Entry->getParent();
@@ -168,6 +174,11 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
   MI->eraseFromParent();
 }
 
+static bool spillsCR(const MachineFunction &MF) {
+  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  return FuncInfo->isCRSpilled();
+}
+
 /// determineFrameLayout - Determine the size of the frame and maximum call
 /// frame size.
 void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
@@ -184,13 +195,22 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
 
   // If we are a leaf function, and use up to 224 bytes of stack space,
   // don't have a frame pointer, calls, or dynamic alloca then we do not need
-  // to adjust the stack pointer (we fit in the Red Zone).
-  bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
-  // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.
+  // to adjust the stack pointer (we fit in the Red Zone).  For 64-bit
+  // SVR4, we also require a stack frame if we need to spill the CR,
+  // since this spill area is addressed relative to the stack pointer.
+  bool DisableRedZone = MF.getFunction()->getAttributes().
+    hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
+  // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.  However, it can
+  // still generate stackless code if all local vars are reg-allocated.
+  // Try: (FrameSize <= 224
+  //       || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI()))
   if (!DisableRedZone &&
       FrameSize <= 224 &&                          // Fits in red zone.
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
       !MFI->adjustsStack() &&                      // No calls.
+      !(Subtarget.isPPC64() &&                     // No 64-bit SVR4 CRsave.
+	Subtarget.isSVR4ABI()
+	&& spillsCR(MF)) &&
       (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
     // No need for frame
     MFI->setStackSize(0);
@@ -241,7 +261,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
 
   // Naked functions have no stack frame pushed, so we don't have a frame
   // pointer.
-  if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+  if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                                     Attribute::Naked))
     return false;
 
   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
@@ -268,12 +289,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
 
   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
   // process it.
-  for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
-    if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
-      HandleVRSaveUpdate(MBBI, TII);
-      break;
+  if (!Subtarget.isSVR4ABI())
+    for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+      if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+        HandleVRSaveUpdate(MBBI, TII);
+        break;
+      }
     }
-  }
 
   // Move MBBI back to the beginning of the function.
   MBBI = MBB.begin();
@@ -488,7 +510,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
     // Add callee saved registers to move list.
     const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
-      int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
       unsigned Reg = CSI[I].getReg();
       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
 
@@ -497,6 +518,25 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
       if (PPC::CRBITRCRegClass.contains(Reg))
         continue;
 
+      // For SVR4, don't emit a move for the CR spill slot if we haven't
+      // spilled CRs.
+      if (Subtarget.isSVR4ABI()
+	  && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
+	  && !spillsCR(MF))
+	continue;
+
+      // For 64-bit SVR4 when we have spilled CRs, the spill location
+      // is SP+8, not a frame-relative slot.
+      if (Subtarget.isSVR4ABI()
+	  && Subtarget.isPPC64()
+	  && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+	MachineLocation CSDst(PPC::X1, 8);
+	MachineLocation CSSrc(PPC::CR2);
+	Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+	continue;
+      }
+
+      int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
       MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
       MachineLocation CSSrc(Reg);
       Moves.push_back(MachineMove(Label, CSDst, CSSrc));
@@ -714,11 +754,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   }
 }
 
-static bool spillsCR(const MachineFunction &MF) {
-  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
-  return FuncInfo->isCRSpilled();
-}
-
 /// MustSaveLR - Return true if this function requires that we save the LR
 /// register onto the stack in the prolog and restore it in the epilog of the
 /// function.
@@ -808,7 +843,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
   bool HasGPSaveArea = false;
   bool HasG8SaveArea = false;
   bool HasFPSaveArea = false;
-  bool HasCRSaveArea = false;
   bool HasVRSAVESaveArea = false;
   bool HasVRSaveArea = false;
 
@@ -843,10 +877,9 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
       if (Reg < MinFPR) {
         MinFPR = Reg;
       }
-// FIXME SVR4: Disable CR save area for now.
     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
                PPC::CRRCRegClass.contains(Reg)) {
-//      HasCRSaveArea = true;
+      ; // do nothing, as we already know whether CRs are spilled
     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
       HasVRSAVESaveArea = true;
     } else if (PPC::VRRCRegClass.contains(Reg)) {
@@ -926,16 +959,21 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
     }
   }
 
-  // The CR save area is below the general register save area.
-  if (HasCRSaveArea) {
-    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
-    //             which have the CR/CRBIT register class?
+  // For 32-bit only, the CR save area is below the general register
+  // save area.  For 64-bit SVR4, the CR save area is addressed relative
+  // to the stack pointer and hence does not need an adjustment here.
+  // Only CR2 (the first nonvolatile spilled) has an associated frame
+  // index so that we have a single uniform save area.
+  if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
     // Adjust the frame index of the CR spill slot.
     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
       unsigned Reg = CSI[i].getReg();
 
-      if (PPC::CRBITRCRegClass.contains(Reg) ||
-          PPC::CRRCRegClass.contains(Reg)) {
+      if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
+	  // Leave Darwin logic as-is.
+	  || (!Subtarget.isSVR4ABI() &&
+	      (PPC::CRBITRCRegClass.contains(Reg) ||
+	       PPC::CRRCRegClass.contains(Reg)))) {
         int FI = CSI[i].getFrameIdx();
 
         FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
@@ -973,3 +1011,184 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
     }
   }
 }
+
+bool 
+PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+				     MachineBasicBlock::iterator MI,
+				     const std::vector<CalleeSavedInfo> &CSI,
+				     const TargetRegisterInfo *TRI) const {
+
+  // Currently, this function only handles SVR4 32- and 64-bit ABIs.
+  // Return false otherwise to maintain pre-existing behavior.
+  if (!Subtarget.isSVR4ABI())
+    return false;
+
+  MachineFunction *MF = MBB.getParent();
+  const PPCInstrInfo &TII =
+    *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+  DebugLoc DL;
+  bool CRSpilled = false;
+  
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    // CR2 through CR4 are the nonvolatile CR fields.
+    bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
+
+    if (CRSpilled && IsCRField)
+      continue;
+
+    // Add the callee-saved register as live-in; it's killed at the spill.
+    MBB.addLiveIn(Reg);
+
+    // Insert the spill to the stack frame.
+    if (IsCRField) {
+      CRSpilled = true;
+      // The first time we see a CR field, store the whole CR into the
+      // save slot via GPR12 (available in the prolog for 32- and 64-bit).
+      if (Subtarget.isPPC64()) {
+	// 64-bit:  SP+8
+	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12));
+	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW))
+			       .addReg(PPC::X12,
+				       getKillRegState(true))
+			       .addImm(8)
+			       .addReg(PPC::X1));
+      } else {
+	// 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
+	// the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
+	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12));
+	MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
+					 .addReg(PPC::R12,
+						 getKillRegState(true)),
+					 CSI[i].getFrameIdx()));
+      }
+      
+      // Record that we spill the CR in this function.
+      PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
+      FuncInfo->setSpillsCR();
+    } else {
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.storeRegToStackSlot(MBB, MI, Reg, true,
+			      CSI[i].getFrameIdx(), RC, TRI);
+    }
+  }
+  return true;
+}
+
+static void
+restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
+	   MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+	   const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
+
+  MachineFunction *MF = MBB.getParent();
+  const PPCInstrInfo &TII =
+    *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+  DebugLoc DL;
+  unsigned RestoreOp, MoveReg;
+
+  if (isPPC64) {
+    // 64-bit:  SP+8
+    MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12)
+	       .addImm(8)
+	       .addReg(PPC::X1));
+    RestoreOp = PPC::MTCRF8;
+    MoveReg = PPC::X12;
+  } else {
+    // 32-bit:  FP-relative
+    MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
+					     PPC::R12),
+				     CSI[CSIIndex].getFrameIdx()));
+    RestoreOp = PPC::MTCRF;
+    MoveReg = PPC::R12;
+  }
+  
+  if (CR2Spilled)
+    MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
+	       .addReg(MoveReg));
+
+  if (CR3Spilled)
+    MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
+	       .addReg(MoveReg));
+
+  if (CR4Spilled)
+    MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
+	       .addReg(MoveReg));
+}
+
+bool 
+PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+					MachineBasicBlock::iterator MI,
+				        const std::vector<CalleeSavedInfo> &CSI,
+					const TargetRegisterInfo *TRI) const {
+
+  // Currently, this function only handles SVR4 32- and 64-bit ABIs.
+  // Return false otherwise to maintain pre-existing behavior.
+  if (!Subtarget.isSVR4ABI())
+    return false;
+
+  MachineFunction *MF = MBB.getParent();
+  const PPCInstrInfo &TII =
+    *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+  bool CR2Spilled = false;
+  bool CR3Spilled = false;
+  bool CR4Spilled = false;
+  unsigned CSIIndex = 0;
+
+  // Initialize insertion-point logic; we will be restoring in reverse
+  // order of spill.
+  MachineBasicBlock::iterator I = MI, BeforeI = I;
+  bool AtStart = I == MBB.begin();
+
+  if (!AtStart)
+    --BeforeI;
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+
+    if (Reg == PPC::CR2) {
+      CR2Spilled = true;
+      // The spill slot is associated only with CR2, which is the
+      // first nonvolatile spilled.  Save it here.
+      CSIIndex = i;
+      continue;
+    } else if (Reg == PPC::CR3) {
+      CR3Spilled = true;
+      continue;
+    } else if (Reg == PPC::CR4) {
+      CR4Spilled = true;
+      continue;
+    } else {
+      // When we first encounter a non-CR register after seeing at
+      // least one CR register, restore all spilled CRs together.
+      if ((CR2Spilled || CR3Spilled || CR4Spilled)
+	  && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+	restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
+		   MBB, I, CSI, CSIIndex);
+	CR2Spilled = CR3Spilled = CR4Spilled = false;
+      }
+
+      // Default behavior for non-CR saves.
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
+			       RC, TRI);
+      assert(I != MBB.begin() &&
+	     "loadRegFromStackSlot didn't insert any code!");
+      }
+
+    // Insert in reverse order.
+    if (AtStart)
+      I = MBB.begin();
+    else {
+      I = BeforeI;
+      ++I;
+    }	    
+  }
+
+  // If we haven't yet spilled the CRs, do so now.
+  if (CR2Spilled || CR3Spilled || CR4Spilled)
+    restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
+	       MBB, I, CSI, CSIIndex);
+
+  return true;
+}
+
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index d708541..3517d8c 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -15,9 +15,9 @@
 
 #include "PPC.h"
 #include "PPCSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/STLExtras.h"
 
 namespace llvm {
   class PPCSubtarget;
@@ -45,6 +45,16 @@ public:
                                             RegScavenger *RS = NULL) const;
   void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
   /// targetHandlesStackFrameRounding - Returns true if the target is
   /// responsible for rounding up the stack frame (probably at emitPrologue
   /// time).
@@ -170,23 +180,11 @@ public:
       {PPC::R15, -68},
       {PPC::R14, -72},
 
-      // CR save area offset.
-      // FIXME SVR4: Disable CR save area for now.
-//      {PPC::CR2, -4},
-//      {PPC::CR3, -4},
-//      {PPC::CR4, -4},
-//      {PPC::CR2LT, -4},
-//      {PPC::CR2GT, -4},
-//      {PPC::CR2EQ, -4},
-//      {PPC::CR2UN, -4},
-//      {PPC::CR3LT, -4},
-//      {PPC::CR3GT, -4},
-//      {PPC::CR3EQ, -4},
-//      {PPC::CR3UN, -4},
-//      {PPC::CR4LT, -4},
-//      {PPC::CR4GT, -4},
-//      {PPC::CR4EQ, -4},
-//      {PPC::CR4UN, -4},
+      // CR save area offset.  We map each of the nonvolatile CR fields
+      // to the slot for CR2, which is the first of the nonvolatile CR
+      // fields to be assigned, so that we only allocate one save slot.
+      // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
+      {PPC::CR2, -4},
 
       // VRSAVE save area offset.
       {PPC::VRSAVE, -4},
@@ -228,27 +226,6 @@ public:
       {PPC::F14, -144},
 
       // General register save area offsets.
-      // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit
-      //                    mode?
-      {PPC::R31, -4},
-      {PPC::R30, -12},
-      {PPC::R29, -20},
-      {PPC::R28, -28},
-      {PPC::R27, -36},
-      {PPC::R26, -44},
-      {PPC::R25, -52},
-      {PPC::R24, -60},
-      {PPC::R23, -68},
-      {PPC::R22, -76},
-      {PPC::R21, -84},
-      {PPC::R20, -92},
-      {PPC::R19, -100},
-      {PPC::R18, -108},
-      {PPC::R17, -116},
-      {PPC::R16, -124},
-      {PPC::R15, -132},
-      {PPC::R14, -140},
-
       {PPC::X31, -8},
       {PPC::X30, -16},
       {PPC::X29, -24},
@@ -268,24 +245,6 @@ public:
       {PPC::X15, -136},
       {PPC::X14, -144},
 
-      // CR save area offset.
-      // FIXME SVR4: Disable CR save area for now.
-//      {PPC::CR2, -4},
-//      {PPC::CR3, -4},
-//      {PPC::CR4, -4},
-//      {PPC::CR2LT, -4},
-//      {PPC::CR2GT, -4},
-//      {PPC::CR2EQ, -4},
-//      {PPC::CR2UN, -4},
-//      {PPC::CR3LT, -4},
-//      {PPC::CR3GT, -4},
-//      {PPC::CR3EQ, -4},
-//      {PPC::CR3UN, -4},
-//      {PPC::CR4LT, -4},
-//      {PPC::CR4GT, -4},
-//      {PPC::CR4EQ, -4},
-//      {PPC::CR4UN, -4},
-
       // VRSAVE save area offset.
       {PPC::VRSAVE, -4},
 
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index e8f4d16..762b346 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -14,22 +14,24 @@
 
 #define DEBUG_TYPE "ppc-codegen"
 #include "PPC.h"
-#include "PPCTargetMachine.h"
 #include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "PPCTargetMachine.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 namespace {
@@ -53,7 +55,9 @@ namespace {
       GlobalBaseReg = 0;
       SelectionDAGISel::runOnMachineFunction(MF);
 
-      InsertVRSaveCode(MF);
+      if (!PPCSubTarget.isSVR4ABI())
+        InsertVRSaveCode(MF);
+
       return true;
     }
 
@@ -621,6 +625,88 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
   }
 }
 
+// getVCmpInst: return the vector compare instruction for the specified
+// vector type and condition code. Since this is for altivec specific code,
+// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
+static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
+  switch (CC) {
+    case ISD::SETEQ:
+    case ISD::SETUEQ:
+    case ISD::SETNE:
+    case ISD::SETUNE:
+      if (VecVT == MVT::v16i8)
+        return PPC::VCMPEQUB;
+      else if (VecVT == MVT::v8i16)
+        return PPC::VCMPEQUH;
+      else if (VecVT == MVT::v4i32)
+        return PPC::VCMPEQUW;
+      // v4f32 != v4f32 could be translate to unordered not equal
+      else if (VecVT == MVT::v4f32)
+        return PPC::VCMPEQFP;
+      break;
+    case ISD::SETLT:
+    case ISD::SETGT:
+    case ISD::SETLE:
+    case ISD::SETGE:
+      if (VecVT == MVT::v16i8)
+        return PPC::VCMPGTSB;
+      else if (VecVT == MVT::v8i16)
+        return PPC::VCMPGTSH;
+      else if (VecVT == MVT::v4i32)
+        return PPC::VCMPGTSW;
+      else if (VecVT == MVT::v4f32)
+        return PPC::VCMPGTFP;
+      break;
+    case ISD::SETULT:
+    case ISD::SETUGT:
+    case ISD::SETUGE:
+    case ISD::SETULE:
+      if (VecVT == MVT::v16i8)
+        return PPC::VCMPGTUB;
+      else if (VecVT == MVT::v8i16)
+        return PPC::VCMPGTUH;
+      else if (VecVT == MVT::v4i32)
+        return PPC::VCMPGTUW;
+      break;
+    case ISD::SETOEQ:
+      if (VecVT == MVT::v4f32)
+        return PPC::VCMPEQFP;
+      break;
+    case ISD::SETOLT:
+    case ISD::SETOGT:
+    case ISD::SETOLE:
+      if (VecVT == MVT::v4f32)
+        return PPC::VCMPGTFP;
+      break;
+    case ISD::SETOGE:
+      if (VecVT == MVT::v4f32)
+        return PPC::VCMPGEFP;
+      break;
+    default:
+      break;
+  }
+  llvm_unreachable("Invalid integer vector compare condition");
+}
+
+// getVCmpEQInst: return the equal compare instruction for the specified vector
+// type. Since this is for altivec specific code, only support the altivec
+// types (v16i8, v8i16, v4i32, and v4f32).
+static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
+  switch (VecVT) {
+    case MVT::v16i8:
+      return PPC::VCMPEQUB;
+    case MVT::v8i16:
+      return PPC::VCMPEQUH;
+    case MVT::v4i32:
+      return PPC::VCMPEQUW;
+    case MVT::v4f32:
+      return PPC::VCMPEQFP;
+    default:
+      llvm_unreachable("Invalid integer vector compare condition");
+  }
+}
+
+
 SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   unsigned Imm;
@@ -701,10 +787,67 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
     }
   }
 
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  // Altivec Vector compare instructions do not set any CR register by default and
+  // vector compare operations return the same type as the operands.
+  if (LHS.getValueType().isVector()) {
+    EVT VecVT = LHS.getValueType();
+    MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
+    unsigned int VCmpInst = getVCmpInst(VT, CC);
+
+    switch (CC) {
+      case ISD::SETEQ:
+      case ISD::SETOEQ:
+      case ISD::SETUEQ:
+        return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
+      case ISD::SETNE:
+      case ISD::SETONE:
+      case ISD::SETUNE: {
+        SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
+        return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp);
+      } 
+      case ISD::SETLT:
+      case ISD::SETOLT:
+      case ISD::SETULT:
+        return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS);
+      case ISD::SETGT:
+      case ISD::SETOGT:
+      case ISD::SETUGT:
+        return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
+      case ISD::SETGE:
+      case ISD::SETOGE:
+      case ISD::SETUGE: {
+        // Small optimization: Altivec provides a 'Vector Compare Greater Than
+        // or Equal To' instruction (vcmpgefp), so in this case there is no
+        // need for extra logic for the equal compare.
+        if (VecVT.getSimpleVT().isFloatingPoint()) {
+          return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
+        } else {
+          SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
+          unsigned int VCmpEQInst = getVCmpEQInst(VT);
+          SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
+          return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ);
+        }
+      }
+      case ISD::SETLE:
+      case ISD::SETOLE:
+      case ISD::SETULE: {
+        SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
+        unsigned int VCmpEQInst = getVCmpEQInst(VT);
+        SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
+        return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ);
+      }
+      default:
+        llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
+    }
+  }
+
   bool Inv;
   int OtherCondIdx;
   unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx);
-  SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+  SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
   SDValue IntCR;
 
   // Force the ccreg into CR7.
@@ -717,7 +860,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   if (PPCSubTarget.hasMFOCRF() && OtherCondIdx == -1)
     IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
                                            CCReg), 0);
- else
+  else
     IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
                                            CR7Reg, CCReg), 0);
 
@@ -1127,6 +1270,52 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
                                            Chain), 0);
     return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
   }
+  case PPCISD::TOC_ENTRY: {
+    assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
+
+    // For medium code model, we generate two instructions as described
+    // below.  Otherwise we allow SelectCodeCommon to handle this, selecting
+    // one of LDtoc, LDtocJTI, and LDtocCPT.
+    if (TM.getCodeModel() != CodeModel::Medium)
+      break;
+
+    // The first source operand is a TargetGlobalAddress or a
+    // TargetJumpTable.  If it is an externally defined symbol, a symbol
+    // with common linkage, a function address, or a jump table address,
+    // we generate:
+    //   LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
+    // Otherwise we generate:
+    //   ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
+    SDValue GA = N->getOperand(0);
+    SDValue TOCbase = N->getOperand(1);
+    SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
+                                        TOCbase, GA);
+
+    if (isa<JumpTableSDNode>(GA))
+      return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+                                    SDValue(Tmp, 0));
+
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
+      const GlobalValue *GValue = G->getGlobal();
+      const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+      const GlobalValue *RealGValue = GAlias ?
+        GAlias->resolveAliasedGlobal(false) : GValue;
+      const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+      assert((GVar || isa<Function>(RealGValue)) &&
+             "Unexpected global value subclass!");
+
+      // An external variable is one without an initializer.  For these,
+      // for variables with common linkage, and for Functions, generate
+      // the LDtocL form.
+      if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
+          RealGValue->hasAvailableExternallyLinkage())
+        return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+                                      SDValue(Tmp, 0));
+    }
+
+    return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
+                                  SDValue(Tmp, 0), GA);
+  }
   }
 
   return SelectCode(N);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index dbb3b14..9966b2c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12,15 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCISelLowering.h"
+#include "MCTargetDesc/PPCPredicates.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCPerfectShuffle.h"
 #include "PPCTargetMachine.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -29,6 +24,11 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -347,6 +347,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
       setOperationAction(ISD::UREM, VT, Expand);
       setOperationAction(ISD::FDIV, VT, Expand);
       setOperationAction(ISD::FNEG, VT, Expand);
+      setOperationAction(ISD::FSQRT, VT, Expand);
+      setOperationAction(ISD::FLOG, VT, Expand);
+      setOperationAction(ISD::FLOG10, VT, Expand);
+      setOperationAction(ISD::FLOG2, VT, Expand);
+      setOperationAction(ISD::FEXP, VT, Expand);
+      setOperationAction(ISD::FEXP2, VT, Expand);
+      setOperationAction(ISD::FSIN, VT, Expand);
+      setOperationAction(ISD::FCOS, VT, Expand);
+      setOperationAction(ISD::FABS, VT, Expand);
+      setOperationAction(ISD::FPOWI, VT, Expand);
+      setOperationAction(ISD::FFLOOR, VT, Expand);
+      setOperationAction(ISD::FCEIL,  VT, Expand);
+      setOperationAction(ISD::FTRUNC, VT, Expand);
+      setOperationAction(ISD::FRINT,  VT, Expand);
+      setOperationAction(ISD::FNEARBYINT, VT, Expand);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
@@ -361,6 +376,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
       setOperationAction(ISD::CTTZ, VT, Expand);
       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+      setOperationAction(ISD::VSELECT, VT, Expand);
+      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+
+      for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+           j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
+        MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
+        setTruncStoreAction(VT, InnerVT, Expand);
+      }
+      setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
+      setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
+      setLoadExtAction(ISD::EXTLOAD, VT, Expand);
     }
 
     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
@@ -373,6 +399,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
     setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 
     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
@@ -392,6 +426,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+
+    // Altivec does not contain unordered floating-point compare instructions
+    setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
+    setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
+    setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
+    setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
+    setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
+    setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
   }
 
   if (Subtarget->has64BitSupport()) {
@@ -401,6 +443,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
 
   setOperationAction(ISD::ATOMIC_LOAD,  MVT::i32, Expand);
   setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
 
   setBooleanContents(ZeroOrOneBooleanContent);
   setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
@@ -534,11 +578,27 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
+  case PPCISD::ADDIS_TOC_HA:    return "PPCISD::ADDIS_TOC_HA";
+  case PPCISD::LD_TOC_L:        return "PPCISD::LD_TOC_L";
+  case PPCISD::ADDI_TOC_L:      return "PPCISD::ADDI_TOC_L";
+  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
+  case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
+  case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
+  case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
+  case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
+  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
+  case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
+  case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
+  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
+  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
+  case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
   }
 }
 
 EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
-  return MVT::i32;
+  if (!VT.isVector())
+    return MVT::i32;
+  return VT.changeVectorElementTypeToInteger();
 }
 
 //===----------------------------------------------------------------------===//
@@ -1264,8 +1324,8 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
 
   unsigned MOHiFlag, MOLoFlag;
   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
-  SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag);
-  SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag);
+  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
+  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
   return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
 }
 
@@ -1278,19 +1338,81 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
   EVT PtrVT = getPointerTy();
   bool is64bit = PPCSubTarget.isPPC64();
 
-  TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+  TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+
+  if (Model == TLSModel::LocalExec) {
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                               PPCII::MO_TPREL16_HA);
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                               PPCII::MO_TPREL16_LO);
+    SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+                                     is64bit ? MVT::i64 : MVT::i32);
+    SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+    return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+  }
 
-  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                             PPCII::MO_TPREL16_HA);
-  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                             PPCII::MO_TPREL16_LO);
+  if (!is64bit)
+    llvm_unreachable("only local-exec is currently supported for ppc32");
+
+  if (Model == TLSModel::InitialExec) {
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+    SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+                                     PtrVT, GOTReg, TGA);
+    SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
+                                   PtrVT, TGA, TPOffsetHi);
+    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+  }
+
+  if (Model == TLSModel::GeneralDynamic) {
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+    SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+                                     GOTReg, TGA);
+    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
+                                   GOTEntryHi, TGA);
+
+    // We need a chain node, and don't have one handy.  The underlying
+    // call has no side effects, so using the function entry node
+    // suffices.
+    SDValue Chain = DAG.getEntryNode();
+    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+    SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+    SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
+                                  PtrVT, ParmReg, TGA);
+    // The return value from GET_TLS_ADDR really is in X3 already, but
+    // some hacks are needed here to tie everything together.  The extra
+    // copies dissolve during subsequent transforms.
+    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+    return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
+  }
 
-  if (model != TLSModel::LocalExec)
-    llvm_unreachable("only local-exec TLS mode supported");
-  SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
-                                   is64bit ? MVT::i64 : MVT::i32);
-  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
-  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+  if (Model == TLSModel::LocalDynamic) {
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+    SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+                                     GOTReg, TGA);
+    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
+                                   GOTEntryHi, TGA);
+
+    // We need a chain node, and don't have one handy.  The underlying
+    // call has no side effects, so using the function entry node
+    // suffices.
+    SDValue Chain = DAG.getEntryNode();
+    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+    SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+    SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
+                                  PtrVT, ParmReg, TGA);
+    // The return value from GET_TLSLD_ADDR really is in X3 already, but
+    // some hacks are needed here to tie everything together.  The extra
+    // copies dissolve during subsequent transforms.
+    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
+                                      Chain, ParmReg, TGA);
+    return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
+  }
+
+  llvm_unreachable("Unknown TLS model!");
 }
 
 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
@@ -1493,7 +1615,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = (PtrVT == MVT::i64);
   Type *IntPtrTy =
-    DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
+    DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
                                                              *DAG.getContext());
 
   TargetLowering::ArgListTy Args;
@@ -1716,9 +1838,13 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
                                         DebugLoc dl, SelectionDAG &DAG,
                                         SmallVectorImpl<SDValue> &InVals)
                                           const {
-  if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
-    return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
-                                     dl, DAG, InVals);
+  if (PPCSubTarget.isSVR4ABI()) {
+    if (PPCSubTarget.isPPC64())
+      return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
+                                         dl, DAG, InVals);
+    else
+      return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
+                                         dl, DAG, InVals);
   } else {
     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
                                        dl, DAG, InVals);
@@ -1726,7 +1852,7 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
 }
 
 SDValue
-PPCTargetLowering::LowerFormalArguments_SVR4(
+PPCTargetLowering::LowerFormalArguments_32SVR4(
                                       SDValue Chain,
                                       CallingConv::ID CallConv, bool isVarArg,
                                       const SmallVectorImpl<ISD::InputArg>
@@ -1943,6 +2069,334 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
   return Chain;
 }
 
+// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+// value to MVT::i64 and then truncate to the correct register size.
+SDValue
+PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
+                                     SelectionDAG &DAG, SDValue ArgVal,
+                                     DebugLoc dl) const {
+  if (Flags.isSExt())
+    ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+                         DAG.getValueType(ObjectVT));
+  else if (Flags.isZExt())
+    ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+                         DAG.getValueType(ObjectVT));
+  
+  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+}
+
+// Set the size that is at least reserved in caller of this function.  Tail
+// call optimized functions' reserved stack space needs to be aligned so that
+// taking the difference between two stack areas will result in an aligned
+// stack.
+void
+PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
+                                      unsigned nAltivecParamsAtEnd,
+                                      unsigned MinReservedArea,
+                                      bool isPPC64) const {
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  // Add the Altivec parameters at the end, if needed.
+  if (nAltivecParamsAtEnd) {
+    MinReservedArea = ((MinReservedArea+15)/16)*16;
+    MinReservedArea += 16*nAltivecParamsAtEnd;
+  }
+  MinReservedArea =
+    std::max(MinReservedArea,
+             PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+  unsigned TargetAlign
+    = DAG.getMachineFunction().getTarget().getFrameLowering()->
+        getStackAlignment();
+  unsigned AlignMask = TargetAlign-1;
+  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+  FI->setMinReservedArea(MinReservedArea);
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_64SVR4(
+                                      SDValue Chain,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg>
+                                        &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+  // TODO: add description of PPC stack frame format, or at least some docs.
+  //
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  // Potential tail calls could cause overwriting of argument stack slots.
+  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+                       (CallConv == CallingConv::Fast));
+  unsigned PtrByteSize = 8;
+
+  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
+  // Area that is at least reserved in caller of this function.
+  unsigned MinReservedArea = ArgOffset;
+
+  static const uint16_t GPR[] = {
+    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+  };
+
+  static const uint16_t *FPR = GetFPR();
+
+  static const uint16_t VR[] = {
+    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+  };
+
+  const unsigned Num_GPR_Regs = array_lengthof(GPR);
+  const unsigned Num_FPR_Regs = 13;
+  const unsigned Num_VR_Regs  = array_lengthof(VR);
+
+  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+  // Add DAG nodes to load the arguments or copy them out of registers.  On
+  // entry to a function on PPC, the arguments start after the linkage area,
+  // although the first ones are often in registers.
+
+  SmallVector<SDValue, 8> MemOps;
+  unsigned nAltivecParamsAtEnd = 0;
+  Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+    SDValue ArgVal;
+    bool needsLoad = false;
+    EVT ObjectVT = Ins[ArgNo].VT;
+    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+    unsigned ArgSize = ObjSize;
+    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+    unsigned CurArgOffset = ArgOffset;
+
+    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+      if (isVarArg) {
+        MinReservedArea = ((MinReservedArea+15)/16)*16;
+        MinReservedArea += CalculateStackSlotSize(ObjectVT,
+                                                  Flags,
+                                                  PtrByteSize);
+      } else
+        nAltivecParamsAtEnd++;
+    } else
+      // Calculate min reserved area.
+      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
+                                                Flags,
+                                                PtrByteSize);
+
+    // FIXME the codegen can be much improved in some cases.
+    // We do not have to keep everything in memory.
+    if (Flags.isByVal()) {
+      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+      ObjSize = Flags.getByValSize();
+      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+      // Empty aggregate parameters do not take up registers.  Examples:
+      //   struct { } a;
+      //   union  { } b;
+      //   int c[0];
+      // etc.  However, we have to provide a place-holder in InVals, so
+      // pretend we have an 8-byte item at the current address for that
+      // purpose.
+      if (!ObjSize) {
+        int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+        InVals.push_back(FIN);
+        continue;
+      }
+      // All aggregates smaller than 8 bytes must be passed right-justified.
+      if (ObjSize < PtrByteSize)
+        CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
+      // The value of the object is its address.
+      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      InVals.push_back(FIN);
+
+      if (ObjSize < 8) {
+        if (GPR_idx != Num_GPR_Regs) {
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+          SDValue Store;
+
+          if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
+            EVT ObjType = (ObjSize == 1 ? MVT::i8 :
+                           (ObjSize == 2 ? MVT::i16 : MVT::i32));
+            Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+                                      MachinePointerInfo(FuncArg, CurArgOffset),
+                                      ObjType, false, false, 0);
+          } else {
+            // For sizes that don't fit a truncating store (3, 5, 6, 7),
+            // store the whole register as-is to the parameter save area
+            // slot.  The address of the parameter was already calculated
+            // above (InVals.push_back(FIN)) to be the right-justified
+            // offset within the slot.  For this store, we need a new
+            // frame index that points at the beginning of the slot.
+            int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+            Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                 MachinePointerInfo(FuncArg, ArgOffset),
+                                 false, false, 0);
+          }
+
+          MemOps.push_back(Store);
+          ++GPR_idx;
+        }
+        // Whether we copied from a register or not, advance the offset
+        // into the parameter save area by a full doubleword.
+        ArgOffset += PtrByteSize;
+        continue;
+      }
+
+      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+        // Store whatever pieces of the object are in registers
+        // to memory.  ArgOffset will be the address of the beginning
+        // of the object.
+        if (GPR_idx != Num_GPR_Regs) {
+          unsigned VReg;
+          VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                       MachinePointerInfo(FuncArg, ArgOffset),
+                                       false, false, 0);
+          MemOps.push_back(Store);
+          ++GPR_idx;
+          ArgOffset += PtrByteSize;
+        } else {
+          ArgOffset += ArgSize - j;
+          break;
+        }
+      }
+      continue;
+    }
+
+    switch (ObjectVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unhandled argument type!");
+    case MVT::i32:
+    case MVT::i64:
+      if (GPR_idx != Num_GPR_Regs) {
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+        if (ObjectVT == MVT::i32)
+          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+          // value to MVT::i64 and then truncate to the correct register size.
+          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
+
+        ++GPR_idx;
+      } else {
+        needsLoad = true;
+        ArgSize = PtrByteSize;
+      }
+      ArgOffset += 8;
+      break;
+
+    case MVT::f32:
+    case MVT::f64:
+      // Every 8 bytes of argument space consumes one of the GPRs available for
+      // argument passing.
+      if (GPR_idx != Num_GPR_Regs) {
+        ++GPR_idx;
+      }
+      if (FPR_idx != Num_FPR_Regs) {
+        unsigned VReg;
+
+        if (ObjectVT == MVT::f32)
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+        else
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+        ++FPR_idx;
+      } else {
+        needsLoad = true;
+        ArgSize = PtrByteSize;
+      }
+
+      ArgOffset += 8;
+      break;
+    case MVT::v4f32:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v16i8:
+      // Note that vector arguments in registers don't reserve stack space,
+      // except in varargs functions.
+      if (VR_idx != Num_VR_Regs) {
+        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+        if (isVarArg) {
+          while ((ArgOffset % 16) != 0) {
+            ArgOffset += PtrByteSize;
+            if (GPR_idx != Num_GPR_Regs)
+              GPR_idx++;
+          }
+          ArgOffset += 16;
+          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
+        }
+        ++VR_idx;
+      } else {
+        // Vectors are aligned.
+        ArgOffset = ((ArgOffset+15)/16)*16;
+        CurArgOffset = ArgOffset;
+        ArgOffset += 16;
+        needsLoad = true;
+      }
+      break;
+    }
+
+    // We need to load the argument to a virtual register if we determined
+    // above that we ran out of physical registers of the appropriate type.
+    if (needsLoad) {
+      int FI = MFI->CreateFixedObject(ObjSize,
+                                      CurArgOffset + (ArgSize - ObjSize),
+                                      isImmutable);
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+                           false, false, false, 0);
+    }
+
+    InVals.push_back(ArgVal);
+  }
+
+  // Set the size that is at least reserved in caller of this function.  Tail
+  // call optimized functions' reserved stack space needs to be aligned so that
+  // taking the difference between two stack areas will result in an aligned
+  // stack.
+  setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);
+
+  // If the function takes variable number of arguments, make a frame index for
+  // the start of the first vararg value... for expansion of llvm.va_start.
+  if (isVarArg) {
+    int Depth = ArgOffset;
+
+    FuncInfo->setVarArgsFrameIndex(
+      MFI->CreateFixedObject(PtrByteSize, Depth, true));
+    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+    // If this function is vararg, store any remaining integer argument regs
+    // to their spots on the stack so that they may be loaded by deferencing the
+    // result of va_next.
+    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+      unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
+      MemOps.push_back(Store);
+      // Increment the address by four for the next argument to store
+      SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+  }
+
+  if (!MemOps.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl,
+                        MVT::Other, &MemOps[0], MemOps.size());
+
+  return Chain;
+}
+
 SDValue
 PPCTargetLowering::LowerFormalArguments_Darwin(
                                       SDValue Chain,
@@ -2019,10 +2473,12 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
       default: llvm_unreachable("Unhandled argument type!");
       case MVT::i32:
       case MVT::f32:
-        VecArgOffset += isPPC64 ? 8 : 4;
+        VecArgOffset += 4;
         break;
       case MVT::i64:  // PPC64
       case MVT::f64:
+        // FIXME: We are guaranteed to be !isPPC64 at this point.
+        // Does MVT::i64 apply?
         VecArgOffset += 8;
         break;
       case MVT::v4f32:
@@ -2045,7 +2501,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
 
   SmallVector<SDValue, 8> MemOps;
   unsigned nAltivecParamsAtEnd = 0;
-  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+  Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
     SDValue ArgVal;
     bool needsLoad = false;
     EVT ObjectVT = Ins[ArgNo].VT;
@@ -2093,10 +2550,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
           else
             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+          EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                                            MachinePointerInfo(),
-                                            ObjSize==1 ? MVT::i8 : MVT::i16,
-                                            false, false, 0);
+                                            MachinePointerInfo(FuncArg,
+                                              CurArgOffset),
+                                            ObjType, false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
         }
@@ -2107,8 +2565,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
       }
       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
         // Store whatever pieces of the object are in registers
-        // to memory.  ArgVal will be address of the beginning of
-        // the object.
+        // to memory.  ArgOffset will be the address of the beginning
+        // of the object.
         if (GPR_idx != Num_GPR_Regs) {
           unsigned VReg;
           if (isPPC64)
@@ -2119,7 +2577,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                       MachinePointerInfo(),
+                                       MachinePointerInfo(FuncArg, ArgOffset),
                                        false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
@@ -2154,18 +2612,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
 
-        if (ObjectVT == MVT::i32) {
+        if (ObjectVT == MVT::i32)
           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
           // value to MVT::i64 and then truncate to the correct register size.
-          if (Flags.isSExt())
-            ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
-                                 DAG.getValueType(ObjectVT));
-          else if (Flags.isZExt())
-            ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
-                                 DAG.getValueType(ObjectVT));
-
-          ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
-        }
+          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
 
         ++GPR_idx;
       } else {
@@ -2252,23 +2702,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   }
 
   // Set the size that is at least reserved in caller of this function.  Tail
-  // call optimized function's reserved stack space needs to be aligned so that
+  // call optimized functions' reserved stack space needs to be aligned so that
   // taking the difference between two stack areas will result in an aligned
   // stack.
-  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
-  // Add the Altivec parameters at the end, if needed.
-  if (nAltivecParamsAtEnd) {
-    MinReservedArea = ((MinReservedArea+15)/16)*16;
-    MinReservedArea += 16*nAltivecParamsAtEnd;
-  }
-  MinReservedArea =
-    std::max(MinReservedArea,
-             PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
-  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
-    getStackAlignment();
-  unsigned AlignMask = TargetAlign-1;
-  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
-  FI->setMinReservedArea(MinReservedArea);
+  setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);
 
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
@@ -2308,8 +2745,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   return Chain;
 }
 
-/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
-/// linkage area for the Darwin ABI.
+/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus
+/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.
 static unsigned
 CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
                                      bool isPPC64,
@@ -2718,7 +3155,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
       // Thus for a call through a function pointer, the following actions need
       // to be performed:
       //   1. Save the TOC of the caller in the TOC save area of its stack
-      //      frame (this is done in LowerCall_Darwin()).
+      //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
       //   2. Load the address of the function entry point from the function
       //      descriptor.
       //   3. Load the TOC of the callee from the function descriptor into r2.
@@ -2808,6 +3245,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
   return CallOpc;
 }
 
+static
+bool isLocalCall(const SDValue &Callee)
+{
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    return !G->getGlobal()->isDeclaration() &&
+           !G->getGlobal()->isWeakForLinker();
+  return false;
+}
+
 SDValue
 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                    CallingConv::ID CallConv, bool isVarArg,
@@ -2823,12 +3269,32 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
     CCValAssign &VA = RVLocs[i];
-    EVT VT = VA.getValVT();
     assert(VA.isRegLoc() && "Can only return in registers!");
-    Chain = DAG.getCopyFromReg(Chain, dl,
-                               VA.getLocReg(), VT, InFlag).getValue(1);
-    InVals.push_back(Chain.getValue(0));
-    InFlag = Chain.getValue(2);
+
+    SDValue Val = DAG.getCopyFromReg(Chain, dl,
+                                     VA.getLocReg(), VA.getLocVT(), InFlag);
+    Chain = Val.getValue(1);
+    InFlag = Val.getValue(2);
+
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::AExt:
+      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+      break;
+    case CCValAssign::ZExt:
+      Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
+                        DAG.getValueType(VA.getValVT()));
+      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+      break;
+    case CCValAssign::SExt:
+      Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
+                        DAG.getValueType(VA.getValVT()));
+      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+      break;
+    }
+
+    InVals.push_back(Val);
   }
 
   return Chain;
@@ -2916,8 +3382,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
       // from allocating it), resulting in an additional register being
       // allocated and an unnecessary move instruction being generated.
       needsTOCRestore = true;
-    } else if (CallOpc == PPCISD::CALL_SVR4) {
-      // Otherwise insert NOP.
+    } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) {
+      // Otherwise insert NOP for non-local calls.
       CallOpc = PPCISD::CALL_NOP_SVR4;
     }
   }
@@ -2959,10 +3425,16 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
                                                    Ins, DAG);
 
-  if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
-    return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
-                          isTailCall, Outs, OutVals, Ins,
-                          dl, DAG, InVals);
+  if (PPCSubTarget.isSVR4ABI()) {
+    if (PPCSubTarget.isPPC64())
+      return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
+                              isTailCall, Outs, OutVals, Ins,
+                              dl, DAG, InVals);
+    else
+      return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+                              isTailCall, Outs, OutVals, Ins,
+                              dl, DAG, InVals);
+  }
 
   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
                           isTailCall, Outs, OutVals, Ins,
@@ -2970,15 +3442,15 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 }
 
 SDValue
-PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
-                                  CallingConv::ID CallConv, bool isVarArg,
-                                  bool isTailCall,
-                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                  const SmallVectorImpl<SDValue> &OutVals,
-                                  const SmallVectorImpl<ISD::InputArg> &Ins,
-                                  DebugLoc dl, SelectionDAG &DAG,
-                                  SmallVectorImpl<SDValue> &InVals) const {
-  // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
+PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    bool isTailCall,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const {
+  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
   // of the 32-bit SVR4 ABI stack frame layout.
 
   assert((CallConv == CallingConv::C ||
@@ -3182,6 +3654,396 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
                     Ins, InVals);
 }
 
+// Copy an argument into memory, being careful to do this outside the
+// call sequence for the call to which the argument belongs.
+SDValue
+PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
+                                              SDValue CallSeqStart,
+                                              ISD::ArgFlagsTy Flags,
+                                              SelectionDAG &DAG,
+                                              DebugLoc dl) const {
+  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+                        CallSeqStart.getNode()->getOperand(0),
+                        Flags, DAG, dl);
+  // The MEMCPY must go outside the CALLSEQ_START..END.
+  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+                             CallSeqStart.getNode()->getOperand(1));
+  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+                         NewCallSeqStart.getNode());
+  return NewCallSeqStart;
+}
+
+SDValue
+PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    bool isTailCall,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const {
+
+  unsigned NumOps = Outs.size();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  unsigned PtrByteSize = 8;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Mark this function as potentially containing a function that contains a
+  // tail call. As a consequence the frame pointer will be used for dynamicalloc
+  // and restoring the callers stack pointer in this functions epilog. This is
+  // done because by tail calling the called function might overwrite the value
+  // in this function's (MF) stack pointer stack slot 0(SP).
+  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+      CallConv == CallingConv::Fast)
+    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+  unsigned nAltivecParamsAtEnd = 0;
+
+  // Count how many bytes are to be pushed on the stack, including the linkage
+  // area, and parameter passing area.  We start with at least 48 bytes, which
+  // is reserved space for [SP][CR][LR][3 x unused].
+  // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result
+  // of this call.
+  unsigned NumBytes =
+    CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,
+                                         Outs, OutVals, nAltivecParamsAtEnd);
+
+  // Calculate by how many bytes the stack has to be adjusted in case of tail
+  // call optimization.
+  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+  // To protect arguments on the stack from being clobbered in a tail call,
+  // force all the loads to happen before doing any other lowering.
+  if (isTailCall)
+    Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog pass
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  SDValue CallSeqStart = Chain;
+
+  // Load the return address and frame pointer so it can be move somewhere else
+  // later.
+  SDValue LROp, FPOp;
+  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+                                       dl);
+
+  // Set up a copy of the stack pointer for use loading and storing any
+  // arguments that may not fit in the registers available for argument
+  // passing.
+  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+
+  // Figure out which arguments are going to go in registers, and which in
+  // memory.  Also, if this is a vararg function, floating point operations
+  // must be stored to our stack, and loaded into integer regs as well, if
+  // any integer regs are available for argument passing.
+  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
+  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+  static const uint16_t GPR[] = {
+    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+  };
+  static const uint16_t *FPR = GetFPR();
+
+  static const uint16_t VR[] = {
+    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+  };
+  const unsigned NumGPRs = array_lengthof(GPR);
+  const unsigned NumFPRs = 13;
+  const unsigned NumVRs  = array_lengthof(VR);
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+  SmallVector<SDValue, 8> MemOpChains;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    SDValue Arg = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+    // PtrOff will be used to store the current argument to the stack if a
+    // register cannot be found for it.
+    SDValue PtrOff;
+
+    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+    // Promote integers to 64-bit values.
+    if (Arg.getValueType() == MVT::i32) {
+      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+    }
+
+    // FIXME memcpy is used way more than necessary.  Correctness first.
+    // Note: "by value" is code for passing a structure by value, not
+    // basic types.
+    if (Flags.isByVal()) {
+      // Note: Size includes alignment padding, so
+      //   struct x { short a; char b; }
+      // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
+      // These are the proper values we need for right-justifying the
+      // aggregate in a parameter register.
+      unsigned Size = Flags.getByValSize();
+
+      // An empty aggregate parameter takes up no storage and no
+      // registers.
+      if (Size == 0)
+        continue;
+
+      // All aggregates smaller than 8 bytes must be passed right-justified.
+      if (Size==1 || Size==2 || Size==4) {
+        EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
+        if (GPR_idx != NumGPRs) {
+          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+                                        MachinePointerInfo(), VT,
+                                        false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+          ArgOffset += PtrByteSize;
+          continue;
+        }
+      }
+
+      if (GPR_idx == NumGPRs && Size < 8) {
+        SDValue Const = DAG.getConstant(PtrByteSize - Size,
+                                        PtrOff.getValueType());
+        SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+                                                          CallSeqStart,
+                                                          Flags, DAG, dl);
+        ArgOffset += PtrByteSize;
+        continue;
+      }
+      // Copy entire object into memory.  There are cases where gcc-generated
+      // code assumes it is there, even if it could be put entirely into
+      // registers.  (This is not what the doc says.)
+
+      // FIXME: The above statement is likely due to a misunderstanding of the
+      // documents.  All arguments must be copied into the parameter area BY
+      // THE CALLEE in the event that the callee takes the address of any
+      // formal argument.  That has not yet been implemented.  However, it is
+      // reasonable to use the stack area as a staging area for the register
+      // load.
+
+      // Skip this for small aggregates, as we will use the same slot for a
+      // right-justified copy, below.
+      if (Size >= 8)
+        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
+                                                          CallSeqStart,
+                                                          Flags, DAG, dl);
+
+      // When a register is available, pass a small aggregate right-justified.
+      if (Size < 8 && GPR_idx != NumGPRs) {
+        // The easiest way to get this right-justified in a register
+        // is to copy the structure into the rightmost portion of a
+        // local variable slot, then load the whole slot into the
+        // register.
+        // FIXME: The memcpy seems to produce pretty awful code for
+        // small aggregates, particularly for packed ones.
+        // FIXME: It would be preferable to use the slot in the 
+        // parameter save area instead of a new local variable.
+        SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+        SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+                                                          CallSeqStart,
+                                                          Flags, DAG, dl);
+
+        // Load the slot into the register.
+        SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
+                                   MachinePointerInfo(),
+                                   false, false, false, 0);
+        MemOpChains.push_back(Load.getValue(1));
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+        // Done with this argument.
+        ArgOffset += PtrByteSize;
+        continue;
+      }
+
+      // For aggregates larger than PtrByteSize, copy the pieces of the
+      // object that fit into registers from the parameter save area.
+      for (unsigned j=0; j<Size; j+=PtrByteSize) {
+        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+        if (GPR_idx != NumGPRs) {
+          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+                                     MachinePointerInfo(),
+                                     false, false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+          ArgOffset += PtrByteSize;
+        } else {
+          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+          break;
+        }
+      }
+      continue;
+    }
+
+    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected ValueType for argument!");
+    case MVT::i32:
+    case MVT::i64:
+      if (GPR_idx != NumGPRs) {
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+      } else {
+        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                         true, isTailCall, false, MemOpChains,
+                         TailCallArguments, dl);
+      }
+      ArgOffset += PtrByteSize;
+      break;
+    case MVT::f32:
+    case MVT::f64:
+      if (FPR_idx != NumFPRs) {
+        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+        if (isVarArg) {
+          // A single float or an aggregate containing only a single float
+          // must be passed right-justified in the stack doubleword, and
+          // in the GPR, if one is available.
+          SDValue StoreOff;
+          if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
+            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+            StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+          } else
+            StoreOff = PtrOff;
+
+          SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,
+                                       MachinePointerInfo(), false, false, 0);
+          MemOpChains.push_back(Store);
+
+          // Float varargs are always shadowed in available integer registers
+          if (GPR_idx != NumGPRs) {
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+                                       MachinePointerInfo(), false, false,
+                                       false, 0);
+            MemOpChains.push_back(Load.getValue(1));
+            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+          }
+        } else if (GPR_idx != NumGPRs)
+          // If we have any FPRs remaining, we may also have GPRs remaining.
+          ++GPR_idx;
+      } else {
+        // Single-precision floating-point values are mapped to the
+        // second (rightmost) word of the stack doubleword.
+        if (Arg.getValueType() == MVT::f32) {
+          SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+          PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+        }
+
+        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                         true, isTailCall, false, MemOpChains,
+                         TailCallArguments, dl);
+      }
+      ArgOffset += 8;
+      break;
+    case MVT::v4f32:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v16i8:
+      if (isVarArg) {
+        // These go aligned on the stack, or in the corresponding R registers
+        // when within range.  The Darwin PPC ABI doc claims they also go in
+        // V registers; in fact gcc does this only for arguments that are
+        // prototyped, not for those that match the ...  We do it for all
+        // arguments, seems to work.
+        while (ArgOffset % 16 !=0) {
+          ArgOffset += PtrByteSize;
+          if (GPR_idx != NumGPRs)
+            GPR_idx++;
+        }
+        // We could elide this store in the case where the object fits
+        // entirely in R registers.  Maybe later.
+        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+                            DAG.getConstant(ArgOffset, PtrVT));
+        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+                                     MachinePointerInfo(), false, false, 0);
+        MemOpChains.push_back(Store);
+        if (VR_idx != NumVRs) {
+          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+                                     MachinePointerInfo(),
+                                     false, false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+        }
+        ArgOffset += 16;
+        for (unsigned i=0; i<16; i+=PtrByteSize) {
+          if (GPR_idx == NumGPRs)
+            break;
+          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+                                  DAG.getConstant(i, PtrVT));
+          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+                                     false, false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+        }
+        break;
+      }
+
+      // Non-varargs Altivec params generally go in registers, but have
+      // stack space allocated at the end.
+      if (VR_idx != NumVRs) {
+        // Doesn't have GPR space allocated.
+        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+      } else {
+        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                         true, isTailCall, true, MemOpChains,
+                         TailCallArguments, dl);
+        ArgOffset += 16;
+      }
+      break;
+    }
+  }
+
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Check if this is an indirect call (MTCTR/BCTRL).
+  // See PrepareCall() for more information about calls through function
+  // pointers in the 64-bit SVR4 ABI.
+  if (!isTailCall &&
+      !dyn_cast<GlobalAddressSDNode>(Callee) &&
+      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+      !isBLACompatibleAddress(Callee, DAG)) {
+    // Load r2 into a virtual register and store it to the TOC save area.
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
+    // TOC save area offset.
+    SDValue PtrOff = DAG.getIntPtrConstant(40);
+    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
+                         false, false, 0);
+    // R12 must contain the address of an indirect callee.  This does not
+    // mean the MTCTR instruction must use R12; it's easier to model this
+    // as an extra parameter, so do that.
+    RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
+  }
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  if (isTailCall)
+    PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
+                    FPOp, true, TailCallArguments);
+
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+                    Ins, InVals);
+}
+
 SDValue
 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
                                     CallingConv::ID CallConv, bool isVarArg,
@@ -3192,7 +4054,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
                                     DebugLoc dl, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &InVals) const {
 
-  unsigned NumOps  = Outs.size();
+  unsigned NumOps = Outs.size();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
@@ -3299,11 +4161,13 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
     }
 
     // FIXME memcpy is used way more than necessary.  Correctness first.
+    // Note: "by value" is code for passing a structure by value, not
+    // basic types.
     if (Flags.isByVal()) {
       unsigned Size = Flags.getByValSize();
+      // Very small objects are passed right-justified.  Everything else is
+      // passed left-justified.
       if (Size==1 || Size==2) {
-        // Very small objects are passed right-justified.
-        // Everything else is passed left-justified.
         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
@@ -3314,17 +4178,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
 
           ArgOffset += PtrByteSize;
         } else {
-          SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
+          SDValue Const = DAG.getConstant(PtrByteSize - Size,
+                                          PtrOff.getValueType());
           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
-          SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
-                                CallSeqStart.getNode()->getOperand(0),
-                                Flags, DAG, dl);
-          // This must go outside the CALLSEQ_START..END.
-          SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
-                               CallSeqStart.getNode()->getOperand(1));
-          DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
-                                 NewCallSeqStart.getNode());
-          Chain = CallSeqStart = NewCallSeqStart;
+          Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+                                                            CallSeqStart,
+                                                            Flags, DAG, dl);
           ArgOffset += PtrByteSize;
         }
         continue;
@@ -3332,15 +4191,13 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
       // Copy entire object into memory.  There are cases where gcc-generated
       // code assumes it is there, even if it could be put entirely into
       // registers.  (This is not what the doc says.)
-      SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
-                            CallSeqStart.getNode()->getOperand(0),
-                            Flags, DAG, dl);
-      // This must go outside the CALLSEQ_START..END.
-      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
-                           CallSeqStart.getNode()->getOperand(1));
-      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
-      Chain = CallSeqStart = NewCallSeqStart;
-      // And copy the pieces of it that fit into registers.
+      Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
+                                                        CallSeqStart,
+                                                        Flags, DAG, dl);
+
+      // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
+      // copy the pieces of the object that fit into registers from the
+      // parameter save area.
       for (unsigned j=0; j<Size; j+=PtrByteSize) {
         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
@@ -3409,11 +4266,10 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
             ++GPR_idx;
         }
-      } else {
+      } else
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                          isPPC64, isTailCall, false, MemOpChains,
                          TailCallArguments, dl);
-      }
       if (isPPC64)
         ArgOffset += 8;
       else
@@ -3508,22 +4364,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                         &MemOpChains[0], MemOpChains.size());
 
-  // Check if this is an indirect call (MTCTR/BCTRL).
-  // See PrepareCall() for more information about calls through function
-  // pointers in the 64-bit SVR4 ABI.
-  if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() &&
-      !dyn_cast<GlobalAddressSDNode>(Callee) &&
-      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
-      !isBLACompatibleAddress(Callee, DAG)) {
-    // Load r2 into a virtual register and store it to the TOC save area.
-    SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
-    // TOC save area offset.
-    SDValue PtrOff = DAG.getIntPtrConstant(40);
-    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
-                         false, false, 0);
-  }
-
   // On Darwin, R12 must contain the address of an indirect callee.  This does
   // not mean the MTCTR instruction must use R12; it's easier to model this as
   // an extra parameter, so do that.
@@ -3588,8 +4428,24 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
-                             OutVals[i], Flag);
+
+    SDValue Arg = OutVals[i];
+
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
     Flag = Chain.getValue(1);
   }
 
@@ -3821,7 +4677,52 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
     return SDValue();
 
   if (Op.getOperand(0).getValueType() == MVT::i64) {
-    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+    SDValue SINT = Op.getOperand(0);
+    // When converting to single-precision, we actually need to convert
+    // to double-precision first and then round to single-precision.
+    // To avoid double-rounding effects during that operation, we have
+    // to prepare the input operand.  Bits that might be truncated when
+    // converting to double-precision are replaced by a bit that won't
+    // be lost at this stage, but is below the single-precision rounding
+    // position.
+    //
+    // However, if -enable-unsafe-fp-math is in effect, accept double
+    // rounding to avoid the extra overhead.
+    if (Op.getValueType() == MVT::f32 &&
+        !DAG.getTarget().Options.UnsafeFPMath) {
+
+      // Twiddle input to make sure the low 11 bits are zero.  (If this
+      // is the case, we are guaranteed the value will fit into the 53 bit
+      // mantissa of an IEEE double-precision value without rounding.)
+      // If any of those low 11 bits were not zero originally, make sure
+      // bit 12 (value 2048) is set instead, so that the final rounding
+      // to single-precision gets the correct result.
+      SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+                                  SINT, DAG.getConstant(2047, MVT::i64));
+      Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
+                          Round, DAG.getConstant(2047, MVT::i64));
+      Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
+      Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+                          Round, DAG.getConstant(-2048, MVT::i64));
+
+      // However, we cannot use that value unconditionally: if the magnitude
+      // of the input value is small, the bit-twiddling we did above might
+      // end up visibly changing the output.  Fortunately, in that case, we
+      // don't need to twiddle bits since the original input will convert
+      // exactly to double-precision floating-point already.  Therefore,
+      // construct a conditional to use the original value if the top 11
+      // bits are all sign-bit copies, and use the rounded value computed
+      // above otherwise.
+      SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
+                                 SINT, DAG.getConstant(53, MVT::i32));
+      Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
+                         Cond, DAG.getConstant(1, MVT::i64));
+      Cond = DAG.getSetCC(dl, MVT::i32,
+                          Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
+
+      SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
+    }
+    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
     SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
     if (Op.getValueType() == MVT::f32)
       FP = DAG.getNode(ISD::FP_ROUND, dl,
@@ -5670,6 +6571,14 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
     case 'v':
     case 'y':
       return C_RegisterClass;
+    case 'Z':
+      // FIXME: While Z does indicate a memory constraint, it specifically
+      // indicates an r+r address (used in conjunction with the 'y' modifier
+      // in the replacement string). Currently, we're forcing the base
+      // register to be r0 in the asm printer (which is interpreted as zero)
+      // and forming the complete address in the second register. This is
+      // suboptimal.
+      return C_Memory;
     }
   }
   return TargetLowering::getConstraintType(Constraint);
@@ -5712,6 +6621,9 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
   case 'y':
     weight = CW_Register;
     break;
+  case 'Z':
+    weight = CW_Memory;
+    break;
   }
   return weight;
 }
@@ -5728,9 +6640,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
         return std::make_pair(0U, &PPC::G8RCRegClass);
       return std::make_pair(0U, &PPC::GPRCRegClass);
     case 'f':
-      if (VT == MVT::f32)
+      if (VT == MVT::f32 || VT == MVT::i32)
         return std::make_pair(0U, &PPC::F4RCRegClass);
-      if (VT == MVT::f64)
+      if (VT == MVT::f64 || VT == MVT::i64)
         return std::make_pair(0U, &PPC::F8RCRegClass);
       break;
     case 'v':
@@ -5910,7 +6822,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
   bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
                MFI->hasVarSizedObjects()) &&
                   MFI->getStackSize() &&
-                  !MF.getFunction()->hasFnAttr(Attribute::Naked);
+                  !MF.getFunction()->getAttributes().
+                    hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked);
   unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
                                 (is31 ? PPC::R31 : PPC::R1);
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
@@ -5933,16 +6846,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 /// lowering. If DstAlign is zero that means it's safe to destination
 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 /// means there isn't a need to check it against alignment requirement,
-/// probably because the source does not need to be loaded. If
-/// 'IsZeroVal' is true, that means it's safe to return a
-/// non-scalar-integer type, e.g. empty string source, constant, or loaded
-/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-/// constant so it does not need to be loaded.
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
 /// It returns EVT::Other if the type should be determined using generic
 /// target-independent logic.
 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                            unsigned DstAlign, unsigned SrcAlign,
-                                           bool IsZeroVal,
+                                           bool IsMemset, bool ZeroMemset,
                                            bool MemcpyStrSrc,
                                            MachineFunction &MF) const {
   if (this->PPCSubTarget.isPPC64()) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 902b188..12b3df7 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -17,8 +17,8 @@
 
 #include "PPC.h"
 #include "PPCSubtarget.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
   namespace PPCISD {
@@ -178,6 +178,65 @@ namespace llvm {
       CR6SET,
       CR6UNSET,
 
+      /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
+      /// TLS model, produces an ADDIS8 instruction that adds the GOT
+      /// base to sym@got@tprel@ha.
+      ADDIS_GOT_TPREL_HA,
+
+      /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
+      /// TLS model, produces a LD instruction with base register G8RReg
+      /// and offset sym@got@tprel@l.  This completes the addition that
+      /// finds the offset of "sym" relative to the thread pointer.
+      LD_GOT_TPREL_L,
+
+      /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+      /// model, produces an ADD instruction that adds the contents of
+      /// G8RReg to the thread pointer.  Symbol contains a relocation
+      /// sym@tls which is to be replaced by the thread pointer and
+      /// identifies to the linker that the instruction is part of a
+      /// TLS sequence.
+      ADD_TLS,
+
+      /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
+      /// model, produces an ADDIS8 instruction that adds the GOT base
+      /// register to sym@got@tlsgd@ha.
+      ADDIS_TLSGD_HA,
+
+      /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+      /// model, produces an ADDI8 instruction that adds G8RReg to
+      /// sym@got@tlsgd@l.
+      ADDI_TLSGD_L,
+
+      /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+      /// model, produces a call to __tls_get_addr(sym@tlsgd).
+      GET_TLS_ADDR,
+
+      /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
+      /// model, produces an ADDIS8 instruction that adds the GOT base
+      /// register to sym@got@tlsld@ha.
+      ADDIS_TLSLD_HA,
+
+      /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+      /// model, produces an ADDI8 instruction that adds G8RReg to
+      /// sym@got@tlsld@l.
+      ADDI_TLSLD_L,
+
+      /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+      /// model, produces a call to __tls_get_addr(sym@tlsld).
+      GET_TLSLD_ADDR,
+
+      /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
+      /// local-dynamic TLS model, produces an ADDIS8 instruction
+      /// that adds X3 to sym@dtprel@ha.  The Chain operand is needed 
+      /// to tie this in place following a copy to %X3 from the result
+      /// of a GET_TLSLD_ADDR.
+      ADDIS_DTPREL_HA,
+
+      /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
+      /// model, produces an ADDI8 instruction that adds G8RReg to
+      /// sym@got@dtprel@l.
+      ADDI_DTPREL_L,
+
       /// STD_32 - This is the STD instruction for use with "32-bit" registers.
       STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
 
@@ -191,7 +250,21 @@ namespace llvm {
       /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
       /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
       /// or i32.
-      LBRX
+      LBRX,
+
+      /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium code model, produces
+      /// an ADDIS8 instruction that adds the TOC base register to sym@toc@ha.
+      ADDIS_TOC_HA,
+
+      /// G8RC = LD_TOC_L Symbol, G8RReg - For medium code model, produces a
+      /// LD instruction with base register G8RReg and offset sym@toc@l.
+      /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+      LD_TOC_L,
+
+      /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
+      /// an ADDI8 instruction that adds G8RReg to sym@toc@l.
+      /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+      ADDI_TOC_L
     };
   }
 
@@ -358,16 +431,15 @@ namespace llvm {
     /// lowering. If DstAlign is zero that means it's safe to destination
     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
     /// means there isn't a need to check it against alignment requirement,
-    /// probably because the source does not need to be loaded. If
-    /// 'IsZeroVal' is true, that means it's safe to return a
-    /// non-scalar-integer type, e.g. empty string source, constant, or loaded
-    /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-    /// constant so it does not need to be loaded.
+    /// probably because the source does not need to be loaded. If 'IsMemset' is
+    /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+    /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+    /// source is constant so it does not need to be loaded.
     /// It returns EVT::Other if the type should be determined using generic
     /// target-independent logic.
     virtual EVT
-    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
-                        bool IsZeroVal, bool MemcpyStrSrc,
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 
+                        bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                         MachineFunction &MF) const;
 
     /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
@@ -467,20 +539,41 @@ namespace llvm {
                   DebugLoc dl, SelectionDAG &DAG) const;
 
     SDValue
+      extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
+                        SDValue ArgVal, DebugLoc dl) const;
+
+    void
+      setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
+                         unsigned nAltivecParamsAtEnd,
+                         unsigned MinReservedArea, bool isPPC64) const;
+
+    SDValue
       LowerFormalArguments_Darwin(SDValue Chain,
                                   CallingConv::ID CallConv, bool isVarArg,
                                   const SmallVectorImpl<ISD::InputArg> &Ins,
                                   DebugLoc dl, SelectionDAG &DAG,
                                   SmallVectorImpl<SDValue> &InVals) const;
     SDValue
-      LowerFormalArguments_SVR4(SDValue Chain,
-                                CallingConv::ID CallConv, bool isVarArg,
-                                const SmallVectorImpl<ISD::InputArg> &Ins,
-                                DebugLoc dl, SelectionDAG &DAG,
-                                SmallVectorImpl<SDValue> &InVals) const;
+      LowerFormalArguments_64SVR4(SDValue Chain,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SmallVectorImpl<SDValue> &InVals) const;
+    SDValue
+      LowerFormalArguments_32SVR4(SDValue Chain,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue
-      LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+      createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
+                                 SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
+                                 SelectionDAG &DAG, DebugLoc dl) const;
+
+    SDValue
+      LowerCall_Darwin(SDValue Chain, SDValue Callee,
+                       CallingConv::ID CallConv,
                        bool isVarArg, bool isTailCall,
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
                        const SmallVectorImpl<SDValue> &OutVals,
@@ -488,13 +581,22 @@ namespace llvm {
                        DebugLoc dl, SelectionDAG &DAG,
                        SmallVectorImpl<SDValue> &InVals) const;
     SDValue
-    LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
-                   bool isVarArg, bool isTailCall,
-                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                   const SmallVectorImpl<SDValue> &OutVals,
-                   const SmallVectorImpl<ISD::InputArg> &Ins,
-                   DebugLoc dl, SelectionDAG &DAG,
-                   SmallVectorImpl<SDValue> &InVals) const;
+      LowerCall_64SVR4(SDValue Chain, SDValue Callee,
+                       CallingConv::ID CallConv,
+                       bool isVarArg, bool isTailCall,
+                       const SmallVectorImpl<ISD::OutputArg> &Outs,
+                       const SmallVectorImpl<SDValue> &OutVals,
+                       const SmallVectorImpl<ISD::InputArg> &Ins,
+                       DebugLoc dl, SelectionDAG &DAG,
+                       SmallVectorImpl<SDValue> &InVals) const;
+    SDValue
+    LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+                     bool isVarArg, bool isTailCall,
+                     const SmallVectorImpl<ISD::OutputArg> &Outs,
+                     const SmallVectorImpl<SDValue> &OutVals,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc dl, SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) const;
   };
 }
 
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index cfe71d17..1dd5415 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -32,6 +32,15 @@ def symbolLo64 : Operand<i64> {
 def tocentry : Operand<iPTR> {
   let MIOperandInfo = (ops i32imm:$imm);
 }
+def memrs : Operand<iPTR> {   // memri where the immediate is a symbolLo64
+  let PrintMethod = "printMemRegImm";
+  let EncoderMethod = "getMemRIXEncoding";
+  let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg);
+}
+def tlsreg : Operand<i64> {
+  let EncoderMethod = "getTLSRegEncoding";
+}
+def tlsgd : Operand<i64> {}
 
 //===----------------------------------------------------------------------===//
 // 64-bit transformation functions.
@@ -63,7 +72,7 @@ def HI48_64 : SDNodeXForm<imm, [{
 //
 
 let Defs = [LR8] in
-  def MovePCtoLR8 : Pseudo<(outs), (ins), "", []>,
+  def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
                     PPC970_Unit_BRU;
 
 // Darwin ABI Calls.
@@ -99,6 +108,16 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
                              (outs), (ins calltarget:$func),
                              "bl $func\n\tnop", BrB, []>;
 
+    let isCodeGenOnly = 1 in
+    def BL8_NOP_ELF_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+                                  (outs), (ins calltarget:$func, tlsgd:$sym),
+                                  "bl $func($sym)\n\tnop", BrB, []>;
+
+    let isCodeGenOnly = 1 in
+    def BL8_NOP_ELF_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+                                  (outs), (ins calltarget:$func, tlsgd:$sym),
+                                  "bl $func($sym)\n\tnop", BrB, []>;
+
     def BLA8_ELF : IForm<18, 1, 1,
                          (outs), (ins aaddr:$func),
                          "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
@@ -141,31 +160,31 @@ def : Pat<(PPCnop),
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64",
       [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
     def ATOMIC_LOAD_SUB_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64",
       [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
     def ATOMIC_LOAD_OR_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64",
       [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
     def ATOMIC_LOAD_XOR_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64",
       [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
     def ATOMIC_LOAD_AND_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64",
       [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
     def ATOMIC_LOAD_NAND_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64",
       [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
 
     def ATOMIC_CMP_SWAP_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64",
       [(set G8RC:$dst, 
                     (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
 
     def ATOMIC_SWAP_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64",
       [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
   }
 }
@@ -234,10 +253,10 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
   let Defs = [CTR8], Uses = [CTR8] in {
-    def BDZ8  : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
-                         "bdz $dst",  BrB, []>;
-    def BDNZ8 : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
-                         "bdnz $dst", BrB, []>;
+    def BDZ8  : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+                        "bdz $dst">;
+    def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+                        "bdnz $dst">;
   }
 }
 
@@ -247,7 +266,7 @@ def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
             PPC970_MicroCode, PPC970_Unit_CRU;
 
 def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
-                       "", SprMFCR>,
+                       "#MFCR8pseud", SprMFCR>,
             PPC970_MicroCode, PPC970_Unit_CRU;
             
 def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
@@ -278,7 +297,7 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
 // the POWER3.
 
 let Defs = [X1], Uses = [X1] in
-def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
+def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8",
                        [(set G8RC:$result,
                              (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
 
@@ -359,6 +378,11 @@ def XORIS8  : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
 def ADD8  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "add $rT, $rA, $rB", IntSimple,
                      [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
+// initial-exec thread-local storage model.
+def ADD8TLS  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
+                        "add $rT, $rA, $rB@tls", IntSimple,
+                        [(set G8RC:$rT, (add G8RC:$rA, tglobaltlsaddr:$rB))]>;
                      
 let Defs = [CARRY] in {
 def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
@@ -494,16 +518,16 @@ def RLDIMI : MDForm_1<30, 3,
 
 // Rotate instructions.
 def RLDCL  : MDForm_1<30, 0,
-                      (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB),
-                      "rldcl $rA, $rS, $rB, $MB", IntRotateD,
+                      (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MBE),
+                      "rldcl $rA, $rS, $rB, $MBE", IntRotateD,
                       []>, isPPC64;
 def RLDICL : MDForm_1<30, 0,
-                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB),
-                      "rldicl $rA, $rS, $SH, $MB", IntRotateDI,
+                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
+                      "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
                       []>, isPPC64;
 def RLDICR : MDForm_1<30, 1,
-                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
-                      "rldicr $rA, $rS, $SH, $ME", IntRotateDI,
+                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
+                      "rldicr $rA, $rS, $SH, $MBE", IntRotateDI,
                       []>, isPPC64;
 
 def RLWINM8 : MForm_2<21,
@@ -511,7 +535,7 @@ def RLWINM8 : MForm_2<21,
                      "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
                      []>;
 
-def ISEL8   : AForm_1<31, 15,
+def ISEL8   : AForm_4<31, 15,
                      (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
                      "isel $rT, $rA, $rB, $cond", IntGeneral,
                      []>;
@@ -556,7 +580,7 @@ def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
                     "lhaux $rD, $addr", LdStLHAU,
                     []>, RegConstraint<"$addr.offreg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result),
                     (ins memrr:$addr),
                     "lwaux $rD, $addr", LdStLHAU,
                     []>, RegConstraint<"$addr.offreg = $ea_result">,
@@ -606,7 +630,7 @@ def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
                    "lbzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.offreg = $ea_result">,
                    NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result),
                    (ins memrr:$addr),
                    "lhzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.offreg = $ea_result">,
@@ -625,27 +649,33 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LD   : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
                     "ld $rD, $src", LdStLD,
                     [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src),
+                    "ld $rD, $src", LdStLD,
+                    []>, isPPC64;
+// The following three definitions are selected for small code model only.
+// Otherwise, we need to create two instructions to form a 32-bit offset,
+// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
 def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
-                  "",
+                  "#LDtoc",
                   [(set G8RC:$rD,
                      (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
 def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
-                  "",
+                  "#LDtocJTI",
                   [(set G8RC:$rD,
                      (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64;
 def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
-                  "",
+                  "#LDtocCPT",
                   [(set G8RC:$rD,
                      (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64;
 
 let hasSideEffects = 1 in { 
-let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
-def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg),
+let RST = 2, DS = 2 in
+def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg),
                     "ld 2, 8($reg)", LdStLD,
                     [(PPCload_toc G8RC:$reg)]>, isPPC64;
                     
-let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
-def LDtoc_restore : DSForm_1<58, 0, (outs), (ins),
+let RST = 2, DS = 10, RA = 1 in
+def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
                     "ld 2, 40(1)", LdStLD,
                     [(PPCtoc_restore)]>, isPPC64;
 }
@@ -671,6 +701,77 @@ def : Pat<(PPCload ixaddr:$src),
 def : Pat<(PPCload xaddr:$src),
           (LDX xaddr:$src)>;
 
+// Support for medium code model.
+def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp),
+                       "#ADDIStocHA",
+                       [(set G8RC:$rD,
+                         (PPCaddisTocHA G8RC:$reg, tglobaladdr:$disp))]>,
+                       isPPC64;
+def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+                   "#LDtocL",
+                   [(set G8RC:$rD,
+                     (PPCldTocL tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp),
+                     "#ADDItocL",
+                     [(set G8RC:$rD,
+                       (PPCaddiTocL G8RC:$reg, tglobaladdr:$disp))]>, isPPC64;
+
+// Support for thread-local storage.
+def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+                         "#ADDISgotTprelHA",
+                         [(set G8RC:$rD,
+                           (PPCaddisGotTprelHA G8RC:$reg,
+                                               tglobaltlsaddr:$disp))]>,
+                  isPPC64;
+def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC:$reg),
+                        "#LDgotTprelL",
+                        [(set G8RC:$rD,
+                          (PPCldGotTprelL tglobaltlsaddr:$disp, G8RC:$reg))]>,
+                 isPPC64;
+def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g),
+          (ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>;
+def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+                         "#ADDIStlsgdHA",
+                         [(set G8RC:$rD,
+                           (PPCaddisTlsgdHA G8RC:$reg, tglobaltlsaddr:$disp))]>,
+                  isPPC64;
+def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
+                       "#ADDItlsgdL",
+                       [(set G8RC:$rD,
+                         (PPCaddiTlsgdL G8RC:$reg, tglobaltlsaddr:$disp))]>,
+                 isPPC64;
+def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+                        "#GETtlsADDR",
+                        [(set G8RC:$rD,
+                          (PPCgetTlsAddr G8RC:$reg, tglobaltlsaddr:$sym))]>,
+                 isPPC64;
+def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+                         "#ADDIStlsldHA",
+                         [(set G8RC:$rD,
+                           (PPCaddisTlsldHA G8RC:$reg, tglobaltlsaddr:$disp))]>,
+                  isPPC64;
+def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
+                       "#ADDItlsldL",
+                       [(set G8RC:$rD,
+                         (PPCaddiTlsldL G8RC:$reg, tglobaltlsaddr:$disp))]>,
+                 isPPC64;
+def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+                          "#GETtlsldADDR",
+                          [(set G8RC:$rD,
+                            (PPCgetTlsldAddr G8RC:$reg, tglobaltlsaddr:$sym))]>,
+                   isPPC64;
+def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+                          "#ADDISdtprelHA",
+                          [(set G8RC:$rD,
+                            (PPCaddisDtprelHA G8RC:$reg,
+                                              tglobaltlsaddr:$disp))]>,
+                   isPPC64;
+def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
+                         "#ADDIdtprelL",
+                         [(set G8RC:$rD,
+                           (PPCaddiDtprelL G8RC:$reg, tglobaltlsaddr:$disp))]>,
+                  isPPC64;
+
 let PPC970_Unit = 2 in {
 // Truncating stores.                       
 def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
@@ -706,7 +807,7 @@ def STDX  : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
 
 let PPC970_Unit = 2 in {
 
-def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
                     "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
                     [(set ptr_rc:$ea_res,
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index b0b8423..0cf28ae 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -158,10 +158,6 @@ def vecspltisw : PatLeaf<(build_vector), [{
   return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
 }], VSPLTISW_get_imm>;
 
-def V_immneg0 : PatLeaf<(build_vector), [{
-  return PPC::isAllNegativeZeroVector(N);
-}]>;
-
 //===----------------------------------------------------------------------===//
 // Helpers for defining instructions that directly correspond to intrinsics.
 
@@ -340,6 +336,28 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vctuxs $vD, $vB, $UIMM", VecFP,
                       [(set VRRC:$vD,
                              (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+
+// Defines with the UIM field set to 0 for floating-point
+// to integer (fp_to_sint/fp_to_uint) conversions and integer
+// to floating-point (sint_to_fp/uint_to_fp) conversions.
+let VA = 0 in {
+def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB),
+                       "vcfsx $vD, $vB, 0", VecFP,
+                       [(set VRRC:$vD,
+                             (int_ppc_altivec_vcfsx VRRC:$vB, 0))]>;
+def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB),
+                        "vctuxs $vD, $vB, 0", VecFP,
+                        [(set VRRC:$vD,
+                               (int_ppc_altivec_vctuxs VRRC:$vB, 0))]>;
+def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB),
+                       "vcfux $vD, $vB, 0", VecFP,
+                       [(set VRRC:$vD,
+                               (int_ppc_altivec_vcfux VRRC:$vB, 0))]>;
+def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB),
+                      "vctsxs $vD, $vB, 0", VecFP,
+                      [(set VRRC:$vD,
+                             (int_ppc_altivec_vctsxs VRRC:$vB, 0))]>;
+}
 def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
 def VLOGEFP  : VX2_Int<458, "vlogefp",  int_ppc_altivec_vlogefp>;
 
@@ -563,7 +581,12 @@ def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
 def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
                       "vxor $vD, $vD, $vD", VecFP,
                       [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+let IMM=-1 in {
+def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins),
+                      "vspltisw $vD, -1", VecFP,
+                      [(set VRRC:$vD, (v4i32 immAllOnesV))]>;
 }
+} // VALU Operations.
 
 //===----------------------------------------------------------------------===//
 // Additional Altivec Patterns
@@ -650,7 +673,8 @@ def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
           (VANDC VRRC:$A, VRRC:$B)>;
 
 def : Pat<(fmul VRRC:$vA, VRRC:$vB),
-          (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>; 
+          (VMADDFP VRRC:$vA, VRRC:$vB,
+             (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>; 
 
 // Fused multiply add and multiply sub for packed float.  These are represented
 // separately from the real instructions above, for operations that must have
@@ -689,3 +713,23 @@ def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
           (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>;
 def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
           (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>;
+
+// Float to integer and integer to float conversions
+def : Pat<(v4i32 (fp_to_sint (v4f32 VRRC:$vA))),
+           (VCTSXS_0 VRRC:$vA)>;
+def : Pat<(v4i32 (fp_to_uint (v4f32 VRRC:$vA))),
+           (VCTUXS_0 VRRC:$vA)>;
+def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))),
+           (VCFSX_0 VRRC:$vA)>;
+def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))),
+           (VCFUX_0 VRRC:$vA)>;
+
+// Floating-point rounding
+def : Pat<(v4f32 (ffloor (v4f32 VRRC:$vA))),
+          (VRFIM VRRC:$vA)>;
+def : Pat<(v4f32 (fceil (v4f32 VRRC:$vA))),
+          (VRFIP VRRC:$vA)>;
+def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))),
+          (VRFIZ VRRC:$vA)>;
+def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))),
+          (VRFIN VRRC:$vA)>;
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index a41a027..c3c171c 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -94,12 +94,6 @@ class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = lk;
 }
 
-class IForm_ext<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
-            string asmstr, InstrItinClass itin, list<dag> pattern>
-         : IForm<opcode, aa, lk, OOL, IOL, asmstr, itin, pattern> {
-  let LI{0-4} = bo;
-}
-
 // 1.7.2 B-Form
 class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
   : I<opcode, OOL, IOL, asmstr, BrB> {
@@ -118,6 +112,13 @@ class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
   let Inst{31}    = lk;
 }
 
+class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
+             string asmstr>
+  : BForm<opcode, aa, lk, OOL, IOL, asmstr> {
+  let BIBO{4-0} = bo;
+  let BIBO{6-5} = 0;
+  let CR = 0;
+}
 
 // 1.7.4 D-Form
 class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
@@ -625,9 +626,9 @@ class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
                 InstrItinClass itin> 
   : I<opcode, OOL, IOL, asmstr, itin> {
   bits<8>  FXM;
-  bits<5>  ST;
+  bits<5>  rS;
    
-  let Inst{6-10}  = ST;
+  let Inst{6-10}  = rS;
   let Inst{11}    = 0;
   let Inst{12-19} = FXM;
   let Inst{20}    = 0;
@@ -666,7 +667,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
                       string cstr, InstrItinClass itin, list<dag>pattern>
   : I<opcode, OOL, IOL, asmstr, itin> {
   bits<8> FM;
-  bits<5> RT;
+  bits<5> rT;
 
   bit RC = 0;    // set by isDOT
   let Pattern = pattern;
@@ -675,7 +676,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{6} = 0;
   let Inst{7-14}  = FM;
   let Inst{15} = 0;
-  let Inst{16-20} = RT;
+  let Inst{16-20} = rT;
   let Inst{21-30} = xo;
   let Inst{31}    = RC;
 }
@@ -758,6 +759,26 @@ class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
   let FRB = 0;
 }
 
+class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, 
+              InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RT;
+  bits<5> RA;
+  bits<5> RB;
+  bits<7> BIBO;  // 2 bits of BI and 5 bits of BO (must be 12).
+  bits<3> CR;
+
+  let Pattern = pattern;
+
+  let Inst{6-10}  = RT;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = RB;
+  let Inst{21-23} = CR;
+  let Inst{24-25} = BIBO{6-5};
+  let Inst{26-30} = xo;
+  let Inst{31}    = 0;
+}
+
 // 1.7.13 M-Form
 class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d2df664..a0517a8 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -12,12 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
 #include "PPC.h"
+#include "PPCHazardRecognizers.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
-#include "PPCHazardRecognizers.h"
-#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -28,7 +29,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/STLExtras.h"
 
 #define GET_INSTRINFO_CTOR
 #include "PPCGenInstrInfo.inc"
@@ -60,7 +60,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
     return new PPCScoreboardHazardRecognizer(II, DAG);
   }
 
-  return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+  return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
 }
 
 /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
@@ -141,7 +141,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
 
   // Normal instructions can be commuted the obvious way.
   if (MI->getOpcode() != PPC::RLWIMI)
-    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstruction(MI, NewMI);
 
   // Cannot commute if it has a non-zero rotate count.
   if (MI->getOperand(3).getImm() != 0)
@@ -570,12 +570,15 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
     // STVX VAL, 0, R0
     //
     // FIXME: We use R0 here, because it isn't available for RA.
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+    bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
+    unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
+    unsigned GPR0  = Is64Bit ? PPC::X0    : PPC::R0;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
                                        FrameIdx, 0, 0));
     NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
                      .addReg(SrcReg, getKillRegState(isKill))
-                     .addReg(PPC::R0)
-                     .addReg(PPC::R0));
+                     .addReg(GPR0)
+                     .addReg(GPR0));
   } else {
     llvm_unreachable("Unknown regclass!");
   }
@@ -707,10 +710,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
     // Dest = LVX 0, R0
     //
     // FIXME: We use R0 here, because it isn't available for RA.
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+    bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
+    unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
+    unsigned GPR0  = Is64Bit ? PPC::X0    : PPC::R0;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
                                        FrameIdx, 0, 0));
-    NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0)
-                     .addReg(PPC::R0));
+    NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0)
+                     .addReg(GPR0));
   } else {
     llvm_unreachable("Unknown regclass!");
   }
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index a503908..8c077b7 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -91,6 +91,20 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
 def PPCvmaddfp  : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
 def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
 
+def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>;
+def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
+                            [SDNPMayLoad]>;
+def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
+def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
+def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
+def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
+def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
+def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
+def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
+                              [SDNPHasChain]>;
+def PPCaddiDtprelL   : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
+
 def PPCvperm    : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
 
 // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@@ -167,6 +181,12 @@ def PPClarx      : SDNode<"PPCISD::LARX", SDT_PPClarx,
 def PPCstcx      : SDNode<"PPCISD::STCX", SDT_PPCstcx,
                           [SDNPHasChain, SDNPMayStore]>;
 
+// Instructions to support medium code model
+def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>;
+def PPCldTocL     : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCaddiTocL   : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>;
+
+
 // Instructions to support dynamic alloca.
 def SDTDynOp  : SDTypeProfile<1, 2, []>;
 def PPCdynalloc   : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
@@ -369,9 +389,9 @@ def IsBookE  : Predicate<"PPCSubTarget.isBookE()">;
 
 let hasCtrlDep = 1 in {
 let Defs = [R1], Uses = [R1] in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "",
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "#ADJCALLSTACKDOWN $amt",
                               [(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "",
+def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2",
                               [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
@@ -380,7 +400,7 @@ def UPDATE_VRSAVE    : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
 }
 
 let Defs = [R1], Uses = [R1] in
-def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "",
+def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC",
                        [(set GPRC:$result,
                              (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
                          
@@ -389,19 +409,19 @@ def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "",
 let usesCustomInserter = 1,    // Expanded after instruction selection.
     PPC970_Single = 1 in {
   def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
-                              i32imm:$BROPC), "",
+                              i32imm:$BROPC), "#SELECT_CC_I4",
                               []>;
   def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
-                              i32imm:$BROPC), "",
+                              i32imm:$BROPC), "#SELECT_CC_I8",
                               []>;
   def SELECT_CC_F4  : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
-                              i32imm:$BROPC), "",
+                              i32imm:$BROPC), "#SELECT_CC_F4",
                               []>;
   def SELECT_CC_F8  : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
-                              i32imm:$BROPC), "",
+                              i32imm:$BROPC), "#SELECT_CC_F8",
                               []>;
   def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
-                              i32imm:$BROPC), "",
+                              i32imm:$BROPC), "#SELECT_CC_VRRC",
                               []>;
 }
 
@@ -409,16 +429,16 @@ let usesCustomInserter = 1,    // Expanded after instruction selection.
 // scavenge a register for it.
 let mayStore = 1 in
 def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F),
-                     "", []>;
+                     "#SPILL_CR", []>;
 
 // RESTORE_CR - Indicate that we're restoring the CR register (previously
 // spilled), so we'll need to scavenge a register for it.
 let mayLoad = 1 in
 def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
-                     "", []>;
+                     "#RESTORE_CR", []>;
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
-  let isReturn = 1, Uses = [LR, RM] in
+  let isCodeGenOnly = 1, isReturn = 1, Uses = [LR, RM] in
     def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
                           "b${p:cc}lr ${p:reg}", BrB, 
                           [(retflag)]>;
@@ -427,7 +447,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
 }
 
 let Defs = [LR] in
-  def MovePCtoLR : Pseudo<(outs), (ins), "", []>,
+  def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
                    PPC970_Unit_BRU;
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
@@ -439,16 +459,17 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
 
   // BCC represents an arbitrary conditional branch on a predicate.
   // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
-  // a two-value operand where a dag node expects two operands. :( 
-  def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
-                  "b${cond:cc} ${cond:reg}, $dst"
-                  /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+  // a two-value operand where a dag node expects two operands. :(
+  let isCodeGenOnly = 1 in
+    def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+                    "b${cond:cc} ${cond:reg}, $dst"
+                    /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
 
   let Defs = [CTR], Uses = [CTR] in {
-    def BDZ  : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
-                         "bdz $dst",  BrB, []>;
-    def BDNZ : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
-                         "bdnz $dst", BrB, []>;
+    def BDZ  : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+                       "bdz $dst">;
+    def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+                       "bdnz $dst">;
   }
 }
 
@@ -564,81 +585,81 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8",
       [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_SUB_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8",
       [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_AND_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8",
       [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_OR_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8",
       [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_XOR_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8",
       [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_NAND_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8",
       [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_ADD_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16",
       [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_SUB_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16",
       [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_AND_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16",
       [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_OR_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16",
       [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_XOR_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16",
       [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_NAND_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16",
       [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_ADD_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32",
       [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_SUB_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32",
       [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_AND_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32",
       [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_OR_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32",
       [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_XOR_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32",
       [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_NAND_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32",
       [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
 
     def ATOMIC_CMP_SWAP_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8",
       [(set GPRC:$dst, 
                     (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
     def ATOMIC_CMP_SWAP_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
       [(set GPRC:$dst, 
                     (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
     def ATOMIC_CMP_SWAP_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
       [(set GPRC:$dst, 
                     (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
 
     def ATOMIC_SWAP_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8",
       [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
     def ATOMIC_SWAP_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16",
       [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
     def ATOMIC_SWAP_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32",
       [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
   }
 }
@@ -731,7 +752,7 @@ def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
                    []>, RegConstraint<"$addr.offreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result),
                    (ins memrr:$addr),
                    "lhzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.offreg = $ea_result">,
@@ -1207,7 +1228,7 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
 //
 // FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
 def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
-                       "", SprMFCR>,
+                       "#MFCRpseud", SprMFCR>,
             PPC970_MicroCode, PPC970_Unit_CRU;
             
 def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
@@ -1394,13 +1415,13 @@ let Uses = [RM] in {
                       "fdivs $FRT, $FRA, $FRB", FPDivS,
                       [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
   def FMUL  : AForm_3<63, 25,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fmul $FRT, $FRA, $FRB", FPFused,
-                      [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>;
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
+                      "fmul $FRT, $FRA, $FRC", FPFused,
+                      [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>;
   def FMULS : AForm_3<59, 25,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
-                      "fmuls $FRT, $FRA, $FRB", FPGeneral,
-                      [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>;
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
+                      "fmuls $FRT, $FRA, $FRC", FPGeneral,
+                      [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>;
   def FSUB  : AForm_2<63, 20,
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
                       "fsub $FRT, $FRA, $FRB", FPAddSub,
@@ -1413,7 +1434,7 @@ let Uses = [RM] in {
 }
 
 let PPC970_Unit = 1 in {  // FXU Operations.
-  def ISEL  : AForm_1<31, 15,
+  def ISEL  : AForm_4<31, 15,
                      (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
                      "isel $rT, $rA, $rB, $cond", IntGeneral,
                      []>;
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index aba2739..851de17 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -15,10 +15,10 @@
 #include "PPCJITInfo.h"
 #include "PPCRelocations.h"
 #include "PPCTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/Support/Memory.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 2f8243a..46d4a08 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -14,8 +14,8 @@
 #ifndef POWERPC_JITINFO_H
 #define POWERPC_JITINFO_H
 
-#include "llvm/Target/TargetJITInfo.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
 
 namespace llvm {
   class PPCTargetMachine;
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 19ec993..73f7a2c 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
@@ -20,7 +21,6 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
 static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index ab8bf1f..378c147 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -15,32 +15,32 @@
 #define DEBUG_TYPE "reginfo"
 #include "PPCRegisterInfo.h"
 #include "PPC.h"
+#include "PPCFrameLowering.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
-#include "PPCFrameLowering.h"
 #include "PPCSubtarget.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include <cstdlib>
 
 #define GET_REGINFO_TARGET_DESC
@@ -71,7 +71,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
   : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
                        ST.isPPC64() ? 0 : 1,
                        ST.isPPC64() ? 0 : 1),
-    Subtarget(ST), TII(tii) {
+    Subtarget(ST), TII(tii), CRSpillFrameIdx(0) {
   ImmToIdxMap[PPC::LD]   = PPC::LDX;    ImmToIdxMap[PPC::STD]  = PPC::STDX;
   ImmToIdxMap[PPC::LBZ]  = PPC::LBZX;   ImmToIdxMap[PPC::STB]  = PPC::STBX;
   ImmToIdxMap[PPC::LHZ]  = PPC::LHZX;   ImmToIdxMap[PPC::LHA]  = PPC::LHAX;
@@ -111,10 +111,15 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
                                  CSR_Darwin32_SaveList;
 
+  // For 32-bit SVR4, also initialize the frame index associated with
+  // the CR spill slot.
+  if (!Subtarget.isPPC64())
+    CRSpillFrameIdx = 0;
+
   return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
 }
 
-const unsigned*
+const uint32_t*
 PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
   if (Subtarget.isDarwinABI())
     return Subtarget.isPPC64() ? CSR_Darwin64_RegMask :
@@ -477,6 +482,32 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
   MBB.erase(II);
 }
 
+bool
+PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+				      unsigned Reg, int &FrameIdx) const {
+
+  // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
+  // ABI, return true to prevent allocating an additional frame slot.
+  // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
+  // is arbitrary and will be subsequently ignored.  For 32-bit, we must
+  // create exactly one stack slot and return its FrameIdx for all
+  // nonvolatiles.
+  if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
+    if (Subtarget.isPPC64()) {
+      FrameIdx = 0;
+    } else if (CRSpillFrameIdx) {
+      FrameIdx = CRSpillFrameIdx;
+    } else {
+      MachineFrameInfo *MFI = 
+        (const_cast<MachineFunction &>(MF)).getFrameInfo();
+      FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+      CRSpillFrameIdx = FrameIdx;
+    }
+    return true;
+  }
+  return false;
+}
+
 void
 PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                      int SPAdj, RegScavenger *RS) const {
@@ -566,7 +597,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // to Offset to get the correct offset.
   // Naked functions have stack size 0, although getStackSize may not reflect that
   // because we didn't call all the pieces that compute it for naked functions.
-  if (!MF.getFunction()->hasFnAttr(Attribute::Naked))
+  if (!MF.getFunction()->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked))
     Offset += MFI->getStackSize();
 
   // If we can, encode the offset directly into the instruction.  If this is a
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 152c36d..a8fd796 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -30,6 +30,7 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
   std::map<unsigned, unsigned> ImmToIdxMap;
   const PPCSubtarget &Subtarget;
   const TargetInstrInfo &TII;
+  mutable int CRSpillFrameIdx;
 public:
   PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
   
@@ -43,7 +44,7 @@ public:
 
   /// Code Generation virtual methods...
   const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
-  const unsigned *getCallPreservedMask(CallingConv::ID CC) const;
+  const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
@@ -65,6 +66,8 @@ public:
                        int SPAdj, RegScavenger *RS) const;
   void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
                        int SPAdj, RegScavenger *RS) const;
+  bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+			    int &FrameIdx) const;
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, RegScavenger *RS = NULL) const;
 
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index bb193ac..d9b4e30 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCSubtarget.h"
-#include "PPCRegisterInfo.h"
 #include "PPC.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetMachine.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
 #include <cstdlib>
 
 #define GET_SUBTARGETINFO_TARGET_DESC
@@ -54,19 +54,26 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
     CPUName = sys::getHostCPUName();
 #endif
 
-  // Parse features string.
-  ParseSubtargetFeatures(CPUName, FS);
-
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUName);
 
+  // Make sure 64-bit features are available when CPUname is generic
+  std::string FullFS = FS;
+
   // If we are generating code for ppc64, verify that options make sense.
   if (is64Bit) {
     Has64BitSupport = true;
     // Silently force 64-bit register use on ppc64.
     Use64BitRegs = true;
+    if (!FullFS.empty())
+      FullFS = "+64bit," + FullFS;
+    else
+      FullFS = "+64bit";
   }
-  
+
+  // Parse features string.
+  ParseSubtargetFeatures(CPUName, FullFS);
+
   // If the user requested use of 64-bit regs, but the cpu selected doesn't
   // support it, ignore.
   if (use64BitRegs() && !has64BitSupport())
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index b8b1614..416c0f3 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -14,9 +14,9 @@
 #ifndef POWERPCSUBTARGET_H
 #define POWERPCSUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
@@ -33,34 +33,34 @@ namespace PPC {
   enum {
     DIR_NONE,
     DIR_32,
-    DIR_440, 
-    DIR_601, 
-    DIR_602, 
-    DIR_603, 
+    DIR_440,
+    DIR_601,
+    DIR_602,
+    DIR_603,
     DIR_7400,
-    DIR_750, 
-    DIR_970, 
+    DIR_750,
+    DIR_970,
     DIR_A2,
     DIR_E500mc,
     DIR_E5500,
     DIR_PWR6,
     DIR_PWR7,
-    DIR_64  
+    DIR_64
   };
 }
 
 class GlobalValue;
 class TargetMachine;
-  
+
 class PPCSubtarget : public PPCGenSubtargetInfo {
 protected:
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned StackAlignment;
-  
+
   /// Selected instruction itineraries (one entry per itinerary class.)
   InstrItineraryData InstrItins;
-  
+
   /// Which cpu directive was used.
   unsigned DarwinDirective;
 
@@ -76,7 +76,7 @@ protected:
   bool IsBookE;
   bool HasLazyResolverStubs;
   bool IsJITCodeModel;
-  
+
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
 
@@ -86,11 +86,11 @@ public:
   ///
   PPCSubtarget(const std::string &TT, const std::string &CPU,
                const std::string &FS, bool is64Bit);
-  
-  /// ParseSubtargetFeatures - Parses features string setting specified 
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-  
+
   /// SetJITMode - This is called to inform the subtarget info that we are
   /// producing code for the JIT.
   void SetJITMode();
@@ -99,20 +99,27 @@ public:
   /// stack frame on entry to the function and which must be maintained by every
   /// function for this subtarget.
   unsigned getStackAlignment() const { return StackAlignment; }
-  
+
   /// getDarwinDirective - Returns the -m directive specified for the cpu.
   ///
   unsigned getDarwinDirective() const { return DarwinDirective; }
-  
-  /// getInstrItins - Return the instruction itineraies based on subtarget 
+
+  /// getInstrItins - Return the instruction itineraies based on subtarget
   /// selection.
   const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
 
-  /// getTargetDataString - Return the pointer size and type alignment
+  /// getDataLayoutString - Return the pointer size and type alignment
   /// properties of this subtarget.
-  const char *getTargetDataString() const {
+  const char *getDataLayoutString() const {
     // Note, the alignment values for f64 and i64 on ppc64 in Darwin
     // documentation are wrong; these are correct (i.e. "what gcc does").
+    if (isPPC64() && isSVR4ABI()) {
+      if (TargetTriple.getOS() == llvm::Triple::FreeBSD)
+        return "E-p:64:64-f64:64:64-i64:64:64-f128:64:64-v128:128:128-n32:64";
+      else
+        return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64";
+    }
+
     return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
                      : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
   }
@@ -120,22 +127,22 @@ public:
   /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
   ///
   bool isPPC64() const { return IsPPC64; }
-  
+
   /// has64BitSupport - Return true if the selected CPU supports 64-bit
   /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
   bool has64BitSupport() const { return Has64BitSupport; }
-  
+
   /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
   /// registers in 32-bit mode when possible.  This can only true if
   /// has64BitSupport() returns true.
   bool use64BitRegs() const { return Use64BitRegs; }
-  
+
   /// hasLazyResolverStub - Return true if accesses to the specified global have
   /// to go through a dyld lazy resolution stub.  This means that an extra load
   /// is required to get the address of the global.
-  bool hasLazyResolverStub(const GlobalValue *GV, 
+  bool hasLazyResolverStub(const GlobalValue *GV,
                            const TargetMachine &TM) const;
-  
+
   // isJITCodeModel - True if we're generating code for the JIT
   bool isJITCodeModel() const { return IsJITCodeModel; }
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 9805112..b8b7882 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -13,13 +13,13 @@
 
 #include "PPCTargetMachine.h"
 #include "PPC.h"
-#include "llvm/PassManager.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 static cl::
@@ -40,7 +40,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
                                    bool is64Bit)
   : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS, is64Bit),
-    DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
+    DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
     FrameLowering(Subtarget), JITInfo(*this, is64Bit),
     TLInfo(*this), TSInfo(*this),
     InstrItins(Subtarget.getInstrItineraryData()) {
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 7da2b0c..d917d99 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -15,13 +15,13 @@
 #define PPC_TARGETMACHINE_H
 
 #include "PPCFrameLowering.h"
-#include "PPCSubtarget.h"
-#include "PPCJITInfo.h"
-#include "PPCInstrInfo.h"
 #include "PPCISelLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCJITInfo.h"
 #include "PPCSelectionDAGInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
 
 namespace llvm {
 
@@ -29,7 +29,7 @@ namespace llvm {
 ///
 class PPCTargetMachine : public LLVMTargetMachine {
   PPCSubtarget        Subtarget;
-  const TargetData    DataLayout;       // Calculates type size & alignment
+  const DataLayout    DL;       // Calculates type size & alignment
   PPCInstrInfo        InstrInfo;
   PPCFrameLowering    FrameLowering;
   PPCJITInfo          JITInfo;
@@ -58,7 +58,7 @@ public:
     return &InstrInfo.getRegisterInfo();
   }
 
-  virtual const TargetData    *getTargetData() const    { return &DataLayout; }
+  virtual const DataLayout    *getDataLayout() const    { return &DL; }
   virtual const PPCSubtarget  *getSubtargetImpl() const { return &Subtarget; }
   virtual const InstrItineraryData *getInstrItineraryData() const {
     return &InstrItins;
diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 5dc8568..fa44331 100644
--- a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;