60 files changed, 3738 insertions, 2405 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 2f562ca..06bb968 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -213,16 +214,12 @@ struct PPCOperand;
 
 class PPCAsmParser : public MCTargetAsmParser {
   MCSubtargetInfo &STI;
-  MCAsmParser &Parser;
   const MCInstrInfo &MII;
   bool IsPPC64;
   bool IsDarwin;
 
-  MCAsmParser &getParser() const { return Parser; }
-  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
-  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
-  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+  void Warning(SMLoc L, const Twine &Msg) { getParser().Warning(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg) { return getParser().Error(L, Msg); }
 
   bool isPPC64() const { return IsPPC64; }
   bool isDarwin() const { return IsDarwin; }
@@ -244,10 +241,12 @@ class PPCAsmParser : public MCTargetAsmParser {
   bool ParseDirectiveTC(unsigned Size, SMLoc L);
   bool ParseDirectiveMachine(SMLoc L);
   bool ParseDarwinDirectiveMachine(SMLoc L);
+  bool ParseDirectiveAbiVersion(SMLoc L);
+  bool ParseDirectiveLocalEntry(SMLoc L);
 
   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                OperandVector &Operands, MCStreamer &Out,
-                               unsigned &ErrorInfo,
+                               uint64_t &ErrorInfo,
                                bool MatchingInlineAsm) override;
 
   void ProcessInstruction(MCInst &Inst, const OperandVector &Ops);
@@ -263,9 +262,8 @@ class PPCAsmParser : public MCTargetAsmParser {
 
 public:
   PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
-               const MCInstrInfo &_MII,
-               const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) {
+               const MCInstrInfo &_MII, const MCTargetOptions &Options)
+      : MCTargetAsmParser(), STI(_STI), MII(_MII) {
     // Check for 64-bit vs. 32-bit pointer mode.
     Triple TheTriple(STI.getTargetTriple());
     IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -294,6 +292,7 @@ struct PPCOperand : public MCParsedAsmOperand {
   enum KindTy {
     Token,
     Immediate,
+    ContextImmediate,
     Expression,
     TLSRegister
   } Kind;
@@ -338,6 +337,7 @@ public:
       Tok = o.Tok;
       break;
     case Immediate:
+    case ContextImmediate:
       Imm = o.Imm;
       break;
     case Expression:
@@ -362,6 +362,16 @@ public:
     assert(Kind == Immediate && "Invalid access!");
     return Imm.Val;
   }
+  int64_t getImmS16Context() const {
+    assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!");
+    if (Kind == Immediate)
+      return Imm.Val;
+    return static_cast<int16_t>(Imm.Val);
+  }
+  int64_t getImmU16Context() const {
+    assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!");
+    return Imm.Val;
+  }
 
   const MCExpr *getExpr() const {
     assert(Kind == Expression && "Invalid access!");
@@ -406,22 +416,73 @@ public:
   bool isToken() const override { return Kind == Token; }
   bool isImm() const override { return Kind == Immediate || Kind == Expression; }
   bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
+  bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
   bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
   bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
   bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
-  bool isU16Imm() const { return Kind == Expression ||
-                                 (Kind == Immediate && isUInt<16>(getImm())); }
-  bool isS16Imm() const { return Kind == Expression ||
-                                 (Kind == Immediate && isInt<16>(getImm())); }
+  bool isU6ImmX2() const { return Kind == Immediate &&
+                                  isUInt<6>(getImm()) &&
+                                  (getImm() & 1) == 0; }
+  bool isU7ImmX4() const { return Kind == Immediate &&
+                                  isUInt<7>(getImm()) &&
+                                  (getImm() & 3) == 0; }
+  bool isU8ImmX8() const { return Kind == Immediate &&
+                                  isUInt<8>(getImm()) &&
+                                  (getImm() & 7) == 0; }
+  bool isU16Imm() const {
+    switch (Kind) {
+      case Expression:
+        return true;
+      case Immediate:
+      case ContextImmediate:
+        return isUInt<16>(getImmU16Context());
+      default:
+        return false;
+    }
+  }
+  bool isS16Imm() const {
+    switch (Kind) {
+      case Expression:
+        return true;
+      case Immediate:
+      case ContextImmediate:
+        return isInt<16>(getImmS16Context());
+      default:
+        return false;
+    }
+  }
   bool isS16ImmX4() const { return Kind == Expression ||
                                    (Kind == Immediate && isInt<16>(getImm()) &&
                                     (getImm() & 3) == 0); }
-  bool isS17Imm() const { return Kind == Expression ||
-                                 (Kind == Immediate && isInt<17>(getImm())); }
+  bool isS17Imm() const {
+    switch (Kind) {
+      case Expression:
+        return true;
+      case Immediate:
+      case ContextImmediate:
+        return isInt<17>(getImmS16Context());
+      default:
+        return false;
+    }
+  }
   bool isTLSReg() const { return Kind == TLSRegister; }
-  bool isDirectBr() const { return Kind == Expression ||
-                                   (Kind == Immediate && isInt<26>(getImm()) &&
-                                    (getImm() & 3) == 0); }
+  bool isDirectBr() const {
+    if (Kind == Expression)
+      return true;
+    if (Kind != Immediate)
+      return false;
+    // Operand must be 64-bit aligned, signed 27-bit immediate.
+    if ((getImm() & 3) != 0)
+      return false;
+    if (isInt<26>(getImm()))
+      return true;
+    if (!IsPPC64) {
+      // In 32-bit mode, large 32-bit quantities wrap around.
+      if (isUInt<32>(getImm()) && isInt<26>(static_cast<int32_t>(getImm())))
+        return true;
+    }
+    return false;
+  }
   bool isCondBr() const { return Kind == Expression ||
                                  (Kind == Immediate && isInt<16>(getImm()) &&
                                   (getImm() & 3) == 0); }
@@ -526,6 +587,36 @@ public:
       Inst.addOperand(MCOperand::CreateExpr(getExpr()));
   }
 
+  void addS16ImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    switch (Kind) {
+      case Immediate:
+        Inst.addOperand(MCOperand::CreateImm(getImm()));
+        break;
+      case ContextImmediate:
+        Inst.addOperand(MCOperand::CreateImm(getImmS16Context()));
+        break;
+      default:
+        Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+        break;
+    }
+  }
+
+  void addU16ImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    switch (Kind) {
+      case Immediate:
+        Inst.addOperand(MCOperand::CreateImm(getImm()));
+        break;
+      case ContextImmediate:
+        Inst.addOperand(MCOperand::CreateImm(getImmU16Context()));
+        break;
+      default:
+        Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+        break;
+    }
+  }
+
   void addBranchTargetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     if (Kind == Immediate)
@@ -566,9 +657,9 @@ public:
     // explicitly.
     void *Mem = ::operator new(sizeof(PPCOperand) + Str.size());
     std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token));
-    Op->Tok.Data = (const char *)(Op.get() + 1);
+    Op->Tok.Data = reinterpret_cast<const char *>(Op.get() + 1);
     Op->Tok.Length = Str.size();
-    std::memcpy((void *)Op->Tok.Data, Str.data(), Str.size());
+    std::memcpy(const_cast<char *>(Op->Tok.Data), Str.data(), Str.size());
     Op->StartLoc = S;
     Op->EndLoc = S;
     Op->IsPPC64 = IsPPC64;
@@ -607,6 +698,16 @@ public:
   }
 
   static std::unique_ptr<PPCOperand>
+  CreateContextImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
+    auto Op = make_unique<PPCOperand>(ContextImmediate);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    Op->IsPPC64 = IsPPC64;
+    return Op;
+  }
+
+  static std::unique_ptr<PPCOperand>
   CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) {
     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val))
       return CreateImm(CE->getValue(), S, E, IsPPC64);
@@ -615,6 +716,12 @@ public:
       if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS)
         return CreateTLSReg(SRE, S, E, IsPPC64);
 
+    if (const PPCMCExpr *TE = dyn_cast<PPCMCExpr>(Val)) {
+      int64_t Res;
+      if (TE->EvaluateAsConstant(Res))
+        return CreateContextImm(Res, S, E, IsPPC64);
+    }
+
     return CreateExpr(Val, S, E, IsPPC64);
   }
 };
@@ -627,6 +734,7 @@ void PPCOperand::print(raw_ostream &OS) const {
     OS << "'" << getToken() << "'";
     break;
   case Immediate:
+  case ContextImmediate:
     OS << getImm();
     break;
   case Expression:
@@ -638,6 +746,29 @@ void PPCOperand::print(raw_ostream &OS) const {
   }
 }
 
+static void
+addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) {
+  if (Op.isImm()) {
+    Inst.addOperand(MCOperand::CreateImm(-Op.getImm()));
+    return;
+  }
+  const MCExpr *Expr = Op.getExpr();
+  if (const MCUnaryExpr *UnExpr = dyn_cast<MCUnaryExpr>(Expr)) {
+    if (UnExpr->getOpcode() == MCUnaryExpr::Minus) {
+      Inst.addOperand(MCOperand::CreateExpr(UnExpr->getSubExpr()));
+      return;
+    }
+  } else if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Expr)) {
+    if (BinExpr->getOpcode() == MCBinaryExpr::Sub) {
+      const MCExpr *NE = MCBinaryExpr::CreateSub(BinExpr->getRHS(),
+                                                 BinExpr->getLHS(), Ctx);
+      Inst.addOperand(MCOperand::CreateExpr(NE));
+      return;
+    }
+  }
+  Inst.addOperand(MCOperand::CreateExpr(MCUnaryExpr::CreateMinus(Expr, Ctx)));
+}
+
 void PPCAsmParser::ProcessInstruction(MCInst &Inst,
                                       const OperandVector &Operands) {
   int Opcode = Inst.getOpcode();
@@ -653,41 +784,37 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
   }
   case PPC::SUBI: {
     MCInst TmpInst;
-    int64_t N = Inst.getOperand(2).getImm();
     TmpInst.setOpcode(PPC::ADDI);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(-N));
+    addNegOperand(TmpInst, Inst.getOperand(2), getContext());
     Inst = TmpInst;
     break;
   }
   case PPC::SUBIS: {
     MCInst TmpInst;
-    int64_t N = Inst.getOperand(2).getImm();
     TmpInst.setOpcode(PPC::ADDIS);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(-N));
+    addNegOperand(TmpInst, Inst.getOperand(2), getContext());
     Inst = TmpInst;
     break;
   }
   case PPC::SUBIC: {
     MCInst TmpInst;
-    int64_t N = Inst.getOperand(2).getImm();
     TmpInst.setOpcode(PPC::ADDIC);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(-N));
+    addNegOperand(TmpInst, Inst.getOperand(2), getContext());
     Inst = TmpInst;
     break;
   }
   case PPC::SUBICo: {
     MCInst TmpInst;
-    int64_t N = Inst.getOperand(2).getImm();
     TmpInst.setOpcode(PPC::ADDICo);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
-    TmpInst.addOperand(MCOperand::CreateImm(-N));
+    addNegOperand(TmpInst, Inst.getOperand(2), getContext());
     Inst = TmpInst;
     break;
   }
@@ -921,7 +1048,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
 
 bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                            OperandVector &Operands,
-                                           MCStreamer &Out, unsigned &ErrorInfo,
+                                           MCStreamer &Out, uint64_t &ErrorInfo,
                                            bool MatchingInlineAsm) {
   MCInst Inst;
 
@@ -939,7 +1066,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       return Error(IDLoc, "unrecognized instruction mnemonic");
   case Match_InvalidOperand: {
     SMLoc ErrorLoc = IDLoc;
-    if (ErrorInfo != ~0U) {
+    if (ErrorInfo != ~0ULL) {
       if (ErrorInfo >= Operands.size())
         return Error(IDLoc, "too few operands for instruction");
 
@@ -995,6 +1122,7 @@ MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
 
 bool PPCAsmParser::
 ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+  MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
   StartLoc = Tok.getLoc();
   EndLoc = Tok.getEndLoc();
@@ -1176,6 +1304,7 @@ ParseExpression(const MCExpr *&EVal) {
 /// for this to be done at a higher level.
 bool PPCAsmParser::
 ParseDarwinExpression(const MCExpr *&EVal) {
+  MCAsmParser &Parser = getParser();
   PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None;
   switch (getLexer().getKind()) {
   default:
@@ -1218,6 +1347,7 @@ ParseDarwinExpression(const MCExpr *&EVal) {
 /// This handles registers in the form 'NN', '%rNN' for ELF platforms and
 /// rNN for MachO.
 bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
   SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
   const MCExpr *EVal;
@@ -1412,6 +1542,10 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
       return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
     if (IDVal == ".machine")
       return ParseDirectiveMachine(DirectiveID.getLoc());
+    if (IDVal == ".abiversion")
+      return ParseDirectiveAbiVersion(DirectiveID.getLoc());
+    if (IDVal == ".localentry")
+      return ParseDirectiveLocalEntry(DirectiveID.getLoc());
   } else {
     if (IDVal == ".machine")
       return ParseDarwinDirectiveMachine(DirectiveID.getLoc());
@@ -1422,6 +1556,7 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
 /// ParseDirectiveWord
 ///  ::= .word [ expression (, expression)* ]
 bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  MCAsmParser &Parser = getParser();
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
       const MCExpr *Value;
@@ -1446,6 +1581,7 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 /// ParseDirectiveTC
 ///  ::= .tc [ symbol (, expression)* ]
 bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
+  MCAsmParser &Parser = getParser();
   // Skip TC symbol, which is only used with XCOFF.
   while (getLexer().isNot(AsmToken::EndOfStatement)
          && getLexer().isNot(AsmToken::Comma))
@@ -1466,6 +1602,7 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
 /// ParseDirectiveMachine (ELF platforms)
 ///  ::= .machine [ cpu | "push" | "pop" ]
 bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   if (getLexer().isNot(AsmToken::Identifier) &&
       getLexer().isNot(AsmToken::String)) {
     Error(L, "unexpected token in directive");
@@ -1500,6 +1637,7 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
 /// ParseDarwinDirectiveMachine (Mach-o platforms)
 ///  ::= .machine cpu-identifier
 bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
+  MCAsmParser &Parser = getParser();
   if (getLexer().isNot(AsmToken::Identifier) &&
       getLexer().isNot(AsmToken::String)) {
     Error(L, "unexpected token in directive");
@@ -1534,6 +1672,64 @@ bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
   return false;
 }
 
+/// ParseDirectiveAbiVersion
+///  ::= .abiversion constant-expression
+bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
+  int64_t AbiVersion;
+  if (getParser().parseAbsoluteExpression(AbiVersion)){
+    Error(L, "expected constant expression");
+    return false;
+  }
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    Error(L, "unexpected token in directive");
+    return false;
+  }
+
+  PPCTargetStreamer &TStreamer =
+      *static_cast<PPCTargetStreamer *>(
+           getParser().getStreamer().getTargetStreamer());
+  TStreamer.emitAbiVersion(AbiVersion);
+
+  return false;
+}
+
+/// ParseDirectiveLocalEntry
+///  ::= .localentry symbol, expression
+bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
+  StringRef Name;
+  if (getParser().parseIdentifier(Name)) {
+    Error(L, "expected identifier in directive");
+    return false;
+  }
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma)) {
+    Error(L, "unexpected token in directive");
+    return false;
+  }
+  Lex();
+
+  const MCExpr *Expr;
+  if (getParser().parseExpression(Expr)) {
+    Error(L, "expected expression");
+    return false;
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    Error(L, "unexpected token in directive");
+    return false;
+  }
+
+  PPCTargetStreamer &TStreamer =
+      *static_cast<PPCTargetStreamer *>(
+           getParser().getStreamer().getTargetStreamer());
+  TStreamer.emitLocalEntry(Sym, Expr);
+
+  return false;
+}
+
+
+
 /// Force static initialization.
 extern "C" void LLVMInitializePowerPCAsmParser() {
   RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target);
@@ -1558,6 +1754,10 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
     case MCK_1: ImmVal = 1; break;
     case MCK_2: ImmVal = 2; break;
     case MCK_3: ImmVal = 3; break;
+    case MCK_4: ImmVal = 4; break;
+    case MCK_5: ImmVal = 5; break;
+    case MCK_6: ImmVal = 6; break;
+    case MCK_7: ImmVal = 7; break;
     default: return Match_InvalidOperand;
   }
 
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index ea4de63..47a9474 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -2,9 +2,8 @@ set(LLVM_TARGET_DEFINITIONS PPC.td)
 
 tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher)
-tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter)
 tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel)
@@ -16,7 +15,6 @@ add_public_tablegen_target(PowerPCCommonTableGen)
 add_llvm_target(PowerPCCodeGen
   PPCAsmPrinter.cpp
   PPCBranchSelector.cpp
-  PPCCodeEmitter.cpp
   PPCCTRLoops.cpp
   PPCHazardRecognizers.cpp
   PPCInstrInfo.cpp
@@ -24,7 +22,6 @@ add_llvm_target(PowerPCCodeGen
   PPCISelLowering.cpp
   PPCFastISel.cpp
   PPCFrameLowering.cpp
-  PPCJITInfo.cpp
   PPCMCInstLower.cpp
   PPCMachineFunctionInfo.cpp
   PPCRegisterInfo.cpp
diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
index c1011ff..ea3e7ea 100644
--- a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
+++ b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
 type = Library
 name = PowerPCDisassembler
 parent = PowerPC
-required_libraries = MC PowerPCDesc PowerPCInfo Support
+required_libraries = MCDisassembler PowerPCInfo Support
 add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index a2305a9..5251b60 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -12,7 +12,6 @@
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
@@ -28,13 +27,10 @@ public:
     : MCDisassembler(STI, Ctx) {}
   virtual ~PPCDisassembler() {}
 
-  // Override MCDisassembler.
-  virtual DecodeStatus getInstruction(MCInst &instr,
-                                      uint64_t &size,
-                                      const MemoryObject &region,
-                                      uint64_t address,
-                                      raw_ostream &vStream,
-                                      raw_ostream &cStream) const override;
+  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+                              ArrayRef<uint8_t> Bytes, uint64_t Address,
+                              raw_ostream &VStream,
+                              raw_ostream &CStream) const override;
 };
 } // end anonymous namespace
 
@@ -325,23 +321,19 @@ static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
 #include "PPCGenDisassemblerTables.inc"
 
 DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
-                                                 const MemoryObject &Region,
-                                                 uint64_t Address,
-                                                 raw_ostream &os,
-                                                 raw_ostream &cs) const {
+                                             ArrayRef<uint8_t> Bytes,
+                                             uint64_t Address, raw_ostream &OS,
+                                             raw_ostream &CS) const {
   // Get the four bytes of the instruction.
-  uint8_t Bytes[4];
   Size = 4;
-  if (Region.readBytes(Address, Size, Bytes) == -1) {
+  if (Bytes.size() < 4) {
     Size = 0;
     return MCDisassembler::Fail;
   }
 
   // The instruction is big-endian encoded.
-  uint32_t Inst = (Bytes[0] << 24) |
-                  (Bytes[1] << 16) |
-                  (Bytes[2] <<  8) |
-                  (Bytes[3] <<  0);
+  uint32_t Inst =
+      (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0);
 
   return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
 }
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 7279b09..670c40a 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -17,6 +17,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOpcodes.h"
@@ -207,6 +208,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
   O << (unsigned int)Value;
 }
 
+void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 15 && "Invalid u4imm argument!");
+  O << (unsigned int)Value;
+}
+
 void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
                                        raw_ostream &O) {
   int Value = MI->getOperand(OpNo).getImm();
@@ -260,7 +268,7 @@ void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
   if (!MI->getOperand(OpNo).isImm())
     return printOperand(MI, OpNo, O);
 
-  O << (int)MI->getOperand(OpNo).getImm()*4;
+  O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
 }
 
 
@@ -308,10 +316,16 @@ void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
 
 void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
                                   raw_ostream &O) {
-  printBranchOperand(MI, OpNo, O);
+  // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
+  // come at the _end_ of the expression.
+  const MCOperand &Op = MI->getOperand(OpNo);
+  const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr());
+  O << refExp.getSymbol().getName();
   O << '(';
   printOperand(MI, OpNo+1, O);
   O << ')';
+  if (refExp.getKind() != MCSymbolRefExpr::VK_None)
+    O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind());
 }
 
 
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 211a628..b21aa22 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef PPCINSTPRINTER_H
-#define PPCINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
+#define LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
 
 #include "llvm/MC/MCInstPrinter.h"
 
@@ -44,6 +44,7 @@ public:
                              raw_ostream &O, const char *Modifier = nullptr);
 
   void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 12584be..c54d5e7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -9,7 +9,9 @@
 
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELF.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCMachObjectWriter.h"
@@ -128,6 +130,30 @@ public:
     }
   }
 
+  void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                         const MCFixup &Fixup, const MCFragment *DF,
+                         const MCValue &Target, uint64_t &Value,
+                         bool &IsResolved) override {
+    switch ((PPC::Fixups)Fixup.getKind()) {
+    default: break;
+    case PPC::fixup_ppc_br24:
+    case PPC::fixup_ppc_br24abs:
+      // If the target symbol has a local entry point we must not attempt
+      // to resolve the fixup directly.  Emit a relocation and leave
+      // resolution of the final target address to the linker.
+      if (const MCSymbolRefExpr *A = Target.getSymA()) {
+        const MCSymbolData &Data = Asm.getSymbolData(A->getSymbol());
+        // The "other" values are stored in the last 6 bits of the second byte.
+        // The traditional defines for STO values assume the full byte and thus
+        // the shift to pack it.
+        unsigned Other = MCELF::getOther(Data) << 2;
+        if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
+          IsResolved = false;
+      }
+      break;
+    }
+  }
+
   bool mayNeedRelaxation(const MCInst &Inst) const override {
     // FIXME.
     return false;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index cd3b4f4..b817394 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -11,6 +11,7 @@
 #include "MCTargetDesc/PPCFixupKinds.h"
 #include "MCTargetDesc/PPCMCExpr.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCELF.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCValue.h"
@@ -23,13 +24,12 @@ namespace {
   public:
     PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI);
 
-    virtual ~PPCELFObjectWriter();
   protected:
-    virtual unsigned getRelocTypeInner(const MCValue &Target,
-                                       const MCFixup &Fixup,
-                                       bool IsPCRel) const;
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
+
+    bool needsRelocateWithSymbol(const MCSymbolData &SD,
+                                 unsigned Type) const override;
   };
 }
 
@@ -38,9 +38,6 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
                             Is64Bit ?  ELF::EM_PPC64 : ELF::EM_PPC,
                             /*HasRelocationAddend*/ true) {}
 
-PPCELFObjectWriter::~PPCELFObjectWriter() {
-}
-
 static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
                                                      const MCFixup &Fixup) {
   const MCExpr *Expr = Fixup.getValue();
@@ -69,10 +66,9 @@ static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
   llvm_unreachable("unknown PPCMCExpr kind");
 }
 
-unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
-                                               const MCFixup &Fixup,
-                                               bool IsPCRel) const
-{
+unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel) const {
   MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup);
 
   // determine the type of the relocation
@@ -83,7 +79,18 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       llvm_unreachable("Unimplemented");
     case PPC::fixup_ppc_br24:
     case PPC::fixup_ppc_br24abs:
-      Type = ELF::R_PPC_REL24;
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_PPC_REL24;
+        break;
+      case MCSymbolRefExpr::VK_PLT:
+        Type = ELF::R_PPC_PLTREL24;
+        break;
+      case MCSymbolRefExpr::VK_PPC_LOCAL:
+        Type = ELF::R_PPC_LOCAL24PC;
+        break;
+      }
       break;
     case PPC::fixup_ppc_brcond14:
     case PPC::fixup_ppc_brcond14abs:
@@ -224,7 +231,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
         Type = ELF::R_PPC64_DTPREL16_HIGHESTA;
         break;
       case MCSymbolRefExpr::VK_PPC_GOT_TLSGD:
-        Type = ELF::R_PPC64_GOT_TLSGD16;
+        if (is64Bit())
+          Type = ELF::R_PPC64_GOT_TLSGD16;
+        else
+          Type = ELF::R_PPC_GOT_TLSGD16;
         break;
       case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO:
         Type = ELF::R_PPC64_GOT_TLSGD16_LO;
@@ -236,7 +246,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
         Type = ELF::R_PPC64_GOT_TLSGD16_HA;
         break;
       case MCSymbolRefExpr::VK_PPC_GOT_TLSLD:
-        Type = ELF::R_PPC64_GOT_TLSLD16;
+        if (is64Bit())
+          Type = ELF::R_PPC64_GOT_TLSLD16;
+        else
+          Type = ELF::R_PPC_GOT_TLSLD16;
         break;
       case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO:
         Type = ELF::R_PPC64_GOT_TLSLD16_LO;
@@ -332,13 +345,22 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       switch (Modifier) {
       default: llvm_unreachable("Unsupported Modifier");
       case MCSymbolRefExpr::VK_PPC_TLSGD:
-        Type = ELF::R_PPC64_TLSGD;
+        if (is64Bit())
+          Type = ELF::R_PPC64_TLSGD;
+        else
+          Type = ELF::R_PPC_TLSGD;
         break;
       case MCSymbolRefExpr::VK_PPC_TLSLD:
-        Type = ELF::R_PPC64_TLSLD;
+        if (is64Bit())
+          Type = ELF::R_PPC64_TLSLD;
+        else
+          Type = ELF::R_PPC_TLSLD;
         break;
       case MCSymbolRefExpr::VK_PPC_TLS:
-        Type = ELF::R_PPC64_TLS;
+        if (is64Bit())
+          Type = ELF::R_PPC64_TLS;
+        else
+          Type = ELF::R_PPC_TLS;
         break;
       }
       break;
@@ -373,10 +395,21 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
   return Type;
 }
 
-unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
-                                          const MCFixup &Fixup,
-                                          bool IsPCRel) const {
-  return getRelocTypeInner(Target, Fixup, IsPCRel);
+bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+                                                 unsigned Type) const {
+  switch (Type) {
+    default:
+      return false;
+
+    case ELF::R_PPC_REL24:
+      // If the target symbol has a local entry point, we must keep the
+      // target symbol to preserve that information for the linker.
+      // The "other" values are stored in the last 6 bits of the second byte.
+      // The traditional defines for STO values assume the full byte and thus
+      // the shift to pack it.
+      unsigned Other = MCELF::getOther(SD) << 2;
+      return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0;
+  }
 }
 
 MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 68de8c1..ae43e59 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_PPC_PPCFIXUPKINDS_H
-#define LLVM_PPC_PPCFIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCFIXUPKINDS_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCFIXUPKINDS_H
 
 #include "llvm/MC/MCFixup.h"
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index b95a2ac..893aae3 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -42,9 +42,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
   UseIntegratedAssembler = true;
 }
 
-void PPCLinuxMCAsmInfo::anchor() { }
+void PPCELFMCAsmInfo::anchor() { }
 
-PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
+PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
   if (is64Bit) {
     PointerSize = CalleeSaveStackSlotSize = 8;
   }
@@ -64,7 +64,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
   DollarIsPC = true;
 
   // Set up DWARF directives
-  HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
   MinInstAlignment = 4;
 
   // Exceptions handling
@@ -73,6 +72,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
   ZeroDirective = "\t.space\t";
   Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
   AssemblerDialect = 1;           // New-Style mnemonics.
+  LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
 
   if (T.getOS() == llvm::Triple::FreeBSD ||
       (T.getOS() == llvm::Triple::NetBSD && !is64Bit) ||
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 754330b..9f0294d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef PPCTARGETASMINFO_H
-#define PPCTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
 
 #include "llvm/MC/MCAsmInfoDarwin.h"
 #include "llvm/MC/MCAsmInfoELF.h"
@@ -26,10 +26,10 @@ class Triple;
     explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
   };
 
-  class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
+  class PPCELFMCAsmInfo : public MCAsmInfoELF {
     void anchor() override;
   public:
-    explicit PPCLinuxMCAsmInfo(bool is64Bit, const Triple&);
+    explicit PPCELFMCAsmInfo(bool is64Bit, const Triple&);
   };
 
 } // namespace llvm
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 435a93f..786b7fe 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -66,6 +66,15 @@ public:
   unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
                              SmallVectorImpl<MCFixup> &Fixups,
                              const MCSubtargetInfo &STI) const;
+  unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
+  unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
+  unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
   unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
                              SmallVectorImpl<MCFixup> &Fixups,
                              const MCSubtargetInfo &STI) const;
@@ -260,6 +269,54 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
 }
 
 
+unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+                                              SmallVectorImpl<MCFixup> &Fixups,
+                                              const MCSubtargetInfo &STI)
+                                              const {
+  // Encode (imm, reg) as a spe8dis, which has the low 5-bits of (imm / 8)
+  // as the displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+  const MCOperand &MO = MI.getOperand(OpNo);
+  assert(MO.isImm());
+  uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 3;
+  return reverseBits(Imm | RegBits) >> 22;
+}
+
+
+unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+                                              SmallVectorImpl<MCFixup> &Fixups,
+                                              const MCSubtargetInfo &STI)
+                                              const {
+  // Encode (imm, reg) as a spe4dis, which has the low 5-bits of (imm / 4)
+  // as the displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+  const MCOperand &MO = MI.getOperand(OpNo);
+  assert(MO.isImm());
+  uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 2;
+  return reverseBits(Imm | RegBits) >> 22;
+}
+
+
+unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+                                              SmallVectorImpl<MCFixup> &Fixups,
+                                              const MCSubtargetInfo &STI)
+                                              const {
+  // Encode (imm, reg) as a spe2dis, which has the low 5-bits of (imm / 2)
+  // as the displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+  const MCOperand &MO = MI.getOperand(OpNo);
+  assert(MO.isImm());
+  uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 1;
+  return reverseBits(Imm | RegBits) >> 22;
+}
+
+
 unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
                                        SmallVectorImpl<MCFixup> &Fixups,
                                        const MCSubtargetInfo &STI) const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 3ac0aca..7204bef 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "PPCFixupKinds.h"
 #include "PPCMCExpr.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
@@ -52,40 +53,56 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const {
 }
 
 bool
+PPCMCExpr::EvaluateAsConstant(int64_t &Res) const {
+  MCValue Value;
+
+  if (!getSubExpr()->EvaluateAsRelocatable(Value, nullptr, nullptr))
+    return false;
+
+  if (!Value.isAbsolute())
+    return false;
+
+  Res = EvaluateAsInt64(Value.getConstant());
+  return true;
+}
+
+int64_t
+PPCMCExpr::EvaluateAsInt64(int64_t Value) const {
+  switch (Kind) {
+    case VK_PPC_LO:
+      return Value & 0xffff;
+    case VK_PPC_HI:
+      return (Value >> 16) & 0xffff;
+    case VK_PPC_HA:
+      return ((Value + 0x8000) >> 16) & 0xffff;
+    case VK_PPC_HIGHER:
+      return (Value >> 32) & 0xffff;
+    case VK_PPC_HIGHERA:
+      return ((Value + 0x8000) >> 32) & 0xffff;
+    case VK_PPC_HIGHEST:
+      return (Value >> 48) & 0xffff;
+    case VK_PPC_HIGHESTA:
+      return ((Value + 0x8000) >> 48) & 0xffff;
+    case VK_PPC_None:
+      break;
+  }
+  llvm_unreachable("Invalid kind!");
+}
+
+bool
 PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
-                                     const MCAsmLayout *Layout) const {
+                                     const MCAsmLayout *Layout,
+                                     const MCFixup *Fixup) const {
   MCValue Value;
 
-  if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout))
+  if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout, Fixup))
     return false;
 
   if (Value.isAbsolute()) {
-    int64_t Result = Value.getConstant();
-    switch (Kind) {
-      default:
-        llvm_unreachable("Invalid kind!");
-      case VK_PPC_LO:
-        Result = Result & 0xffff;
-        break;
-      case VK_PPC_HI:
-        Result = (Result >> 16) & 0xffff;
-        break;
-      case VK_PPC_HA:
-        Result = ((Result + 0x8000) >> 16) & 0xffff;
-        break;
-      case VK_PPC_HIGHER:
-        Result = (Result >> 32) & 0xffff;
-        break;
-      case VK_PPC_HIGHERA:
-        Result = ((Result + 0x8000) >> 32) & 0xffff;
-        break;
-      case VK_PPC_HIGHEST:
-        Result = (Result >> 48) & 0xffff;
-        break;
-      case VK_PPC_HIGHESTA:
-        Result = ((Result + 0x8000) >> 48) & 0xffff;
-        break;
-    }
+    int64_t Result = EvaluateAsInt64(Value.getConstant());
+    if ((Fixup == nullptr || (unsigned)Fixup->getKind() != PPC::fixup_ppc_half16) &&
+        (Result >= 0x8000))
+      return false;
     Res = MCValue::get(Result);
   } else {
     if (!Layout)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index bca4085..f0a6bb9 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef PPCMCEXPR_H
-#define PPCMCEXPR_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCEXPR_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCEXPR_H
 
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCExpr.h"
@@ -34,6 +34,8 @@ private:
   const MCExpr *Expr;
   bool IsDarwin;
 
+  int64_t EvaluateAsInt64(int64_t Value) const;
+
   explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr,
                      bool _IsDarwin)
     : Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {}
@@ -78,7 +80,8 @@ public:
 
   void PrintImpl(raw_ostream &OS) const override;
   bool EvaluateAsRelocatableImpl(MCValue &Res,
-                                 const MCAsmLayout *Layout) const override;
+                                 const MCAsmLayout *Layout,
+                                 const MCFixup *Fixup) const override;
   void visitUsedExpr(MCStreamer &Streamer) const override;
   const MCSection *FindAssociatedSection() const override {
     return getSubExpr()->FindAssociatedSection();
@@ -87,6 +90,8 @@ public:
   // There are no TLS PPCMCExprs at the moment.
   void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
 
+  bool EvaluateAsConstant(int64_t &Res) const;
+
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Target;
   }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 7057797..00be8f4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -16,12 +16,16 @@
 #include "PPCMCAsmInfo.h"
 #include "PPCTargetStreamer.h"
 #include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -75,7 +79,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
   if (TheTriple.isOSDarwin())
     MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
   else
-    MAI = new PPCLinuxMCAsmInfo(isPPC64, TheTriple);
+    MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple);
 
   // Initial state of the frame pointer is R1.
   unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
@@ -125,11 +129,20 @@ public:
   void emitMachine(StringRef CPU) override {
     OS << "\t.machine " << CPU << '\n';
   }
+  void emitAbiVersion(int AbiVersion) override {
+    OS << "\t.abiversion " << AbiVersion << '\n';
+  }
+  void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
+    OS << "\t.localentry\t" << *S << ", " << *LocalOffset << '\n';
+  }
 };
 
 class PPCTargetELFStreamer : public PPCTargetStreamer {
 public:
   PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+  MCELFStreamer &getStreamer() {
+    return static_cast<MCELFStreamer &>(Streamer);
+  }
   void emitTCEntry(const MCSymbol &S) override {
     // Creates a R_PPC64_TOC relocation
     Streamer.EmitSymbolValue(&S, 8);
@@ -138,6 +151,39 @@ public:
     // FIXME: Is there anything to do in here or does this directive only
     // limit the parser?
   }
+  void emitAbiVersion(int AbiVersion) override {
+    MCAssembler &MCA = getStreamer().getAssembler();
+    unsigned Flags = MCA.getELFHeaderEFlags();
+    Flags &= ~ELF::EF_PPC64_ABI;
+    Flags |= (AbiVersion & ELF::EF_PPC64_ABI);
+    MCA.setELFHeaderEFlags(Flags);
+  }
+  void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
+    MCAssembler &MCA = getStreamer().getAssembler();
+    MCSymbolData &Data = getStreamer().getOrCreateSymbolData(S);
+
+    int64_t Res;
+    if (!LocalOffset->EvaluateAsAbsolute(Res, MCA))
+      report_fatal_error(".localentry expression must be absolute.");
+
+    unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res);
+    if (Res != ELF::decodePPC64LocalEntryOffset(Encoded))
+      report_fatal_error(".localentry expression cannot be encoded.");
+
+    // The "other" values are stored in the last 6 bits of the second byte.
+    // The traditional defines for STO values assume the full byte and thus
+    // the shift to pack it.
+    unsigned Other = MCELF::getOther(Data) << 2;
+    Other &= ~ELF::STO_PPC64_LOCAL_MASK;
+    Other |= Encoded;
+    MCELF::setOther(Data, Other >> 2);
+
+    // For GAS compatibility, unless we already saw a .abiversion directive,
+    // set e_flags to indicate ELFv2 ABI.
+    unsigned Flags = MCA.getELFHeaderEFlags();
+    if ((Flags & ELF::EF_PPC64_ABI) == 0)
+      MCA.setELFHeaderEFlags(Flags | 2);
+  }
 };
 
 class PPCTargetMachOStreamer : public PPCTargetStreamer {
@@ -150,25 +196,27 @@ public:
     // FIXME: We should update the CPUType, CPUSubType in the Object file if
     // the new values are different from the defaults.
   }
+  void emitAbiVersion(int AbiVersion) override {
+    llvm_unreachable("Unknown pseudo-op: .abiversion");
+  }
+  void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
+    llvm_unreachable("Unknown pseudo-op: .localentry");
+  }
 };
 }
 
 // This is duplicated code. Refactor this.
 static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
                                     MCContext &Ctx, MCAsmBackend &MAB,
-                                    raw_ostream &OS,
-                                    MCCodeEmitter *Emitter,
-                                    const MCSubtargetInfo &STI,
-                                    bool RelaxAll,
-                                    bool NoExecStack) {
+                                    raw_ostream &OS, MCCodeEmitter *Emitter,
+                                    const MCSubtargetInfo &STI, bool RelaxAll) {
   if (Triple(TT).isOSDarwin()) {
     MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
     new PPCTargetMachOStreamer(*S);
     return S;
   }
 
-  MCStreamer *S =
-      createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+  MCStreamer *S = createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
   new PPCTargetELFStreamer(*S);
   return S;
 }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 474395b..68f7f7a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef PPCMCTARGETDESC_H
-#define PPCMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
 
 // GCC #defines PPC on Linux but we use it as our namespace name
 #undef PPC
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index cff27ba..df2f14a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -80,7 +80,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
 }
 
 /// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum.
-/// Outline based on PPCELFObjectWriter::getRelocTypeInner().
+/// Outline based on PPCELFObjectWriter::GetRelocType().
 static unsigned getRelocType(const MCValue &Target,
                              const MCFixupKind FixupKind, // from
                                                           // Fixup.getKind()
@@ -360,7 +360,7 @@ void PPCMachObjectWriter::RecordPPCRelocation(
       // For external relocations, make sure to offset the fixup value to
       // compensate for the addend of the symbol address, if it was
       // undefined. This occurs with weak definitions, for example.
-      if (!SD->Symbol->isUndefined())
+      if (!SD->getSymbol().isUndefined())
         FixedValue -= Layout.getSymbolOffset(SD);
     } else {
       // The index is the section ordinal (1-based).
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 10e328a..6075631 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
-#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCPREDICATES_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCPREDICATES_H
 
 // GCC #defines PPC on Linux but we use it as our namespace name
 #undef PPC
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index c966748..cf516f4 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -13,7 +13,7 @@ TARGET = PPC
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \
-                PPCGenAsmWriter.inc  PPCGenCodeEmitter.inc \
+                PPCGenAsmWriter.inc  \
                 PPCGenInstrInfo.inc PPCGenDAGISel.inc \
                 PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \
                 PPCGenMCCodeEmitter.inc PPCGenFastISel.inc \
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index c42c5be..8fb33df 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_POWERPC_H
-#define LLVM_TARGET_POWERPC_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPC_H
+#define LLVM_LIB_TARGET_POWERPC_PPC_H
 
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include <string>
@@ -26,7 +26,6 @@ namespace llvm {
   class PassRegistry;
   class FunctionPass;
   class ImmutablePass;
-  class JITCodeEmitter;
   class MachineInstr;
   class AsmPrinter;
   class MCInst;
@@ -41,8 +40,6 @@ namespace llvm {
   FunctionPass *createPPCVSXFMAMutatePass();
   FunctionPass *createPPCBranchSelectionPass();
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
-  FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
-                                            JITCodeEmitter &MCE);
   void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                     AsmPrinter &AP, bool isDarwin);
 
@@ -60,10 +57,11 @@ namespace llvm {
     // PPC Specific MachineOperand flags.
     MO_NO_FLAG,
     
-    /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
-    /// reference is actually to the "FOO$stub" symbol.  This is used for calls
-    /// and jumps to external functions on Tiger and earlier.
-    MO_DARWIN_STUB = 1,
+    /// MO_PLT_OR_STUB - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "FOO$stub" or "FOO@plt" symbol.  This is
+    /// used for calls and jumps to external functions on Tiger and earlier, and
+    /// for PIC calls on Linux and ELF systems.
+    MO_PLT_OR_STUB = 1,
     
     /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
     /// the function's picbase, e.g. lo16(symbol-picbase).
@@ -95,7 +93,12 @@ namespace llvm {
     MO_TOC_LO    = 7 << 4,
 
     // Symbol for VK_PPC_TLS fixup attached to an ADD instruction
-    MO_TLS       = 8 << 4
+    MO_TLS       = 8 << 4,
+
+    // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr
+    // call sequences.
+    MO_TLSLD     = 9 << 4,
+    MO_TLSGD     = 10 << 4
   };
   } // end namespace PPCII
   
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index a9842b2..46d56a4 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -56,6 +56,8 @@ def FeatureCRBits    : SubtargetFeature<"crbits", "UseCRBits", "true",
                               "Use condition-register bits individually">;
 def FeatureAltivec   : SubtargetFeature<"altivec","HasAltivec", "true",
                                         "Enable Altivec instructions">;
+def FeatureSPE       : SubtargetFeature<"spe","HasSPE", "true",
+                                        "Enable SPE instructions">;
 def FeatureMFOCRF    : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
                                         "Enable the MFOCRF instruction">;
 def FeatureFSqrt     : SubtargetFeature<"fsqrt","HasFSQRT", "true",
@@ -88,11 +90,23 @@ def FeatureLDBRX     : SubtargetFeature<"ldbrx","HasLDBRX", "true",
                                         "Enable the ldbrx instruction">;
 def FeatureBookE     : SubtargetFeature<"booke", "IsBookE", "true",
                                         "Enable Book E instructions">;
+def FeatureMSYNC     : SubtargetFeature<"msync", "HasOnlyMSYNC", "true",
+                              "Has only the msync instruction instead of sync",
+                              [FeatureBookE]>;
+def FeatureE500      : SubtargetFeature<"e500", "IsE500", "true",
+                                        "Enable E500/E500mc instructions">;
+def FeaturePPC4xx    : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true",
+                                        "Enable PPC 4xx instructions">;
+def FeaturePPC6xx    : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
+                                        "Enable PPC 6xx instructions">;
 def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
                                         "Enable QPX instructions">;
 def FeatureVSX       : SubtargetFeature<"vsx","HasVSX", "true",
                                         "Enable VSX instructions",
                                         [FeatureAltivec]>;
+def FeatureP8Vector  : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
+                                        "Enable POWER8 vector instructions",
+                                        [FeatureVSX, FeatureAltivec]>;
 
 def DeprecatedMFTB   : SubtargetFeature<"", "DeprecatedMFTB", "true",
                                         "Treat mftb as deprecated">;
@@ -105,7 +119,16 @@ def DeprecatedDST    : SubtargetFeature<"", "DeprecatedDST", "true",
 // CMPB         p6, p6x, p7        cmpb
 // DFP          p6, p6x, p7        decimal floating-point instructions
 // POPCNTB      p5 through p7      popcntb and related instructions
-// VSX          p7                 vector-scalar instruction set
+
+//===----------------------------------------------------------------------===//
+// ABI Selection                                                              //
+//===----------------------------------------------------------------------===//
+
+def FeatureELFv1 : SubtargetFeature<"elfv1", "TargetABI", "PPC_ABI_ELFv1",
+                                    "Use the ELFv1 ABI">;
+
+def FeatureELFv2 : SubtargetFeature<"elfv2", "TargetABI", "PPC_ABI_ELFv2",
+                                    "Use the ELFv2 ABI">;
 
 //===----------------------------------------------------------------------===//
 // Classes used for relation maps.
@@ -178,10 +201,12 @@ include "PPCInstrInfo.td"
 def : Processor<"generic", G3Itineraries, [Directive32]>;
 def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
                                           FeatureFRES, FeatureFRSQRTE,
-                                          FeatureBookE, DeprecatedMFTB]>;
+                                          FeatureBookE, FeatureMSYNC,
+                                          DeprecatedMFTB]>;
 def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
                                           FeatureFRES, FeatureFRSQRTE,
-                                          FeatureBookE, DeprecatedMFTB]>;
+                                          FeatureBookE, FeatureMSYNC,
+                                          DeprecatedMFTB]>;
 def : Processor<"601", G3Itineraries, [Directive601]>;
 def : Processor<"602", G3Itineraries, [Directive602]>;
 def : Processor<"603", G3Itineraries, [Directive603,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index fd044d9..5648873 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -18,6 +18,7 @@
 
 #include "PPC.h"
 #include "InstPrinter/PPCInstPrinter.h"
+#include "PPCMachineFunctionInfo.h"
 #include "MCTargetDesc/PPCMCExpr.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "PPCSubtarget.h"
@@ -27,10 +28,12 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
@@ -100,9 +103,11 @@ namespace {
     }
 
     bool doFinalization(Module &M) override;
+    void EmitStartOfAsmFile(Module &M) override;
 
     void EmitFunctionEntryLabel() override;
 
+    void EmitFunctionBodyStart() override;
     void EmitFunctionBodyEnd() override;
   };
 
@@ -142,7 +147,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
 
 void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
                                  raw_ostream &O) {
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
   const MachineOperand &MO = MI->getOperand(OpNo);
   
   switch (MO.getType()) {
@@ -270,6 +275,18 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
         printOperand(MI, OpNo, O);
         return false;
       }
+    case 'U': // Print 'u' for update form.
+    case 'X': // Print 'x' for indexed form.
+      {
+	// FIXME: Currently for PowerPC memory operands are always loaded
+	// into a register, so we never get an update or indexed form.
+	// This is bad even for offset forms, since even if we know we
+	// have a value in -16(r1), we will generate a load into r<n>
+	// and then load from 0(r<n>).  Until that issue is fixed,
+	// tolerate 'U' and 'X' but don't output anything.
+	assert(MI->getOperand(OpNo).isReg());
+	return false;
+      }
     }
   }
 
@@ -285,7 +302,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
 /// exists for it.  If not, create one.  Then return a symbol that references
 /// the TOC entry.
 MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
   MCSymbol *&TOCEntry = TOC[Sym];
 
   // To avoid name clash check if the name already exists.
@@ -306,12 +323,35 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
 void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   MCInst TmpInst;
   bool isPPC64 = Subtarget.isPPC64();
+  bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
+  const Module *M = MF->getFunction()->getParent();
+  PICLevel::Level PL = M->getPICLevel();
   
   // Lower multi-instruction pseudo operations.
   switch (MI->getOpcode()) {
   default: break;
   case TargetOpcode::DBG_VALUE:
     llvm_unreachable("Should be handled target independently");
+  case PPC::MoveGOTtoLR: {
+    // Transform %LR = MoveGOTtoLR
+    // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4
+    // _GLOBAL_OFFSET_TABLE_@local-4 (instruction preceding
+    // _GLOBAL_OFFSET_TABLE_) has exactly one instruction:
+    //      blrl
+    // This will return the pointer to _GLOBAL_OFFSET_TABLE_@local
+    MCSymbol *GOTSymbol =
+      OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+    const MCExpr *OffsExpr =
+      MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol,
+                                                      MCSymbolRefExpr::VK_PPC_LOCAL,
+                                                      OutContext),
+                              MCConstantExpr::Create(4, OutContext),
+                              OutContext);
+
+    // Emit the 'bl'.
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL).addExpr(OffsExpr));
+    return;
+  }
   case PPC::MovePCtoLR:
   case PPC::MovePCtoLR8: {
     // Transform %LR = MovePCtoLR
@@ -330,11 +370,85 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     OutStreamer.EmitLabel(PICBase);
     return;
   }
+  case PPC::UpdateGBR: {
+    // Transform %Rd = UpdateGBR(%Rt, %Ri)
+    // Into: lwz %Rt, .L0$poff - .L0$pb(%Ri)
+    //       add %Rd, %Rt, %Ri
+    // Get the offset from the GOT Base Register to the GOT
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+    MCSymbol *PICOffset =
+      MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol();
+    TmpInst.setOpcode(PPC::LWZ);
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(PICOffset, MCSymbolRefExpr::VK_None, OutContext);
+    const MCExpr *PB =
+      MCSymbolRefExpr::Create(MF->getPICBaseSymbol(),
+                              MCSymbolRefExpr::VK_None,
+                              OutContext);
+    const MCOperand TR = TmpInst.getOperand(1);
+    const MCOperand PICR = TmpInst.getOperand(0);
+
+    // Step 1: lwz %Rt, .L$poff - .L$pb(%Ri)
+    TmpInst.getOperand(1) =
+        MCOperand::CreateExpr(MCBinaryExpr::CreateSub(Exp, PB, OutContext));
+    TmpInst.getOperand(0) = TR;
+    TmpInst.getOperand(2) = PICR;
+    EmitToStreamer(OutStreamer, TmpInst);
+
+    TmpInst.setOpcode(PPC::ADD4);
+    TmpInst.getOperand(0) = PICR;
+    TmpInst.getOperand(1) = TR;
+    TmpInst.getOperand(2) = PICR;
+    EmitToStreamer(OutStreamer, TmpInst);
+    return;
+  }
+  case PPC::LWZtoc: {
+    // Transform %R3 = LWZtoc <ga:@min1>, %R2
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+
+    // Change the opcode to LWZ, and the global address operand to be a
+    // reference to the GOT entry we will synthesize later.
+    TmpInst.setOpcode(PPC::LWZ);
+    const MachineOperand &MO = MI->getOperand(1);
+
+    // Map symbol -> label of TOC entry
+    assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
+    MCSymbol *MOSymbol = nullptr;
+    if (MO.isGlobal())
+      MOSymbol = getSymbol(MO.getGlobal());
+    else if (MO.isCPI())
+      MOSymbol = GetCPISymbol(MO.getIndex());
+    else if (MO.isJTI())
+      MOSymbol = GetJTISymbol(MO.getIndex());
+    else if (MO.isBlockAddress())
+      MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+
+    if (PL == PICLevel::Small) {
+      const MCExpr *Exp =
+        MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_GOT,
+                                OutContext);
+      TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+    } else {
+      MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+
+      const MCExpr *Exp =
+        MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None,
+                                OutContext);
+      const MCExpr *PB =
+        MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")),
+                                                             OutContext);
+      Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext);
+      TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+    }
+    EmitToStreamer(OutStreamer, TmpInst);
+    return;
+  }
   case PPC::LDtocJTI:
   case PPC::LDtocCPT:
+  case PPC::LDtocBA:
   case PPC::LDtoc: {
     // Transform %X3 = LDtoc <ga:@min1>, %X2
-    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
 
     // Change the opcode to LD, and the global address operand to be a
     // reference to the TOC entry we will synthesize later.
@@ -342,7 +456,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MachineOperand &MO = MI->getOperand(1);
 
     // Map symbol -> label of TOC entry
-    assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+    assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
     MCSymbol *MOSymbol = nullptr;
     if (MO.isGlobal())
       MOSymbol = getSymbol(MO.getGlobal());
@@ -350,6 +464,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       MOSymbol = GetCPISymbol(MO.getIndex());
     else if (MO.isJTI())
       MOSymbol = GetJTISymbol(MO.getIndex());
+    else if (MO.isBlockAddress())
+      MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
 
     MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
 
@@ -363,7 +479,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       
   case PPC::ADDIStocHA: {
     // Transform %Xd = ADDIStocHA %X2, <ga:@sym>
-    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
 
     // Change the opcode to ADDIS8.  If the global address is external, has
     // common linkage, is a non-local function address, or is a jump table
@@ -371,7 +487,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // reference the symbol directly.
     TmpInst.setOpcode(PPC::ADDIS8);
     const MachineOperand &MO = MI->getOperand(2);
-    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+            MO.isBlockAddress()) &&
            "Invalid operand for ADDIStocHA!");
     MCSymbol *MOSymbol = nullptr;
     bool IsExternal = false;
@@ -391,9 +508,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       MOSymbol = GetCPISymbol(MO.getIndex());
     else if (MO.isJTI())
       MOSymbol = GetJTISymbol(MO.getIndex());
+    else if (MO.isBlockAddress())
+      MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
 
     if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
-        MO.isJTI() || TM.getCodeModel() == CodeModel::Large)
+        MO.isJTI() || MO.isBlockAddress() ||
+        TM.getCodeModel() == CodeModel::Large)
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
 
     const MCExpr *Exp =
@@ -405,19 +525,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case PPC::LDtocL: {
     // Transform %Xd = LDtocL <ga:@sym>, %Xs
-    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
 
     // Change the opcode to LD.  If the global address is external, has
     // common linkage, or is a jump table address, then reference the
     // associated TOC entry.  Otherwise reference the symbol directly.
     TmpInst.setOpcode(PPC::LD);
     const MachineOperand &MO = MI->getOperand(1);
-    assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+            MO.isBlockAddress()) &&
            "Invalid operand for LDtocL!");
     MCSymbol *MOSymbol = nullptr;
 
     if (MO.isJTI())
       MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+    else if (MO.isBlockAddress()) {
+      MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+    }
     else if (MO.isCPI()) {
       MOSymbol = GetCPISymbol(MO.getIndex());
       if (TM.getCodeModel() == CodeModel::Large)
@@ -442,7 +567,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case PPC::ADDItocL: {
     // Transform %Xd = ADDItocL %Xs, <ga:@sym>
-    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
 
     // Change the opcode to ADDI8.  If the global address is external, then
     // generate a TOC entry and reference that.  Otherwise reference the
@@ -493,7 +618,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case PPC::LDgotTprelL:
   case PPC::LDgotTprelL32: {
     // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
-    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
 
     // Change the opcode to LD.
     TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ);
@@ -508,6 +633,34 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
 
+  case PPC::PPC32PICGOT: {
+    MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+    MCSymbol *GOTRef = OutContext.CreateTempSymbol();
+    MCSymbol *NextInstr = OutContext.CreateTempSymbol();
+
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
+      // FIXME: We would like an efficient form for this, so we don't have to do
+      // a lot of extra uniquing.
+      .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext)));
+    const MCExpr *OffsExpr =
+      MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext),
+                                MCSymbolRefExpr::Create(GOTRef, OutContext),
+        OutContext);
+    OutStreamer.EmitLabel(GOTRef);
+    OutStreamer.EmitValue(OffsExpr, 4);
+    OutStreamer.EmitLabel(NextInstr);
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR)
+                                .addReg(MI->getOperand(0).getReg()));
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LWZ)
+                                .addReg(MI->getOperand(1).getReg())
+                                .addImm(0)
+                                .addReg(MI->getOperand(0).getReg()));
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADD4)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(MI->getOperand(1).getReg())
+                                .addReg(MI->getOperand(0).getReg()));
+    return;
+  }
   case PPC::PPC32GOT: {
     MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
     const MCExpr *SymGotTlsL =
@@ -541,40 +694,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                                 .addExpr(SymGotTlsGD));
     return;
   }
-  case PPC::ADDItlsgdL: {
+  case PPC::ADDItlsgdL:
     // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
     // Into:      %Xd = ADDI8 %Xs, sym@got@tlsgd@l
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+  case PPC::ADDItlsgdL32: {
+    // Transform: %Rd = ADDItlsgdL32 %Rs, <ga:@sym>
+    // Into:      %Rd = ADDI %Rs, sym@got@tlsgd
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = getSymbol(GValue);
     const MCExpr *SymGotTlsGD =
-      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO,
-                              OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(MI->getOperand(1).getReg())
-                                .addExpr(SymGotTlsGD));
-    return;
-  }
-  case PPC::GETtlsADDR: {
-    // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
-    // Into:      BL8_NOP_TLS __tls_get_addr(sym@tlsgd)
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
-
-    StringRef Name = "__tls_get_addr";
-    MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
-    const MCSymbolRefExpr *TlsRef = 
-      MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
-    const MachineOperand &MO = MI->getOperand(2);
-    const GlobalValue *GValue = MO.getGlobal();
-    MCSymbol *MOSymbol = getSymbol(GValue);
-    const MCExpr *SymVar =
-      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
+      MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
+                                         MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO :
+                                         MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
                               OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
-                                .addExpr(TlsRef)
-                                .addExpr(SymVar));
+    EmitToStreamer(OutStreamer,
+                   MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+                   .addReg(MI->getOperand(0).getReg())
+                   .addReg(MI->getOperand(1).getReg())
+                   .addExpr(SymGotTlsGD));
     return;
   }
   case PPC::ADDIStlsldHA: {
@@ -593,72 +731,63 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                                 .addExpr(SymGotTlsLD));
     return;
   }
-  case PPC::ADDItlsldL: {
+  case PPC::ADDItlsldL:
     // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
     // Into:      %Xd = ADDI8 %Xs, sym@got@tlsld@l
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+  case PPC::ADDItlsldL32: {
+    // Transform: %Rd = ADDItlsldL32 %Rs, <ga:@sym>
+    // Into:      %Rd = ADDI %Rs, sym@got@tlsld
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = getSymbol(GValue);
     const MCExpr *SymGotTlsLD =
-      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO,
-                              OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(MI->getOperand(1).getReg())
-                                .addExpr(SymGotTlsLD));
-    return;
-  }
-  case PPC::GETtlsldADDR: {
-    // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
-    // Into:      BL8_NOP_TLS __tls_get_addr(sym@tlsld)
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
-
-    StringRef Name = "__tls_get_addr";
-    MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
-    const MCSymbolRefExpr *TlsRef = 
-      MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
-    const MachineOperand &MO = MI->getOperand(2);
-    const GlobalValue *GValue = MO.getGlobal();
-    MCSymbol *MOSymbol = getSymbol(GValue);
-    const MCExpr *SymVar =
-      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
+      MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
+                                         MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO :
+                                         MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
                               OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
-                                .addExpr(TlsRef)
-                                .addExpr(SymVar));
+    EmitToStreamer(OutStreamer,
+                   MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+                   .addReg(MI->getOperand(0).getReg())
+                   .addReg(MI->getOperand(1).getReg())
+                   .addExpr(SymGotTlsLD));
     return;
   }
-  case PPC::ADDISdtprelHA: {
+  case PPC::ADDISdtprelHA:
     // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
     // Into:      %Xd = ADDIS8 %X3, sym@dtprel@ha
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+  case PPC::ADDISdtprelHA32: {
+    // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym>
+    // Into:      %Rd = ADDIS %R3, sym@dtprel@ha
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = getSymbol(GValue);
     const MCExpr *SymDtprel =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
                               OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(PPC::X3)
-                                .addExpr(SymDtprel));
+    EmitToStreamer(OutStreamer,
+                   MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
+                   .addReg(MI->getOperand(0).getReg())
+                   .addReg(Subtarget.isPPC64() ? PPC::X3 : PPC::R3)
+                   .addExpr(SymDtprel));
     return;
   }
-  case PPC::ADDIdtprelL: {
+  case PPC::ADDIdtprelL:
     // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
     // Into:      %Xd = ADDI8 %Xs, sym@dtprel@l
-    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+  case PPC::ADDIdtprelL32: {
+    // Transform: %Rd = ADDIdtprelL32 %Rs, <ga:@sym>
+    // Into:      %Rd = ADDI %Rs, sym@dtprel@l
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = getSymbol(GValue);
     const MCExpr *SymDtprel =
       MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
                               OutContext);
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
-                                .addReg(MI->getOperand(0).getReg())
-                                .addReg(MI->getOperand(1).getReg())
-                                .addExpr(SymDtprel));
+    EmitToStreamer(OutStreamer,
+                   MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+                   .addReg(MI->getOperand(0).getReg())
+                   .addReg(MI->getOperand(1).getReg())
+                   .addExpr(SymDtprel));
     return;
   }
   case PPC::MFOCRF:
@@ -713,14 +842,77 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   }
 
-  LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+  LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
   EmitToStreamer(OutStreamer, TmpInst);
 }
 
+void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  if (Subtarget.isELFv2ABI()) {
+    PPCTargetStreamer *TS =
+      static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+
+    if (TS)
+      TS->emitAbiVersion(2);
+  }
+
+  if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_)
+    return AsmPrinter::EmitStartOfAsmFile(M);
+
+  if (M.getPICLevel() == PICLevel::Small)
+    return AsmPrinter::EmitStartOfAsmFile(M);
+
+  OutStreamer.SwitchSection(OutContext.getELFSection(".got2",
+         ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+         SectionKind::getReadOnly()));
+
+  MCSymbol *TOCSym = OutContext.GetOrCreateSymbol(Twine(".LTOC"));
+  MCSymbol *CurrentPos = OutContext.CreateTempSymbol();
+
+  OutStreamer.EmitLabel(CurrentPos);
+
+  // The GOT pointer points to the middle of the GOT, in order to reference the
+  // entire 64kB range.  0x8000 is the midpoint.
+  const MCExpr *tocExpr =
+    MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(CurrentPos, OutContext),
+                            MCConstantExpr::Create(0x8000, OutContext),
+                            OutContext);
+
+  OutStreamer.EmitAssignment(TOCSym, tocExpr);
+
+  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
 void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
-  if (!Subtarget.isPPC64())  // linux/ppc32 - Normal entry label.
+  // linux/ppc32 - Normal entry label.
+  if (!Subtarget.isPPC64() && 
+      (TM.getRelocationModel() != Reloc::PIC_ || 
+       MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small))
     return AsmPrinter::EmitFunctionEntryLabel();
-    
+
+  if (!Subtarget.isPPC64()) {
+    const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+  	if (PPCFI->usesPICBase()) {
+      MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
+      MCSymbol *PICBase = MF->getPICBaseSymbol();
+      OutStreamer.EmitLabel(RelocSymbol);
+
+      const MCExpr *OffsExpr =
+        MCBinaryExpr::CreateSub(
+          MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")),
+                                                               OutContext),
+                                  MCSymbolRefExpr::Create(PICBase, OutContext),
+          OutContext);
+      OutStreamer.EmitValue(OffsExpr, 4);
+      OutStreamer.EmitLabel(CurrentFnSym);
+      return;
+    } else
+      return AsmPrinter::EmitFunctionEntryLabel();
+  }
+
+  // ELFv2 ABI - Normal entry label.
+  if (Subtarget.isELFv2ABI())
+    return AsmPrinter::EmitFunctionEntryLabel();
+
   // Emit an official procedure descriptor.
   MCSectionSubPair Current = OutStreamer.getCurrentSection();
   const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
@@ -752,15 +944,22 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
 
 
 bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
-  const DataLayout *TD = TM.getDataLayout();
+  const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
 
   bool isPPC64 = TD->getPointerSizeInBits() == 64;
 
   PPCTargetStreamer &TS =
       static_cast<PPCTargetStreamer &>(*OutStreamer.getTargetStreamer());
 
-  if (isPPC64 && !TOC.empty()) {
-    const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
+  if (!TOC.empty()) {
+    const MCSectionELF *Section;
+    
+    if (isPPC64)
+      Section = OutStreamer.getContext().getELFSection(".toc",
+        ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+        SectionKind::getReadOnly());
+	else
+      Section = OutStreamer.getContext().getELFSection(".got2",
         ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
         SectionKind::getReadOnly());
     OutStreamer.SwitchSection(Section);
@@ -768,8 +967,11 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
     for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
          E = TOC.end(); I != E; ++I) {
       OutStreamer.EmitLabel(I->second);
-      MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
-      TS.emitTCEntry(*S);
+      MCSymbol *S = I->first;
+      if (isPPC64)
+        TS.emitTCEntry(*S);
+      else
+        OutStreamer.EmitSymbolValue(S, 4);
     }
   }
 
@@ -795,6 +997,68 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
   return AsmPrinter::doFinalization(M);
 }
 
+/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2.
+void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
+  // In the ELFv2 ABI, in functions that use the TOC register, we need to
+  // provide two entry points.  The ABI guarantees that when calling the
+  // local entry point, r2 is set up by the caller to contain the TOC base
+  // for this function, and when calling the global entry point, r12 is set
+  // up by the caller to hold the address of the global entry point.  We
+  // thus emit a prefix sequence along the following lines:
+  //
+  // func:
+  //         # global entry point
+  //         addis r2,r12,(.TOC.-func)@ha
+  //         addi  r2,r2,(.TOC.-func)@l
+  //         .localentry func, .-func
+  //         # local entry point, followed by function body
+  //
+  // This ensures we have r2 set up correctly while executing the function
+  // body, no matter which entry point is called.
+  if (Subtarget.isELFv2ABI()
+      // Only do all that if the function uses r2 in the first place.
+      && !MF->getRegInfo().use_empty(PPC::X2)) {
+
+    MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol();
+    OutStreamer.EmitLabel(GlobalEntryLabel);
+    const MCSymbolRefExpr *GlobalEntryLabelExp =
+      MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext);
+
+    MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+    const MCExpr *TOCDeltaExpr =
+      MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext),
+                              GlobalEntryLabelExp, OutContext);
+
+    const MCExpr *TOCDeltaHi =
+      PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext);
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
+                                .addReg(PPC::X2)
+                                .addReg(PPC::X12)
+                                .addExpr(TOCDeltaHi));
+
+    const MCExpr *TOCDeltaLo =
+      PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext);
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI)
+                                .addReg(PPC::X2)
+                                .addReg(PPC::X2)
+                                .addExpr(TOCDeltaLo));
+
+    MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol();
+    OutStreamer.EmitLabel(LocalEntryLabel);
+    const MCSymbolRefExpr *LocalEntryLabelExp =
+       MCSymbolRefExpr::Create(LocalEntryLabel, OutContext);
+    const MCExpr *LocalOffsetExp =
+      MCBinaryExpr::CreateSub(LocalEntryLabelExp,
+                              GlobalEntryLabelExp, OutContext);
+
+    PPCTargetStreamer *TS =
+      static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+
+    if (TS)
+      TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp);
+  }
+}
+
 /// EmitFunctionBodyEnd - Print the traceback table before the .size
 /// directive.
 ///
@@ -886,7 +1150,8 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
 
 void PPCDarwinAsmPrinter::
 EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
-  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+  bool isPPC64 =
+      TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64;
   bool isDarwin = Subtarget.isDarwin();
   
   const TargetLoweringObjectFileMachO &TLOFMacho = 
@@ -1022,7 +1287,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
 
 
 bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
-  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+  bool isPPC64 =
+      TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64;
 
   // Darwin/PPC always uses mach-o.
   const TargetLoweringObjectFileMachO &TLOFMacho = 
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index ee90671..41594be 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "ppc-branch-select"
@@ -64,7 +65,7 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
 
 bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
   const PPCInstrInfo *TII =
-                static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
+      static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
   // Give the blocks of the function a dense, in-order, numbering.
   Fn.RenumberBlocks();
   BlockSizes.resize(Fn.getNumBlockIDs());
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index ec1e34d..5f3b176 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -214,7 +214,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
 
       if (!TM)
         return true;
-      const TargetLowering *TLI = TM->getTargetLowering();
+      const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
 
       if (Function *F = CI->getCalledFunction()) {
         // Most intrinsics don't become function calls, but some might.
@@ -384,10 +384,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
       if (!TM)
         return true;
-      const TargetLowering *TLI = TM->getTargetLowering();
+      const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
 
-      if (TLI->supportJumpTables() &&
-          SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
+      if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
         return true;
     }
   }
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index d48164d..cf8fee4 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -14,9 +14,15 @@
 
 /// CCIfSubtarget - Match if the current subtarget has a feature F.
 class CCIfSubtarget<string F, CCAction A>
- : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+    : CCIf<!strconcat("static_cast<const PPCSubtarget&>"
+                       "(State.getMachineFunction().getSubtarget()).",
+                     F),
+          A>;
 class CCIfNotSubtarget<string F, CCAction A>
- : CCIf<!strconcat("!State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+    : CCIf<!strconcat("!static_cast<const PPCSubtarget&>"
+                       "(State.getMachineFunction().getSubtarget()).",
+                     F),
+          A>;
 
 //===----------------------------------------------------------------------===//
 // Return Value Calling Convention
@@ -31,13 +37,18 @@ def RetCC_PPC : CallingConv<[
   CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
   CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
   CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+
+  // Floating point types returned as "direct" go into F1 .. F8; note that
+  // only the ELFv2 ABI fully utilizes all these registers.
+  CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+  CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
   
-  CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
-  CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
-  
-  // Vector types are always returned in V2.
-  CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>,
-  CCIfType<[v2f64, v2i64], CCAssignToReg<[VSH2]>>
+  // Vector types returned as "direct" go into V2 .. V9; note that only the
+  // ELFv2 ABI fully utilizes all these registers.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32],
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
+  CCIfType<[v2f64, v2i64],
+           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
 ]>;
 
 
@@ -69,10 +80,12 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
   CCIfType<[i32],  CCPromoteToType<i64>>,
   CCIfType<[i64],  CCAssignToReg<[X3, X4]>>,
   CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
-  CCIfType<[f32],  CCAssignToReg<[F1, F2]>>,
-  CCIfType<[f64],  CCAssignToReg<[F1, F2, F3, F4]>>,
-  CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>,
-  CCIfType<[v2f64, v2i64], CCAssignToReg<[VSH2]>>
+  CCIfType<[f32],  CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+  CCIfType<[f64],  CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+  CCIfType<[v16i8, v8i16, v4i32, v4f32],
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
+  CCIfType<[v2f64, v2i64],
+           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
 ]>;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
deleted file mode 100644
index 0875523..0000000
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ /dev/null
@@ -1,293 +0,0 @@
-//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
-// JIT-compile bitcode to native PowerPC.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPC.h"
-#include "PPCRelocations.h"
-#include "PPCTargetMachine.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
-using namespace llvm;
-
-namespace {
-  class PPCCodeEmitter : public MachineFunctionPass {
-    TargetMachine &TM;
-    JITCodeEmitter &MCE;
-    MachineModuleInfo *MMI;
-    
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<MachineModuleInfo>();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-    
-    static char ID;
-    
-    /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
-    /// its address in the function into this pointer.
-    void *MovePCtoLROffset;
-  public:
-    
-    PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
-      : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
-
-    /// getBinaryCodeForInstr - This function, generated by the
-    /// CodeEmitterGenerator using TableGen, produces the binary encoding for
-    /// machine instructions.
-    uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-
-    
-    MachineRelocation GetRelocation(const MachineOperand &MO,
-                                    unsigned RelocID) const;
-    
-    /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
-    unsigned getMachineOpValue(const MachineInstr &MI,
-                               const MachineOperand &MO) const;
-
-    unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
-    unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
-    unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
-    unsigned getAbsDirectBrEncoding(const MachineInstr &MI,
-                                    unsigned OpNo) const;
-    unsigned getAbsCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
-
-    unsigned getImm16Encoding(const MachineInstr &MI, unsigned OpNo) const;
-    unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
-    unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
-    unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
-    unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const;
-
-    const char *getPassName() const override {
-      return "PowerPC Machine Code Emitter";
-    }
-
-    /// runOnMachineFunction - emits the given MachineFunction to memory
-    ///
-    bool runOnMachineFunction(MachineFunction &MF) override;
-
-    /// emitBasicBlock - emits the given MachineBasicBlock to memory
-    ///
-    void emitBasicBlock(MachineBasicBlock &MBB);
-  };
-}
-
-char PPCCodeEmitter::ID = 0;
-
-/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
-/// to the specified MCE object.
-FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
-                                                JITCodeEmitter &JCE) {
-  return new PPCCodeEmitter(TM, JCE);
-}
-
-bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
-  assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
-          MF.getTarget().getRelocationModel() != Reloc::Static) &&
-         "JIT relocation model must be set to static or default!");
-
-  MMI = &getAnalysis<MachineModuleInfo>();
-  MCE.setModuleInfo(MMI);
-  do {
-    MovePCtoLROffset = nullptr;
-    MCE.startFunction(MF);
-    for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
-      emitBasicBlock(*BB);
-  } while (MCE.finishFunction(MF));
-
-  return false;
-}
-
-void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
-  MCE.StartMachineBasicBlock(&MBB);
-
-  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
-    const MachineInstr &MI = *I;
-    MCE.processDebugLoc(MI.getDebugLoc(), true);
-    switch (MI.getOpcode()) {
-    default:
-      MCE.emitWordBE(getBinaryCodeForInstr(MI));
-      break;
-    case TargetOpcode::CFI_INSTRUCTION:
-      break;
-    case TargetOpcode::EH_LABEL:
-      MCE.emitLabel(MI.getOperand(0).getMCSymbol());
-      break;
-    case TargetOpcode::IMPLICIT_DEF:
-    case TargetOpcode::KILL:
-      break; // pseudo opcode, no side effects
-    case PPC::MovePCtoLR:
-    case PPC::MovePCtoLR8:
-      assert(TM.getRelocationModel() == Reloc::PIC_);
-      MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
-      MCE.emitWordBE(0x48000005);   // bl 1
-      break;
-    }
-    MCE.processDebugLoc(MI.getDebugLoc(), false);
-  }
-}
-
-unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
-                                             unsigned OpNo) const {
-  const MachineOperand &MO = MI.getOperand(OpNo);
-  assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 ||
-          MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) &&
-         (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
-  return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg());
-}
-
-MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, 
-                                                unsigned RelocID) const {
-  // If in PIC mode, we need to encode the negated address of the
-  // 'movepctolr' into the unrelocated field.  After relocation, we'll have
-  // &gv-&movepctolr-4 in the imm field.  Once &movepctolr is added to the imm
-  // field, we get &gv.  This doesn't happen for branch relocations, which are
-  // always implicitly pc relative.
-  intptr_t Cst = 0;
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
-    Cst = -(intptr_t)MovePCtoLROffset - 4;
-  }
-  
-  if (MO.isGlobal())
-    return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID,
-                                    const_cast<GlobalValue *>(MO.getGlobal()),
-                                    Cst, isa<Function>(MO.getGlobal()));
-  if (MO.isSymbol())
-    return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
-                                        RelocID, MO.getSymbolName(), Cst);
-  if (MO.isCPI())
-    return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
-                                           RelocID, MO.getIndex(), Cst);
-
-  if (MO.isMBB())
-    return MachineRelocation::getBB(MCE.getCurrentPCOffset(),
-                                    RelocID, MO.getMBB());
-  
-  assert(MO.isJTI());
-  return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
-                                         RelocID, MO.getIndex(), Cst);
-}
-
-unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI,
-                                             unsigned OpNo) const {
-  const MachineOperand &MO = MI.getOperand(OpNo);
-  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-  
-  MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx));
-  return 0;
-}
-
-unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
-                                           unsigned OpNo) const {
-  const MachineOperand &MO = MI.getOperand(OpNo);
-  MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx));
-  return 0;
-}
-
-unsigned PPCCodeEmitter::getAbsDirectBrEncoding(const MachineInstr &MI,
-                                                unsigned OpNo) const {
-  const MachineOperand &MO = MI.getOperand(OpNo);
-  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-
-  llvm_unreachable("Absolute branch relocations unsupported on the old JIT.");
-}
-
-unsigned PPCCodeEmitter::getAbsCondBrEncoding(const MachineInstr &MI,
-                                              unsigned OpNo) const {
-  llvm_unreachable("Absolute branch relocations unsupported on the old JIT.");
-}
-
-unsigned PPCCodeEmitter::getImm16Encoding(const MachineInstr &MI,
-                                          unsigned OpNo) const {
-  const MachineOperand &MO = MI.getOperand(OpNo);
-  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-
-  unsigned RelocID;
-  switch (MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) {
-    default: llvm_unreachable("Unsupported target operand flags!");
-    case PPCII::MO_LO: RelocID = PPC::reloc_absolute_low; break;
-    case PPCII::MO_HA: RelocID = PPC::reloc_absolute_high; break;
-  }
-
-  MCE.addRelocation(GetRelocation(MO, RelocID));
-  return 0;
-}
-
-unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI,
-                                          unsigned OpNo) const {
-  // Encode (imm, reg) as a memri, which has the low 16-bits as the
-  // displacement and the next 5 bits as the register #.
-  assert(MI.getOperand(OpNo+1).isReg());
-  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16;
-  
-  const MachineOperand &MO = MI.getOperand(OpNo);
-  if (MO.isImm())
-    return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits;
-  
-  // Add a fixup for the displacement field.
-  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
-  return RegBits;
-}
-
-unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
-                                           unsigned OpNo) const {
-  // Encode (imm, reg) as a memrix, which has the low 14-bits as the
-  // displacement and the next 5 bits as the register #.
-  assert(MI.getOperand(OpNo+1).isReg());
-  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14;
-  
-  const MachineOperand &MO = MI.getOperand(OpNo);
-  if (MO.isImm())
-    return ((getMachineOpValue(MI, MO) >> 2) & 0x3FFF) | RegBits;
-  
-  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
-  return RegBits;
-}
-
-
-unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
-                                           unsigned OpNo) const {
-  llvm_unreachable("TLS not supported on the old JIT.");
-  return 0;
-}
-
-unsigned PPCCodeEmitter::getTLSCallEncoding(const MachineInstr &MI,
-                                            unsigned OpNo) const {
-  llvm_unreachable("TLS not supported on the old JIT.");
-  return 0;
-}
-
-unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
-                                           const MachineOperand &MO) const {
-
-  if (MO.isReg()) {
-    // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
-    // The GPR operand should come through here though.
-    assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
-            MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
-           MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
-    return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
-  }
-  
-  assert(MO.isImm() &&
-         "Relocation required in an instruction that we cannot encode!");
-  return MO.getImm();
-}
-
-#include "PPCGenCodeEmitter.inc"
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 92a0ec1..1149354 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -39,7 +39,7 @@
 //===----------------------------------------------------------------------===//
 //
 // TBD:
-//   FastLowerArguments: Handle simple cases.
+//   fastLowerArguments: Handle simple cases.
 //   PPCMaterializeGV: Handle TLS.
 //   SelectCall: Handle function pointers.
 //   SelectCall: Handle multi-register return values.
@@ -92,30 +92,29 @@ class PPCFastISel final : public FastISel {
   public:
     explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
                          const TargetLibraryInfo *LibInfo)
-    : FastISel(FuncInfo, LibInfo),
-      TM(FuncInfo.MF->getTarget()),
-      TII(*TM.getInstrInfo()),
-      TLI(*TM.getTargetLowering()),
-      PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
-      Context(&FuncInfo.Fn->getContext()) { }
+        : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
+          TII(*TM.getSubtargetImpl()->getInstrInfo()),
+          TLI(*TM.getSubtargetImpl()->getTargetLowering()),
+          PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
+          Context(&FuncInfo.Fn->getContext()) {}
 
   // Backend specific FastISel code.
   private:
-    bool TargetSelectInstruction(const Instruction *I) override;
-    unsigned TargetMaterializeConstant(const Constant *C) override;
-    unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
+    bool fastSelectInstruction(const Instruction *I) override;
+    unsigned fastMaterializeConstant(const Constant *C) override;
+    unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
     bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
                              const LoadInst *LI) override;
-    bool FastLowerArguments() override;
-    unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
-    unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+    bool fastLowerArguments() override;
+    unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
+    unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
                              const TargetRegisterClass *RC,
                              unsigned Op0, bool Op0IsKill,
                              uint64_t Imm);
-    unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+    unsigned fastEmitInst_r(unsigned MachineInstOpcode,
                             const TargetRegisterClass *RC,
                             unsigned Op0, bool Op0IsKill);
-    unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+    unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
                              const TargetRegisterClass *RC,
                              unsigned Op0, bool Op0IsKill,
                              unsigned Op1, bool Op1IsKill);
@@ -153,7 +152,7 @@ class PPCFastISel final : public FastISel {
                            unsigned DestReg, bool IsZExt);
     unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
     unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
-    unsigned PPCMaterializeInt(const Constant *C, MVT VT);
+    unsigned PPCMaterializeInt(const Constant *C, MVT VT, bool UseSExt = true);
     unsigned PPCMaterialize32BitInt(int64_t Imm,
                                     const TargetRegisterClass *RC);
     unsigned PPCMaterialize64BitInt(int64_t Imm,
@@ -560,7 +559,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) {
   unsigned ResultReg = 0;
   if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
     return false;
-  UpdateValueMap(I, ResultReg);
+  updateValueMap(I, ResultReg);
   return true;
 }
 
@@ -707,7 +706,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
 
     BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
       .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
-    FastEmitBranch(FBB, DbgLoc);
+    fastEmitBranch(FBB, DbgLoc);
     FuncInfo.MBB->addSuccessor(TBB);
     return true;
 
@@ -715,7 +714,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
              dyn_cast<ConstantInt>(BI->getCondition())) {
     uint64_t Imm = CI->getZExtValue();
     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
-    FastEmitBranch(Target, DbgLoc);
+    fastEmitBranch(Target, DbgLoc);
     return true;
   }
 
@@ -838,7 +837,7 @@ bool PPCFastISel::SelectFPExt(const Instruction *I) {
     return false;
 
   // No code is generated for a FP extend.
-  UpdateValueMap(I, SrcReg);
+  updateValueMap(I, SrcReg);
   return true;
 }
 
@@ -860,12 +859,12 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
     .addReg(SrcReg);
 
-  UpdateValueMap(I, DestReg);
+  updateValueMap(I, DestReg);
   return true;
 }
 
 // Move an i32 or i64 value in a GPR to an f64 value in an FPR.
-// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// FIXME: When direct register moves are implemented (see PowerISA 2.07),
 // those should be used instead of moving via a stack slot when the
 // subtarget permits.
 // FIXME: The code here is sloppy for the 4-byte case.  Can use a 4-byte
@@ -898,10 +897,10 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
   if (SrcVT == MVT::i32) {
     if (!IsSigned) {
       LoadOpc = PPC::LFIWZX;
-      Addr.Offset = 4;
+      Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
     } else if (PPCSubTarget->hasLFIWAX()) {
       LoadOpc = PPC::LFIWAX;
-      Addr.Offset = 4;
+      Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
     }
   }
 
@@ -979,13 +978,13 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
     .addReg(FPReg);
 
-  UpdateValueMap(I, DestReg);
+  updateValueMap(I, DestReg);
   return true;
 }
 
 // Move the floating-point value in SrcReg into an integer destination
 // register, and return the register (or zero if we can't handle it).
-// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// FIXME: When direct register moves are implemented (see PowerISA 2.07),
 // those should be used instead of moving via a stack slot when the
 // subtarget permits.
 unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
@@ -1080,7 +1079,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
   if (IntReg == 0)
     return false;
 
-  UpdateValueMap(I, IntReg);
+  updateValueMap(I, IntReg);
   return true;
 }
 
@@ -1169,7 +1168,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
                 ResultReg)
             .addReg(SrcReg1)
             .addImm(Imm);
-        UpdateValueMap(I, ResultReg);
+        updateValueMap(I, ResultReg);
         return true;
       }
     }
@@ -1185,7 +1184,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
 
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
     .addReg(SrcReg1).addReg(SrcReg2);
-  UpdateValueMap(I, ResultReg);
+  updateValueMap(I, ResultReg);
   return true;
 }
 
@@ -1200,10 +1199,12 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
                                   unsigned &NumBytes,
                                   bool IsVarArg) {
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
 
   // Reserve space for the linkage area on the stack.
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+  bool isELFv2ABI = PPCSubTarget->isELFv2ABI();
+  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+                                                          isELFv2ABI);
   CCInfo.AllocateStack(LinkageSize, 8);
 
   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
@@ -1232,6 +1233,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
   // Because we cannot tell if this is needed on the caller side, we have to
   // conservatively assume that it is needed.  As such, make sure we have at
   // least enough stack space for the caller to store the 8 GPRs.
+  // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
   NumBytes = std::max(NumBytes, LinkageSize + 64);
 
   // Issue CALLSEQ_START.
@@ -1318,7 +1320,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
   // any real difficulties there.
   if (RetVT != MVT::isVoid) {
     SmallVector<CCValAssign, 16> RVLocs;
-    CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+    CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
     CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
     CCValAssign &VA = RVLocs[0];
     assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
@@ -1364,7 +1366,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
 
     assert(ResultReg && "ResultReg unset!");
     UsedRegs.push_back(SourcePhysReg);
-    UpdateValueMap(I, ResultReg);
+    updateValueMap(I, ResultReg);
   }
 }
 
@@ -1408,7 +1410,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
       RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
       RetVT != MVT::f64) {
     SmallVector<CCValAssign, 16> RVLocs;
-    CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+    CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
     CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
     if (RVLocs.size() > 1)
       return false;
@@ -1498,6 +1500,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
   for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
     MIB.addReg(RegArgs[II], RegState::Implicit);
 
+  // Direct calls in the ELFv2 ABI need the TOC register live into the call.
+  if (PPCSubTarget->isELFv2ABI())
+    MIB.addReg(PPC::X2, RegState::Implicit);
+
   // Add a register mask with the call-preserved registers.  Proper
   // defs for return values will be added by setPhysRegsDeadExcept().
   MIB.addRegMask(TRI.getCallPreservedMask(CC));
@@ -1531,7 +1537,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
 
     // Analyze operands of the call, assigning locations to each operand.
     SmallVector<CCValAssign, 16> ValLocs;
-    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
+    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
     CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
     const Value *RV = Ret->getOperand(0);
     
@@ -1541,13 +1547,23 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
 
     // Special case for returning a constant integer of any size.
     // Materialize the constant as an i64 and copy it to the return
-    // register.  This avoids an unnecessary extend or truncate.
+    // register. We still need to worry about properly extending the sign. E.g:
+    // If the constant has only one bit, it means it is a boolean. Therefore
+    // we can't use PPCMaterializeInt because it extends the sign which will
+    // cause negations of the returned value to be incorrect as they are
+    // implemented as the flip of the least significant bit.
     if (isa<ConstantInt>(*RV)) {
       const Constant *C = cast<Constant>(RV);
-      unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
-      unsigned RetReg = ValLocs[0].getLocReg();
+
+      CCValAssign &VA = ValLocs[0];
+
+      unsigned RetReg = VA.getLocReg();
+      unsigned SrcReg = PPCMaterializeInt(C, MVT::i64,
+                                          VA.getLocInfo() == CCValAssign::SExt);
+
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
+            TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
+
       RetRegs.push_back(RetReg);
 
     } else {
@@ -1714,7 +1730,7 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) {
     SrcReg = ResultReg;
   }
 
-  UpdateValueMap(I, SrcReg);
+  updateValueMap(I, SrcReg);
   return true;
 }
 
@@ -1753,13 +1769,13 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) {
   if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
     return false;
 
-  UpdateValueMap(I, ResultReg);
+  updateValueMap(I, ResultReg);
   return true;
 }
 
 // Attempt to fast-select an instruction that wasn't handled by
 // the table-generated machinery.
-bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
+bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
 
   switch (I->getOpcode()) {
     case Instruction::Load:
@@ -2007,7 +2023,8 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
 
 // Materialize an integer constant into a register, and return
 // the register number (or zero if we failed to handle it).
-unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
+unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
+                                                           bool UseSExt) {
   // If we're using CR bit registers for i1 values, handle that as a special
   // case first.
   if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
@@ -2031,7 +2048,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
     unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
     unsigned ImmReg = createResultReg(RC);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
-      .addImm(CI->getSExtValue());
+      .addImm( (UseSExt) ? CI->getSExtValue() : CI->getZExtValue() );
     return ImmReg;
   }
 
@@ -2048,7 +2065,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
 
 // Materialize a constant into a register, and return the register
 // number (or zero if we failed to handle it).
-unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
+unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
   EVT CEVT = TLI.getValueType(C->getType(), true);
 
   // Only handle simple types.
@@ -2067,7 +2084,7 @@ unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
 
 // Materialize the address created by an alloca into a register, and
 // return the register number (or zero if we failed to handle it).
-unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
   // Don't handle dynamic allocas.
   if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
 
@@ -2167,7 +2184,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
 
 // Attempt to lower call arguments in a faster way than done by
 // the selection DAG code.
-bool PPCFastISel::FastLowerArguments() {
+bool PPCFastISel::fastLowerArguments() {
   // Defer to normal argument lowering for now.  It's reasonably
   // efficient.  Consider doing something like ARM to handle the
   // case where all args fit in registers, no varargs, no float
@@ -2177,7 +2194,7 @@ bool PPCFastISel::FastLowerArguments() {
 
 // Handle materializing integer constants into a register.  This is not
 // automatically generated for PowerPC, so must be explicitly created here.
-unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
+unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
   
   if (Opc != ISD::Constant)
     return 0;
@@ -2214,7 +2231,7 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
 // assigning R0 or X0 to the output register for GPRC and G8RC
 // register classes, as any such result could be used in ADDI, etc.,
 // where those regs have another meaning.
-unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
                                       const TargetRegisterClass *RC,
                                       unsigned Op0, bool Op0IsKill,
                                       uint64_t Imm) {
@@ -2227,27 +2244,27 @@ unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
     (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
      (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
 
-  return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC,
+  return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
                                    Op0, Op0IsKill, Imm);
 }
 
 // Override for instructions with one register operand to avoid use of
 // R0/X0.  The automatic infrastructure isn't aware of the context so
 // we must be conservative.
-unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
                                      const TargetRegisterClass* RC,
                                      unsigned Op0, bool Op0IsKill) {
   const TargetRegisterClass *UseRC =
     (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
      (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
 
-  return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
+  return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
 }
 
 // Override for instructions with two register operands to avoid use
 // of R0/X0.  The automatic infrastructure isn't aware of the context
 // so we must be conservative.
-unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
                                       const TargetRegisterClass* RC,
                                       unsigned Op0, bool Op0IsKill,
                                       unsigned Op1, bool Op1IsKill) {
@@ -2255,7 +2272,7 @@ unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
     (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
      (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
 
-  return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
+  return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
                                    Op1, Op1IsKill);
 }
 
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 65e9cf2..dc87a6c 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -254,7 +254,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
 // transform this into the appropriate ORI instruction.
 static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
   MachineFunction *MF = MI->getParent()->getParent();
-  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
   DebugLoc dl = MI->getDebugLoc();
 
   unsigned UsedRegMask = 0;
@@ -372,7 +372,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
   unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
 
   const PPCRegisterInfo *RegInfo =
-    static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+      static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
 
   // If we are a leaf function, and use up to 224 bytes of stack space,
   // don't have a frame pointer, calls, or dynamic alloca then we do not need
@@ -400,7 +400,8 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
 
   // Maximum call frame needs to be at least big enough for linkage area.
   unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(),
-                                             Subtarget.isDarwinABI());
+                                             Subtarget.isDarwinABI(),
+                                             Subtarget.isELFv2ABI());
   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
 
   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
@@ -459,9 +460,9 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
 
   const PPCRegisterInfo *RegInfo =
-    static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+      static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
   bool HasBP = RegInfo->hasBasePointer(MF);
-  unsigned BPReg  = HasBP ? (unsigned) PPC::R30 : FPReg;
+  unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
 
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@@ -497,21 +498,23 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const PPCInstrInfo &TII =
-    *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+      *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
   const PPCRegisterInfo *RegInfo =
-    static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+      static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
 
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
   DebugLoc dl;
   bool needsFrameMoves = MMI.hasDebugInfo() ||
     MF.getFunction()->needsUnwindTableEntry();
+  bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
 
   // Get processor type.
   bool isPPC64 = Subtarget.isPPC64();
   // Get the ABI.
   bool isDarwinABI = Subtarget.isDarwinABI();
   bool isSVR4ABI = Subtarget.isSVR4ABI();
+  bool isELFv2ABI = Subtarget.isELFv2ABI();
   assert((isDarwinABI || isSVR4ABI) &&
          "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
 
@@ -546,7 +549,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
   bool HasBP = RegInfo->hasBasePointer(MF);
 
   unsigned SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
-  unsigned BPReg       = isPPC64 ? PPC::X30 : PPC::R30;
+  unsigned BPReg       = RegInfo->getBaseRegister(MF);
   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
   unsigned ScratchReg  = isPPC64 ? PPC::X0  : PPC::R0;
@@ -602,7 +605,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
       BPOffset = FFI->getObjectOffset(BPIndex);
     } else {
       BPOffset =
-        PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI);
+        PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
+                                                   isDarwinABI,
+                                                   isPIC);
     }
   }
 
@@ -623,6 +628,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
          "Prologue CR saving supported only in 64-bit mode");
 
   if (!MustSaveCRs.empty()) { // will only occur for PPC64
+    // FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
+    // If only one or two CR fields are clobbered, it could be more
+    // efficient to use mfocrf to selectively save just those fields.
     MachineInstrBuilder MIB =
       BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
@@ -791,8 +799,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
       // For 64-bit SVR4 when we have spilled CRs, the spill location
       // is SP+8, not a frame-relative slot.
       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+        // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
+        // the whole CR word.  In the ELFv2 ABI, every CR that was
+        // actually saved gets its own CFI record.
+        unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
         unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
-            nullptr, MRI->getDwarfRegNum(PPC::CR2, true), 8));
+            nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex);
         continue;
@@ -812,9 +824,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   assert(MBBI != MBB.end() && "Returning block has no terminator");
   const PPCInstrInfo &TII =
-    *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+      *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
   const PPCRegisterInfo *RegInfo =
-    static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+      static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
 
   unsigned RetOpcode = MBBI->getOpcode();
   DebugLoc dl;
@@ -839,6 +851,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   // Get the ABI.
   bool isDarwinABI = Subtarget.isDarwinABI();
   bool isSVR4ABI = Subtarget.isSVR4ABI();
+  bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
 
   // Check if the link register (LR) has been saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -849,7 +862,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   bool HasBP = RegInfo->hasBasePointer(MF);
 
   unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
-  unsigned BPReg      = isPPC64 ? PPC::X30 : PPC::R30;
+  unsigned BPReg      = RegInfo->getBaseRegister(MF);
   unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
   unsigned ScratchReg  = isPPC64 ? PPC::X0  : PPC::R0;
   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
@@ -890,7 +903,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
       BPOffset = FFI->getObjectOffset(BPIndex);
     } else {
       BPOffset =
-        PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI);
+        PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
+                                                   isDarwinABI,
+                                                   isPIC);
     }
   }
 
@@ -1054,7 +1069,7 @@ void
 PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                    RegScavenger *) const {
   const PPCRegisterInfo *RegInfo =
-    static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+      static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
 
   //  Save and clear the LR state.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -1067,6 +1082,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   int FPSI = FI->getFramePointerSaveIndex();
   bool isPPC64 = Subtarget.isPPC64();
   bool isDarwinABI  = Subtarget.isDarwinABI();
+  bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
   MachineFrameInfo *MFI = MF.getFrameInfo();
 
   // If the frame pointer save index hasn't been defined yet.
@@ -1081,7 +1097,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   int BPSI = FI->getBasePointerSaveIndex();
   if (!BPSI && RegInfo->hasBasePointer(MF)) {
-    int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI);
+    int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, isPIC);
     // Allocate the frame index for the base pointer save area.
     BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
     // Save the result.
@@ -1185,7 +1201,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
   }
 
   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
-  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
 
   int64_t LowerBound = 0;
 
@@ -1220,7 +1236,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
   }
 
   const PPCRegisterInfo *RegInfo =
-    static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+      static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
   if (RegInfo->hasBasePointer(MF)) {
     HasGPSaveArea = true;
 
@@ -1368,7 +1384,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction *MF = MBB.getParent();
   const PPCInstrInfo &TII =
-    *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+      *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
   DebugLoc DL;
   bool CRSpilled = false;
   MachineInstrBuilder CRMIB;
@@ -1430,7 +1446,7 @@ restoreCRs(bool isPPC64, bool is31,
 
   MachineFunction *MF = MBB.getParent();
   const PPCInstrInfo &TII =
-    *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+      *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
   DebugLoc DL;
   unsigned RestoreOp, MoveReg;
 
@@ -1463,7 +1479,7 @@ void PPCFrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
   const PPCInstrInfo &TII =
-    *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+      *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
       I->getOpcode() == PPC::ADJCALLSTACKUP) {
     // Add (actually subtract) back the amount the callee popped on return.
@@ -1513,7 +1529,7 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction *MF = MBB.getParent();
   const PPCInstrInfo &TII =
-    *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+      *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
   bool CR2Spilled = false;
   bool CR3Spilled = false;
   bool CR4Spilled = false;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 7a226f7..c482588 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef POWERPC_FRAMEINFO_H
-#define POWERPC_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
+#define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
 
 #include "PPC.h"
 #include "llvm/ADT/STLExtras.h"
@@ -76,8 +76,8 @@ public:
 
   /// getTOCSaveOffset - Return the previous frame offset to save the
   /// TOC register -- 64-bit SVR4 ABI only.
-  static unsigned getTOCSaveOffset(void) {
-    return 40;
+  static unsigned getTOCSaveOffset(bool isELFv2ABI) {
+    return isELFv2ABI ? 24 : 40;
   }
 
   /// getFramePointerSaveOffset - Return the previous frame offset to save the
@@ -97,19 +97,22 @@ public:
 
   /// getBasePointerSaveOffset - Return the previous frame offset to save the
   /// base pointer.
-  static unsigned getBasePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
+  static unsigned getBasePointerSaveOffset(bool isPPC64,
+                                           bool isDarwinABI,
+                                           bool isPIC) {
     if (isDarwinABI)
       return isPPC64 ? -16U : -8U;
 
     // SVR4 ABI: First slot in the general register save area.
-    return isPPC64 ? -16U : -8U;
+    return isPPC64 ? -16U : isPIC ? -12U : -8U;
   }
 
   /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
   ///
-  static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
+  static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI,
+                                 bool isELFv2ABI) {
     if (isDarwinABI || isPPC64)
-      return 6 * (isPPC64 ? 8 : 4);
+      return (isELFv2ABI ? 4 : 6) * (isPPC64 ? 8 : 4);
 
     // SVR4 ABI:
     return 8;
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 23f76c1..4b50214 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef PPCHAZRECS_H
-#define PPCHAZRECS_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H
+#define LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H
 
 #include "PPCInstrInfo.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -76,10 +76,10 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
 
 public:
   PPCHazardRecognizer970(const ScheduleDAG &DAG);
-  virtual HazardType getHazardType(SUnit *SU, int Stalls) override;
-  virtual void EmitInstruction(SUnit *SU) override;
-  virtual void AdvanceCycle() override;
-  virtual void Reset() override;
+  HazardType getHazardType(SUnit *SU, int Stalls) override;
+  void EmitInstruction(SUnit *SU) override;
+  void AdvanceCycle() override;
+  void Reset() override;
 
 private:
   /// EndDispatchGroup - Called when we are finishing a new dispatch group.
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 4881b3f..49ba58b 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -14,6 +14,7 @@
 
 #include "PPC.h"
 #include "MCTargetDesc/PPCPredicates.h"
+#include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -26,6 +27,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -56,16 +58,16 @@ namespace {
     unsigned GlobalBaseReg;
   public:
     explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
-      : SelectionDAGISel(tm), TM(tm),
-        PPCLowering(TM.getTargetLowering()),
-        PPCSubTarget(TM.getSubtargetImpl()) {
+        : SelectionDAGISel(tm), TM(tm),
+          PPCLowering(TM.getSubtargetImpl()->getTargetLowering()),
+          PPCSubTarget(TM.getSubtargetImpl()) {
       initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnMachineFunction(MachineFunction &MF) override {
       // Make sure we re-emit a set of the global base reg if necessary
       GlobalBaseReg = 0;
-      PPCLowering = TM.getTargetLowering();
+      PPCLowering = TM.getSubtargetImpl()->getTargetLowering();
       PPCSubTarget = TM.getSubtargetImpl();
       SelectionDAGISel::runOnMachineFunction(MF);
 
@@ -232,7 +234,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
   unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
   unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
 
-  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
   MachineBasicBlock &EntryBB = *Fn.begin();
   DebugLoc dl;
   // Emit the following code into the entry block:
@@ -268,16 +270,34 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
 ///
 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
   if (!GlobalBaseReg) {
-    const TargetInstrInfo &TII = *TM.getInstrInfo();
+    const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
     // Insert the set of GlobalBaseReg into the first MBB of the function
     MachineBasicBlock &FirstMBB = MF->front();
     MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+    const Module *M = MF->getFunction()->getParent();
     DebugLoc dl;
 
     if (PPCLowering->getPointerTy() == MVT::i32) {
-      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
-      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
-      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+      if (PPCSubTarget->isTargetELF()) {
+        GlobalBaseReg = PPC::R30;
+        if (M->getPICLevel() == PICLevel::Small) {
+          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
+          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+        } else {
+          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+          unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+          BuildMI(FirstMBB, MBBI, dl,
+                  TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg)
+                  .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
+          MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
+        }
+      } else {
+        GlobalBaseReg =
+          RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
+        BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+        BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+      }
     } else {
       GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
@@ -650,94 +670,105 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
 // getVCmpInst: return the vector compare instruction for the specified
 // vector type and condition code. Since this is for altivec specific code,
 // only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC,
-                                bool HasVSX) {
-  switch (CC) {
-    case ISD::SETEQ:
-    case ISD::SETUEQ:
-    case ISD::SETNE:
-    case ISD::SETUNE:
-      if (VecVT == MVT::v16i8)
-        return PPC::VCMPEQUB;
-      else if (VecVT == MVT::v8i16)
-        return PPC::VCMPEQUH;
-      else if (VecVT == MVT::v4i32)
-        return PPC::VCMPEQUW;
-      // v4f32 != v4f32 could be translate to unordered not equal
-      else if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPEQDP;
-      break;
-    case ISD::SETLT:
-    case ISD::SETGT:
-    case ISD::SETLE:
-    case ISD::SETGE:
-      if (VecVT == MVT::v16i8)
-        return PPC::VCMPGTSB;
-      else if (VecVT == MVT::v8i16)
-        return PPC::VCMPGTSH;
-      else if (VecVT == MVT::v4i32)
-        return PPC::VCMPGTSW;
-      else if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPGTDP;
-      break;
-    case ISD::SETULT:
-    case ISD::SETUGT:
-    case ISD::SETUGE:
-    case ISD::SETULE:
-      if (VecVT == MVT::v16i8)
-        return PPC::VCMPGTUB;
-      else if (VecVT == MVT::v8i16)
-        return PPC::VCMPGTUH;
-      else if (VecVT == MVT::v4i32)
-        return PPC::VCMPGTUW;
-      break;
-    case ISD::SETOEQ:
-      if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPEQDP;
-      break;
-    case ISD::SETOLT:
-    case ISD::SETOGT:
-    case ISD::SETOLE:
-      if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPGTDP;
-      break;
-    case ISD::SETOGE:
-      if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPGEDP;
-      break;
-    default:
-      break;
-  }
-  llvm_unreachable("Invalid integer vector compare condition");
-}
+static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
+                                bool HasVSX, bool &Swap, bool &Negate) {
+  Swap = false;
+  Negate = false;
 
-// getVCmpEQInst: return the equal compare instruction for the specified vector
-// type. Since this is for altivec specific code, only support the altivec
-// types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT, bool HasVSX) {
-  switch (VecVT) {
-    case MVT::v16i8:
-      return PPC::VCMPEQUB;
-    case MVT::v8i16:
-      return PPC::VCMPEQUH;
-    case MVT::v4i32:
-      return PPC::VCMPEQUW;
-    case MVT::v4f32:
-      return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
-    case MVT::v2f64:
-      return PPC::XVCMPEQDP;
-    default:
-      llvm_unreachable("Invalid integer vector compare condition");
+  if (VecVT.isFloatingPoint()) {
+    /* Handle some cases by swapping input operands.  */
+    switch (CC) {
+      case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
+      case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+      case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
+      case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
+      case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+      case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
+      default: break;
+    }
+    /* Handle some cases by negating the result.  */
+    switch (CC) {
+      case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+      case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
+      case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
+      case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
+      default: break;
+    }
+    /* We have instructions implementing the remaining cases.  */
+    switch (CC) {
+      case ISD::SETEQ:
+      case ISD::SETOEQ:
+        if (VecVT == MVT::v4f32)
+          return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+        else if (VecVT == MVT::v2f64)
+          return PPC::XVCMPEQDP;
+        break;
+      case ISD::SETGT:
+      case ISD::SETOGT:
+        if (VecVT == MVT::v4f32)
+          return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+        else if (VecVT == MVT::v2f64)
+          return PPC::XVCMPGTDP;
+        break;
+      case ISD::SETGE:
+      case ISD::SETOGE:
+        if (VecVT == MVT::v4f32)
+          return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
+        else if (VecVT == MVT::v2f64)
+          return PPC::XVCMPGEDP;
+        break;
+      default:
+        break;
+    }
+    llvm_unreachable("Invalid floating-point vector compare condition");
+  } else {
+    /* Handle some cases by swapping input operands.  */
+    switch (CC) {
+      case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
+      case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+      case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+      case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
+      default: break;
+    }
+    /* Handle some cases by negating the result.  */
+    switch (CC) {
+      case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+      case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
+      case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
+      case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
+      default: break;
+    }
+    /* We have instructions implementing the remaining cases.  */
+    switch (CC) {
+      case ISD::SETEQ:
+      case ISD::SETUEQ:
+        if (VecVT == MVT::v16i8)
+          return PPC::VCMPEQUB;
+        else if (VecVT == MVT::v8i16)
+          return PPC::VCMPEQUH;
+        else if (VecVT == MVT::v4i32)
+          return PPC::VCMPEQUW;
+        break;
+      case ISD::SETGT:
+        if (VecVT == MVT::v16i8)
+          return PPC::VCMPGTSB;
+        else if (VecVT == MVT::v8i16)
+          return PPC::VCMPGTSH;
+        else if (VecVT == MVT::v4i32)
+          return PPC::VCMPGTSW;
+        break;
+      case ISD::SETUGT:
+        if (VecVT == MVT::v16i8)
+          return PPC::VCMPGTUB;
+        else if (VecVT == MVT::v8i16)
+          return PPC::VCMPGTUH;
+        else if (VecVT == MVT::v4i32)
+          return PPC::VCMPGTUW;
+        break;
+      default:
+        break;
+    }
+    llvm_unreachable("Invalid integer vector compare condition");
   }
 }
 
@@ -829,60 +860,20 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   // vector compare operations return the same type as the operands.
   if (LHS.getValueType().isVector()) {
     EVT VecVT = LHS.getValueType();
-    MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
-    unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget->hasVSX());
-
-    switch (CC) {
-      case ISD::SETEQ:
-      case ISD::SETOEQ:
-      case ISD::SETUEQ:
-        return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
-      case ISD::SETNE:
-      case ISD::SETONE:
-      case ISD::SETUNE: {
-        SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
-        return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
-                                                               PPC::VNOR,
-                                    VecVT, VCmp, VCmp);
-      } 
-      case ISD::SETLT:
-      case ISD::SETOLT:
-      case ISD::SETULT:
-        return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS);
-      case ISD::SETGT:
-      case ISD::SETOGT:
-      case ISD::SETUGT:
-        return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
-      case ISD::SETGE:
-      case ISD::SETOGE:
-      case ISD::SETUGE: {
-        // Small optimization: Altivec provides a 'Vector Compare Greater Than
-        // or Equal To' instruction (vcmpgefp), so in this case there is no
-        // need for extra logic for the equal compare.
-        if (VecVT.getSimpleVT().isFloatingPoint()) {
-          return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
-        } else {
-          SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
-          unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
-          SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
-          return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
-                                                                 PPC::VOR,
-                                      VecVT, VCmpGT, VCmpEQ);
-        }
-      }
-      case ISD::SETLE:
-      case ISD::SETOLE:
-      case ISD::SETULE: {
-        SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
-        unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
-        SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
-        return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
-                                                               PPC::VOR,
-                                    VecVT, VCmpLE, VCmpEQ);
-      }
-      default:
-        llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
+    bool Swap, Negate;
+    unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
+                                        PPCSubTarget->hasVSX(), Swap, Negate);
+    if (Swap)
+      std::swap(LHS, RHS);
+
+    if (Negate) {
+      SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
+      return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
+                                                              PPC::VNOR,
+                                  VecVT, VCmp, VCmp);
     }
+
+    return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
   }
 
   if (PPCSubTarget->useCRBits())
@@ -924,6 +915,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     return nullptr;   // Already selected.
   }
 
+  // In case any misguided DAG-level optimizations form an ADD with a
+  // TargetConstant operand, crash here instead of miscompiling (by selecting
+  // an r+r add instead of some kind of r+i add).
+  if (N->getOpcode() == ISD::ADD &&
+      N->getOperand(1).getOpcode() == ISD::TargetConstant)
+    llvm_unreachable("Invalid ADD with TargetConstant operand");
+
   switch (N->getOpcode()) {
   default: break;
 
@@ -1331,7 +1329,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     else if (N->getValueType(0) == MVT::f32)
       SelectCCOp = PPC::SELECT_CC_F4;
     else if (N->getValueType(0) == MVT::f64)
-      SelectCCOp = PPC::SELECT_CC_F8;
+      if (PPCSubTarget->hasVSX())
+        SelectCCOp = PPC::SELECT_CC_VSFRC;
+      else
+        SelectCCOp = PPC::SELECT_CC_F8;
+    else if (N->getValueType(0) == MVT::v2f64 ||
+             N->getValueType(0) == MVT::v2i64)
+      SelectCCOp = PPC::SELECT_CC_VSRC;
     else
       SelectCCOp = PPC::SELECT_CC_VRRC;
 
@@ -1445,11 +1449,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
   }
   case PPCISD::TOC_ENTRY: {
-    assert (PPCSubTarget->isPPC64() && "Only supported for 64-bit ABI");
+    assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
+            "Only supported for 64-bit ABI and 32-bit SVR4");
+    if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
+      SDValue GA = N->getOperand(0);
+      return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
+                                    N->getOperand(1));
+    }
 
     // For medium and large code model, we generate two instructions as
     // described below.  Otherwise we allow SelectCodeCommon to handle this,
-    // selecting one of LDtoc, LDtocJTI, and LDtocCPT.
+    // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
     CodeModel::Model CModel = TM.getCodeModel();
     if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
       break;
@@ -1466,7 +1476,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
                                         TOCbase, GA);
 
-    if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
+    if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
+        CModel == CodeModel::Large)
       return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
                                     SDValue(Tmp, 0));
 
@@ -1483,6 +1494,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
                                   SDValue(Tmp, 0), GA);
   }
+  case PPCISD::PPC32_PICGOT: {
+    // Generate a PIC-safe GOT reference.
+    assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
+      "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
+    return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(),  MVT::i32);
+  }
   case PPCISD::VADD_SPLAT: {
     // This expands into one of three sequences, depending on whether
     // the first operand is odd or even, positive or negative.
@@ -1683,7 +1700,9 @@ void PPCDAGToDAGISel::PeepholeCROps() {
       case PPC::SELECT_I8:
       case PPC::SELECT_F4:
       case PPC::SELECT_F8:
-      case PPC::SELECT_VRRC: {
+      case PPC::SELECT_VRRC:
+      case PPC::SELECT_VSFRC:
+      case PPC::SELECT_VSRC: {
         SDValue Op = MachineNode->getOperand(0);
         if (Op.isMachineOpcode()) {
           if (Op.getMachineOpcode() == PPC::CRSET)
@@ -1989,6 +2008,8 @@ void PPCDAGToDAGISel::PeepholeCROps() {
       case PPC::SELECT_F4:
       case PPC::SELECT_F8:
       case PPC::SELECT_VRRC:
+      case PPC::SELECT_VSFRC:
+      case PPC::SELECT_VSRC:
         if (Op1Set)
           ResNode = MachineNode->getOperand(1).getNode();
         else if (Op1Unset)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index bc057bf..e93bdaf 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -39,6 +39,10 @@
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
+// FIXME: Remove this once soft-float is supported.
+static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
+cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
+
 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
 
@@ -51,19 +55,10 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
 // FIXME: Remove this once the bug has been fixed!
 extern cl::opt<bool> ANDIGlueBug;
 
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
-  // If it isn't a Mach-O file then it's going to be a linux ELF
-  // object file.
-  if (TT.isOSDarwin())
-    return new TargetLoweringObjectFileMachO();
-
-  return new PPC64LinuxTargetObjectFile();
-}
-
-PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
-    : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),
+PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
+    : TargetLowering(TM),
       Subtarget(*TM.getSubtargetImpl()) {
-  setPow2DivIsCheap();
+  setPow2SDivIsCheap();
 
   // Use _setjmp/_longjmp instead of setjmp/longjmp.
   setUseUnderscoreSetJmp(true);
@@ -453,6 +448,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+      setOperationAction(ISD::MULHU, VT, Expand);
+      setOperationAction(ISD::MULHS, VT, Expand);
       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
       setOperationAction(ISD::UDIVREM, VT, Expand);
@@ -526,11 +523,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     // Altivec does not contain unordered floating-point compare instructions
     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
-    setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
-    setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
-    setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
-    setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
-
     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
 
@@ -561,11 +553,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
       // Share the Altivec comparison restrictions.
       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
-      setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
-      setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
-      setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
-      setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
-
       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
 
@@ -617,15 +604,22 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
   }
 
-  setOperationAction(ISD::ATOMIC_LOAD,  MVT::i32, Expand);
-  setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
-  setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
-  setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+  if (!isPPC64) {
+    setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
+    setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+  }
 
   setBooleanContents(ZeroOrOneBooleanContent);
   // Altivec instructions set fields to all zeros or all ones.
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
+  if (!isPPC64) {
+    // These libcalls are not available in 32-bit.
+    setLibcallName(RTLIB::SHL_I128, nullptr);
+    setLibcallName(RTLIB::SRL_I128, nullptr);
+    setLibcallName(RTLIB::SRA_I128, nullptr);
+  }
+
   if (isPPC64) {
     setStackPointerRegisterToSaveRestore(PPC::X1);
     setExceptionPointerRegister(PPC::X3);
@@ -685,11 +679,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   if (Subtarget.isDarwin())
     setPrefFunctionAlignment(4);
 
-  if (isPPC64 && Subtarget.isJITCodeModel())
-    // Temporary workaround for the inability of PPC64 JIT to handle jump
-    // tables.
-    setSupportJumpTables(false);
-
   setInsertFencesForAtomic(true);
 
   if (Subtarget.enableMachineScheduler())
@@ -782,6 +771,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::SHL:             return "PPCISD::SHL";
   case PPCISD::CALL:            return "PPCISD::CALL";
   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
+  case PPCISD::CALL_TLS:        return "PPCISD::CALL_TLS";
+  case PPCISD::CALL_NOP_TLS:    return "PPCISD::CALL_NOP_TLS";
   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
@@ -811,10 +802,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
-  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
-  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
@@ -828,6 +817,11 @@ EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
   return VT.changeVectorElementTypeToInteger();
 }
 
+bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
+  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 // Node matching predicates, for use by the tblgen matching code.
 //===----------------------------------------------------------------------===//
@@ -853,14 +847,27 @@ static bool isConstantOrUndef(int Op, int Val) {
 
 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
 /// VPKUHUM instruction.
-bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operantion with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                                SelectionDAG &DAG) {
-  unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;
-  if (!isUnary) {
+  bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+  if (ShuffleKind == 0) {
+    if (IsLE)
+      return false;
     for (unsigned i = 0; i != 16; ++i)
-      if (!isConstantOrUndef(N->getMaskElt(i),  i*2+j))
+      if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
         return false;
-  } else {
+  } else if (ShuffleKind == 2) {
+    if (!IsLE)
+      return false;
+    for (unsigned i = 0; i != 16; ++i)
+      if (!isConstantOrUndef(N->getMaskElt(i), i*2))
+        return false;
+  } else if (ShuffleKind == 1) {
+    unsigned j = IsLE ? 0 : 1;
     for (unsigned i = 0; i != 8; ++i)
       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
@@ -871,27 +878,34 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
 
 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
 /// VPKUWUM instruction.
-bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operantion with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                                SelectionDAG &DAG) {
-  unsigned j, k;
-  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
-    j = 0;
-    k = 1;
-  } else {
-    j = 2;
-    k = 3;
-  }
-  if (!isUnary) {
+  bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+  if (ShuffleKind == 0) {
+    if (IsLE)
+      return false;
     for (unsigned i = 0; i != 16; i += 2)
-      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j) ||
-          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+k))
+      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
+          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
         return false;
-  } else {
+  } else if (ShuffleKind == 2) {
+    if (!IsLE)
+      return false;
+    for (unsigned i = 0; i != 16; i += 2)
+      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
+          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
+        return false;
+  } else if (ShuffleKind == 1) {
+    unsigned j = IsLE ? 0 : 2;
     for (unsigned i = 0; i != 8; i += 2)
-      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j) ||
-          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+k) ||
-          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j) ||
-          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+k))
+      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
+          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
+          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
+          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
         return false;
   }
   return true;
@@ -919,38 +933,63 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
 
 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
+/// The ShuffleKind distinguishes between big-endian merges with two 
+/// different inputs (0), either-endian merges with two identical inputs (1),
+/// and little-endian merges with two different inputs (2).  For the latter,
+/// the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
-                             bool isUnary, SelectionDAG &DAG) {
-  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
-    if (!isUnary)
+                             unsigned ShuffleKind, SelectionDAG &DAG) {
+  if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+    if (ShuffleKind == 1) // unary
+      return isVMerge(N, UnitSize, 0, 0);
+    else if (ShuffleKind == 2) // swapped
       return isVMerge(N, UnitSize, 0, 16);
-    return isVMerge(N, UnitSize, 0, 0);
+    else
+      return false;
   } else {
-    if (!isUnary)
+    if (ShuffleKind == 1) // unary
+      return isVMerge(N, UnitSize, 8, 8);
+    else if (ShuffleKind == 0) // normal
       return isVMerge(N, UnitSize, 8, 24);
-    return isVMerge(N, UnitSize, 8, 8);
+    else
+      return false;
   }
 }
 
 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
+/// The ShuffleKind distinguishes between big-endian merges with two 
+/// different inputs (0), either-endian merges with two identical inputs (1),
+/// and little-endian merges with two different inputs (2).  For the latter,
+/// the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
-                             bool isUnary, SelectionDAG &DAG) {
-  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
-    if (!isUnary)
+                             unsigned ShuffleKind, SelectionDAG &DAG) {
+  if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+    if (ShuffleKind == 1) // unary
+      return isVMerge(N, UnitSize, 8, 8);
+    else if (ShuffleKind == 2) // swapped
       return isVMerge(N, UnitSize, 8, 24);
-    return isVMerge(N, UnitSize, 8, 8);
+    else
+      return false;
   } else {
-    if (!isUnary)
+    if (ShuffleKind == 1) // unary
+      return isVMerge(N, UnitSize, 0, 0);
+    else if (ShuffleKind == 0) // normal
       return isVMerge(N, UnitSize, 0, 16);
-    return isVMerge(N, UnitSize, 0, 0);
+    else
+      return false;
   }
 }
 
 
 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
 /// amount, otherwise return -1.
-int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) {
+/// The ShuffleKind distinguishes between big-endian operations with two 
+/// different inputs (0), either-endian operations with two identical inputs
+/// (1), and little-endian operations with two different inputs (2).  For the
+/// latter, the input operands are swapped (see PPCInstrAltivec.td).
+int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+                             SelectionDAG &DAG) {
   if (N->getValueType(0) != MVT::v16i8)
     return -1;
 
@@ -968,38 +1007,26 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) {
   unsigned ShiftAmt = SVOp->getMaskElt(i);
   if (ShiftAmt < i) return -1;
 
-  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
-
-    ShiftAmt += i;
-
-    if (!isUnary) {
-      // Check the rest of the elements to see if they are consecutive.
-      for (++i; i != 16; ++i)
-        if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt - i))
-          return -1;
-    } else {
-      // Check the rest of the elements to see if they are consecutive.
-      for (++i; i != 16; ++i)
-        if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt - i) & 15))
-          return -1;
-    }
-
-  } else {  // Big Endian
+  ShiftAmt -= i;
+  bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()->
+    isLittleEndian();
+
+  if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
+    // Check the rest of the elements to see if they are consecutive.
+    for (++i; i != 16; ++i)
+      if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+        return -1;
+  } else if (ShuffleKind == 1) {
+    // Check the rest of the elements to see if they are consecutive.
+    for (++i; i != 16; ++i)
+      if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+        return -1;
+  } else
+    return -1;
 
-    ShiftAmt -= i;
+  if (ShuffleKind == 2 && isLE)
+    ShiftAmt = 16 - ShiftAmt;
 
-    if (!isUnary) {
-      // Check the rest of the elements to see if they are consecutive.
-      for (++i; i != 16; ++i)
-        if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
-          return -1;
-    } else {
-      // Check the rest of the elements to see if they are consecutive.
-      for (++i; i != 16; ++i)
-        if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
-          return -1;
-    }
-  }
   return ShiftAmt;
 }
 
@@ -1055,7 +1082,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
                                 SelectionDAG &DAG) {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
   assert(isSplatShuffleMask(SVOp, EltSize));
-  if (DAG.getTarget().getDataLayout()->isLittleEndian())
+  if (DAG.getSubtarget().getDataLayout()->isLittleEndian())
     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
   else
     return SVOp->getMaskElt(0) / EltSize;
@@ -1331,7 +1358,13 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
         // If all of the bits are known zero on the LHS or RHS, the add won't
         // carry.
-        Base = N.getOperand(0);
+        if (FrameIndexSDNode *FI =
+              dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+          Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+          fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+        } else {
+          Base = N.getOperand(0);
+        }
         Disp = DAG.getTargetConstant(imm, N.getValueType());
         return true;
       }
@@ -1491,10 +1524,9 @@ static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
   HiOpFlags = PPCII::MO_HA;
   LoOpFlags = PPCII::MO_LO;
 
-  // Don't use the pic base if not in PIC relocation model.  Or if we are on a
-  // non-darwin platform.  We don't support PIC on other platforms yet.
-  bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
-               TM.getSubtarget<PPCSubtarget>().isDarwin();
+  // Don't use the pic base if not in PIC relocation model.
+  bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
+
   if (isPIC) {
     HiOpFlags |= PPCII::MO_PIC_FLAG;
     LoOpFlags |= PPCII::MO_PIC_FLAG;
@@ -1550,6 +1582,15 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
 
   unsigned MOHiFlag, MOLoFlag;
   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+
+  if (isPIC && Subtarget.isSVR4ABI()) {
+    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
+                                           PPCII::MO_PIC_FLAG);
+    SDLoc DL(CP);
+    return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
+                       DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+  }
+
   SDValue CPIHi =
     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
   SDValue CPILo =
@@ -1571,6 +1612,15 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
 
   unsigned MOHiFlag, MOLoFlag;
   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+
+  if (isPIC && Subtarget.isSVR4ABI()) {
+    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+                                        PPCII::MO_PIC_FLAG);
+    SDLoc DL(GA);
+    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,
+                       DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+  }
+
   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
   return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
@@ -1579,8 +1629,16 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
                                              SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
+  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
+  const BlockAddress *BA = BASDN->getBlockAddress();
 
-  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  // 64-bit SVR4 ABI code is always position-independent.
+  // The actual BlockAddress is stored in the TOC.
+  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+    SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
+    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA,
+                       DAG.getRegister(PPC::X2, MVT::i64));
+  }
 
   unsigned MOHiFlag, MOLoFlag;
   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
@@ -1589,6 +1647,27 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
   return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
 }
 
+// Generate a call to __tls_get_addr for the given GOT entry Op.
+std::pair<SDValue,SDValue>
+PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
+                                SelectionDAG &DAG) const {
+
+  Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Node = Op;
+  Entry.Ty = IntPtrTy;
+  Args.push_back(Entry);
+
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+    .setCallee(CallingConv::C, IntPtrTy,
+               DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
+               std::move(Args), 0);
+
+  return LowerCallTo(CLI);
+}
+
 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                               SelectionDAG &DAG) const {
 
@@ -1601,6 +1680,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
   const GlobalValue *GV = GA->getGlobal();
   EVT PtrVT = getPointerTy();
   bool is64bit = Subtarget.isPPC64();
+  const Module *M = DAG.getMachineFunction().getFunction()->getParent();
+  PICLevel::Level picLevel = M->getPICLevel();
 
   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
 
@@ -1632,50 +1713,46 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
   }
 
   if (Model == TLSModel::GeneralDynamic) {
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
-    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
-    SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
-                                     GOTReg, TGA);
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                             PPCII::MO_TLSGD);
+    SDValue GOTPtr;
+    if (is64bit) {
+      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+                                   GOTReg, TGA);
+    } else {
+      if (picLevel == PICLevel::Small)
+        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+      else
+        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+    }
     SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
-                                   GOTEntryHi, TGA);
-
-    // We need a chain node, and don't have one handy.  The underlying
-    // call has no side effects, so using the function entry node
-    // suffices.
-    SDValue Chain = DAG.getEntryNode();
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
-    SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
-    SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
-                                  PtrVT, ParmReg, TGA);
-    // The return value from GET_TLS_ADDR really is in X3 already, but
-    // some hacks are needed here to tie everything together.  The extra
-    // copies dissolve during subsequent transforms.
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
-    return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
+                                   GOTPtr, TGA);
+    std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
+    return CallResult.first;
   }
 
   if (Model == TLSModel::LocalDynamic) {
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
-    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
-    SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
-                                     GOTReg, TGA);
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                             PPCII::MO_TLSLD);
+    SDValue GOTPtr;
+    if (is64bit) {
+      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+                           GOTReg, TGA);
+    } else {
+      if (picLevel == PICLevel::Small)
+        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+      else
+        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+    }
     SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
-                                   GOTEntryHi, TGA);
-
-    // We need a chain node, and don't have one handy.  The underlying
-    // call has no side effects, so using the function entry node
-    // suffices.
-    SDValue Chain = DAG.getEntryNode();
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
-    SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
-    SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
-                                  PtrVT, ParmReg, TGA);
-    // The return value from GET_TLSLD_ADDR really is in X3 already, but
-    // some hacks are needed here to tie everything together.  The extra
-    // copies dissolve during subsequent transforms.
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+                                   GOTPtr, TGA);
+    std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
+    SDValue TLSAddr = CallResult.first;
+    SDValue Chain = CallResult.second;
     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
-                                      Chain, ParmReg, TGA);
+                                      Chain, TLSAddr, TGA);
     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
   }
 
@@ -1700,6 +1777,14 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
   unsigned MOHiFlag, MOLoFlag;
   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
 
+  if (isPIC && Subtarget.isSVR4ABI()) {
+    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
+                                            GSDN->getOffset(),
+                                            PPCII::MO_PIC_FLAG);
+    return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
+                       DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));
+  }
+
   SDValue GAHi =
     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
   SDValue GALo =
@@ -1794,7 +1879,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
   // gpr_index
   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
                                     VAListPtr, MachinePointerInfo(SV), MVT::i8,
-                                    false, false, 0);
+                                    false, false, false, 0);
   InChain = GprIndex.getValue(1);
 
   if (VT == MVT::i64) {
@@ -1817,7 +1902,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
   // fpr
   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
                                     FprPtr, MachinePointerInfo(SV), MVT::i8,
-                                    false, false, 0);
+                                    false, false, false, 0);
   InChain = FprIndex.getValue(1);
 
   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
@@ -2127,14 +2212,19 @@ static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
   unsigned ArgSize = ArgVT.getStoreSize();
   if (Flags.isByVal())
     ArgSize = Flags.getByValSize();
-  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+  // Round up to multiples of the pointer size, except for array members,
+  // which are always packed.
+  if (!Flags.isInConsecutiveRegs())
+    ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
 
   return ArgSize;
 }
 
 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
 /// on the stack.
-static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags,
+static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
+                                            ISD::ArgFlagsTy Flags,
                                             unsigned PtrByteSize) {
   unsigned Align = PtrByteSize;
 
@@ -2156,14 +2246,78 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags,
     }
   }
 
+  // Array members are always packed to their original alignment.
+  if (Flags.isInConsecutiveRegs()) {
+    // If the array member was split into multiple registers, the first
+    // needs to be aligned to the size of the full type.  (Except for
+    // ppcf128, which is only aligned as its f64 components.)
+    if (Flags.isSplit() && OrigVT != MVT::ppcf128)
+      Align = OrigVT.getStoreSize();
+    else
+      Align = ArgVT.getStoreSize();
+  }
+
   return Align;
 }
 
+/// CalculateStackSlotUsed - Return whether this argument will use its
+/// stack slot (instead of being passed in registers).  ArgOffset,
+/// AvailableFPRs, and AvailableVRs must hold the current argument
+/// position, and will be updated to account for this argument.
+static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
+                                   ISD::ArgFlagsTy Flags,
+                                   unsigned PtrByteSize,
+                                   unsigned LinkageSize,
+                                   unsigned ParamAreaSize,
+                                   unsigned &ArgOffset,
+                                   unsigned &AvailableFPRs,
+                                   unsigned &AvailableVRs) {
+  bool UseMemory = false;
+
+  // Respect alignment of argument on the stack.
+  unsigned Align =
+    CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+  // If there's no space left in the argument save area, we must
+  // use memory (this check also catches zero-sized arguments).
+  if (ArgOffset >= LinkageSize + ParamAreaSize)
+    UseMemory = true;
+
+  // Allocate argument on the stack.
+  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+  if (Flags.isInConsecutiveRegsLast())
+    ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+  // If we overran the argument save area, we must use memory
+  // (this check catches arguments passed partially in memory)
+  if (ArgOffset > LinkageSize + ParamAreaSize)
+    UseMemory = true;
+
+  // However, if the argument is actually passed in an FPR or a VR,
+  // we don't use memory after all.
+  if (!Flags.isByVal()) {
+    if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
+      if (AvailableFPRs > 0) {
+        --AvailableFPRs;
+        return false;
+      }
+    if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+        ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+        ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+      if (AvailableVRs > 0) {
+        --AvailableVRs;
+        return false;
+      }
+  }
+
+  return UseMemory;
+}
+
 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
 /// ensure minimum alignment required for target.
 static unsigned EnsureStackAlignment(const TargetMachine &Target,
                                      unsigned NumBytes) {
-  unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();
+  unsigned TargetAlign =
+      Target.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
   unsigned AlignMask = TargetAlign - 1;
   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
   return NumBytes;
@@ -2240,11 +2394,11 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
 
   // Reserve space for the linkage area on the stack.
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false);
+  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
 
   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -2315,7 +2469,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
   // caller's stack frame, right above the parameter list area.
   SmallVector<CCValAssign, 16> ByValArgLocs;
   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                      getTargetMachine(), ByValArgLocs, *DAG.getContext());
+                      ByValArgLocs, *DAG.getContext());
 
   // Reserve stack space for the allocations in CCInfo.
   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -2348,7 +2502,9 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
       PPC::F8
     };
-    const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+    unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+    if (DisablePPCFloatInVariadic)
+      NumFPArgRegs = 0;
 
     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
                                                           NumGPArgRegs));
@@ -2357,7 +2513,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
 
     // Make room for NumGPArgRegs and NumFPArgRegs.
     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
-                NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
+                NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
 
     FuncInfo->setVarArgsStackOffset(
       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
@@ -2399,7 +2555,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
                                    MachinePointerInfo(), false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by eight for the next argument to store
-      SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
+      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
                                          PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
@@ -2437,6 +2593,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
                                       SmallVectorImpl<SDValue> &InVals) const {
   // TODO: add description of PPC stack frame format, or at least some docs.
   //
+  bool isELFv2ABI = Subtarget.isELFv2ABI();
   bool isLittleEndian = Subtarget.isLittleEndian();
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -2448,8 +2605,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
                        (CallConv == CallingConv::Fast));
   unsigned PtrByteSize = 8;
 
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
-  unsigned ArgOffset = LinkageSize;
+  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+                                                          isELFv2ABI);
 
   static const MCPhysReg GPR[] = {
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -2471,12 +2628,29 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
   const unsigned Num_FPR_Regs = 13;
   const unsigned Num_VR_Regs  = array_lengthof(VR);
 
-  unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
+  // Do a first pass over the arguments to determine whether the ABI
+  // guarantees that our caller has allocated the parameter save area
+  // on its stack frame.  In the ELFv1 ABI, this is always the case;
+  // in the ELFv2 ABI, it is true if this is a vararg function or if
+  // any parameter is located in a stack slot.
+
+  bool HasParameterArea = !isELFv2ABI || isVarArg;
+  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
+  unsigned NumBytes = LinkageSize;
+  unsigned AvailableFPRs = Num_FPR_Regs;
+  unsigned AvailableVRs = Num_VR_Regs;
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i)
+    if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
+                               PtrByteSize, LinkageSize, ParamAreaSize,
+                               NumBytes, AvailableFPRs, AvailableVRs))
+      HasParameterArea = true;
 
   // Add DAG nodes to load the arguments or copy them out of registers.  On
   // entry to a function on PPC, the arguments start after the linkage area,
   // although the first ones are often in registers.
 
+  unsigned ArgOffset = LinkageSize;
+  unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
   SmallVector<SDValue, 8> MemOps;
   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
   unsigned CurArgIdx = 0;
@@ -2484,6 +2658,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
     SDValue ArgVal;
     bool needsLoad = false;
     EVT ObjectVT = Ins[ArgNo].VT;
+    EVT OrigVT = Ins[ArgNo].ArgVT;
     unsigned ObjSize = ObjectVT.getStoreSize();
     unsigned ArgSize = ObjSize;
     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
@@ -2492,7 +2667,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
 
     /* Respect alignment of argument on the stack.  */
     unsigned Align =
-      CalculateStackSlotAlignment(ObjectVT, Flags, PtrByteSize);
+      CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
     ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
     unsigned CurArgOffset = ArgOffset;
 
@@ -2520,15 +2695,31 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
         continue;
       }
 
-      // All aggregates smaller than 8 bytes must be passed right-justified.
-      if (ObjSize < PtrByteSize && !isLittleEndian)
-        CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
-      // The value of the object is its address.
-      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+      // Create a stack object covering all stack doublewords occupied
+      // by the argument.  If the argument is (fully or partially) on
+      // the stack, or if the argument is fully in registers but the
+      // caller has allocated the parameter save anyway, we can refer
+      // directly to the caller's stack frame.  Otherwise, create a
+      // local copy in our own frame.
+      int FI;
+      if (HasParameterArea ||
+          ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
+        FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
+      else
+        FI = MFI->CreateStackObject(ArgSize, Align, false);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      InVals.push_back(FIN);
 
-      if (ObjSize < 8) {
+      // Handle aggregates smaller than 8 bytes.
+      if (ObjSize < PtrByteSize) {
+        // The value of the object is its address, which differs from the
+        // address of the enclosing doubleword on big-endian systems.
+        SDValue Arg = FIN;
+        if (!isLittleEndian) {
+          SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);
+          Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
+        }
+        InVals.push_back(Arg);
+
         if (GPR_idx != Num_GPR_Regs) {
           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
@@ -2537,18 +2728,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
-            Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+            Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
                                       MachinePointerInfo(FuncArg),
                                       ObjType, false, false, 0);
           } else {
             // For sizes that don't fit a truncating store (3, 5, 6, 7),
             // store the whole register as-is to the parameter save area
-            // slot.  The address of the parameter was already calculated
-            // above (InVals.push_back(FIN)) to be the right-justified
-            // offset within the slot.  For this store, we need a new
-            // frame index that points at the beginning of the slot.
-            int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
-            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+            // slot.
             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
                                  MachinePointerInfo(FuncArg),
                                  false, false, 0);
@@ -2562,27 +2748,29 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
         continue;
       }
 
+      // The value of the object is its address, which is the address of
+      // its first stack doubleword.
+      InVals.push_back(FIN);
+
+      // Store whatever pieces of the object are in registers to memory.
       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
-        // Store whatever pieces of the object are in registers
-        // to memory.  ArgOffset will be the address of the beginning
-        // of the object.
-        if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg;
-          VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
-          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
-          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                       MachinePointerInfo(FuncArg, j),
-                                       false, false, 0);
-          MemOps.push_back(Store);
-          ++GPR_idx;
-          ArgOffset += PtrByteSize;
-        } else {
-          ArgOffset += ArgSize - j;
+        if (GPR_idx == Num_GPR_Regs)
           break;
+
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+        SDValue Addr = FIN;
+        if (j) {
+          SDValue Off = DAG.getConstant(j, PtrVT);
+          Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
         }
+        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
+                                     MachinePointerInfo(FuncArg, j),
+                                     false, false, 0);
+        MemOps.push_back(Store);
+        ++GPR_idx;
       }
+      ArgOffset += ArgSize;
       continue;
     }
 
@@ -2591,6 +2779,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
     case MVT::i1:
     case MVT::i32:
     case MVT::i64:
+      // These can be scalar arguments or elements of an integer array type
+      // passed directly.  Clang may use those instead of "byval" aggregate
+      // types to avoid forcing arguments to memory unnecessarily.
       if (GPR_idx != Num_GPR_Regs) {
         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
@@ -2608,6 +2799,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
 
     case MVT::f32:
     case MVT::f64:
+      // These can be scalar arguments or elements of a float array type
+      // passed directly.  The latter are used to implement ELFv2 homogenous
+      // float aggregates.
       if (FPR_idx != Num_FPR_Regs) {
         unsigned VReg;
 
@@ -2620,12 +2814,32 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
 
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++FPR_idx;
+      } else if (GPR_idx != Num_GPR_Regs) {
+        // This can only ever happen in the presence of f32 array types,
+        // since otherwise we never run out of FPRs before running out
+        // of GPRs.
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+        if (ObjectVT == MVT::f32) {
+          if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
+            ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
+                                 DAG.getConstant(32, MVT::i32));
+          ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+        }
+
+        ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
       } else {
         needsLoad = true;
-        ArgSize = PtrByteSize;
       }
 
-      ArgOffset += 8;
+      // When passing an array of floats, the array occupies consecutive
+      // space in the argument area; only round up to the next doubleword
+      // at the end of the array.  Otherwise, each float takes 8 bytes.
+      ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
+      ArgOffset += ArgSize;
+      if (Flags.isInConsecutiveRegsLast())
+        ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
       break;
     case MVT::v4f32:
     case MVT::v4i32:
@@ -2633,6 +2847,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
     case MVT::v16i8:
     case MVT::v2f64:
     case MVT::v2i64:
+      // These can be scalar arguments or elements of a vector array type
+      // passed directly.  The latter are used to implement ELFv2 homogenous
+      // vector aggregates.
       if (VR_idx != Num_VR_Regs) {
         unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
                         MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
@@ -2662,7 +2879,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
 
   // Area that is at least reserved in the caller of this function.
   unsigned MinReservedArea;
-  MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
+  if (HasParameterArea)
+    MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
+  else
+    MinReservedArea = LinkageSize;
 
   // Set the size that is at least reserved in caller of this function.  Tail
   // call optimized functions' reserved stack space needs to be aligned so that
@@ -2723,7 +2943,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
                        (CallConv == CallingConv::Fast));
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
+  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
+                                                          false);
   unsigned ArgOffset = LinkageSize;
   // Area that is at least reserved in caller of this function.
   unsigned MinReservedArea = ArgOffset;
@@ -2849,7 +3070,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         CurArgOffset = CurArgOffset + (4 - ObjSize);
       }
       // The value of the object is its address.
-      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       InVals.push_back(FIN);
       if (ObjSize==1 || ObjSize==2) {
@@ -3336,6 +3557,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
 
   bool isPPC64 = Subtarget.isPPC64();
   bool isSVR4ABI = Subtarget.isSVR4ABI();
+  bool isELFv2ABI = Subtarget.isELFv2ABI();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   NodeTys.push_back(MVT::Other);   // Returns a chain
@@ -3352,42 +3574,41 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
     }
 
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
-    // Use indirect calls for ALL functions calls in JIT mode, since the
-    // far-call stubs may be outside relocation limits for a BL instruction.
-    if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
-      unsigned OpFlags = 0;
-      if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
-          (Subtarget.getTargetTriple().isMacOSX() &&
-           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
-          (G->getGlobal()->isDeclaration() ||
-           G->getGlobal()->isWeakForLinker())) {
-        // PC-relative references to external symbols should go through $stub,
-        // unless we're building with the leopard linker or later, which
-        // automatically synthesizes these stubs.
-        OpFlags = PPCII::MO_DARWIN_STUB;
-      }
-
-      // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
-      // every direct call is) turn it into a TargetGlobalAddress /
-      // TargetExternalSymbol node so that legalize doesn't hack it.
-      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
-                                          Callee.getValueType(),
-                                          0, OpFlags);
-      needIndirectCall = false;
+    unsigned OpFlags = 0;
+    if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+         (Subtarget.getTargetTriple().isMacOSX() &&
+          Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+         (G->getGlobal()->isDeclaration() ||
+          G->getGlobal()->isWeakForLinker())) ||
+        (Subtarget.isTargetELF() && !isPPC64 &&
+         !G->getGlobal()->hasLocalLinkage() &&
+         DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+      // PC-relative references to external symbols should go through $stub,
+      // unless we're building with the leopard linker or later, which
+      // automatically synthesizes these stubs.
+      OpFlags = PPCII::MO_PLT_OR_STUB;
     }
+
+    // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+    // every direct call is) turn it into a TargetGlobalAddress /
+    // TargetExternalSymbol node so that legalize doesn't hack it.
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+                                        Callee.getValueType(), 0, OpFlags);
+    needIndirectCall = false;
   }
 
   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     unsigned char OpFlags = 0;
 
-    if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
-        (Subtarget.getTargetTriple().isMacOSX() &&
-         Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
+    if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+         (Subtarget.getTargetTriple().isMacOSX() &&
+          Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
+        (Subtarget.isTargetELF() && !isPPC64 &&
+         DAG.getTarget().getRelocationModel() == Reloc::PIC_)	) {
       // PC-relative references to external symbols should go through $stub,
       // unless we're building with the leopard linker or later, which
       // automatically synthesizes these stubs.
-      OpFlags = PPCII::MO_DARWIN_STUB;
+      OpFlags = PPCII::MO_PLT_OR_STUB;
     }
 
     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
@@ -3400,7 +3621,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
     // to do the call, we can't use PPCISD::CALL.
     SDValue MTCTROps[] = {Chain, Callee, InFlag};
 
-    if (isSVR4ABI && isPPC64) {
+    if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
       // Function pointers in the 64-bit SVR4 ABI do not point to the function
       // entry point, but to the function descriptor (the function entry point
       // address is part of the function descriptor though).
@@ -3480,7 +3701,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
     CallOpc = PPCISD::BCTRL;
     Callee.setNode(nullptr);
     // Add use of X11 (holding environment pointer)
-    if (isSVR4ABI && isPPC64)
+    if (isSVR4ABI && isPPC64 && !isELFv2ABI)
       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
     // Add CTR register as callee so a bctr can be emitted later.
     if (isTailCall)
@@ -3491,6 +3712,23 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
   if (Callee.getNode()) {
     Ops.push_back(Chain);
     Ops.push_back(Callee);
+
+    // If this is a call to __tls_get_addr, find the symbol whose address
+    // is to be taken and add it to the list.  This will be used to 
+    // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
+    // We find the symbol by walking the chain to the CopyFromReg, walking
+    // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
+    // pulling the symbol from that node.
+    if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+      if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
+        assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
+        SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
+        SDValue TGTAddr = AddI->getOperand(1);
+        assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
+               "Didn't find target global TLS address where we expected one");
+        Ops.push_back(TGTAddr);
+        CallOpc = PPCISD::CALL_TLS;
+      }
   }
   // If this is a tail call add stack pointer delta.
   if (isTailCall)
@@ -3502,6 +3740,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                   RegsToPass[i].second.getValueType()));
 
+  // Direct calls in the ELFv2 ABI need the TOC register live into the call.
+  if (Callee.getNode() && isELFv2ABI)
+    Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+
   return CallOpc;
 }
 
@@ -3522,8 +3764,8 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                    SmallVectorImpl<SDValue> &InVals) const {
 
   SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                    getTargetMachine(), RVLocs, *DAG.getContext());
+  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+                    *DAG.getContext());
   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
 
   // Copy all of the result registers out of their specified physreg.
@@ -3571,6 +3813,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
                               int SPDiff, unsigned NumBytes,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               SmallVectorImpl<SDValue> &InVals) const {
+
+  bool isELFv2ABI = Subtarget.isELFv2ABI();
   std::vector<EVT> NodeTys;
   SmallVector<SDValue, 8> Ops;
   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
@@ -3589,7 +3833,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
 
   // Add a register mask operand representing the call-preserved registers.
-  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const TargetRegisterInfo *TRI =
+      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
   const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3636,7 +3881,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
                 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
       // Otherwise insert NOP for non-local calls.
       CallOpc = PPCISD::CALL_NOP;
-    }
+    } else if (CallOpc == PPCISD::CALL_TLS)
+      // For 64-bit SVR4, TLS calls are always non-local.
+      CallOpc = PPCISD::CALL_NOP_TLS;
   }
 
   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
@@ -3646,7 +3893,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
     SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
-    unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+    unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
     SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
     SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
     Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
@@ -3735,11 +3982,12 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
 
   // Assign locations to all of the outgoing arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
 
   // Reserve space for the linkage area on the stack.
-  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
+                       PtrByteSize);
 
   if (isVarArg) {
     // Handle fixed and variable vector arguments differently.
@@ -3776,7 +4024,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
   // Assign locations to all of the outgoing aggregate by value arguments.
   SmallVector<CCValAssign, 16> ByValArgLocs;
   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                      getTargetMachine(), ByValArgLocs, *DAG.getContext());
+                      ByValArgLocs, *DAG.getContext());
 
   // Reserve stack space for the allocations in CCInfo.
   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -3948,6 +4196,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                     SDLoc dl, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &InVals) const {
 
+  bool isELFv2ABI = Subtarget.isELFv2ABI();
   bool isLittleEndian = Subtarget.isLittleEndian();
   unsigned NumOps = Outs.size();
 
@@ -3966,21 +4215,27 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
 
   // Count how many bytes are to be pushed on the stack, including the linkage
-  // area, and parameter passing area.  We start with at least 48 bytes, which
-  // is reserved space for [SP][CR][LR][3 x unused].
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+  // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
+  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
+  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
+  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+                                                          isELFv2ABI);
   unsigned NumBytes = LinkageSize;
 
   // Add up all the space actually used.
   for (unsigned i = 0; i != NumOps; ++i) {
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
     EVT ArgVT = Outs[i].VT;
+    EVT OrigVT = Outs[i].ArgVT;
 
     /* Respect alignment of argument on the stack.  */
-    unsigned Align = CalculateStackSlotAlignment(ArgVT, Flags, PtrByteSize);
+    unsigned Align =
+      CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
 
     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+    if (Flags.isInConsecutiveRegsLast())
+      NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
   }
 
   unsigned NumBytesActuallyUsed = NumBytes;
@@ -3990,6 +4245,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
   // Because we cannot tell if this is needed on the caller side, we have to
   // conservatively assume that it is needed.  As such, make sure we have at
   // least enough stack space for the caller to store the 8 GPRs.
+  // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
 
   // Tail call needs the stack to be aligned.
@@ -4056,10 +4312,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
   for (unsigned i = 0; i != NumOps; ++i) {
     SDValue Arg = OutVals[i];
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    EVT ArgVT = Outs[i].VT;
+    EVT OrigVT = Outs[i].ArgVT;
 
     /* Respect alignment of argument on the stack.  */
     unsigned Align =
-      CalculateStackSlotAlignment(Outs[i].VT, Flags, PtrByteSize);
+      CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
     ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
 
     /* Compute GPR index associated with argument offset.  */
@@ -4103,7 +4361,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
                                         MachinePointerInfo(), VT,
-                                        false, false, 0);
+                                        false, false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
 
@@ -4199,6 +4457,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     case MVT::i1:
     case MVT::i32:
     case MVT::i64:
+      // These can be scalar arguments or elements of an integer array type
+      // passed directly.  Clang may use those instead of "byval" aggregate
+      // types to avoid forcing arguments to memory unnecessarily.
       if (GPR_idx != NumGPRs) {
         RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
       } else {
@@ -4209,39 +4470,70 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
       ArgOffset += PtrByteSize;
       break;
     case MVT::f32:
-    case MVT::f64:
-      if (FPR_idx != NumFPRs) {
+    case MVT::f64: {
+      // These can be scalar arguments or elements of a float array type
+      // passed directly.  The latter are used to implement ELFv2 homogenous
+      // float aggregates.
+
+      // Named arguments go into FPRs first, and once they overflow, the
+      // remaining arguments go into GPRs and then the parameter save area.
+      // Unnamed arguments for vararg functions always go to GPRs and
+      // then the parameter save area.  For now, put all arguments to vararg
+      // routines always in both locations (FPR *and* GPR or stack slot).
+      bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+
+      // First load the argument into the next available FPR.
+      if (FPR_idx != NumFPRs)
         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
 
-        if (isVarArg) {
-          // A single float or an aggregate containing only a single float
-          // must be passed right-justified in the stack doubleword, and
-          // in the GPR, if one is available.
-          SDValue StoreOff;
-          if (Arg.getSimpleValueType().SimpleTy == MVT::f32 &&
-              !isLittleEndian) {
-            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
-            StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
-          } else
-            StoreOff = PtrOff;
-
-          SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,
-                                       MachinePointerInfo(), false, false, 0);
-          MemOpChains.push_back(Store);
-
-          // Float varargs are always shadowed in available integer registers
-          if (GPR_idx != NumGPRs) {
-            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
-                                       MachinePointerInfo(), false, false,
-                                       false, 0);
-            MemOpChains.push_back(Load.getValue(1));
-            RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
-          }
-        }
+      // Next, load the argument into GPR or stack slot if needed.
+      if (!NeedGPROrStack)
+        ;
+      else if (GPR_idx != NumGPRs) {
+        // In the non-vararg case, this can only ever happen in the
+        // presence of f32 array types, since otherwise we never run
+        // out of FPRs before running out of GPRs.
+        SDValue ArgVal;
+
+        // Double values are always passed in a single GPR.
+        if (Arg.getValueType() != MVT::f32) {
+          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+
+        // Non-array float values are extended and passed in a GPR.
+        } else if (!Flags.isInConsecutiveRegs()) {
+          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+          ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
+
+        // If we have an array of floats, we collect every odd element
+        // together with its predecessor into one GPR.
+        } else if (ArgOffset % PtrByteSize != 0) {
+          SDValue Lo, Hi;
+          Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
+          Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+          if (!isLittleEndian)
+            std::swap(Lo, Hi);
+          ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+
+        // The final element, if even, goes into the first half of a GPR.
+        } else if (Flags.isInConsecutiveRegsLast()) {
+          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+          ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
+          if (!isLittleEndian)
+            ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
+                                 DAG.getConstant(32, MVT::i32));
+
+        // Non-final even elements are skipped; they will be handled
+        // together the with subsequent argument on the next go-around.
+        } else
+          ArgVal = SDValue();
+
+        if (ArgVal.getNode())
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));
       } else {
         // Single-precision floating-point values are mapped to the
         // second (rightmost) word of the stack doubleword.
-        if (Arg.getValueType() == MVT::f32 && !isLittleEndian) {
+        if (Arg.getValueType() == MVT::f32 &&
+            !isLittleEndian && !Flags.isInConsecutiveRegs()) {
           SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
         }
@@ -4250,14 +4542,25 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                          true, isTailCall, false, MemOpChains,
                          TailCallArguments, dl);
       }
-      ArgOffset += 8;
+      // When passing an array of floats, the array occupies consecutive
+      // space in the argument area; only round up to the next doubleword
+      // at the end of the array.  Otherwise, each float takes 8 bytes.
+      ArgOffset += (Arg.getValueType() == MVT::f32 &&
+                    Flags.isInConsecutiveRegs()) ? 4 : 8;
+      if (Flags.isInConsecutiveRegsLast())
+        ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
       break;
+    }
     case MVT::v4f32:
     case MVT::v4i32:
     case MVT::v8i16:
     case MVT::v16i8:
     case MVT::v2f64:
     case MVT::v2i64:
+      // These can be scalar arguments or elements of a vector array type
+      // passed directly.  The latter are used to implement ELFv2 homogenous
+      // vector aggregates.
+
       // For a varargs call, named arguments go into VRs or on the stack as
       // usual; unnamed arguments always go to the stack or the corresponding
       // GPRs when within range.  For now, we always put the value in both
@@ -4328,11 +4631,16 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     // Load r2 into a virtual register and store it to the TOC save area.
     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
     // TOC save area offset.
-    unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+    unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
     Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
                          false, false, 0);
+    // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
+    // This does not mean the MTCTR instruction must use R12; it's easier
+    // to model this as an extra parameter, so do that.
+    if (isELFv2ABI)
+      RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
   }
 
   // Build a sequence of copy-to-reg nodes chained together with token chain
@@ -4383,7 +4691,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
   // Count how many bytes are to be pushed on the stack, including the linkage
   // area, and parameter passing area.  We start with 24/48 bytes, which is
   // prereserved space for [SP][CR][LR][3 x unused].
-  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
+  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
+                                                          false);
   unsigned NumBytes = LinkageSize;
 
   // Add up all the space actually used.
@@ -4522,7 +4831,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
                                         MachinePointerInfo(), VT,
-                                        false, false, 0);
+                                        false, false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
 
@@ -4751,8 +5060,7 @@ PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   LLVMContext &Context) const {
   SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
-                 RVLocs, Context);
+  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
   return CCInfo.CheckReturn(Outs, RetCC_PPC);
 }
 
@@ -4764,8 +5072,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
                                SDLoc dl, SelectionDAG &DAG) const {
 
   SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), RVLocs, *DAG.getContext());
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+                 *DAG.getContext());
   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
 
   SDValue Flag;
@@ -5773,15 +6081,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
     if (PPC::isSplatShuffleMask(SVOp, 1) ||
         PPC::isSplatShuffleMask(SVOp, 2) ||
         PPC::isSplatShuffleMask(SVOp, 4) ||
-        PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) ||
-        PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) ||
-        PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 ||
-        PPC::isVMRGLShuffleMask(SVOp, 1, true, DAG) ||
-        PPC::isVMRGLShuffleMask(SVOp, 2, true, DAG) ||
-        PPC::isVMRGLShuffleMask(SVOp, 4, true, DAG) ||
-        PPC::isVMRGHShuffleMask(SVOp, 1, true, DAG) ||
-        PPC::isVMRGHShuffleMask(SVOp, 2, true, DAG) ||
-        PPC::isVMRGHShuffleMask(SVOp, 4, true, DAG)) {
+        PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
+        PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
+        PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
+        PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
+        PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
+        PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
+        PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
+        PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
+        PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
       return Op;
     }
   }
@@ -5789,15 +6097,16 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   // Altivec has a variety of "shuffle immediates" that take two vector inputs
   // and produce a fixed permutation.  If any of these match, do not lower to
   // VPERM.
-  if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) ||
-      PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) ||
-      PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 ||
-      PPC::isVMRGLShuffleMask(SVOp, 1, false, DAG) ||
-      PPC::isVMRGLShuffleMask(SVOp, 2, false, DAG) ||
-      PPC::isVMRGLShuffleMask(SVOp, 4, false, DAG) ||
-      PPC::isVMRGHShuffleMask(SVOp, 1, false, DAG) ||
-      PPC::isVMRGHShuffleMask(SVOp, 2, false, DAG) ||
-      PPC::isVMRGHShuffleMask(SVOp, 4, false, DAG))
+  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
+  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+      PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+      PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
+      PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
+      PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
+      PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
+      PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
+      PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
+      PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
     return Op;
 
   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
@@ -6252,11 +6561,44 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
 //  Other Lowering Code
 //===----------------------------------------------------------------------===//
 
+static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+  Function *Func = Intrinsic::getDeclaration(M, Id);
+  return Builder.CreateCall(Func);
+}
+
+// The mappings for emitLeading/TrailingFence is taken from
+// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+                                         AtomicOrdering Ord, bool IsStore,
+                                         bool IsLoad) const {
+  if (Ord == SequentiallyConsistent)
+    return callIntrinsic(Builder, Intrinsic::ppc_sync);
+  else if (isAtLeastRelease(Ord))
+    return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
+  else
+    return nullptr;
+}
+
+Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+                                          AtomicOrdering Ord, bool IsStore,
+                                          bool IsLoad) const {
+  if (IsLoad && isAtLeastAcquire(Ord))
+    return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
+  // FIXME: this is too conservative, a dependent branch + isync is enough.
+  // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
+  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
+  // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+  else
+    return nullptr;
+}
+
 MachineBasicBlock *
 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
                                     bool is64bit, unsigned BinOpcode) const {
   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction *F = BB->getParent();
@@ -6318,7 +6660,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
                                             bool is8bit,    // operation
                                             unsigned BinOpcode) const {
   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
   // In 64 bit mode we have to use 64 bits for addresses, even though the
   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
   // registers without caring whether they're 32 or 64, but here we're
@@ -6446,7 +6789,8 @@ llvm::MachineBasicBlock*
 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
                                     MachineBasicBlock *MBB) const {
   DebugLoc DL = MI->getDebugLoc();
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
 
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -6545,7 +6889,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
   // Setup
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
   const PPCRegisterInfo *TRI =
-    static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
+      getTargetMachine().getSubtarget<PPCSubtarget>().getRegisterInfo();
   MIB.addRegMask(TRI->getNoPreservedMask());
 
   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
@@ -6594,7 +6938,8 @@ MachineBasicBlock *
 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
                                      MachineBasicBlock *MBB) const {
   DebugLoc DL = MI->getDebugLoc();
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
 
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -6613,7 +6958,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
   // Since FP is only updated here but NOT referenced, it's treated as GPR.
   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
-  unsigned BP  = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
+  unsigned BP  = (PVT == MVT::i64) ? PPC::X30 :
+                  (Subtarget.isSVR4ABI() &&
+                   MF->getTarget().getRelocationModel() == Reloc::PIC_ ?
+                     PPC::R29 : PPC::R30);
 
   MachineInstrBuilder MIB;
 
@@ -6703,7 +7051,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     return emitEHSjLjLongJmp(MI, BB);
   }
 
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrInfo *TII =
+      getTargetMachine().getSubtargetImpl()->getInstrInfo();
 
   // To "insert" these instructions we actually have to insert their
   // control-flow patterns.
@@ -6726,7 +7075,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     Cond.push_back(MI->getOperand(1));
 
     DebugLoc dl = MI->getDebugLoc();
-    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    const TargetInstrInfo *TII =
+        getTargetMachine().getSubtargetImpl()->getInstrInfo();
     TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
                       Cond, MI->getOperand(2).getReg(),
                       MI->getOperand(3).getReg());
@@ -6735,11 +7085,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
              MI->getOpcode() == PPC::SELECT_CC_F4 ||
              MI->getOpcode() == PPC::SELECT_CC_F8 ||
              MI->getOpcode() == PPC::SELECT_CC_VRRC ||
+             MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
+             MI->getOpcode() == PPC::SELECT_CC_VSRC ||
              MI->getOpcode() == PPC::SELECT_I4 ||
              MI->getOpcode() == PPC::SELECT_I8 ||
              MI->getOpcode() == PPC::SELECT_F4 ||
              MI->getOpcode() == PPC::SELECT_F8 ||
-             MI->getOpcode() == PPC::SELECT_VRRC) {
+             MI->getOpcode() == PPC::SELECT_VRRC ||
+             MI->getOpcode() == PPC::SELECT_VSFRC ||
+             MI->getOpcode() == PPC::SELECT_VSRC) {
     // The incoming instruction knows the destination vreg to set, the
     // condition code register to branch on, the true/false values to
     // select between, and a branch opcode to use.
@@ -6770,7 +7124,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
         MI->getOpcode() == PPC::SELECT_I8 ||
         MI->getOpcode() == PPC::SELECT_F4 ||
         MI->getOpcode() == PPC::SELECT_F8 ||
-        MI->getOpcode() == PPC::SELECT_VRRC) {
+        MI->getOpcode() == PPC::SELECT_VRRC ||
+        MI->getOpcode() == PPC::SELECT_VSFRC ||
+        MI->getOpcode() == PPC::SELECT_VSRC) {
       BuildMI(BB, dl, TII->get(PPC::BC))
         .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
     } else {
@@ -7131,151 +7487,54 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 // Target Optimization Hooks
 //===----------------------------------------------------------------------===//
 
-SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
-                                               DAGCombinerInfo &DCI) const {
-  if (DCI.isAfterLegalizeVectorOps())
-    return SDValue();
-
-  EVT VT = Op.getValueType();
-
-  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
-      (VT == MVT::f64 && Subtarget.hasFRE())  ||
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps,
+                                            bool &UseOneConstNR) const {
+  EVT VT = Operand.getValueType();
+  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+      (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
-
-    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
-    // For the reciprocal, we need to find the zero of the function:
-    //   F(X) = A X - 1 [which has a zero at X = 1/A]
-    //     =>
-    //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
-    //     does not require additional intermediate precision]
-
     // Convergence is quadratic, so we essentially double the number of digits
-    // correct after every iteration. The minimum architected relative
-    // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
-    // 23 digits and double has 52 digits.
-    int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
+    // correct after every iteration. For both FRE and FRSQRTE, the minimum
+    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+    // 2^-14. IEEE float has 23 digits and double has 52 digits.
+    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
     if (VT.getScalarType() == MVT::f64)
-      ++Iterations;
-
-    SelectionDAG &DAG = DCI.DAG;
-    SDLoc dl(Op);
-
-    SDValue FPOne =
-      DAG.getConstantFP(1.0, VT.getScalarType());
-    if (VT.isVector()) {
-      assert(VT.getVectorNumElements() == 4 &&
-             "Unknown vector type");
-      FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
-                          FPOne, FPOne, FPOne, FPOne);
-    }
-
-    SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
-    DCI.AddToWorklist(Est.getNode());
-
-    // Newton iterations: Est = Est + Est (1 - Arg * Est)
-    for (int i = 0; i < Iterations; ++i) {
-      SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
-      DCI.AddToWorklist(NewEst.getNode());
-
-      NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
-      DCI.AddToWorklist(NewEst.getNode());
-
-      NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
-      DCI.AddToWorklist(NewEst.getNode());
-
-      Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
-      DCI.AddToWorklist(Est.getNode());
-    }
-
-    return Est;
+      ++RefinementSteps;
+    UseOneConstNR = true;
+    return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
   }
-
   return SDValue();
 }
 
-SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
-                                             DAGCombinerInfo &DCI) const {
-  if (DCI.isAfterLegalizeVectorOps())
-    return SDValue();
-
-  EVT VT = Op.getValueType();
-
-  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
-      (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps) const {
+  EVT VT = Operand.getValueType();
+  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+      (VT == MVT::f64 && Subtarget.hasFRE())  ||
       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
-
-    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
-    // For the reciprocal sqrt, we need to find the zero of the function:
-    //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
-    //     =>
-    //   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
-    // As a result, we precompute A/2 prior to the iteration loop.
-
     // Convergence is quadratic, so we essentially double the number of digits
-    // correct after every iteration. The minimum architected relative
-    // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
-    // 23 digits and double has 52 digits.
-    int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
+    // correct after every iteration. For both FRE and FRSQRTE, the minimum
+    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+    // 2^-14. IEEE float has 23 digits and double has 52 digits.
+    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
     if (VT.getScalarType() == MVT::f64)
-      ++Iterations;
-
-    SelectionDAG &DAG = DCI.DAG;
-    SDLoc dl(Op);
-
-    SDValue FPThreeHalves =
-      DAG.getConstantFP(1.5, VT.getScalarType());
-    if (VT.isVector()) {
-      assert(VT.getVectorNumElements() == 4 &&
-             "Unknown vector type");
-      FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
-                                  FPThreeHalves, FPThreeHalves,
-                                  FPThreeHalves, FPThreeHalves);
-    }
-
-    SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
-    DCI.AddToWorklist(Est.getNode());
-
-    // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
-    // this entire sequence requires only one FP constant.
-    SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
-    DCI.AddToWorklist(HalfArg.getNode());
-
-    HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
-    DCI.AddToWorklist(HalfArg.getNode());
-
-    // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
-    for (int i = 0; i < Iterations; ++i) {
-      SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
-      DCI.AddToWorklist(NewEst.getNode());
-
-      NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
-      DCI.AddToWorklist(NewEst.getNode());
-
-      NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
-      DCI.AddToWorklist(NewEst.getNode());
-
-      Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
-      DCI.AddToWorklist(Est.getNode());
-    }
-
-    return Est;
+      ++RefinementSteps;
+    return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
   }
-
   return SDValue();
 }
 
-// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
-// not enforce equality of the chain operands.
-static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
+static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
                             unsigned Bytes, int Dist,
                             SelectionDAG &DAG) {
-  EVT VT = LS->getMemoryVT();
   if (VT.getSizeInBits() / 8 != Bytes)
     return false;
 
-  SDValue Loc = LS->getBasePtr();
   SDValue BaseLoc = Base->getBasePtr();
   if (Loc.getOpcode() == ISD::FrameIndex) {
     if (BaseLoc.getOpcode() != ISD::FrameIndex)
@@ -7306,11 +7565,77 @@ static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
   return false;
 }
 
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
+                            unsigned Bytes, int Dist,
+                            SelectionDAG &DAG) {
+  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
+    EVT VT = LS->getMemoryVT();
+    SDValue Loc = LS->getBasePtr();
+    return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
+  }
+
+  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+    EVT VT;
+    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+    default: return false;
+    case Intrinsic::ppc_altivec_lvx:
+    case Intrinsic::ppc_altivec_lvxl:
+    case Intrinsic::ppc_vsx_lxvw4x:
+      VT = MVT::v4i32;
+      break;
+    case Intrinsic::ppc_vsx_lxvd2x:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_altivec_lvebx:
+      VT = MVT::i8;
+      break;
+    case Intrinsic::ppc_altivec_lvehx:
+      VT = MVT::i16;
+      break;
+    case Intrinsic::ppc_altivec_lvewx:
+      VT = MVT::i32;
+      break;
+    }
+
+    return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
+  }
+
+  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
+    EVT VT;
+    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+    default: return false;
+    case Intrinsic::ppc_altivec_stvx:
+    case Intrinsic::ppc_altivec_stvxl:
+    case Intrinsic::ppc_vsx_stxvw4x:
+      VT = MVT::v4i32;
+      break;
+    case Intrinsic::ppc_vsx_stxvd2x:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_altivec_stvebx:
+      VT = MVT::i8;
+      break;
+    case Intrinsic::ppc_altivec_stvehx:
+      VT = MVT::i16;
+      break;
+    case Intrinsic::ppc_altivec_stvewx:
+      VT = MVT::i32;
+      break;
+    }
+
+    return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
+  }
+
+  return false;
+}
+
 // Return true is there is a nearyby consecutive load to the one provided
 // (regardless of alignment). We search up and down the chain, looking though
-// token factors and other loads (but nothing else). As a result, a true
-// results indicates that it is safe to create a new consecutive load adjacent
-// to the load provided.
+// token factors and other loads (but nothing else). As a result, a true result
+// indicates that it is safe to create a new consecutive load adjacent to the
+// load provided.
 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
   SDValue Chain = LD->getChain();
   EVT VT = LD->getMemoryVT();
@@ -7324,10 +7649,10 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
   // nodes just above the top-level loads and token factors.
   while (!Queue.empty()) {
     SDNode *ChainNext = Queue.pop_back_val();
-    if (!Visited.insert(ChainNext))
+    if (!Visited.insert(ChainNext).second)
       continue;
 
-    if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+    if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
         return true;
 
@@ -7355,17 +7680,17 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
        
     while (!Queue.empty()) {
       SDNode *LoadRoot = Queue.pop_back_val();
-      if (!Visited.insert(LoadRoot))
+      if (!Visited.insert(LoadRoot).second)
         continue;
 
-      if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+      if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
           return true;
 
       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
            UE = LoadRoot->use_end(); UI != UE; ++UI)
-        if (((isa<LoadSDNode>(*UI) &&
-            cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+        if (((isa<MemSDNode>(*UI) &&
+            cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
           Queue.push_back(*UI);
     }
@@ -7485,7 +7810,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
     SDValue BinOp = BinOps.back();
     BinOps.pop_back();
 
-    if (!Visited.insert(BinOp.getNode()))
+    if (!Visited.insert(BinOp.getNode()).second)
       continue;
 
     PromOps.push_back(BinOp);
@@ -7699,7 +8024,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
     SDValue BinOp = BinOps.back();
     BinOps.pop_back();
 
-    if (!Visited.insert(BinOp.getNode()))
+    if (!Visited.insert(BinOp.getNode()).second)
       continue;
 
     PromOps.push_back(BinOp);
@@ -7936,92 +8261,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SETCC:
   case ISD::SELECT_CC:
     return DAGCombineTruncBoolExt(N, DCI);
-  case ISD::FDIV: {
-    assert(TM.Options.UnsafeFPMath &&
-           "Reciprocal estimates require UnsafeFPMath");
-
-    if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
-      SDValue RV =
-        DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
-      if (RV.getNode()) {
-        DCI.AddToWorklist(RV.getNode());
-        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
-                           N->getOperand(0), RV);
-      }
-    } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
-               N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
-      SDValue RV =
-        DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
-                                 DCI);
-      if (RV.getNode()) {
-        DCI.AddToWorklist(RV.getNode());
-        RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
-                         N->getValueType(0), RV);
-        DCI.AddToWorklist(RV.getNode());
-        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
-                           N->getOperand(0), RV);
-      }
-    } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
-               N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
-      SDValue RV =
-        DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
-                                 DCI);
-      if (RV.getNode()) {
-        DCI.AddToWorklist(RV.getNode());
-        RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
-                         N->getValueType(0), RV,
-                         N->getOperand(1).getOperand(1));
-        DCI.AddToWorklist(RV.getNode());
-        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
-                           N->getOperand(0), RV);
-      }
-    }
-
-    SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
-    if (RV.getNode()) {
-      DCI.AddToWorklist(RV.getNode());
-      return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
-                         N->getOperand(0), RV);
-    }
-
-    }
-    break;
-  case ISD::FSQRT: {
-    assert(TM.Options.UnsafeFPMath &&
-           "Reciprocal estimates require UnsafeFPMath");
-
-    // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
-    // reciprocal sqrt.
-    SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
-    if (RV.getNode()) {
-      DCI.AddToWorklist(RV.getNode());
-      RV = DAGCombineFastRecip(RV, DCI);
-      if (RV.getNode()) {
-        // Unfortunately, RV is now NaN if the input was exactly 0. Select out
-        // this case and force the answer to 0.
-
-        EVT VT = RV.getValueType();
-
-        SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
-        if (VT.isVector()) {
-          assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
-          Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
-        }
-
-        SDValue ZeroCmp =
-          DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
-                       N->getOperand(0), Zero, ISD::SETEQ);
-        DCI.AddToWorklist(ZeroCmp.getNode());
-        DCI.AddToWorklist(RV.getNode());
-
-        RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
-                         ZeroCmp, Zero, RV);
-        return RV;
-      }
-    }
-
-    }
-    break;
   case ISD::SINT_TO_FP:
     if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
       if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
@@ -8112,6 +8351,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
     if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
         TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+        // P8 and later hardware should just use LOAD.
+        !TM.getSubtarget<PPCSubtarget>().hasP8Vector() &&
         (VT == MVT::v16i8 || VT == MVT::v8i16 ||
          VT == MVT::v4i32 || VT == MVT::v4f32) &&
         LD->getAlignment() < ABIAlignment) {
@@ -8149,17 +8390,25 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                             Intrinsic::ppc_altivec_lvsl);
       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
 
-      // Refine the alignment of the original load (a "new" load created here
-      // which was identical to the first except for the alignment would be
-      // merged with the existing node regardless).
+      // Create the new MMO for the new base load. It is like the original MMO,
+      // but represents an area in memory almost twice the vector size centered
+      // on the original address. If the address is unaligned, we might start
+      // reading up to (sizeof(vector)-1) bytes below the address of the
+      // original unaligned load.
       MachineFunction &MF = DAG.getMachineFunction();
-      MachineMemOperand *MMO =
-        MF.getMachineMemOperand(LD->getPointerInfo(),
-                                LD->getMemOperand()->getFlags(),
-                                LD->getMemoryVT().getStoreSize(),
-                                ABIAlignment);
-      LD->refineAlignment(MMO);
-      SDValue BaseLoad = SDValue(LD, 0);
+      MachineMemOperand *BaseMMO =
+        MF.getMachineMemOperand(LD->getMemOperand(),
+                                -LD->getMemoryVT().getStoreSize()+1,
+                                2*LD->getMemoryVT().getStoreSize()-1);
+
+      // Create the new base load.
+      SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx,
+                                               getPointerTy());
+      SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
+      SDValue BaseLoad =
+        DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
+                                DAG.getVTList(MVT::v4i32, MVT::Other),
+                                BaseLoadOps, MVT::v4i32, BaseMMO);
 
       // Note that the value of IncOffset (which is provided to the next
       // load's pointer info offset value, and thus used to calculate the
@@ -8181,21 +8430,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
 
+      MachineMemOperand *ExtraMMO =
+        MF.getMachineMemOperand(LD->getMemOperand(),
+                                1, 2*LD->getMemoryVT().getStoreSize()-1);
+      SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
       SDValue ExtraLoad =
-        DAG.getLoad(VT, dl, Chain, Ptr,
-                    LD->getPointerInfo().getWithOffset(IncOffset),
-                    LD->isVolatile(), LD->isNonTemporal(),
-                    LD->isInvariant(), ABIAlignment);
+        DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
+                                DAG.getVTList(MVT::v4i32, MVT::Other),
+                                ExtraLoadOps, MVT::v4i32, ExtraMMO);
 
       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
         BaseLoad.getValue(1), ExtraLoad.getValue(1));
 
-      if (BaseLoad.getValueType() != MVT::v4i32)
-        BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad);
-
-      if (ExtraLoad.getValueType() != MVT::v4i32)
-        ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
-
       // Because vperm has a big-endian bias, we must reverse the order
       // of the input vectors and complement the permute control vector
       // when generating little endian code.  We have already handled the
@@ -8212,36 +8458,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       if (VT != MVT::v4i32)
         Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
 
-      // Now we need to be really careful about how we update the users of the
-      // original load. We cannot just call DCI.CombineTo (or
-      // DAG.ReplaceAllUsesWith for that matter), because the load still has
-      // uses created here (the permutation for example) that need to stay.
-      SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
-      while (UI != UE) {
-        SDUse &Use = UI.getUse();
-        SDNode *User = *UI;
-        // Note: BaseLoad is checked here because it might not be N, but a
-        // bitcast of N.
-        if (User == Perm.getNode() || User == BaseLoad.getNode() ||
-            User == TF.getNode() || Use.getResNo() > 1) {
-          ++UI;
-          continue;
-        }
-
-        SDValue To = Use.getResNo() ? TF : Perm;
-        ++UI;
-
-        SmallVector<SDValue, 8> Ops;
-        for (const SDUse &O : User->ops()) {
-          if (O == Use)
-            Ops.push_back(To);
-          else
-            Ops.push_back(O);
-        }
-
-        DAG.UpdateNodeOperands(User, Ops);
-      }
-
+      // The output of the permutation is our loaded result, the TokenFactor is
+      // our new chain.
+      DCI.CombineTo(N, Perm, TF);
       return SDValue(N, 0);
     }
     }
@@ -8659,7 +8878,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
       PPC::GPRCRegClass.contains(R.first)) {
-    const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+    const TargetRegisterInfo *TRI =
+        getTargetMachine().getSubtargetImpl()->getRegisterInfo();
     return std::make_pair(TRI->getMatchingSuperReg(R.first,
                             PPC::sub_32, &PPC::G8RCRegClass),
                           &PPC::G8RCRegClass);
@@ -8872,6 +9092,92 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   return false;
 }
 
+bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                           const CallInst &I,
+                                           unsigned Intrinsic) const {
+
+  switch (Intrinsic) {
+  case Intrinsic::ppc_altivec_lvx:
+  case Intrinsic::ppc_altivec_lvxl:
+  case Intrinsic::ppc_altivec_lvebx:
+  case Intrinsic::ppc_altivec_lvehx:
+  case Intrinsic::ppc_altivec_lvewx:
+  case Intrinsic::ppc_vsx_lxvd2x:
+  case Intrinsic::ppc_vsx_lxvw4x: {
+    EVT VT;
+    switch (Intrinsic) {
+    case Intrinsic::ppc_altivec_lvebx:
+      VT = MVT::i8;
+      break;
+    case Intrinsic::ppc_altivec_lvehx:
+      VT = MVT::i16;
+      break;
+    case Intrinsic::ppc_altivec_lvewx:
+      VT = MVT::i32;
+      break;
+    case Intrinsic::ppc_vsx_lxvd2x:
+      VT = MVT::v2f64;
+      break;
+    default:
+      VT = MVT::v4i32;
+      break;
+    }
+
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = VT;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = -VT.getStoreSize()+1;
+    Info.size = 2*VT.getStoreSize()-1;
+    Info.align = 1;
+    Info.vol = false;
+    Info.readMem = true;
+    Info.writeMem = false;
+    return true;
+  }
+  case Intrinsic::ppc_altivec_stvx:
+  case Intrinsic::ppc_altivec_stvxl:
+  case Intrinsic::ppc_altivec_stvebx:
+  case Intrinsic::ppc_altivec_stvehx:
+  case Intrinsic::ppc_altivec_stvewx:
+  case Intrinsic::ppc_vsx_stxvd2x:
+  case Intrinsic::ppc_vsx_stxvw4x: {
+    EVT VT;
+    switch (Intrinsic) {
+    case Intrinsic::ppc_altivec_stvebx:
+      VT = MVT::i8;
+      break;
+    case Intrinsic::ppc_altivec_stvehx:
+      VT = MVT::i16;
+      break;
+    case Intrinsic::ppc_altivec_stvewx:
+      VT = MVT::i32;
+      break;
+    case Intrinsic::ppc_vsx_stxvd2x:
+      VT = MVT::v2f64;
+      break;
+    default:
+      VT = MVT::v4i32;
+      break;
+    }
+
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.memVT = VT;
+    Info.ptrVal = I.getArgOperand(1);
+    Info.offset = -VT.getStoreSize()+1;
+    Info.size = 2*VT.getStoreSize()-1;
+    Info.align = 1;
+    Info.vol = false;
+    Info.readMem = false;
+    Info.writeMem = true;
+    return true;
+  }
+  default:
+    break;
+  }
+
+  return false;
+}
+
 /// getOptimalMemOpType - Returns the target specific optimal type for load
 /// and store operations as a result of memset, memcpy, and memmove
 /// lowering. If DstAlign is zero that means it's safe to destination
@@ -8931,9 +9237,10 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
   return isInt<16>(Imm) || isUInt<16>(Imm);
 }
 
-bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
-                                                      unsigned,
-                                                      bool *Fast) const {
+bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+                                                       unsigned,
+                                                       unsigned,
+                                                       bool *Fast) const {
   if (DisablePPCUnaligned)
     return false;
 
@@ -8948,7 +9255,8 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
 
   if (VT.getSimpleVT().isVector()) {
     if (Subtarget.hasVSX()) {
-      if (VT != MVT::v2f64 && VT != MVT::v2i64)
+      if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
+          VT != MVT::v4f32 && VT != MVT::v4i32)
         return false;
     } else {
       return false;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index df05aa5..bb4d1f1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
-#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
+#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
 
 #include "PPC.h"
 #include "PPCInstrInfo.h"
@@ -99,6 +99,10 @@ namespace llvm {
       /// SVR4 calls.
       CALL, CALL_NOP,
 
+      /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
+      /// to access TLS variables.
+      CALL_TLS, CALL_NOP_TLS,
+
       /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
       /// MTCTR instruction.
       MTCTR,
@@ -181,6 +185,10 @@ namespace llvm {
       /// on PPC32.
       PPC32_GOT,
 
+      /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
+      /// local dynamic TLS  on PPC32.
+      PPC32_PICGOT,
+
       /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
       /// TLS model, produces an ADDIS8 instruction that adds the GOT
       /// base to sym\@got\@tprel\@ha.
@@ -210,10 +218,6 @@ namespace llvm {
       /// sym\@got\@tlsgd\@l.
       ADDI_TLSGD_L,
 
-      /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
-      /// model, produces a call to __tls_get_addr(sym\@tlsgd).
-      GET_TLS_ADDR,
-
       /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
       /// model, produces an ADDIS8 instruction that adds the GOT base
       /// register to sym\@got\@tlsld\@ha.
@@ -224,10 +228,6 @@ namespace llvm {
       /// sym\@got\@tlsld\@l.
       ADDI_TLSLD_L,
 
-      /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
-      /// model, produces a call to __tls_get_addr(sym\@tlsld).
-      GET_TLSLD_ADDR,
-
       /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
       /// local-dynamic TLS model, produces an ADDIS8 instruction
       /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
@@ -297,27 +297,28 @@ namespace llvm {
   namespace PPC {
     /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
     /// VPKUHUM instruction.
-    bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+    bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                               SelectionDAG &DAG);
 
     /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
     /// VPKUWUM instruction.
-    bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+    bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                               SelectionDAG &DAG);
 
     /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
     /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
     bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
-                            bool isUnary, SelectionDAG &DAG);
+                            unsigned ShuffleKind, SelectionDAG &DAG);
 
     /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
     /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
     bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
-                            bool isUnary, SelectionDAG &DAG);
+                            unsigned ShuffleKind, SelectionDAG &DAG);
 
-    /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
-    /// amount, otherwise return -1.
-    int isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG);
+    /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
+    /// shift amount, otherwise return -1.
+    int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+                            SelectionDAG &DAG);
 
     /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
     /// specifies a splat of a single element that is suitable for input to
@@ -344,7 +345,7 @@ namespace llvm {
     const PPCSubtarget &Subtarget;
 
   public:
-    explicit PPCTargetLowering(PPCTargetMachine &TM);
+    explicit PPCTargetLowering(const PPCTargetMachine &TM);
 
     /// getTargetNodeName() - This method returns the name of a target specific
     /// DAG node.
@@ -355,6 +356,11 @@ namespace llvm {
     /// getSetCCResultType - Return the ISD::SETCC ValueType
     EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
 
+    /// Return true if target always beneficiates from combining into FMA for a
+    /// given value type. This must typically return false on targets where FMA
+    /// takes more cycles to execute than FADD.
+    bool enableAggressiveFMAFusion(EVT VT) const override;
+
     /// getPreIndexedAddressParts - returns true by value, base pointer and
     /// offset pointer and addressing mode by reference if the node's address
     /// can be legally represented as pre-indexed load / store address.
@@ -403,6 +409,11 @@ namespace llvm {
                                        const SelectionDAG &DAG,
                                        unsigned Depth = 0) const override;
 
+    Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+                                  bool IsStore, bool IsLoad) const override;
+    Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+                                   bool IsStore, bool IsLoad) const override;
+
     MachineBasicBlock *
       EmitInstrWithCustomInserter(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const override;
@@ -472,6 +483,10 @@ namespace llvm {
 
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
 
+    bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+                            const CallInst &I,
+                            unsigned Intrinsic) const override;
+
     /// getOptimalMemOpType - Returns the target specific optimal type for load
     /// and store operations as a result of memset, memcpy, and memmove
     /// lowering. If DstAlign is zero that means it's safe to destination
@@ -490,9 +505,10 @@ namespace llvm {
 
     /// Is unaligned memory access allowed for the given type, and is it fast
     /// relative to software emulation.
-    bool allowsUnalignedMemoryAccesses(EVT VT,
-                                       unsigned AddrSpace,
-                                       bool *Fast = nullptr) const override;
+    bool allowsMisalignedMemoryAccesses(EVT VT,
+                                        unsigned AddrSpace,
+                                        unsigned Align = 1,
+                                        bool *Fast = nullptr) const override;
 
     /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
     /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
@@ -510,6 +526,20 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
                              const TargetLibraryInfo *LibInfo) const override;
 
+    /// \brief Returns true if an argument of type Ty needs to be passed in a
+    /// contiguous block of registers in calling convention CallConv.
+    bool functionArgumentNeedsConsecutiveRegisters(
+      Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
+      // We support any array type as "consecutive" block in the parameter
+      // save area.  The element type defines the alignment requirement and
+      // whether the argument should go in GPRs, FPRs, or VRs if available.
+      //
+      // Note that clang uses this capability both to implement the ELFv2
+      // homogeneous float/vector aggregate ABI, and to avoid having to use
+      // "byval" when passing aggregates that might fully fit in registers.
+      return Ty->isArrayTy();
+    }
+
   private:
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
     SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -533,6 +563,8 @@ namespace llvm {
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl,
+                                            SelectionDAG &DAG) const;
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
@@ -666,8 +698,12 @@ namespace llvm {
 
     SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
-    SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
+
+    SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps,
+                             bool &UseOneConstNR) const override;
+    SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps) const override;
 
     CCAssignFn *useFastISelCCs(unsigned Flag) const;
   };
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 9318f70..9a19abb 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -188,6 +188,9 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
 def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
           (BL8_NOP texternalsym:$dst)>;
 
+def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym),
+          (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
+
 // Atomic operations
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
@@ -786,7 +789,7 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LD   : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
                     "ld $rD, $src", IIC_LdStLD,
                     [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
-// The following three definitions are selected for small code model only.
+// The following four definitions are selected for small code model only.
 // Otherwise, we need to create two instructions to form a 32-bit offset,
 // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
 def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
@@ -801,8 +804,12 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   "#LDtocCPT",
                   [(set i64:$rD,
                      (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
+def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+                  "#LDtocCPT",
+                  [(set i64:$rD,
+                     (PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
 
-let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2 in
+let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2, Defs = [X2] in
 def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
                     "ld 2, $src", IIC_LdStLD,
                     [(PPCload_toc ixaddr:$src)]>, isPPC64;
@@ -872,11 +879,6 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                        [(set i64:$rD,
                          (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
                  isPPC64;
-def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
-                        "#GETtlsADDR",
-                        [(set i64:$rD,
-                          (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
-                 isPPC64;
 def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                          "#ADDIStlsldHA",
                          [(set i64:$rD,
@@ -887,11 +889,6 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                        [(set i64:$rD,
                          (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
                  isPPC64;
-def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
-                          "#GETtlsldADDR",
-                          [(set i64:$rD,
-                            (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
-                   isPPC64;
 def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                           "#ADDISdtprelHA",
                           [(set i64:$rD,
@@ -1135,3 +1132,9 @@ def : Pat<(i64 (unaligned4load xoaddr:$src)),
 def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
           (STDX $rS, xoaddr:$dst)>;
 
+// 64-bits atomic loads and stores
+def : Pat<(atomic_load_64 ixaddr:$src), (LD  memrix:$src)>;
+def : Pat<(atomic_load_64 xaddr:$src),  (LDX memrr:$src)>;
+
+def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD  g8rc:$val, memrix:$ptr)>;
+def : Pat<(atomic_store_64 xaddr:$ptr,  i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index dce46d8..4ef08eb 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -22,110 +22,143 @@ def vnot_ppc : PatFrag<(ops node:$in),
 
 def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                               (vector_shuffle node:$lhs, node:$rhs), [{
-  return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false,
-                                   *CurDAG);
+  return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
 }]>;
 def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                               (vector_shuffle node:$lhs, node:$rhs), [{
-  return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false,
-                                   *CurDAG);
+  return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
 }]>;
 def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                     (vector_shuffle node:$lhs, node:$rhs), [{
-  return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true,
-                                   *CurDAG);
+  return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
 }]>;
 def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                     (vector_shuffle node:$lhs, node:$rhs), [{
-  return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true,
-                                   *CurDAG);
+  return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
 }]>;
 
+// These fragments are provided for little-endian, where the inputs must be
+// swapped for correct semantics.
+def vpkuhum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
+def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
 
 def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                              (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
-  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false,
-                                 *CurDAG);
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG);
 }]>;
 def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                              (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
-  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false,
-                                 *CurDAG);
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG);
 }]>;
 def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                              (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
-  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false,
-                                 *CurDAG);
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG);
 }]>;
 def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                              (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
-  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false,
-                                 *CurDAG);
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG);
 }]>;
 def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                              (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
-  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false,
-                                 *CurDAG);
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG);
 }]>;
 def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                              (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
-  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false,
-                                 *CurDAG);
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG);
 }]>;
 
 
 def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
-  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true,
-                                 *CurDAG);
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG);
 }]>;
 def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                    (vector_shuffle node:$lhs, node:$rhs), [{
-  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true,
-                                 *CurDAG);
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG);
 }]>;
 def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                    (vector_shuffle node:$lhs, node:$rhs), [{
-  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true,
-                                 *CurDAG);
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG);
 }]>;
 def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                    (vector_shuffle node:$lhs, node:$rhs), [{
-  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true,
-                                 *CurDAG);
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG);
 }]>;
 def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                    (vector_shuffle node:$lhs, node:$rhs), [{
-  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true,
-                                 *CurDAG);
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG);
 }]>;
 def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                    (vector_shuffle node:$lhs, node:$rhs), [{
-  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true,
-                                 *CurDAG);
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG);
+}]>;
+
+
+// These fragments are provided for little-endian, where the inputs must be
+// swapped for correct semantics.
+def vmrglb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                               (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG);
+}]>;
+def vmrglh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG);
+}]>;
+def vmrglw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG);
+}]>;
+def vmrghb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG);
+}]>;
+def vmrghh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG);
+}]>;
+def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG);
 }]>;
 
 
 def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
-  return getI32Imm(PPC::isVSLDOIShuffleMask(N, false, *CurDAG));
+  return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG));
 }]>;
 def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                              (vector_shuffle node:$lhs, node:$rhs), [{
-  return PPC::isVSLDOIShuffleMask(N, false, *CurDAG) != -1;
+  return PPC::isVSLDOIShuffleMask(N, 0, *CurDAG) != -1;
 }], VSLDOI_get_imm>;
 
 
 /// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
 /// vector_shuffle(X,undef,mask) by the dag combiner.
 def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
-  return getI32Imm(PPC::isVSLDOIShuffleMask(N, true, *CurDAG));
+  return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG));
 }]>;
 def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
                                    (vector_shuffle node:$lhs, node:$rhs), [{
-  return PPC::isVSLDOIShuffleMask(N, true, *CurDAG) != -1;
+  return PPC::isVSLDOIShuffleMask(N, 1, *CurDAG) != -1;
 }], VSLDOI_unary_get_imm>;
 
 
+/// VSLDOI_swapped* - These fragments are provided for little-endian, where
+/// the inputs must be swapped for correct semantics.
+def VSLDOI_swapped_get_imm : SDNodeXForm<vector_shuffle, [{
+  return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG));
+}]>;
+def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                     (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVSLDOIShuffleMask(N, 2, *CurDAG) != -1;
+}], VSLDOI_get_imm>;
+
+
 // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
 def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
   return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG));
@@ -242,48 +275,64 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
 def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">;
 let Predicates = [HasAltivec] in {
 
-let isCodeGenOnly = 1 in {
-def DSS      : DSS_Form<822, (outs),
-                        (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
-                        "dss $STRM", IIC_LdStLoad /*FIXME*/, []>,
-                        Deprecated<DeprecatedDST>;
-def DSSALL   : DSS_Form<822, (outs),
-                        (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
-                        "dssall", IIC_LdStLoad /*FIXME*/, []>,
-                        Deprecated<DeprecatedDST>;
-def DST      : DSS_Form<342, (outs),
-                        (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
-                        "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
-                        Deprecated<DeprecatedDST>;
-def DSTT     : DSS_Form<342, (outs),
-                        (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
-                        "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
-                        Deprecated<DeprecatedDST>;
-def DSTST    : DSS_Form<374, (outs),
-                        (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
-                        "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
-                        Deprecated<DeprecatedDST>;
-def DSTSTT   : DSS_Form<374, (outs),
-                        (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
-                        "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
-                        Deprecated<DeprecatedDST>;
+def DSS      : DSS_Form<0, 822, (outs), (ins u5imm:$STRM),
+                        "dss $STRM", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dss imm:$STRM)]>,
+                        Deprecated<DeprecatedDST> {
+  let A = 0;
+  let B = 0;
+}
 
-def DST64    : DSS_Form<342, (outs),
-                        (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
-                        "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+def DSSALL   : DSS_Form<1, 822, (outs), (ins),
+                        "dssall", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dssall)]>,
+                        Deprecated<DeprecatedDST> {
+  let STRM = 0;
+  let A = 0;
+  let B = 0;
+}
+
+def DST      : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+                        "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+                        [(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM)]>,
                         Deprecated<DeprecatedDST>;
-def DSTT64   : DSS_Form<342, (outs),
-                        (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
-                        "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+
+def DSTT     : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+                        "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+                        [(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM)]>,
                         Deprecated<DeprecatedDST>;
-def DSTST64  : DSS_Form<374, (outs),
-                        (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
-                        "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+
+def DSTST    : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+                        "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+                        [(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM)]>,
                         Deprecated<DeprecatedDST>;
-def DSTSTT64 : DSS_Form<374, (outs),
-                        (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
-                        "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+
+def DSTSTT   : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+                        "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+                        [(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM)]>,
                         Deprecated<DeprecatedDST>;
+
+let isCodeGenOnly = 1 in {
+  // The very same instructions as above, but formally matching 64bit registers.
+  def DST64    : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+                          "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+                          [(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM)]>,
+                          Deprecated<DeprecatedDST>;
+
+  def DSTT64   : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+                          "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+                          [(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM)]>,
+                          Deprecated<DeprecatedDST>;
+
+  def DSTST64  : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+                          "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+                          [(int_ppc_altivec_dstst i64:$rA, i32:$rB,
+                                                  imm:$STRM)]>,
+                          Deprecated<DeprecatedDST>;
+
+  def DSTSTT64 : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+                          "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+                          [(int_ppc_altivec_dststt i64:$rA, i32:$rB,
+                                                   imm:$STRM)]>,
+                          Deprecated<DeprecatedDST>;
 }
 
 def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
@@ -731,30 +780,6 @@ def V_SETALLONES  : VXForm_3<908, (outs vrrc:$vD), (ins),
 // Additional Altivec Patterns
 //
 
-// DS* intrinsics
-def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
-def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
-
-//  * 32-bit
-def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM),
-          (DST 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM),
-          (DSTT 1, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM),
-          (DSTST 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM),
-          (DSTSTT 1, imm:$STRM, $rA, $rB)>;
-
-//  * 64-bit
-def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM),
-          (DST64 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM),
-          (DSTT64 1, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM),
-          (DSTST64 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM),
-          (DSTSTT64 1, imm:$STRM, $rA, $rB)>;
-
 // Loads.
 def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
 
@@ -789,6 +814,16 @@ def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
 def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
         (VPKUHUM $vA, $vA)>;
 
+// Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands.
+// These fragments are matched for little-endian, where the inputs must
+// be swapped for correct semantics.
+def:Pat<(vsldoi_swapped_shuffle:$in v16i8:$vA, v16i8:$vB),
+        (VSLDOI $vB, $vA, (VSLDOI_swapped_get_imm $in))>;
+def:Pat<(vpkuwum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (VPKUWUM $vB, $vA)>;
+def:Pat<(vpkuhum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (VPKUHUM $vB, $vA)>;
+
 // Match vmrg*(x,x)
 def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef),
         (VMRGLB $vA, $vA)>;
@@ -803,6 +838,22 @@ def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef),
 def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
         (VMRGHW $vA, $vA)>;
 
+// Match vmrg*(y,x), i.e., swapped operands.  These fragments
+// are matched for little-endian, where the inputs must be
+// swapped for correct semantics.
+def:Pat<(vmrglb_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (VMRGLB $vB, $vA)>;
+def:Pat<(vmrglh_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (VMRGLH $vB, $vA)>;
+def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (VMRGLW $vB, $vA)>;
+def:Pat<(vmrghb_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (VMRGHB $vB, $vA)>;
+def:Pat<(vmrghh_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (VMRGHH $vB, $vA)>;
+def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (VMRGHW $vB, $vA)>;
+
 // Logical Operations
 def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>;
 
diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h
index b424d11..cf71b1c 100644
--- a/lib/Target/PowerPC/PPCInstrBuilder.h
+++ b/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -17,8 +17,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef POWERPC_INSTRBUILDER_H
-#define POWERPC_INSTRBUILDER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRBUILDER_H
+#define LLVM_LIB_TARGET_POWERPC_PPCINSTRBUILDER_H
 
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 1e4396c..aa68497 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -380,6 +380,11 @@ class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asms
   let Inst{31}    = RC;
 }
 
+class XForm_tlb<bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin> : XForm_base_r3xo<31, xo, OOL, IOL, asmstr, itin, []> {
+  let RST = 0;
+}
+
 // This is the same as XForm_base_r3xo, but the first two operands are swapped
 // when code is emitted.
 class XForm_base_r3xo_swapped
@@ -417,6 +422,22 @@ class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let B = 0;
 }
 
+class XForm_tlbws<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RST;
+  bits<5> A;
+  bits<1> WS;
+
+  let Pattern = pattern;
+
+  let Inst{6-10}  = RST;
+  let Inst{11-15} = A;
+  let Inst{20}    = WS;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
 class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern> 
   : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -457,6 +478,52 @@ class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = 0;
 }
 
+class XForm_icbt<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<4> CT;
+  bits<5> RA;
+  bits<5> RB;
+
+  let Inst{6} = 0;
+  let Inst{7-10} = CT;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = RB;
+  let Inst{21-30} = xo;
+  let Inst{31} = 0;
+}
+
+class XForm_sr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RS;
+  bits<4> SR;
+
+  let Inst{6-10} = RS;
+  let Inst{12-15} = SR;
+  let Inst{21-30} = xo;
+}
+
+class XForm_mbar<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> MO;
+
+  let Inst{6-10} = MO;
+  let Inst{21-30} = xo;
+}
+
+class XForm_srin<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RS;
+  bits<5> RB;
+
+  let Inst{6-10} = RS;
+  let Inst{16-20} = RB;
+  let Inst{21-30} = xo;
+}
+
 class XForm_mtmsr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
                 InstrItinClass itin>
          : I<opcode, OOL, IOL, asmstr, itin> {
@@ -764,10 +831,9 @@ class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
 
 
 // DSS_Form - Form X instruction, used for altivec dss* instructions.
-class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr, 
+class DSS_Form<bits<1> T, bits<10> xo, dag OOL, dag IOL, string asmstr,
                       InstrItinClass itin, list<dag> pattern>
   : I<31, OOL, IOL, asmstr, itin> {
-  bits<1> T;
   bits<2> STRM;
   bits<5> A;
   bits<5> B;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9bac91d..daf8790 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -75,7 +75,7 @@ PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
   if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
       Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
     const InstrItineraryData *II =
-        &static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
+        static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
     return new ScoreboardHazardRecognizer(II, DAG);
   }
 
@@ -331,6 +331,11 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
   BuildMI(MBB, MI, DL, get(Opcode));
 }
 
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void PPCInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+  NopInst.setOpcode(PPC::NOP);
+}
+
 // Branch analysis.
 // Note: If the condition register is set to CTR or CTR8 then this is a
 // BDNZ (imm == 1) or BDZ (imm == 0) branch.
@@ -1617,6 +1622,7 @@ protected:
       bool Changed = false;
 
       MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+      const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
       for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
            I != IE; ++I) {
         MachineInstr *MI = I;
@@ -1682,16 +1688,26 @@ protected:
         // In theory, there could be other uses of the addend copy before this
         // fma.  We could deal with this, but that would require additional
         // logic below and I suspect it will not occur in any relevant
-        // situations.
-        bool OtherUsers = false;
+        // situations.  Additionally, check whether the copy source is killed
+        // prior to the fma.  In order to replace the addend here with the
+        // source of the copy, it must still be live here.  We can't use
+        // interval testing for a physical register, so as long as we're
+        // walking the MIs we may as well test liveness here.
+        bool OtherUsers = false, KillsAddendSrc = false;
         for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
-             J != JE; --J)
+             J != JE; --J) {
           if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
             OtherUsers = true;
             break;
           }
+          if (J->modifiesRegister(AddendSrcReg, TRI) ||
+              J->killsRegister(AddendSrcReg, TRI)) {
+            KillsAddendSrc = true;
+            break;
+          }
+        }
 
-        if (OtherUsers)
+        if (OtherUsers || KillsAddendSrc)
           continue;
 
         // Find one of the product operands that is killed by this instruction.
@@ -1712,10 +1728,11 @@ protected:
         if (!KilledProdOp)
           continue;
 
-        // In order to replace the addend here with the source of the copy,
-        // it must still be live here.
-        if (!LIS->getInterval(AddendMI->getOperand(1).getReg()).liveAt(FMAIdx))
-          continue;
+        // For virtual registers, verify that the addend source register
+        // is live here (as should have been assured above).
+        assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) ||
+                LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) &&
+               "Addend source register is not live!");
 
         // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
 
@@ -1737,6 +1754,12 @@ protected:
 
         unsigned OldFMAReg = MI->getOperand(0).getReg();
 
+        // The transformation doesn't work well with things like:
+        //    %vreg5 = A-form-op %vreg5, %vreg11, %vreg5;
+        // so leave such things alone.
+        if (OldFMAReg == KilledProdReg)
+          continue;
+
         assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
                "Addend copy not tied to old FMA output!");
 
@@ -1827,7 +1850,7 @@ public:
 
       LIS = &getAnalysis<LiveIntervals>();
 
-      TII = TM->getInstrInfo();
+      TII = TM->getSubtargetImpl()->getInstrInfo();
 
       bool Changed = false;
 
@@ -1980,7 +2003,7 @@ public:
       // If we don't have VSX on the subtarget, don't do anything.
       if (!TM->getSubtargetImpl()->hasVSX())
         return false;
-      TII = TM->getInstrInfo();
+      TII = TM->getSubtargetImpl()->getInstrInfo();
 
       bool Changed = false;
 
@@ -2057,7 +2080,7 @@ public:
       // If we don't have VSX don't bother doing anything here.
       if (!TM->getSubtargetImpl()->hasVSX())
         return false;
-      TII = TM->getInstrInfo();
+      TII = TM->getSubtargetImpl()->getInstrInfo();
 
       bool Changed = false;
 
@@ -2214,7 +2237,7 @@ protected:
 public:
     bool runOnMachineFunction(MachineFunction &MF) override {
       TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
-      TII = TM->getInstrInfo();
+      TII = TM->getSubtargetImpl()->getInstrInfo();
 
       bool Changed = false;
 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 83f14c6..4d310fe 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef POWERPC_INSTRUCTIONINFO_H
-#define POWERPC_INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
 
 #include "PPC.h"
 #include "PPCRegisterInfo.h"
@@ -228,6 +228,8 @@ public:
   /// instruction may be.  This returns the maximum number of bytes.
   ///
   unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+  void getNoopForMachoTarget(MCInst &NopInst) const override;
 };
 
 }
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index c2e3382..8c76c46 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -57,6 +57,9 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
   SDTCisPtrTy<0>, SDTCisVT<1, i32>
 ]>;
 
+def tocentry32 : Operand<iPTR> {
+  let MIOperandInfo = (ops i32imm:$imm);
+}
 
 //===----------------------------------------------------------------------===//
 // PowerPC specific DAG Nodes.
@@ -107,10 +110,8 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
 def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
 def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
 def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
-def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
 def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
-def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
 def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
                               [SDNPHasChain]>;
 def PPCaddiDtprelL   : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
@@ -133,9 +134,15 @@ def SDT_PPCCall   : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
 def PPCcall  : SDNode<"PPCISD::CALL", SDT_PPCCall,
                       [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                        SDNPVariadic]>;
+def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall,
+                         [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                          SDNPVariadic]>;
 def PPCcall_nop  : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                            SDNPVariadic]>;
+def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
+                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                              SDNPVariadic]>;
 def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
@@ -417,6 +424,15 @@ def u2imm   : Operand<i32> {
   let PrintMethod = "printU2ImmOperand";
   let ParserMatchClass = PPCU2ImmAsmOperand;
 }
+
+def PPCU4ImmAsmOperand : AsmOperandClass {
+  let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
+  let RenderMethod = "addImmOperands";
+}
+def u4imm   : Operand<i32> {
+  let PrintMethod = "printU4ImmOperand";
+  let ParserMatchClass = PPCU4ImmAsmOperand;
+}
 def PPCS5ImmAsmOperand : AsmOperandClass {
   let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
   let RenderMethod = "addImmOperands";
@@ -446,7 +462,7 @@ def u6imm   : Operand<i32> {
 }
 def PPCS16ImmAsmOperand : AsmOperandClass {
   let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
-  let RenderMethod = "addImmOperands";
+  let RenderMethod = "addS16ImmOperands";
 }
 def s16imm  : Operand<i32> {
   let PrintMethod = "printS16ImmOperand";
@@ -456,7 +472,7 @@ def s16imm  : Operand<i32> {
 }
 def PPCU16ImmAsmOperand : AsmOperandClass {
   let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
-  let RenderMethod = "addImmOperands";
+  let RenderMethod = "addU16ImmOperands";
 }
 def u16imm  : Operand<i32> {
   let PrintMethod = "printU16ImmOperand";
@@ -466,7 +482,7 @@ def u16imm  : Operand<i32> {
 }
 def PPCS17ImmAsmOperand : AsmOperandClass {
   let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
-  let RenderMethod = "addImmOperands";
+  let RenderMethod = "addS16ImmOperands";
 }
 def s17imm  : Operand<i32> {
   // This operand type is used for addis/lis to allow the assembler parser
@@ -542,7 +558,7 @@ def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
 
 def PPCDispRIOperand : AsmOperandClass {
  let Name = "DispRI"; let PredicateMethod = "isS16Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addS16ImmOperands";
 }
 def dispRI : Operand<iPTR> {
   let ParserMatchClass = PPCDispRIOperand;
@@ -554,6 +570,27 @@ def PPCDispRIXOperand : AsmOperandClass {
 def dispRIX : Operand<iPTR> {
   let ParserMatchClass = PPCDispRIXOperand;
 }
+def PPCDispSPE8Operand : AsmOperandClass {
+ let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE8 : Operand<iPTR> {
+  let ParserMatchClass = PPCDispSPE8Operand;
+}
+def PPCDispSPE4Operand : AsmOperandClass {
+ let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE4 : Operand<iPTR> {
+  let ParserMatchClass = PPCDispSPE4Operand;
+}
+def PPCDispSPE2Operand : AsmOperandClass {
+ let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE2 : Operand<iPTR> {
+  let ParserMatchClass = PPCDispSPE2Operand;
+}
 
 def memri : Operand<iPTR> {
   let PrintMethod = "printMemRegImm";
@@ -571,6 +608,21 @@ def memrix : Operand<iPTR> {   // memri where the imm is 4-aligned.
   let EncoderMethod = "getMemRIXEncoding";
   let DecoderMethod = "decodeMemRIXOperands";
 }
+def spe8dis : Operand<iPTR> {   // SPE displacement where the imm is 8-aligned.
+  let PrintMethod = "printMemRegImm";
+  let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg);
+  let EncoderMethod = "getSPE8DisEncoding";
+}
+def spe4dis : Operand<iPTR> {   // SPE displacement where the imm is 4-aligned.
+  let PrintMethod = "printMemRegImm";
+  let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg);
+  let EncoderMethod = "getSPE4DisEncoding";
+}
+def spe2dis : Operand<iPTR> {   // SPE displacement where the imm is 2-aligned.
+  let PrintMethod = "printMemRegImm";
+  let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg);
+  let EncoderMethod = "getSPE2DisEncoding";
+}
 
 // A single-register address. This is used with the SjLj
 // pseudo-instructions.
@@ -585,6 +637,12 @@ def tlsreg32 : Operand<i32> {
   let EncoderMethod = "getTLSRegEncoding";
   let ParserMatchClass = PPCTLSRegOperand;
 }
+def tlsgd32 : Operand<i32> {}
+def tlscall32 : Operand<i32> {
+  let PrintMethod = "printTLSCall";
+  let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym);
+  let EncoderMethod = "getTLSCallEncoding";
+}
 
 // PowerPC Predicate operand.
 def pred : Operand<OtherVT> {
@@ -611,6 +669,12 @@ def In32BitMode  : Predicate<"!PPCSubTarget->isPPC64()">;
 def In64BitMode  : Predicate<"PPCSubTarget->isPPC64()">;
 def IsBookE  : Predicate<"PPCSubTarget->isBookE()">;
 def IsNotBookE  : Predicate<"!PPCSubTarget->isBookE()">;
+def HasOnlyMSYNC : Predicate<"PPCSubTarget->hasOnlyMSYNC()">;
+def HasSYNC   : Predicate<"!PPCSubTarget->hasOnlyMSYNC()">;
+def IsPPC4xx  : Predicate<"PPCSubTarget->isPPC4xx()">;
+def IsPPC6xx  : Predicate<"PPCSubTarget->isPPC6xx()">;
+def IsE500  : Predicate<"PPCSubTarget->isE500()">;
+def HasSPE  : Predicate<"PPCSubTarget->HasSPE()">;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Multiclass Definitions.
@@ -967,6 +1031,9 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
 let Defs = [LR] in
   def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
                    PPC970_Unit_BRU;
+let Defs = [LR] in
+  def MoveGOTtoLR : Pseudo<(outs), (ins), "#MoveGOTtoLR", []>,
+                    PPC970_Unit_BRU;
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
   let isBarrier = 1 in {
@@ -1068,6 +1135,8 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
                     "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>;
 
     let isCodeGenOnly = 1 in {
+      def BL_TLS  : IForm<18, 0, 1, (outs), (ins tlscall32:$func),
+                          "bl $func", IIC_BrB, []>;
       def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst),
                        "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">;
       def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst),
@@ -1243,8 +1312,15 @@ def DCBZL  : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
                       IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
                       PPC970_DGroup_Single;
 
+def ICBT  : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
+                       "icbt $CT, $src", IIC_LdStLoad>, Requires<[IsBookE]>;
+
 def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
-          (DCBT xoaddr:$dst)>;
+          (DCBT xoaddr:$dst)>;   // data prefetch for loads
+def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)),
+          (DCBTST xoaddr:$dst)>; // data prefetch for stores
+def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
+          (ICBT 0, xoaddr:$dst)>; // inst prefetch (for read)
 
 // Atomic operations
 let usesCustomInserter = 1 in {
@@ -1628,17 +1704,19 @@ def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
                    "stmw $rS, $dst", IIC_LdStLMW, []>;
 
 def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
-                        "sync $L", IIC_LdStSync, []>, Requires<[IsNotBookE]>;
+                        "sync $L", IIC_LdStSync, []>;
 
 let isCodeGenOnly = 1 in {
   def MSYNC : XForm_24_sync<31, 598, (outs), (ins),
-                           "msync", IIC_LdStSync, []>, Requires<[IsBookE]> {
+                           "msync", IIC_LdStSync, []> {
     let L = 0;
   }
 }
 
-def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
-def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
+def : Pat<(int_ppc_sync),   (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(int_ppc_sync),   (MSYNC)>, Requires<[HasOnlyMSYNC]>;
+def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
 
 //===----------------------------------------------------------------------===//
 // PPC32 Arithmetic Instructions.
@@ -2355,6 +2433,8 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
 def : Pat<(PPCcall (i32 texternalsym:$dst)),
           (BL texternalsym:$dst)>;
 
+def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym),
+          (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
 
 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
           (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@@ -2393,13 +2473,47 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
 def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT", 
                 [(set i32:$rD, (PPCppc32GOT))]>;
 
+// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
+// This uses two output registers, the first as the real output, the second as a
+// temporary register, used internally in code generation.
+def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT", 
+                []>, NoEncode<"$rT">;
+
 def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
-                        "#LDgotTprelL32",
-                        [(set i32:$rD,
-                          (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
+                           "#LDgotTprelL32",
+                           [(set i32:$rD,
+                             (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
 def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
           (ADD4TLS $in, tglobaltlsaddr:$g)>;
 
+def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+                         "#ADDItlsgdL32",
+                         [(set i32:$rD,
+                           (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+                          "#ADDItlsldL32",
+                          [(set i32:$rD,
+                            (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+                           "#ADDIdtprelL32",
+                           [(set i32:$rD,
+                             (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+                            "#ADDISdtprelHA32",
+                            [(set i32:$rD,
+                              (PPCaddisDtprelHA i32:$reg,
+                                                tglobaltlsaddr:$disp))]>;
+
+// Support for Position-independent code
+def LWZtoc : Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
+                   "#LWZtoc",
+                   [(set i32:$rD,
+                      (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+// Get Global (GOT) Base Register offset, from the word immediately preceding
+// the function label.
+def UpdateGBR : Pseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+
+
 // Standard shifts.  These are represented separately from the real shifts above
 // so that we can distinguish between shifts that allow 5-bit and 6-bit shift
 // amounts.
@@ -2434,8 +2548,15 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
 def : Pat<(f64 (fextend f32:$src)),
           (COPY_TO_REGCLASS $src, F8RC)>;
 
-def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>;
-def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>;
+// Only seq_cst fences require the heavyweight sync (SYNC 0).
+// All others can use the lightweight sync (SYNC 1).
+// source: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+// The rule for seq_cst is duplicated to work with both 64 bits and 32 bits
+// versions of Power.
+def : Pat<(atomic_fence (i64 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (i32 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (imm),   (imm)), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
 
 // Additional FNMSUB patterns: -a*c + b == -(a*c - b)
 def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
@@ -2454,6 +2575,7 @@ def : Pat<(fcopysign f32:$frB, f64:$frA),
           (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>;
 
 include "PPCInstrAltivec.td"
+include "PPCInstrSPE.td"
 include "PPCInstr64Bit.td"
 include "PPCInstrVSX.td"
 
@@ -2970,6 +3092,16 @@ def : Pat<(i1 (not (trunc i64:$in))),
 // PowerPC Instructions used for assembler/disassembler only
 //
 
+// FIXME: For B=0 or B > 8, the registers following RT are used.
+// WARNING: Do not add patterns for this instruction without fixing this.
+def LSWI  : XForm_base_r3xo<31, 597, (outs gprc:$RT), (ins gprc:$A, u5imm:$B),
+                            "lswi $RT, $A, $B", IIC_LdStLoad, []>;
+
+// FIXME: For B=0 or B > 8, the registers following RT are used.
+// WARNING: Do not add patterns for this instruction without fixing this.
+def STSWI : XForm_base_r3xo<31, 725, (outs), (ins gprc:$RT, gprc:$A, u5imm:$B),
+                            "stswi $RT, $A, $B", IIC_LdStLoad, []>;
+
 def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
                          "isync", IIC_SprISYNC, []>;
 
@@ -2982,9 +3114,47 @@ def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
 def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
                          "wait $L", IIC_LdStLoad, []>;
 
+def MBAR : XForm_mbar<31, 854, (outs), (ins u5imm:$MO),
+                         "mbar $MO", IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def MTSR: XForm_sr<31, 210, (outs), (ins gprc:$RS, u4imm:$SR),
+            "mtsr $SR, $RS", IIC_SprMTSR>;
+
+def MFSR: XForm_sr<31, 595, (outs gprc:$RS), (ins u4imm:$SR),
+            "mfsr $RS, $SR", IIC_SprMFSR>;
+
+def MTSRIN: XForm_srin<31, 242, (outs), (ins gprc:$RS, gprc:$RB),
+            "mtsrin $RS, $RB", IIC_SprMTSR>;
+
+def MFSRIN: XForm_srin<31, 659, (outs gprc:$RS), (ins gprc:$RB),
+            "mfsrin $RS, $RB", IIC_SprMFSR>;
+
 def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
                     "mtmsr $RS, $L", IIC_SprMTMSR>;
 
+def WRTEE: XForm_mtmsr<31, 131, (outs), (ins gprc:$RS),
+                    "wrtee $RS", IIC_SprMTMSR>, Requires<[IsBookE]> {
+  let L = 0;
+}
+
+def WRTEEI: I<31, (outs), (ins i1imm:$E), "wrteei $E", IIC_SprMTMSR>,
+              Requires<[IsBookE]> {
+  bits<1> E;
+
+  let Inst{16} = E;
+  let Inst{21-30} = 163;
+}
+
+def DCCCI : XForm_tlb<454, (outs), (ins gprc:$A, gprc:$B),
+               "dccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
+def ICCCI : XForm_tlb<966, (outs), (ins gprc:$A, gprc:$B),
+               "iccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"dci 0", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"dccci", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"ici 0", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"iccci", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
+
 def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
                   "mfmsr $RT", IIC_SprMFMSR, []>;
 
@@ -3002,15 +3172,66 @@ def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
 
 def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
 
+def TLBIA : XForm_0<31, 370, (outs), (ins),
+                        "tlbia", IIC_SprTLBIA, []>;
+
 def TLBSYNC : XForm_0<31, 566, (outs), (ins),
                         "tlbsync", IIC_SprTLBSYNC, []>;
 
 def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB),
                           "tlbiel $RB", IIC_SprTLBIEL, []>;
 
+def TLBLD : XForm_16b<31, 978, (outs), (ins gprc:$RB),
+                          "tlbld $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
+def TLBLI : XForm_16b<31, 1010, (outs), (ins gprc:$RB),
+                          "tlbli $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
+
 def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
                           "tlbie $RB,$RS", IIC_SprTLBIE, []>;
 
+def TLBSX : XForm_tlb<914, (outs), (ins gprc:$A, gprc:$B), "tlbsx $A, $B",
+                IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def TLBIVAX : XForm_tlb<786, (outs), (ins gprc:$A, gprc:$B), "tlbivax $A, $B",
+                IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def TLBRE : XForm_24_eieio<31, 946, (outs), (ins),
+                           "tlbre", IIC_LdStLoad, []>, Requires<[IsBookE]>;
+
+def TLBWE : XForm_24_eieio<31, 978, (outs), (ins),
+                           "tlbwe", IIC_LdStLoad, []>, Requires<[IsBookE]>;
+
+def TLBRE2 : XForm_tlbws<31, 946, (outs gprc:$RS), (ins gprc:$A, i1imm:$WS),
+               "tlbre $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
+
+def TLBWE2 : XForm_tlbws<31, 978, (outs), (ins gprc:$RS, gprc:$A, i1imm:$WS),
+               "tlbwe $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
+
+def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+                             "tlbsx $RST, $A, $B", IIC_LdStLoad, []>,
+                             Requires<[IsPPC4xx]>;
+def TLBSX2D : XForm_base_r3xo<31, 914, (outs),
+                              (ins gprc:$RST, gprc:$A, gprc:$B),
+                              "tlbsx. $RST, $A, $B", IIC_LdStLoad, []>,
+                              Requires<[IsPPC4xx]>, isDOT;
+
+def RFID : XForm_0<19, 18, (outs), (ins), "rfid", IIC_IntRFID, []>;
+
+def RFI : XForm_0<19, 50, (outs), (ins), "rfi", IIC_SprRFI, []>,
+                  Requires<[IsBookE]>;
+def RFCI : XForm_0<19, 51, (outs), (ins), "rfci", IIC_BrB, []>,
+                   Requires<[IsBookE]>;
+
+def RFDI : XForm_0<19, 39, (outs), (ins), "rfdi", IIC_BrB, []>,
+                   Requires<[IsE500]>;
+def RFMCI : XForm_0<19, 38, (outs), (ins), "rfmci", IIC_BrB, []>,
+                    Requires<[IsE500]>;
+
+def MFDCR : XFXForm_1<31, 323, (outs gprc:$RT), (ins i32imm:$SPR),
+                      "mfdcr $RT, $SPR", IIC_SprMFSPR>, Requires<[IsPPC4xx]>;
+def MTDCR : XFXForm_1<31, 451, (outs), (ins gprc:$RT, i32imm:$SPR),
+                      "mtdcr $SPR, $RT", IIC_SprMTSPR>, Requires<[IsPPC4xx]>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC Assembler Instruction Aliases
 //
@@ -3033,15 +3254,17 @@ class PPCAsmPseudo<string asm, dag iops>
 
 def : InstAlias<"sc", (SC 0)>;
 
-def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>;
-def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>;
-def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>;
-def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>;
+def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"msync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>;
+def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>;
 
 def : InstAlias<"wait", (WAIT 0)>;
 def : InstAlias<"waitrsv", (WAIT 1)>;
 def : InstAlias<"waitimpl", (WAIT 2)>;
 
+def : InstAlias<"mbar", (MBAR 0)>, Requires<[IsBookE]>;
+
 def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
 def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
 def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
@@ -3050,9 +3273,57 @@ def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>
 def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>;
 def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>;
 
+def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>;
+def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>;
+
+def : InstAlias<"mtdscr $Rx", (MTSPR 17, gprc:$Rx)>;
+def : InstAlias<"mfdscr $Rx", (MFSPR gprc:$Rx, 17)>;
+
+def : InstAlias<"mtdsisr $Rx", (MTSPR 18, gprc:$Rx)>;
+def : InstAlias<"mfdsisr $Rx", (MFSPR gprc:$Rx, 18)>;
+
+def : InstAlias<"mtdar $Rx", (MTSPR 19, gprc:$Rx)>;
+def : InstAlias<"mfdar $Rx", (MFSPR gprc:$Rx, 19)>;
+
+def : InstAlias<"mtdec $Rx", (MTSPR 22, gprc:$Rx)>;
+def : InstAlias<"mfdec $Rx", (MFSPR gprc:$Rx, 22)>;
+
+def : InstAlias<"mtsdr1 $Rx", (MTSPR 25, gprc:$Rx)>;
+def : InstAlias<"mfsdr1 $Rx", (MFSPR gprc:$Rx, 25)>;
+
+def : InstAlias<"mtsrr0 $Rx", (MTSPR 26, gprc:$Rx)>;
+def : InstAlias<"mfsrr0 $Rx", (MFSPR gprc:$Rx, 26)>;
+
+def : InstAlias<"mtsrr1 $Rx", (MTSPR 27, gprc:$Rx)>;
+def : InstAlias<"mfsrr1 $Rx", (MFSPR gprc:$Rx, 27)>;
+
+def : InstAlias<"mtsrr2 $Rx", (MTSPR 990, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr2 $Rx", (MFSPR gprc:$Rx, 990)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtsrr3 $Rx", (MTSPR 991, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr3 $Rx", (MFSPR gprc:$Rx, 991)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtcfar $Rx", (MTSPR 28, gprc:$Rx)>;
+def : InstAlias<"mfcfar $Rx", (MFSPR gprc:$Rx, 28)>;
+
+def : InstAlias<"mtamr $Rx", (MTSPR 29, gprc:$Rx)>;
+def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>;
+
+def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>;
+def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>;
+
 def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>;
+def : InstAlias<"mftbl $Rx", (MFTB gprc:$Rx, 268)>;
 def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>;
 
+def : InstAlias<"mttbl $Rx", (MTSPR 284, gprc:$Rx)>;
+def : InstAlias<"mttbu $Rx", (MTSPR 285, gprc:$Rx)>;
+
+def : InstAlias<"mftblo $Rx", (MFSPR gprc:$Rx, 989)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttblo $Rx", (MTSPR 989, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mftbhi $Rx", (MFSPR gprc:$Rx, 988)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+
 def : InstAlias<"xnop", (XORI R0, R0, 0)>;
 
 def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
@@ -3063,6 +3334,60 @@ def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
 
 def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
 
+foreach BATR = 0-3 in {
+    def : InstAlias<"mtdbatu "#BATR#", $Rx",
+                    (MTSPR !add(BATR, !add(BATR, 536)), gprc:$Rx)>,
+                    Requires<[IsPPC6xx]>;
+    def : InstAlias<"mfdbatu $Rx, "#BATR,
+                    (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 536)))>,
+                    Requires<[IsPPC6xx]>;
+    def : InstAlias<"mtdbatl "#BATR#", $Rx",
+                    (MTSPR !add(BATR, !add(BATR, 537)), gprc:$Rx)>,
+                    Requires<[IsPPC6xx]>;
+    def : InstAlias<"mfdbatl $Rx, "#BATR,
+                    (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 537)))>,
+                    Requires<[IsPPC6xx]>;
+    def : InstAlias<"mtibatu "#BATR#", $Rx",
+                    (MTSPR !add(BATR, !add(BATR, 528)), gprc:$Rx)>,
+                    Requires<[IsPPC6xx]>;
+    def : InstAlias<"mfibatu $Rx, "#BATR,
+                    (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 528)))>,
+                    Requires<[IsPPC6xx]>;
+    def : InstAlias<"mtibatl "#BATR#", $Rx",
+                    (MTSPR !add(BATR, !add(BATR, 529)), gprc:$Rx)>,
+                    Requires<[IsPPC6xx]>;
+    def : InstAlias<"mfibatl $Rx, "#BATR,
+                    (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 529)))>,
+                    Requires<[IsPPC6xx]>;
+}
+
+foreach BR = 0-7 in {
+    def : InstAlias<"mfbr"#BR#" $Rx",
+                    (MFDCR gprc:$Rx, !add(BR, 0x80))>,
+                    Requires<[IsPPC4xx]>;
+    def : InstAlias<"mtbr"#BR#" $Rx",
+                    (MTDCR gprc:$Rx, !add(BR, 0x80))>,
+                    Requires<[IsPPC4xx]>;
+}
+
+def : InstAlias<"mtdccr $Rx", (MTSPR 1018, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdccr $Rx", (MFSPR gprc:$Rx, 1018)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mticcr $Rx", (MTSPR 1019, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtdear $Rx", (MTSPR 981, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdear $Rx", (MFSPR gprc:$Rx, 981)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtesr $Rx", (MTSPR 980, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfesr $Rx", (MFSPR gprc:$Rx, 980)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mfspefscr $Rx", (MFSPR gprc:$Rx, 512)>;
+def : InstAlias<"mtspefscr $Rx", (MTSPR 512, gprc:$Rx)>;
+
+def : InstAlias<"mttcr $Rx", (MTSPR 986, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mftcr $Rx", (MFSPR gprc:$Rx, 986)>, Requires<[IsPPC4xx]>;
+
 def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>;
 
 def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm",
@@ -3082,25 +3407,25 @@ def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
 def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
 def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
 
-def : InstAlias<"mfsprg $RT, 0", (MFSPR gprc:$RT, 272)>;
-def : InstAlias<"mfsprg $RT, 1", (MFSPR gprc:$RT, 273)>;
-def : InstAlias<"mfsprg $RT, 2", (MFSPR gprc:$RT, 274)>;
-def : InstAlias<"mfsprg $RT, 3", (MFSPR gprc:$RT, 275)>;
-
-def : InstAlias<"mfsprg0 $RT", (MFSPR gprc:$RT, 272)>;
-def : InstAlias<"mfsprg1 $RT", (MFSPR gprc:$RT, 273)>;
-def : InstAlias<"mfsprg2 $RT", (MFSPR gprc:$RT, 274)>;
-def : InstAlias<"mfsprg3 $RT", (MFSPR gprc:$RT, 275)>;
+def : InstAlias<"mfasr $RT", (MFSPR gprc:$RT, 280)>;
+def : InstAlias<"mtasr $RT", (MTSPR 280, gprc:$RT)>;
 
-def : InstAlias<"mtsprg 0, $RT", (MTSPR 272, gprc:$RT)>;
-def : InstAlias<"mtsprg 1, $RT", (MTSPR 273, gprc:$RT)>;
-def : InstAlias<"mtsprg 2, $RT", (MTSPR 274, gprc:$RT)>;
-def : InstAlias<"mtsprg 3, $RT", (MTSPR 275, gprc:$RT)>;
-
-def : InstAlias<"mtsprg0 $RT", (MTSPR 272, gprc:$RT)>;
-def : InstAlias<"mtsprg1 $RT", (MTSPR 273, gprc:$RT)>;
-def : InstAlias<"mtsprg2 $RT", (MTSPR 274, gprc:$RT)>;
-def : InstAlias<"mtsprg3 $RT", (MTSPR 275, gprc:$RT)>;
+foreach SPRG = 0-3 in {
+  def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 272))>;
+  def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 272))>;
+  def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+  def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+}
+foreach SPRG = 4-7 in {
+  def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>,
+                  Requires<[IsBookE]>;
+  def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 256))>,
+                  Requires<[IsBookE]>;
+  def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+                  Requires<[IsBookE]>;
+  def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+                  Requires<[IsBookE]>;
+}
 
 def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>;
 
@@ -3119,6 +3444,15 @@ def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>;
 
 def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
 
+def : InstAlias<"tlbrehi $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 0)>,
+                Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbrelo $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 1)>,
+                Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbwehi $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 0)>,
+                Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>,
+                Requires<[IsPPC4xx]>;
+
 def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
                           (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
 def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
@@ -3367,3 +3701,18 @@ defm : TrapExtendedMnemonic<"lnl", 5>;
 defm : TrapExtendedMnemonic<"lng", 6>;
 defm : TrapExtendedMnemonic<"u", 31>;
 
+// Atomic loads
+def : Pat<(atomic_load_8  iaddr:$src), (LBZ  memri:$src)>;
+def : Pat<(atomic_load_16 iaddr:$src), (LHZ  memri:$src)>;
+def : Pat<(atomic_load_32 iaddr:$src), (LWZ  memri:$src)>;
+def : Pat<(atomic_load_8  xaddr:$src), (LBZX memrr:$src)>;
+def : Pat<(atomic_load_16 xaddr:$src), (LHZX memrr:$src)>;
+def : Pat<(atomic_load_32 xaddr:$src), (LWZX memrr:$src)>;
+
+// Atomic stores
+def : Pat<(atomic_store_8  iaddr:$ptr, i32:$val), (STB  gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_16 iaddr:$ptr, i32:$val), (STH  gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_32 iaddr:$ptr, i32:$val), (STW  gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_8  xaddr:$ptr, i32:$val), (STBX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_16 xaddr:$ptr, i32:$val), (STHX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_32 xaddr:$ptr, i32:$val), (STWX gprc:$val, memrr:$ptr)>;
diff --git a/lib/Target/PowerPC/PPCInstrSPE.td b/lib/Target/PowerPC/PPCInstrSPE.td
new file mode 100644
index 0000000..cc3a4d2
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrSPE.td
@@ -0,0 +1,447 @@
+//=======-- PPCInstrSPE.td - The PowerPC SPE Extension -*- tablegen -*-=======//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Signal Processing Engine extension to
+// the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+class EVXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> RT;
+  bits<5> RA;
+  bits<5> RB;
+
+  let Pattern = [];
+  
+  let Inst{6-10}  = RT;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = RB;
+  let Inst{21-31} = xo;
+}
+
+class EVXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin> : EVXForm_1<xo, OOL, IOL, asmstr, itin> {
+  let RB = 0;
+}
+
+class EVXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+  bits<3> crD;
+  bits<5> RA;
+  bits<5> RB;
+
+  let Pattern = [];
+  
+  let Inst{6-8}  = crD;
+  let Inst{9-10}  = 0;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = RB;
+  let Inst{21-31} = xo;
+}
+
+class EVXForm_D<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> RT;
+  bits<21> D;
+
+  let Pattern = [];
+
+  let Inst{6-10}  = RT;
+  let Inst{20} = D{0};
+  let Inst{19} = D{1};
+  let Inst{18} = D{2};
+  let Inst{17} = D{3};
+  let Inst{16} = D{4};
+  let Inst{15} = D{5};
+  let Inst{14} = D{6};
+  let Inst{13} = D{7};
+  let Inst{12} = D{8};
+  let Inst{11} = D{9};
+  let Inst{11-20} = D{0-9};
+  let Inst{21-31} = xo;
+}
+
+let Predicates = [HasSPE], isAsmParserOnly = 1 in {
+
+def EVLDD          : EVXForm_D<769, (outs gprc:$RT), (ins spe8dis:$dst),
+                               "evldd $RT, $dst", IIC_VecFP>;
+def EVLDW          : EVXForm_D<771, (outs gprc:$RT), (ins spe8dis:$dst),
+                               "evldw $RT, $dst", IIC_VecFP>;
+def EVLDH          : EVXForm_D<773, (outs gprc:$RT), (ins spe8dis:$dst),
+                               "evldh $RT, $dst", IIC_VecFP>;
+def EVLHHESPLAT    : EVXForm_D<777, (outs gprc:$RT), (ins spe2dis:$dst),
+                               "evlhhesplat $RT, $dst", IIC_VecFP>;
+def EVLHHOUSPLAT   : EVXForm_D<781, (outs gprc:$RT), (ins spe2dis:$dst),
+                               "evlhhousplat $RT, $dst", IIC_VecFP>;
+def EVLHHOSSPLAT   : EVXForm_D<783, (outs gprc:$RT), (ins spe2dis:$dst),
+                               "evlhhossplat $RT, $dst", IIC_VecFP>;
+def EVLWHE         : EVXForm_D<785, (outs gprc:$RT), (ins spe4dis:$dst),
+                               "evlwhe $RT, $dst", IIC_VecFP>;
+def EVLWHOU        : EVXForm_D<789, (outs gprc:$RT), (ins spe4dis:$dst),
+                               "evlwhou $RT, $dst", IIC_VecFP>;
+def EVLWHOS        : EVXForm_D<791, (outs gprc:$RT), (ins spe4dis:$dst),
+                               "evlwhos $RT, $dst", IIC_VecFP>;
+def EVLWWSPLAT     : EVXForm_D<793, (outs gprc:$RT), (ins spe4dis:$dst),
+                               "evlwwsplat $RT, $dst", IIC_VecFP>;
+def EVLWHSPLAT     : EVXForm_D<797, (outs gprc:$RT), (ins spe4dis:$dst),
+                               "evlwhsplat $RT, $dst", IIC_VecFP>;
+
+def EVSTDD         : EVXForm_D<801, (outs), (ins gprc:$RT, spe8dis:$dst),
+                               "evstdd $RT, $dst", IIC_VecFP>;
+def EVSTDH         : EVXForm_D<805, (outs), (ins gprc:$RT, spe8dis:$dst),
+                               "evstdh $RT, $dst", IIC_VecFP>;
+def EVSTDW         : EVXForm_D<803, (outs), (ins gprc:$RT, spe8dis:$dst),
+                               "evstdw $RT, $dst", IIC_VecFP>;
+def EVSTWHE        : EVXForm_D<817, (outs), (ins gprc:$RT, spe4dis:$dst),
+                               "evstwhe $RT, $dst", IIC_VecFP>;
+def EVSTWHO        : EVXForm_D<821, (outs), (ins gprc:$RT, spe4dis:$dst),
+                               "evstwho $RT, $dst", IIC_VecFP>;
+def EVSTWWE        : EVXForm_D<825, (outs), (ins gprc:$RT, spe4dis:$dst),
+                               "evstwwe $RT, $dst", IIC_VecFP>;
+def EVSTWWO        : EVXForm_D<829, (outs), (ins gprc:$RT, spe4dis:$dst),
+                               "evstwwo $RT, $dst", IIC_VecFP>;
+
+def EVMRA : EVXForm_1<1220, (outs gprc:$RT), (ins gprc:$RA),
+                      "evmra $RT, $RA", IIC_VecFP> {
+  let RB = 0;
+}
+
+def BRINC          : EVXForm_1<527, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "brinc $RT, $RA, $RB", IIC_VecFP>;
+def EVABS          : EVXForm_2<520, (outs gprc:$RT), (ins gprc:$RA),
+                               "evabs $RT, $RA", IIC_VecFP>;
+
+def EVADDIW        : EVXForm_1<514, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+                               "evaddiw $RT, $RB, $RA", IIC_VecFP>;
+def EVADDSMIAAW    : EVXForm_2<1225, (outs gprc:$RT), (ins gprc:$RA),
+                               "evaddsmiaaw $RT, $RA", IIC_VecFP>;
+def EVADDSSIAAW    : EVXForm_2<1217, (outs gprc:$RT), (ins gprc:$RA),
+                               "evaddssiaaw $RT, $RA", IIC_VecFP>;
+def EVADDUSIAAW    : EVXForm_2<1216, (outs gprc:$RT), (ins gprc:$RA),
+                               "evaddusiaaw $RT, $RA", IIC_VecFP>;
+def EVADDUMIAAW    : EVXForm_2<1224, (outs gprc:$RT), (ins gprc:$RA),
+                               "evaddumiaaw $RT, $RA", IIC_VecFP>;
+def EVADDW         : EVXForm_1<512, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evaddw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVAND          : EVXForm_1<529, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evand $RT, $RA, $RB", IIC_VecFP>;
+def EVANDC         : EVXForm_1<530, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evandc $RT, $RA, $RB", IIC_VecFP>;
+
+def EVCMPEQ        : EVXForm_3<564, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+                               "evcmpeq $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPGTS       : EVXForm_3<561, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+                               "evcmpgts $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPGTU       : EVXForm_3<560, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+                               "evcmpgtu $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPLTS       : EVXForm_3<563, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+                               "evcmplts $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPLTU       : EVXForm_3<562, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+                               "evcmpltu $crD, $RA, $RB", IIC_VecFP>;
+
+def EVCNTLSW       : EVXForm_2<526, (outs gprc:$RT), (ins gprc:$RA),
+                               "evcntlsw $RT, $RA", IIC_VecFP>;
+def EVCNTLZW       : EVXForm_2<525, (outs gprc:$RT), (ins gprc:$RA),
+                               "evcntlzw $RT, $RA", IIC_VecFP>;
+
+def EVDIVWS        : EVXForm_1<1222, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evdivws $RT, $RA, $RB", IIC_VecFP>;
+def EVDIVWU        : EVXForm_1<1223, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evdivwu $RT, $RA, $RB", IIC_VecFP>;
+
+def EVEQV          : EVXForm_1<537, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "eveqv $RT, $RA, $RB", IIC_VecFP>;
+
+def EVEXTSB        : EVXForm_2<522, (outs gprc:$RT), (ins gprc:$RA),
+                               "evextsb $RT, $RA", IIC_VecFP>;
+def EVEXTSH        : EVXForm_2<523, (outs gprc:$RT), (ins gprc:$RA),
+                               "evextsh $RT, $RA", IIC_VecFP>;
+
+def EVLDDX         : EVXForm_1<768, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evlddx $RT, $RA, $RB", IIC_VecFP>;
+def EVLDWX         : EVXForm_1<770, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evldwx $RT, $RA, $RB", IIC_VecFP>;
+def EVLDHX         : EVXForm_1<772, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evldhx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHESPLATX   : EVXForm_1<776, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evlhhesplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHOUSPLATX  : EVXForm_1<780, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evlhhousplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHOSSPLATX  : EVXForm_1<782, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evlhhossplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHEX        : EVXForm_1<784, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evlwhex $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHOUX       : EVXForm_1<788, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evlwhoux $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHOSX       : EVXForm_1<790, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evlwhosx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWWSPLATX    : EVXForm_1<792, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evlwwsplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHSPLATX    : EVXForm_1<796, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evlwhsplatx $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMERGEHI      : EVXForm_1<556, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmergehi $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGELO      : EVXForm_1<557, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmergelo $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGEHILO    : EVXForm_1<558, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmergehilo $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGELOHI    : EVXForm_1<559, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmergelohi $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMHEGSMFAA    : EVXForm_1<1323, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhegsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMFAN    : EVXForm_1<1451, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhegsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMIAA    : EVXForm_1<1321, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhegsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMIAN    : EVXForm_1<1449, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhegsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGUMIAA    : EVXForm_1<1320, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhegumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGUMIAN    : EVXForm_1<1448, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhegumian $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMHESMF       : EVXForm_1<1035, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhesmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFA      : EVXForm_1<1067, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhesmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFAAW    : EVXForm_1<1291, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhesmfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFANW    : EVXForm_1<1419, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhesmfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMI       : EVXForm_1<1033, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhesmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIA      : EVXForm_1<1065, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhesmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIAAW    : EVXForm_1<1289, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhesmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIANW    : EVXForm_1<1417, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhesmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSF       : EVXForm_1<1027, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhessf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFA      : EVXForm_1<1059, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhessfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFAAW    : EVXForm_1<1283, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhessfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFANW    : EVXForm_1<1411, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhessfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSIAAW    : EVXForm_1<1281, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhessiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSIANW    : EVXForm_1<1409, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhessianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMI       : EVXForm_1<1032, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmheumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIA      : EVXForm_1<1064, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmheumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIAAW    : EVXForm_1<1288, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmheumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIANW    : EVXForm_1<1416, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmheumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUSIAAW    : EVXForm_1<1280, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmheusiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUSIANW    : EVXForm_1<1408, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmheusianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMFAA    : EVXForm_1<1327, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhogsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMFAN    : EVXForm_1<1455, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhogsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMIAA    : EVXForm_1<1325, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhogsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMIAN    : EVXForm_1<1453, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhogsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGUMIAA    : EVXForm_1<1324, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhogumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGUMIAN    : EVXForm_1<1452, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhogumian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMF       : EVXForm_1<1039, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhosmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFA      : EVXForm_1<1071, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhosmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFAAW    : EVXForm_1<1295, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhosmfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFANW    : EVXForm_1<1423, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhosmfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMI       : EVXForm_1<1037, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhosmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIA      : EVXForm_1<1069, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhosmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIAAW    : EVXForm_1<1293, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhosmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIANW    : EVXForm_1<1421, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhosmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSF       : EVXForm_1<1031, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhossf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFA      : EVXForm_1<1063, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhossfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFAAW    : EVXForm_1<1287, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhossfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFANW    : EVXForm_1<1415, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhossfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSIAAW    : EVXForm_1<1285, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhossiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSIANW    : EVXForm_1<1413, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhossianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMI       : EVXForm_1<1036, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhoumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIA      : EVXForm_1<1068, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhoumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIAAW    : EVXForm_1<1292, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhoumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIANW    : EVXForm_1<1420, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhoumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUSIAAW    : EVXForm_1<1284, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhousiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUSIANW    : EVXForm_1<1412, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmhousianw $RT, $RA, $RB", IIC_VecFP>;
+
+
+def EVMWHSMF       : EVXForm_1<1103, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwhsmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMFA      : EVXForm_1<1135, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwhsmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMI       : EVXForm_1<1101, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwhsmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMIA      : EVXForm_1<1133, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwhsmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSSF       : EVXForm_1<1095, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwhssf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSSFA      : EVXForm_1<1127, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwhssfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHUMI       : EVXForm_1<1100, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwhumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHUMIA      : EVXForm_1<1132, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwhumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSMIAAW    : EVXForm_1<1353, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwlsmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSMIANW    : EVXForm_1<1481, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwlsmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSSIAAW    : EVXForm_1<1345, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwlssiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSSIANW    : EVXForm_1<1473, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwlssianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMI       : EVXForm_1<1096, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwlumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIA      : EVXForm_1<1128, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwlumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIAAW    : EVXForm_1<1352, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwlumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIANW    : EVXForm_1<1480, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwlumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUSIAAW    : EVXForm_1<1344, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwlusiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUSIANW    : EVXForm_1<1472, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwlusianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMF        : EVXForm_1<1115, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwsmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFA       : EVXForm_1<1147, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwsmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFAA      : EVXForm_1<1371, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFAN      : EVXForm_1<1499, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMI        : EVXForm_1<1113, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwsmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIA       : EVXForm_1<1145, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwsmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIAA      : EVXForm_1<1369, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIAN      : EVXForm_1<1497, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSF        : EVXForm_1<1107, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwssf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFA       : EVXForm_1<1139, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwssfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFAA      : EVXForm_1<1363, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwssfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFAN      : EVXForm_1<1491, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwssfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMI        : EVXForm_1<1112, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIA       : EVXForm_1<1144, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIAA      : EVXForm_1<1368, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIAN      : EVXForm_1<1496, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evmwumian $RT, $RA, $RB", IIC_VecFP>;
+
+
+def EVNAND         : EVXForm_1<542, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evnand $RT, $RA, $RB", IIC_VecFP>;
+
+def EVNEG          : EVXForm_2<521, (outs gprc:$RT), (ins gprc:$RA),
+                               "evneg $RT, $RA", IIC_VecFP>;
+
+def EVNOR          : EVXForm_1<536, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evnor $RT, $RA, $RB", IIC_VecFP>;
+def EVOR           : EVXForm_1<535, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evor $RT, $RA, $RB", IIC_VecFP>;
+def EVORC          : EVXForm_1<539, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evorc $RT, $RA, $RB", IIC_VecFP>;
+
+def EVRLWI         : EVXForm_1<554, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+                               "evrlwi $RT, $RA, $RB", IIC_VecFP>;
+def EVRLW          : EVXForm_1<552, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evrlw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVRNDW         : EVXForm_2<524, (outs gprc:$RT), (ins gprc:$RA),
+                               "evrndw $RT, $RA", IIC_VecFP>;
+
+def EVSLWI         : EVXForm_1<550, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+                               "evslwi $RT, $RA, $RB", IIC_VecFP>;
+def EVSLW          : EVXForm_1<548, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evslw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSPLATFI      : EVXForm_2<555, (outs gprc:$RT), (ins i32imm:$RA),
+                               "evsplatfi $RT, $RA", IIC_VecFP>;
+def EVSPLATI       : EVXForm_2<553, (outs gprc:$RT), (ins i32imm:$RA),
+                               "evsplati $RT, $RA", IIC_VecFP>;
+
+def EVSRWIS        : EVXForm_1<547, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+                               "evsrwis $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWIU        : EVXForm_1<546, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+                               "evsrwiu $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWS         : EVXForm_1<545, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evsrws $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWU         : EVXForm_1<544, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evsrwu $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSTDDX        : EVXForm_1<800, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+                               "evstddx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTDHX        : EVXForm_1<804, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+                               "evstdhx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTDWX        : EVXForm_1<802, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+                               "evstdwx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWHEX       : EVXForm_1<816, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+                               "evstwhex $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWHOX       : EVXForm_1<820, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+                               "evstwhox $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWWEX       : EVXForm_1<824, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+                               "evstwwex $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWWOX       : EVXForm_1<828, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+                               "evstwwox $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSUBFSSIAAW   : EVXForm_2<1219, (outs gprc:$RT), (ins gprc:$RA),
+                               "evsubfssiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFSMIAAW   : EVXForm_2<1227, (outs gprc:$RT), (ins gprc:$RA),
+                               "evsubfsmiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFUMIAAW   : EVXForm_2<1226, (outs gprc:$RT), (ins gprc:$RA),
+                               "evsubfumiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFUSIAAW   : EVXForm_2<1218, (outs gprc:$RT), (ins gprc:$RA),
+                               "evsubfusiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFW        : EVXForm_1<516, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evsubfw $RT, $RA, $RB", IIC_VecFP>;
+def EVSUBIFW       : EVXForm_1<518, (outs gprc:$RT), (ins u5imm:$RA, gprc:$RB),
+                               "evsubifw $RT, $RA, $RB", IIC_VecFP>;
+def EVXOR          : EVXForm_1<534, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+                               "evxor $RT, $RA, $RB", IIC_VecFP>;
+
+} // HasSPE
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 49bcc48..2c8f998 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -47,23 +47,24 @@ let Uses = [RM] in {
 
   // Load indexed instructions
   let mayLoad = 1, canFoldAsLoad = 1 in {
-    def LXSDX : XForm_1<31, 588,
+    def LXSDX : XX1Form<31, 588,
                         (outs vsfrc:$XT), (ins memrr:$src),
                         "lxsdx $XT, $src", IIC_LdStLFD,
                         [(set f64:$XT, (load xoaddr:$src))]>;
 
-    def LXVD2X : XForm_1<31, 844,
+    def LXVD2X : XX1Form<31, 844,
                          (outs vsrc:$XT), (ins memrr:$src),
                          "lxvd2x $XT, $src", IIC_LdStLFD,
-                         [(set v2f64:$XT, (load xoaddr:$src))]>;
+                         [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>;
 
-    def LXVDSX : XForm_1<31, 332,
+    def LXVDSX : XX1Form<31, 332,
                          (outs vsrc:$XT), (ins memrr:$src),
                          "lxvdsx $XT, $src", IIC_LdStLFD, []>;
 
-    def LXVW4X : XForm_1<31, 780,
+    def LXVW4X : XX1Form<31, 780,
                          (outs vsrc:$XT), (ins memrr:$src),
-                         "lxvw4x $XT, $src", IIC_LdStLFD, []>;
+                         "lxvw4x $XT, $src", IIC_LdStLFD,
+                         [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>;
   }
 
   // Store indexed instructions
@@ -76,11 +77,12 @@ let Uses = [RM] in {
     def STXVD2X : XX1Form<31, 972,
                          (outs), (ins vsrc:$XT, memrr:$dst),
                          "stxvd2x $XT, $dst", IIC_LdStSTFD,
-                         [(store v2f64:$XT, xoaddr:$dst)]>;
+                         [(int_ppc_vsx_stxvd2x v2f64:$XT, xoaddr:$dst)]>;
 
     def STXVW4X : XX1Form<31, 908,
                          (outs), (ins vsrc:$XT, memrr:$dst),
-                         "stxvw4x $XT, $dst", IIC_LdStSTFD, []>;
+                         "stxvw4x $XT, $dst", IIC_LdStSTFD,
+                         [(int_ppc_vsx_stxvw4x v4i32:$XT, xoaddr:$dst)]>;
   }
 
   // Add/Mul Instructions
@@ -641,24 +643,36 @@ let Uses = [RM] in {
   let isCommutable = 1 in {
   def XSMAXDP : XX3Form<60, 160,
                         (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
-                        "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xsmaxdp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsfrc:$XT,
+                              (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>;
   def XSMINDP : XX3Form<60, 168,
                         (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
-                        "xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xsmindp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsfrc:$XT,
+                              (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>;
 
   def XVMAXDP : XX3Form<60, 224,
                         (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                        "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xvmaxdp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsrc:$XT,
+                              (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>;
   def XVMINDP : XX3Form<60, 232,
                         (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                        "xvmindp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xvmindp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsrc:$XT,
+                              (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>;
 
   def XVMAXSP : XX3Form<60, 192,
                         (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                        "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xvmaxsp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsrc:$XT,
+                              (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>;
   def XVMINSP : XX3Form<60, 200,
                         (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                        "xvminsp $XT, $XA, $XB", IIC_VecFP, []>;
+                        "xvminsp $XT, $XA, $XB", IIC_VecFP,
+                        [(set vsrc:$XT,
+                              (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
   } // isCommutable
 } // Uses = [RM]
 
@@ -715,6 +729,31 @@ let Uses = [RM] in {
                        (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
                        "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
 } // neverHasSideEffects
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1,    // Expanded after instruction selection.
+    PPC970_Single = 1 in {
+
+  def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst),
+                             (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
+                             "#SELECT_CC_VSRC",
+                             []>;
+  def SELECT_VSRC: Pseudo<(outs vsrc:$dst),
+                          (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
+                          "#SELECT_VSRC",
+                          [(set v2f64:$dst,
+                                (select i1:$cond, v2f64:$T, v2f64:$F))]>;
+  def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst),
+                              (ins crrc:$cond, f8rc:$T, f8rc:$F,
+                               i32imm:$BROPC), "#SELECT_CC_VSFRC",
+                              []>;
+  def SELECT_VSFRC: Pseudo<(outs f8rc:$dst),
+                           (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
+                           "#SELECT_VSFRC",
+                           [(set f64:$dst,
+                                 (select i1:$cond, f64:$T, f64:$F))]>;
+} // usesCustomInserter
 } // AddedComplexity
 
 def : InstAlias<"xvmovdp $XT, $XB",
@@ -811,6 +850,49 @@ def : Pat<(sext_inreg v2i64:$C, v2i32),
 def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
           (XVCVSXWDP (XXSLDWI $C, $C, 1))>;
 
+// Loads.
+def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+
+// Selects.
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
+          (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
+          (SELECT_VSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
+          (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
+          (SELECT_VSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
+          (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
+          (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+          (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+          (SELECT_VSFRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
+          (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+          (SELECT_VSFRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+          (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+          (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+// Divides.
+def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
+          (XVDIVSP $A, $B)>;
+def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
+          (XVDIVDP $A, $B)>;
+
 } // AddedComplexity
 } // HasVSX
 
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
deleted file mode 100644
index e5f113a..0000000
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
-//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the 32-bit PowerPC target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPCJITInfo.h"
-#include "PPCRelocations.h"
-#include "PPCSubtarget.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-PPCJITInfo::PPCJITInfo(PPCSubtarget &STI)
-    : Subtarget(STI), is64Bit(STI.isPPC64()) {
-  useGOT = 0;
-}
-
-#define BUILD_ADDIS(RD,RS,IMM16) \
-  ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
-#define BUILD_ORI(RD,RS,UIMM16) \
-  ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
-#define BUILD_ORIS(RD,RS,UIMM16) \
-  ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
-#define BUILD_RLDICR(RD,RS,SH,ME) \
-  ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
-   (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
-#define BUILD_MTSPR(RS,SPR)      \
-  ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
-#define BUILD_BCCTRx(BO,BI,LINK) \
-  ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
-#define BUILD_B(TARGET, LINK) \
-  ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
-
-// Pseudo-ops
-#define BUILD_LIS(RD,IMM16)    BUILD_ADDIS(RD,0,IMM16)
-#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
-#define BUILD_MTCTR(RS)        BUILD_MTSPR(RS,9)
-#define BUILD_BCTR(LINK)       BUILD_BCCTRx(20,0,LINK)
-
-static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
-  intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
-  unsigned *AtI = (unsigned*)(intptr_t)At;
-
-  if (Offset >= -(1 << 23) && Offset < (1 << 23)) {   // In range?
-    AtI[0] = BUILD_B(Offset, isCall);     // b/bl target
-  } else if (!is64Bit) {
-    AtI[0] = BUILD_LIS(12, To >> 16);     // lis r12, hi16(address)
-    AtI[1] = BUILD_ORI(12, 12, To);       // ori r12, r12, lo16(address)
-    AtI[2] = BUILD_MTCTR(12);             // mtctr r12
-    AtI[3] = BUILD_BCTR(isCall);          // bctr/bctrl
-  } else {
-    AtI[0] = BUILD_LIS(12, To >> 48);      // lis r12, hi16(address)
-    AtI[1] = BUILD_ORI(12, 12, To >> 32);  // ori r12, r12, lo16(address)
-    AtI[2] = BUILD_SLDI(12, 12, 32);       // sldi r12, r12, 32
-    AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
-    AtI[4] = BUILD_ORI(12, 12, To);        // ori r12, r12, lo16(address)
-    AtI[5] = BUILD_MTCTR(12);              // mtctr r12
-    AtI[6] = BUILD_BCTR(isCall);           // bctr/bctrl
-  }
-}
-
-extern "C" void PPC32CompilationCallback();
-extern "C" void PPC64CompilationCallback();
-
-// The first clause of the preprocessor directive looks wrong, but it is
-// necessary when compiling this code on non-PowerPC hosts.
-#if (!defined(__ppc__) && !defined(__powerpc__)) || defined(__powerpc64__) || defined(__ppc64__)
-void PPC32CompilationCallback() {
-  llvm_unreachable("This is not a 32bit PowerPC, you can't execute this!");
-}
-#elif !defined(__ELF__)
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because we the prolog/epilog inserted by GCC won't work for us.  Instead,
-// write our own wrapper, which does things our way, so we have complete control
-// over register saving and restoring.
-asm(
-    ".text\n"
-    ".align 2\n"
-    ".globl _PPC32CompilationCallback\n"
-"_PPC32CompilationCallback:\n"
-    // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the 
-    // FIXME: need to save v[0-19] for altivec?
-    // FIXME: could shrink frame
-    // Set up a proper stack frame
-    // FIXME Layout
-    //   PowerPC32 ABI linkage    -  24 bytes
-    //                 parameters -  32 bytes
-    //   13 double registers      - 104 bytes
-    //   8 int registers          -  32 bytes
-    "mflr r0\n"
-    "stw r0,  8(r1)\n"
-    "stwu r1, -208(r1)\n"
-    // Save all int arg registers
-    "stw r10, 204(r1)\n"    "stw r9,  200(r1)\n"
-    "stw r8,  196(r1)\n"    "stw r7,  192(r1)\n"
-    "stw r6,  188(r1)\n"    "stw r5,  184(r1)\n"
-    "stw r4,  180(r1)\n"    "stw r3,  176(r1)\n"
-    // Save all call-clobbered FP regs.
-    "stfd f13, 168(r1)\n"   "stfd f12, 160(r1)\n"
-    "stfd f11, 152(r1)\n"   "stfd f10, 144(r1)\n"
-    "stfd f9,  136(r1)\n"   "stfd f8,  128(r1)\n"
-    "stfd f7,  120(r1)\n"   "stfd f6,  112(r1)\n"
-    "stfd f5,  104(r1)\n"   "stfd f4,   96(r1)\n"
-    "stfd f3,   88(r1)\n"   "stfd f2,   80(r1)\n"
-    "stfd f1,   72(r1)\n"
-    // Arguments to Compilation Callback:
-    // r3 - our lr (address of the call instruction in stub plus 4)
-    // r4 - stub's lr (address of instruction that called the stub plus 4)
-    // r5 - is64Bit - always 0.
-    "mr   r3, r0\n"
-    "lwz  r2, 208(r1)\n" // stub's frame
-    "lwz  r4, 8(r2)\n" // stub's lr
-    "li   r5, 0\n"       // 0 == 32 bit
-    "bl _LLVMPPCCompilationCallback\n"
-    "mtctr r3\n"
-    // Restore all int arg registers
-    "lwz r10, 204(r1)\n"    "lwz r9,  200(r1)\n"
-    "lwz r8,  196(r1)\n"    "lwz r7,  192(r1)\n"
-    "lwz r6,  188(r1)\n"    "lwz r5,  184(r1)\n"
-    "lwz r4,  180(r1)\n"    "lwz r3,  176(r1)\n"
-    // Restore all FP arg registers
-    "lfd f13, 168(r1)\n"    "lfd f12, 160(r1)\n"
-    "lfd f11, 152(r1)\n"    "lfd f10, 144(r1)\n"
-    "lfd f9,  136(r1)\n"    "lfd f8,  128(r1)\n"
-    "lfd f7,  120(r1)\n"    "lfd f6,  112(r1)\n"
-    "lfd f5,  104(r1)\n"    "lfd f4,   96(r1)\n"
-    "lfd f3,   88(r1)\n"    "lfd f2,   80(r1)\n"
-    "lfd f1,   72(r1)\n"
-    // Pop 3 frames off the stack and branch to target
-    "lwz  r1, 208(r1)\n"
-    "lwz  r2, 8(r1)\n"
-    "mtlr r2\n"
-    "bctr\n"
-    );
-
-#else
-// ELF PPC 32 support
-
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because we the prolog/epilog inserted by GCC won't work for us.  Instead,
-// write our own wrapper, which does things our way, so we have complete control
-// over register saving and restoring.
-asm(
-    ".text\n"
-    ".align 2\n"
-    ".globl PPC32CompilationCallback\n"
-"PPC32CompilationCallback:\n"
-    // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the 
-    // FIXME: need to save v[0-19] for altivec?
-    // FIXME: could shrink frame
-    // Set up a proper stack frame
-    // FIXME Layout
-    //   8 double registers       -  64 bytes
-    //   8 int registers          -  32 bytes
-    "mflr 0\n"
-    "stw 0,  4(1)\n"
-    "stwu 1, -104(1)\n"
-    // Save all int arg registers
-    "stw 10, 100(1)\n"   "stw 9,  96(1)\n"
-    "stw 8,  92(1)\n"    "stw 7,  88(1)\n"
-    "stw 6,  84(1)\n"    "stw 5,  80(1)\n"
-    "stw 4,  76(1)\n"    "stw 3,  72(1)\n"
-    // Save all call-clobbered FP regs.
-    "stfd 8,  64(1)\n"
-    "stfd 7,  56(1)\n"   "stfd 6,  48(1)\n"
-    "stfd 5,  40(1)\n"   "stfd 4,  32(1)\n"
-    "stfd 3,  24(1)\n"   "stfd 2,  16(1)\n"
-    "stfd 1,  8(1)\n"
-    // Arguments to Compilation Callback:
-    // r3 - our lr (address of the call instruction in stub plus 4)
-    // r4 - stub's lr (address of instruction that called the stub plus 4)
-    // r5 - is64Bit - always 0.
-    "mr   3, 0\n"
-    "lwz  5, 104(1)\n" // stub's frame
-    "lwz  4, 4(5)\n" // stub's lr
-    "li   5, 0\n"       // 0 == 32 bit
-    "bl LLVMPPCCompilationCallback\n"
-    "mtctr 3\n"
-    // Restore all int arg registers
-    "lwz 10, 100(1)\n"   "lwz 9,  96(1)\n"
-    "lwz 8,  92(1)\n"    "lwz 7,  88(1)\n"
-    "lwz 6,  84(1)\n"    "lwz 5,  80(1)\n"
-    "lwz 4,  76(1)\n"    "lwz 3,  72(1)\n"
-    // Restore all FP arg registers
-    "lfd 8,  64(1)\n"
-    "lfd 7,  56(1)\n"    "lfd 6,  48(1)\n"
-    "lfd 5,  40(1)\n"    "lfd 4,  32(1)\n"
-    "lfd 3,  24(1)\n"    "lfd 2,  16(1)\n"
-    "lfd 1,  8(1)\n"
-    // Pop 3 frames off the stack and branch to target
-    "lwz  1, 104(1)\n"
-    "lwz  0, 4(1)\n"
-    "mtlr 0\n"
-    "bctr\n"
-    );
-#endif
-
-#if !defined(__powerpc64__) && !defined(__ppc64__)
-void PPC64CompilationCallback() {
-  llvm_unreachable("This is not a 64bit PowerPC, you can't execute this!");
-}
-#else
-#  ifdef __ELF__
-asm(
-    ".text\n"
-    ".align 2\n"
-    ".globl PPC64CompilationCallback\n"
-#if _CALL_ELF == 2
-    ".type PPC64CompilationCallback,@function\n"
-"PPC64CompilationCallback:\n"
-#else
-    ".section \".opd\",\"aw\",@progbits\n"
-    ".align 3\n"
-"PPC64CompilationCallback:\n"
-    ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
-    ".size PPC64CompilationCallback,24\n"
-    ".previous\n"
-    ".align 4\n"
-    ".type PPC64CompilationCallback,@function\n"
-".L.PPC64CompilationCallback:\n"
-#endif
-#  else
-asm(
-    ".text\n"
-    ".align 2\n"
-    ".globl _PPC64CompilationCallback\n"
-"_PPC64CompilationCallback:\n"
-#  endif
-    // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the 
-    // FIXME: need to save v[0-19] for altivec?
-    // Set up a proper stack frame
-    // Layout
-    //   PowerPC64 ABI linkage    -  48 bytes
-    //                 parameters -  64 bytes
-    //   13 double registers      - 104 bytes
-    //   8 int registers          -  64 bytes
-    "mflr 0\n"
-    "std  0,  16(1)\n"
-    "stdu 1, -280(1)\n"
-    // Save all int arg registers
-    "std 10, 272(1)\n"    "std 9,  264(1)\n"
-    "std 8,  256(1)\n"    "std 7,  248(1)\n"
-    "std 6,  240(1)\n"    "std 5,  232(1)\n"
-    "std 4,  224(1)\n"    "std 3,  216(1)\n"
-    // Save all call-clobbered FP regs.
-    "stfd 13, 208(1)\n"    "stfd 12, 200(1)\n"
-    "stfd 11, 192(1)\n"    "stfd 10, 184(1)\n"
-    "stfd 9,  176(1)\n"    "stfd 8,  168(1)\n"
-    "stfd 7,  160(1)\n"    "stfd 6,  152(1)\n"
-    "stfd 5,  144(1)\n"    "stfd 4,  136(1)\n"
-    "stfd 3,  128(1)\n"    "stfd 2,  120(1)\n"
-    "stfd 1,  112(1)\n"
-    // Arguments to Compilation Callback:
-    // r3 - our lr (address of the call instruction in stub plus 4)
-    // r4 - stub's lr (address of instruction that called the stub plus 4)
-    // r5 - is64Bit - always 1.
-    "mr   3, 0\n"      // return address (still in r0)
-    "ld   5, 280(1)\n" // stub's frame
-    "ld   4, 16(5)\n"  // stub's lr
-    "li   5, 1\n"      // 1 == 64 bit
-#  ifdef __ELF__
-    "bl LLVMPPCCompilationCallback\n"
-    "nop\n"
-#  else
-    "bl _LLVMPPCCompilationCallback\n"
-#  endif
-    "mtctr 3\n"
-    // Restore all int arg registers
-    "ld 10, 272(1)\n"    "ld 9,  264(1)\n"
-    "ld 8,  256(1)\n"    "ld 7,  248(1)\n"
-    "ld 6,  240(1)\n"    "ld 5,  232(1)\n"
-    "ld 4,  224(1)\n"    "ld 3,  216(1)\n"
-    // Restore all FP arg registers
-    "lfd 13, 208(1)\n"    "lfd 12, 200(1)\n"
-    "lfd 11, 192(1)\n"    "lfd 10, 184(1)\n"
-    "lfd 9,  176(1)\n"    "lfd 8,  168(1)\n"
-    "lfd 7,  160(1)\n"    "lfd 6,  152(1)\n"
-    "lfd 5,  144(1)\n"    "lfd 4,  136(1)\n"
-    "lfd 3,  128(1)\n"    "lfd 2,  120(1)\n"
-    "lfd 1,  112(1)\n"
-    // Pop 3 frames off the stack and branch to target
-    "ld  1, 280(1)\n"
-    "ld  0, 16(1)\n"
-    "mtlr 0\n"
-    // XXX: any special TOC handling in the ELF case for JIT?
-    "bctr\n"
-    );
-#endif
-
-extern "C" {
-LLVM_LIBRARY_VISIBILITY void *
-LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4,
-                           unsigned *OrigCallAddrPlus4,
-                           bool is64Bit) {
-  // Adjust the pointer to the address of the call instruction in the stub
-  // emitted by emitFunctionStub, rather than the instruction after it.
-  unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
-  unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
-
-  void *Target = JITCompilerFunction(StubCallAddr);
-
-  // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
-  // it to branch directly to the destination.  If so, rewrite it so it does not
-  // need to go through the stub anymore.
-  unsigned OrigCallInst = *OrigCallAddr;
-  if ((OrigCallInst >> 26) == 18) {     // Direct call.
-    intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
-    
-    if (Offset >= -(1 << 23) && Offset < (1 << 23)) {   // In range?
-      // Clear the original target out.
-      OrigCallInst &= (63 << 26) | 3;
-      // Fill in the new target.
-      OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
-      // Replace the call.
-      *OrigCallAddr = OrigCallInst;
-    }
-  }
-
-  // Assert that we are coming from a stub that was created with our
-  // emitFunctionStub.
-  if ((*StubCallAddr >> 26) == 18)
-    StubCallAddr -= 3;
-  else {
-  assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
-    StubCallAddr -= is64Bit ? 9 : 6;
-  }
-
-  // Rewrite the stub with an unconditional branch to the target, for any users
-  // who took the address of the stub.
-  EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
-  sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4);
-
-  // Put the address of the target function to call and the address to return to
-  // after calling the target function in a place that is easy to get on the
-  // stack after we restore all regs.
-  return Target;
-}
-}
-
-
-
-TargetJITInfo::LazyResolverFn
-PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
-  JITCompilerFunction = Fn;
-  return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
-}
-
-TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
-  // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
-  // instructions to save the caller's address if this is a lazy-compilation
-  // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
-  // into a register and jump through it.
-  StubLayout Result = {10*4, 4};
-  return Result;
-}
-
-#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
-defined(__APPLE__)
-extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
-#endif
-
-void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
-                                   JITCodeEmitter &JCE) {
-  // If this is just a call to an external function, emit a branch instead of a
-  // call.  The code is the same except for one bit of the last instruction.
-  if (Fn != (void*)(intptr_t)PPC32CompilationCallback && 
-      Fn != (void*)(intptr_t)PPC64CompilationCallback) {
-    void *Addr = (void*)JCE.getCurrentPCValue();
-    JCE.emitWordBE(0);
-    JCE.emitWordBE(0);
-    JCE.emitWordBE(0);
-    JCE.emitWordBE(0);
-    JCE.emitWordBE(0);
-    JCE.emitWordBE(0);
-    JCE.emitWordBE(0);
-    EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
-    sys::Memory::InvalidateInstructionCache(Addr, 7*4);
-    return Addr;
-  }
-
-  void *Addr = (void*)JCE.getCurrentPCValue();
-  if (is64Bit) {
-    JCE.emitWordBE(0xf821ffb1);     // stdu r1,-80(r1)
-    JCE.emitWordBE(0x7d6802a6);     // mflr r11
-    JCE.emitWordBE(0xf9610060);     // std r11, 96(r1)
-  } else if (Subtarget.isDarwinABI()){
-    JCE.emitWordBE(0x9421ffe0);     // stwu r1,-32(r1)
-    JCE.emitWordBE(0x7d6802a6);     // mflr r11
-    JCE.emitWordBE(0x91610028);     // stw r11, 40(r1)
-  } else {
-    JCE.emitWordBE(0x9421ffe0);     // stwu r1,-32(r1)
-    JCE.emitWordBE(0x7d6802a6);     // mflr r11
-    JCE.emitWordBE(0x91610024);     // stw r11, 36(r1)
-  }
-  intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
-  JCE.emitWordBE(0);
-  JCE.emitWordBE(0);
-  JCE.emitWordBE(0);
-  JCE.emitWordBE(0);
-  JCE.emitWordBE(0);
-  JCE.emitWordBE(0);
-  JCE.emitWordBE(0);
-  EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
-  sys::Memory::InvalidateInstructionCache(Addr, 10*4);
-  return Addr;
-}
-
-
-void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
-                          unsigned NumRelocs, unsigned char* GOTBase) {
-  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
-    unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
-    intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
-    switch ((PPC::RelocationType)MR->getRelocationType()) {
-    default: llvm_unreachable("Unknown relocation type!");
-    case PPC::reloc_pcrel_bx:
-      // PC-relative relocation for b and bl instructions.
-      ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
-      assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
-             "Relocation out of range!");
-      *RelocPos |= (ResultPtr & ((1 << 24)-1))  << 2;
-      break;
-    case PPC::reloc_pcrel_bcx:
-      // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
-      // bcx instructions.
-      ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
-      assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
-             "Relocation out of range!");
-      *RelocPos |= (ResultPtr & ((1 << 14)-1))  << 2;
-      break;
-    case PPC::reloc_absolute_high:     // high bits of ref -> low 16 of instr
-    case PPC::reloc_absolute_low: {    // low bits of ref  -> low 16 of instr
-      ResultPtr += MR->getConstantVal();
-
-      // If this is a high-part access, get the high-part.
-      if (MR->getRelocationType() == PPC::reloc_absolute_high) {
-        // If the low part will have a carry (really a borrow) from the low
-        // 16-bits into the high 16, add a bit to borrow from.
-        if (((int)ResultPtr << 16) < 0)
-          ResultPtr += 1 << 16;
-        ResultPtr >>= 16;
-      }
-
-      // Do the addition then mask, so the addition does not overflow the 16-bit
-      // immediate section of the instruction.
-      unsigned LowBits  = (*RelocPos + ResultPtr) & 65535;
-      unsigned HighBits = *RelocPos & ~65535;
-      *RelocPos = LowBits | HighBits;  // Slam into low 16-bits
-      break;
-    }
-    case PPC::reloc_absolute_low_ix: {  // low bits of ref  -> low 14 of instr
-      ResultPtr += MR->getConstantVal();
-      // Do the addition then mask, so the addition does not overflow the 16-bit
-      // immediate section of the instruction.
-      unsigned LowBits  = (*RelocPos + ResultPtr) & 0xFFFC;
-      unsigned HighBits = *RelocPos & 0xFFFF0003;
-      *RelocPos = LowBits | HighBits;  // Slam into low 14-bits.
-      break;
-    }
-    }
-  }
-}
-
-void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
-  EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
-  sys::Memory::InvalidateInstructionCache(Old, 7*4);
-}
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
deleted file mode 100644
index b6b37ff..0000000
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PowerPC implementation of the TargetJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef POWERPC_JITINFO_H
-#define POWERPC_JITINFO_H
-
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
-class PPCSubtarget;
-class PPCJITInfo : public TargetJITInfo {
-protected:
-  PPCSubtarget &Subtarget;
-  bool is64Bit;
-
-public:
-  PPCJITInfo(PPCSubtarget &STI);
-
-  StubLayout getStubLayout() override;
-  void *emitFunctionStub(const Function *F, void *Fn,
-                         JITCodeEmitter &JCE) override;
-  LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
-  void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs,
-                unsigned char *GOTBase) override;
-
-  /// replaceMachineCodeForFunction - Make it so that calling the function
-  /// whose machine code is at OLD turns into a call to NEW, perhaps by
-  /// overwriting OLD with a branch to NEW.  This is used for self-modifying
-  /// code.
-  ///
-  void replaceMachineCodeForFunction(void *Old, void *New) override;
-};
-}
-
-#endif
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index f8e84a5..880b520 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
+#include "PPCSubtarget.h"
 #include "MCTargetDesc/PPCMCExpr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
@@ -37,14 +38,16 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
 static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
   const TargetMachine &TM = AP.TM;
   Mangler *Mang = AP.Mang;
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
   MCContext &Ctx = AP.OutContext;
+  bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
 
   SmallString<128> Name;
   StringRef Suffix;
-  if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB)
-    Suffix = "$stub";
-  else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)
+  if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB) {
+    if (isDarwin)
+      Suffix = "$stub";
+  } else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)
     Suffix = "$non_lazy_ptr";
 
   if (!Suffix.empty())
@@ -68,7 +71,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
 
   // If the target flags on the operand changes the name of the symbol, do that
   // before we return the symbol.
-  if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
+  if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && isDarwin) {
     MachineModuleInfoImpl::StubValueTy &StubSym =
       getMachOMMI(AP).getFnStubEntry(Sym);
     if (StubSym.getPointer())
@@ -134,8 +137,17 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
     case PPCII::MO_TLS:
       RefKind = MCSymbolRefExpr::VK_PPC_TLS;
       break;
+    case PPCII::MO_TLSGD:
+      RefKind = MCSymbolRefExpr::VK_PPC_TLSGD;
+      break;
+    case PPCII::MO_TLSLD:
+      RefKind = MCSymbolRefExpr::VK_PPC_TLSLD;
+      break;
   }
 
+  if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
+    RefKind = MCSymbolRefExpr::VK_PLT;
+
   const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
 
   if (!MO.isJTI() && MO.getOffset())
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 6a0aec8..4aff95a 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -8,8 +8,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCMachineFunctionInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
 void PPCFunctionInfo::anchor() { }
 
+MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
+  const DataLayout *DL = MF.getSubtarget().getDataLayout();
+  return MF.getContext().GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
+    Twine(MF.getFunctionNumber())+"$poff");
+}
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 33f843d..83de799 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef PPC_MACHINE_FUNCTION_INFO_H
-#define PPC_MACHINE_FUNCTION_INFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
 
 #include "llvm/CodeGen/MachineFunction.h"
 
@@ -92,6 +92,12 @@ class PPCFunctionInfo : public MachineFunctionInfo {
   /// 64-bit SVR4 ABI.
   SmallVector<unsigned, 3> MustSaveCRs;
 
+  /// Hold onto our MachineFunction context.
+  MachineFunction &MF;
+
+  /// Whether this uses the PIC Base register or not.
+  bool UsesPICBase;
+
 public:
   explicit PPCFunctionInfo(MachineFunction &MF) 
     : FramePointerSaveIndex(0),
@@ -109,7 +115,9 @@ public:
       VarArgsStackOffset(0),
       VarArgsNumGPR(0),
       VarArgsNumFPR(0),
-      CRSpillFrameIndex(0) {}
+      CRSpillFrameIndex(0),
+      MF(MF),
+      UsesPICBase(0) {}
 
   int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
   void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -170,6 +178,11 @@ public:
   const SmallVectorImpl<unsigned> &
     getMustSaveCRs() const { return MustSaveCRs; }
   void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
+
+  void setUsesPICBase(bool uses) { UsesPICBase = uses; }
+  bool usesPICBase() const { return UsesPICBase; }
+
+  MCSymbol *getPICOffsetSymbol() const;
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h
index 17b836d..8a1d680 100644
--- a/lib/Target/PowerPC/PPCPerfectShuffle.h
+++ b/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -12,6 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCPERFECTSHUFFLE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCPERFECTSHUFFLE_H
+
 // 31 entries have cost 0
 // 292 entries have cost 1
 // 1384 entries have cost 2
@@ -6584,3 +6587,5 @@ static const unsigned PerfectShuffleTable[6561+1] = {
   835584U,	// <u,u,u,u>: Cost 0 copy LHS
   0
 };
+
+#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index eca774e..9b9966f 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -140,8 +140,8 @@ PPCRegisterInfo::getNoPreservedMask() const {
 
 BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
-  const PPCFrameLowering *PPCFI =
-    static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+  const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
+      MF.getSubtarget().getFrameLowering());
 
   // The ZERO register is not really a register, but the representation of r0
   // when used in instructions that treat r0 as the constant 0.
@@ -199,7 +199,16 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   if (PPCFI->needsFP(MF))
     Reserved.set(PPC::R31);
 
-  if (hasBasePointer(MF))
+  if (hasBasePointer(MF)) {
+  	if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
+        MF.getTarget().getRelocationModel() == Reloc::PIC_)
+      Reserved.set(PPC::R29);
+    else
+      Reserved.set(PPC::R30);
+  }
+
+  if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
+      MF.getTarget().getRelocationModel() == Reloc::PIC_)
     Reserved.set(PPC::R30);
 
   // Reserve Altivec registers when Altivec is unavailable.
@@ -214,7 +223,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 unsigned
 PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
                                          MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
   const unsigned DefaultSafety = 1;
 
   switch (RC->getID()) {
@@ -278,7 +287,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
   // Get the frame info.
   MachineFrameInfo *MFI = MF.getFrameInfo();
   // Get the instruction info.
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   // Determine whether 64-bit pointers are used.
   bool LP64 = Subtarget.isPPC64();
   DebugLoc dl = MI.getDebugLoc();
@@ -289,7 +298,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
   unsigned FrameSize = MFI->getStackSize();
   
   // Get stack alignments.
-  unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned TargetAlign = MF.getTarget()
+                             .getSubtargetImpl()
+                             ->getFrameLowering()
+                             ->getStackAlignment();
   unsigned MaxAlign = MFI->getMaxAlignment();
   assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
          "Maximum call-frame size not sufficiently aligned");
@@ -394,7 +406,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   bool LP64 = Subtarget.isPPC64();
@@ -438,7 +450,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   bool LP64 = Subtarget.isPPC64();
@@ -511,7 +523,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   bool LP64 = Subtarget.isPPC64();
@@ -554,7 +566,7 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   bool LP64 = Subtarget.isPPC64();
@@ -601,7 +613,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -626,7 +638,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -706,7 +718,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // Get the basic block's function.
   MachineFunction &MF = *MBB.getParent();
   // Get the instruction info.
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   // Get the frame info.
   MachineFrameInfo *MFI = MF.getFrameInfo();
   DebugLoc dl = MI.getDebugLoc();
@@ -831,7 +843,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 }
 
 unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
 
   if (!Subtarget.isPPC64())
     return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
@@ -843,7 +855,14 @@ unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
   if (!hasBasePointer(MF))
     return getFrameRegister(MF);
 
-  return Subtarget.isPPC64() ? PPC::X30 : PPC::R30;
+  if (Subtarget.isPPC64())
+    return PPC::X30;
+
+  if (Subtarget.isSVR4ABI() &&
+      MF.getTarget().getRelocationModel() == Reloc::PIC_)
+    return PPC::R29;
+
+  return PPC::R30;
 }
 
 bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
@@ -868,7 +887,10 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
 bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *F = MF.getFunction();
-  unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned StackAlign = MF.getTarget()
+                            .getSubtargetImpl()
+                            ->getFrameLowering()
+                            ->getStackAlignment();
   bool requiresRealignment =
     ((MFI->getMaxAlignment() > StackAlign) ||
      F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -885,16 +907,6 @@ bool PPCRegisterInfo::
 needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   assert(Offset < 0 && "Local offset must be negative");
 
-  unsigned FIOperandNum = 0;
-  while (!MI->getOperand(FIOperandNum).isFI()) {
-    ++FIOperandNum;
-    assert(FIOperandNum < MI->getNumOperands() &&
-           "Instr doesn't have FrameIndex operand!");
-  }
-
-  unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
-  Offset += MI->getOperand(OffsetOperandNo).getImm();
-
   // It's the load/store FI references that cause issues, as it can be difficult
   // to materialize the offset if it won't fit in the literal field. Estimate
   // based on the size of the local frame and some conservative assumptions
@@ -916,8 +928,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   MachineBasicBlock &MBB = *MI->getParent();
   MachineFunction &MF = *MBB.getParent();
 
-  const PPCFrameLowering *PPCFI =
-    static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+  const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
+      MF.getSubtarget().getFrameLowering());
   unsigned StackEst =
     PPCFI->determineFrameLayout(MF, false, true);
 
@@ -951,7 +963,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
     DL = Ins->getDebugLoc();
 
   const MachineFunction &MF = *MBB->getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   const MCInstrDesc &MCID = TII.get(ADDriOpc);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
@@ -976,7 +988,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
 
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   const MCInstrDesc &MCID = MI.getDesc();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   MRI.constrainRegClass(BaseReg,
@@ -985,6 +997,16 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
 
 bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
                                          int64_t Offset) const {
+  unsigned FIOperandNum = 0;
+  while (!MI->getOperand(FIOperandNum).isFI()) {
+    ++FIOperandNum;
+    assert(FIOperandNum < MI->getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
+  Offset += MI->getOperand(OffsetOperandNo).getImm();
+
   return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
          (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
 }
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 13a35f6..c182f95 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef POWERPC32_REGISTERINFO_H
-#define POWERPC32_REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
 
 #include "PPC.h"
 #include "llvm/ADT/DenseMap.h"
diff --git a/lib/Target/PowerPC/PPCRelocations.h b/lib/Target/PowerPC/PPCRelocations.h
deleted file mode 100644
index 0b392f9..0000000
--- a/lib/Target/PowerPC/PPCRelocations.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- PPCRelocations.h - PPC Code Relocations -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PowerPC 32-bit target-specific relocation types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PPCRELOCATIONS_H
-#define PPCRELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-// Hack to rid us of a PPC pre-processor symbol which is erroneously
-// defined in a PowerPC header file (bug in Linux/PPC)
-#ifdef PPC
-#undef PPC
-#endif
-
-namespace llvm {
-  namespace PPC {
-    enum RelocationType {
-      // reloc_vanilla - A standard relocation, where the address of the
-      // relocated object completely overwrites the address of the relocation.
-      reloc_vanilla,
-    
-      // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
-      reloc_pcrel_bx,
-
-      // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
-      // and other bcx instructions.
-      reloc_pcrel_bcx,
-
-      // reloc_absolute_high - Absolute relocation, for the loadhi instruction
-      // (which is really addis).  Add the high 16-bits of the specified global
-      // address into the low 16-bits of the instruction.
-      reloc_absolute_high,
-
-      // reloc_absolute_low - Absolute relocation, for the la instruction (which
-      // is really an addi).  Add the low 16-bits of the specified global
-      // address into the low 16-bits of the instruction.
-      reloc_absolute_low,
-      
-      // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
-      // instruction which have two implicit zero bits.
-      reloc_absolute_low_ix
-    };
-  }
-}
-
-#endif
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 1221d41..7f80121 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -106,6 +106,7 @@ def IIC_SprSLBIE     : InstrItinClass;
 def IIC_SprSLBMTE    : InstrItinClass;
 def IIC_SprSLBMFEE   : InstrItinClass;
 def IIC_SprSLBIA     : InstrItinClass;
+def IIC_SprTLBIA     : InstrItinClass;
 def IIC_SprTLBIEL    : InstrItinClass;
 def IIC_SprTLBIE     : InstrItinClass;
 
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index b2e7f3b..2c1378d 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef POWERPCCSELECTIONDAGINFO_H
-#define POWERPCCSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
 
 #include "llvm/Target/TargetSelectionDAGInfo.h"
 
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 2e1b74a..04e7ec6 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -33,13 +33,12 @@ using namespace llvm;
 #include "PPCGenSubtargetInfo.inc"
 
 /// Return the datalayout string of a subtarget.
-static std::string getDataLayoutString(const PPCSubtarget &ST) {
-  const Triple &T = ST.getTargetTriple();
-
+static std::string getDataLayoutString(const Triple &T) {
+  bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
   std::string Ret;
 
   // Most PPC* platforms are big endian, PPC64LE is little endian.
-  if (ST.isLittleEndian())
+  if (T.getArch() == Triple::ppc64le)
     Ret = "e";
   else
     Ret = "E";
@@ -48,18 +47,18 @@ static std::string getDataLayoutString(const PPCSubtarget &ST) {
 
   // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
   // pointers.
-  if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
+  if (!is64Bit || T.getOS() == Triple::Lv2)
     Ret += "-p:32:32";
 
   // Note, the alignment values for f64 and i64 on ppc64 in Darwin
   // documentation are wrong; these are correct (i.e. "what gcc does").
-  if (ST.isPPC64() || ST.isSVR4ABI())
+  if (is64Bit || !T.isOSDarwin())
     Ret += "-i64:64";
   else
     Ret += "-f64:32:64";
 
   // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
-  if (ST.isPPC64())
+  if (is64Bit)
     Ret += "-n32:64";
   else
     Ret += "-n32";
@@ -70,47 +69,20 @@ static std::string getDataLayoutString(const PPCSubtarget &ST) {
 PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                             StringRef FS) {
   initializeEnvironment();
-  resetSubtargetFeatures(CPU, FS);
+  initSubtargetFeatures(CPU, FS);
   return *this;
 }
 
 PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
-                           const std::string &FS, PPCTargetMachine &TM,
-                           bool is64Bit, CodeGenOpt::Level OptLevel)
-    : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
-      OptLevel(OptLevel),
-      FrameLowering(initializeSubtargetDependencies(CPU, FS)),
-      DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this),
+                           const std::string &FS, const PPCTargetMachine &TM)
+    : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
+      DL(getDataLayoutString(TargetTriple)),
+      IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
+              TargetTriple.getArch() == Triple::ppc64le),
+      TargetABI(PPC_ABI_UNKNOWN),
+      FrameLowering(initializeSubtargetDependencies(CPU, FS)), InstrInfo(*this),
       TLInfo(TM), TSInfo(&DL) {}
 
-/// SetJITMode - This is called to inform the subtarget info that we are
-/// producing code for the JIT.
-void PPCSubtarget::SetJITMode() {
-  // JIT mode doesn't want lazy resolver stubs, it knows exactly where
-  // everything is.  This matters for PPC64, which codegens in PIC mode without
-  // stubs.
-  HasLazyResolverStubs = false;
-
-  // Calls to external functions need to use indirect calls
-  IsJITCodeModel = true;
-}
-
-void PPCSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
-  AttributeSet FnAttrs = MF->getFunction()->getAttributes();
-  Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
-                                           "target-cpu");
-  Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
-                                          "target-features");
-  std::string CPU =
-    !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : "";
-  std::string FS =
-    !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
-  if (!FS.empty()) {
-    initializeEnvironment();
-    resetSubtargetFeatures(CPU, FS);
-  }
-}
-
 void PPCSubtarget::initializeEnvironment() {
   StackAlignment = 16;
   DarwinDirective = PPC::DIR_NONE;
@@ -119,8 +91,10 @@ void PPCSubtarget::initializeEnvironment() {
   Use64BitRegs = false;
   UseCRBits = false;
   HasAltivec = false;
+  HasSPE = false;
   HasQPX = false;
   HasVSX = false;
+  HasP8Vector = false;
   HasFCPSGN = false;
   HasFSQRT = false;
   HasFRE = false;
@@ -136,13 +110,16 @@ void PPCSubtarget::initializeEnvironment() {
   HasPOPCNTD = false;
   HasLDBRX = false;
   IsBookE = false;
+  HasOnlyMSYNC = false;
+  IsPPC4xx = false;
+  IsPPC6xx = false;
+  IsE500 = false;
   DeprecatedMFTB = false;
   DeprecatedDST = false;
   HasLazyResolverStubs = false;
-  IsJITCodeModel = false;
 }
 
-void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   // Determine default and user specified characteristics
   std::string CPUName = CPU;
   if (CPUName.empty())
@@ -156,35 +133,13 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUName);
 
-  // Make sure 64-bit features are available when CPUname is generic
-  std::string FullFS = FS;
-
-  // If we are generating code for ppc64, verify that options make sense.
-  if (IsPPC64) {
-    Has64BitSupport = true;
-    // Silently force 64-bit register use on ppc64.
-    Use64BitRegs = true;
-    if (!FullFS.empty())
-      FullFS = "+64bit," + FullFS;
-    else
-      FullFS = "+64bit";
-  }
-
-  // At -O2 and above, track CR bits as individual registers.
-  if (OptLevel >= CodeGenOpt::Default) {
-    if (!FullFS.empty())
-      FullFS = "+crbits," + FullFS;
-    else
-      FullFS = "+crbits";
-  }
-
   // Parse features string.
-  ParseSubtargetFeatures(CPUName, FullFS);
+  ParseSubtargetFeatures(CPUName, FS);
 
   // If the user requested use of 64-bit regs, but the cpu selected doesn't
   // support it, ignore.
-  if (use64BitRegs() && !has64BitSupport())
-    Use64BitRegs = false;
+  if (IsPPC64 && has64BitSupport())
+    Use64BitRegs = true;
 
   // Set up darwin-specific properties.
   if (isDarwin())
@@ -201,8 +156,20 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
 
   // FIXME: For now, we disable VSX in little-endian mode until endian
   // issues in those instructions can be addressed.
-  if (IsLittleEndian)
+  if (IsLittleEndian) {
     HasVSX = false;
+    HasP8Vector = false;
+  }
+
+  // Determine default ABI.
+  if (TargetABI == PPC_ABI_UNKNOWN) {
+    if (!isDarwin() && IsPPC64) {
+      if (IsLittleEndian)
+        TargetABI = PPC_ABI_ELFv2;
+      else
+        TargetABI = PPC_ABI_ELFv1;
+    }
+  }
 }
 
 /// hasLazyResolverStub - Return true if accesses to the specified global have
@@ -213,31 +180,13 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
   // We never have stubs if HasLazyResolverStubs=false or if in static mode.
   if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
     return false;
-  // If symbol visibility is hidden, the extra load is not needed if
-  // the symbol is definitely defined in the current translation unit.
-  bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+  bool isDecl = GV->isDeclaration();
   if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
     return false;
   return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
          GV->hasCommonLinkage() || isDecl;
 }
 
-bool PPCSubtarget::enablePostRAScheduler(
-           CodeGenOpt::Level OptLevel,
-           TargetSubtargetInfo::AntiDepBreakMode& Mode,
-           RegClassVector& CriticalPathRCs) const {
-  Mode = TargetSubtargetInfo::ANTIDEP_ALL;
-
-  CriticalPathRCs.clear();
-
-  if (isPPC64())
-    CriticalPathRCs.push_back(&PPC::G8RCRegClass);
-  else
-    CriticalPathRCs.push_back(&PPC::GPRCRegClass);
-    
-  return OptLevel >= CodeGenOpt::Default;
-}
-
 // Embedded cores need aggressive scheduling (and some others also benefit).
 static bool needsAggressiveScheduling(unsigned Directive) {
   switch (Directive) {
@@ -259,6 +208,19 @@ bool PPCSubtarget::enableMachineScheduler() const {
   return needsAggressiveScheduling(DarwinDirective);
 }
 
+// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+bool PPCSubtarget::enablePostMachineScheduler() const { return true; }
+
+PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const {
+  return TargetSubtargetInfo::ANTIDEP_ALL;
+}
+
+void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
+  CriticalPathRCs.clear();
+  CriticalPathRCs.push_back(isPPC64() ?
+                            &PPC::G8RCRegClass : &PPC::GPRCRegClass);
+}
+
 void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
                                        MachineInstr *begin,
                                        MachineInstr *end,
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 2a16699..1df19c3 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -11,13 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef POWERPCSUBTARGET_H
-#define POWERPCSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H
+#define LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H
 
 #include "PPCFrameLowering.h"
 #include "PPCInstrInfo.h"
 #include "PPCISelLowering.h"
-#include "PPCJITInfo.h"
 #include "PPCSelectionDAGInfo.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/DataLayout.h"
@@ -66,6 +65,12 @@ class TargetMachine;
 
 class PPCSubtarget : public PPCGenSubtargetInfo {
 protected:
+  /// TargetTriple - What processor and OS we're targeting.
+  Triple TargetTriple;
+
+  // Calculates type size & alignment
+  const DataLayout DL;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned StackAlignment;
@@ -83,8 +88,10 @@ protected:
   bool UseCRBits;
   bool IsPPC64;
   bool HasAltivec;
+  bool HasSPE;
   bool HasQPX;
   bool HasVSX;
+  bool HasP8Vector;
   bool HasFCPSGN;
   bool HasFSQRT;
   bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -97,22 +104,23 @@ protected:
   bool HasPOPCNTD;
   bool HasLDBRX;
   bool IsBookE;
+  bool HasOnlyMSYNC;
+  bool IsE500;
+  bool IsPPC4xx;
+  bool IsPPC6xx;
   bool DeprecatedMFTB;
   bool DeprecatedDST;
   bool HasLazyResolverStubs;
-  bool IsJITCodeModel;
   bool IsLittleEndian;
 
-  /// TargetTriple - What processor and OS we're targeting.
-  Triple TargetTriple;
-
-  /// OptLevel - What default optimization level we're emitting code for.
-  CodeGenOpt::Level OptLevel;
+  enum {
+    PPC_ABI_UNKNOWN,
+    PPC_ABI_ELFv1,
+    PPC_ABI_ELFv2
+  } TargetABI;
 
   PPCFrameLowering FrameLowering;
-  const DataLayout DL;
   PPCInstrInfo InstrInfo;
-  PPCJITInfo JITInfo;
   PPCTargetLowering TLInfo;
   PPCSelectionDAGInfo TSInfo;
 
@@ -121,17 +129,12 @@ public:
   /// of the specified triple.
   ///
   PPCSubtarget(const std::string &TT, const std::string &CPU,
-               const std::string &FS, PPCTargetMachine &TM, bool is64Bit,
-               CodeGenOpt::Level OptLevel);
+               const std::string &FS, const PPCTargetMachine &TM);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
-  /// SetJITMode - This is called to inform the subtarget info that we are
-  /// producing code for the JIT.
-  void SetJITMode();
-
   /// getStackAlignment - Returns the minimum alignment known to hold of the
   /// stack frame on entry to the function and which must be maintained by every
   /// function for this subtarget.
@@ -143,24 +146,32 @@ public:
 
   /// getInstrItins - Return the instruction itineraries based on subtarget
   /// selection.
-  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
-
-  const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; }
-  const DataLayout *getDataLayout() const { return &DL; }
-  const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
-  PPCJITInfo *getJITInfo() { return &JITInfo; }
-  const PPCTargetLowering *getTargetLowering() const { return &TLInfo; }
-  const PPCSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+  const InstrItineraryData *getInstrItineraryData() const override {
+    return &InstrItins;
+  }
+
+  const PPCFrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
+  }
+  const DataLayout *getDataLayout() const override { return &DL; }
+  const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+  const PPCTargetLowering *getTargetLowering() const override {
+    return &TLInfo;
+  }
+  const PPCSelectionDAGInfo *getSelectionDAGInfo() const override {
+    return &TSInfo;
+  }
+  const PPCRegisterInfo *getRegisterInfo() const override {
+    return &getInstrInfo()->getRegisterInfo();
+  }
 
   /// initializeSubtargetDependencies - Initializes using a CPU and feature string
   /// so that we can use initializer lists for subtarget initialization.
   PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
 
-  /// \brief Reset the features for the PowerPC target.
-  void resetSubtargetFeatures(const MachineFunction *MF) override;
 private:
   void initializeEnvironment();
-  void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+  void initSubtargetFeatures(StringRef CPU, StringRef FS);
 
 public:
   /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
@@ -186,9 +197,6 @@ public:
   bool hasLazyResolverStub(const GlobalValue *GV,
                            const TargetMachine &TM) const;
 
-  // isJITCodeModel - True if we're generating code for the JIT
-  bool isJITCodeModel() const { return IsJITCodeModel; }
-
   // isLittleEndian - True if generating little-endian code
   bool isLittleEndian() const { return IsLittleEndian; }
 
@@ -205,13 +213,19 @@ public:
   bool hasFPRND() const { return HasFPRND; }
   bool hasFPCVT() const { return HasFPCVT; }
   bool hasAltivec() const { return HasAltivec; }
+  bool hasSPE() const { return HasSPE; }
   bool hasQPX() const { return HasQPX; }
   bool hasVSX() const { return HasVSX; }
+  bool hasP8Vector() const { return HasP8Vector; }
   bool hasMFOCRF() const { return HasMFOCRF; }
   bool hasISEL() const { return HasISEL; }
   bool hasPOPCNTD() const { return HasPOPCNTD; }
   bool hasLDBRX() const { return HasLDBRX; }
   bool isBookE() const { return IsBookE; }
+  bool hasOnlyMSYNC() const { return HasOnlyMSYNC; }
+  bool isPPC4xx() const { return IsPPC4xx; }
+  bool isPPC6xx() const { return IsPPC6xx; }
+  bool isE500() const { return IsE500; }
   bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
   bool isDeprecatedDST() const { return DeprecatedDST; }
 
@@ -222,18 +236,22 @@ public:
   /// isBGQ - True if this is a BG/Q platform.
   bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
 
+  bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+  bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+
   bool isDarwinABI() const { return isDarwin(); }
   bool isSVR4ABI() const { return !isDarwin(); }
-
-  /// enablePostRAScheduler - True at 'More' optimization.
-  bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                             TargetSubtargetInfo::AntiDepBreakMode& Mode,
-                             RegClassVector& CriticalPathRCs) const override;
+  bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; }
 
   bool enableEarlyIfConversion() const override { return hasISEL(); }
 
   // Scheduling customization.
   bool enableMachineScheduler() const override;
+  // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+  bool enablePostMachineScheduler() const override;
+  AntiDepBreakMode getAntiDepBreakMode() const override;
+  void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
+
   void overrideSchedPolicy(MachineSchedPolicy &Policy,
                            MachineInstr *begin,
                            MachineInstr *end,
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 9563b90..f15189c 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -12,8 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCTargetMachine.h"
+#include "PPCTargetObjectFile.h"
 #include "PPC.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
@@ -37,15 +39,54 @@ extern "C" void LLVMInitializePowerPCTarget() {
   RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
 }
 
+static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, StringRef TT) {
+  std::string FullFS = FS;
+  Triple TargetTriple(TT);
+
+  // Make sure 64-bit features are available when CPUname is generic
+  if (TargetTriple.getArch() == Triple::ppc64 ||
+      TargetTriple.getArch() == Triple::ppc64le) {
+    if (!FullFS.empty())
+      FullFS = "+64bit," + FullFS;
+    else
+      FullFS = "+64bit";
+  }
+
+  if (OL >= CodeGenOpt::Default) {
+    if (!FullFS.empty())
+      FullFS = "+crbits," + FullFS;
+    else
+      FullFS = "+crbits";
+  }
+  return FullFS;
+}
+
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+  // If it isn't a Mach-O file then it's going to be a linux ELF
+  // object file.
+  if (TT.isOSDarwin())
+    return make_unique<TargetLoweringObjectFileMachO>();
+
+  return make_unique<PPC64LinuxTargetObjectFile>();
+}
+
+// The FeatureString here is a little subtle. We are modifying the feature string
+// with what are (currently) non-function specific overrides as it goes into the
+// LLVMTargetMachine constructor and then using the stored value in the
+// Subtarget constructor below it.
 PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
                                    StringRef FS, const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
-                                   CodeGenOpt::Level OL, bool is64Bit)
-    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-      Subtarget(TT, CPU, FS, *this, is64Bit, OL) {
+                                   CodeGenOpt::Level OL)
+    : LLVMTargetMachine(T, TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM,
+                        CM, OL),
+      TLOF(createTLOF(Triple(getTargetTriple()))),
+      Subtarget(TT, CPU, TargetFS, *this) {
   initAsmInfo();
 }
 
+PPCTargetMachine::~PPCTargetMachine() {}
+
 void PPC32TargetMachine::anchor() { }
 
 PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
@@ -53,7 +94,7 @@ PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
-  : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
+  : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
 }
 
 void PPC64TargetMachine::anchor() { }
@@ -63,9 +104,34 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
                                        const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
-  : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
+  : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
 }
 
+const PPCSubtarget *
+PPCTargetMachine::getSubtargetImpl(const Function &F) const {
+  AttributeSet FnAttrs = F.getAttributes();
+  Attribute CPUAttr =
+      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+  Attribute FSAttr =
+      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+  std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+                        ? CPUAttr.getValueAsString().str()
+                        : TargetCPU;
+  std::string FS = !FSAttr.hasAttribute(Attribute::None)
+                       ? FSAttr.getValueAsString().str()
+                       : TargetFS;
+
+  auto &I = SubtargetMap[CPU + FS];
+  if (!I) {
+    // This needs to be done before we create a new subtarget since any
+    // creation will depend on the TM and the code generation flags on the
+    // function that reside in TargetOptions.
+    resetTargetOptions(F);
+    I = llvm::make_unique<PPCSubtarget>(TargetTriple, CPU, FS, *this);
+  }
+  return I.get();
+}
 
 //===----------------------------------------------------------------------===//
 // Pass Pipeline Configuration
@@ -86,6 +152,7 @@ public:
     return *getPPCTargetMachine().getSubtargetImpl();
   }
 
+  void addIRPasses() override;
   bool addPreISel() override;
   bool addILPOpts() override;
   bool addInstSelector() override;
@@ -99,6 +166,11 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new PPCPassConfig(this, PM);
 }
 
+void PPCPassConfig::addIRPasses() {
+  addPass(createAtomicExpandPass(&getPPCTargetMachine()));
+  TargetPassConfig::addIRPasses();
+}
+
 bool PPCPassConfig::addPreISel() {
   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
     addPass(createPPCCTRLoops(getPPCTargetMachine()));
@@ -148,18 +220,6 @@ bool PPCPassConfig::addPreEmitPass() {
   return false;
 }
 
-bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                      JITCodeEmitter &JCE) {
-  // Inform the subtarget that we are in JIT mode.  FIXME: does this break macho
-  // writing?
-  Subtarget.SetJITMode();
-
-  // Machine code emitter pass for PowerPC.
-  PM.add(createPPCJITCodeEmitterPass(*this, JCE));
-
-  return false;
-}
-
 void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
   // Add first the target-independent BasicTTI pass, then our PPC pass. This
   // allows the PPC pass to delegate to the target independent layer when
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 4c7029c..5095d73 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef PPC_TARGETMACHINE_H
-#define PPC_TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETMACHINE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETMACHINE_H
 
 #include "PPCInstrInfo.h"
 #include "PPCSubtarget.h"
@@ -24,46 +24,30 @@ namespace llvm {
 /// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
 ///
 class PPCTargetMachine : public LLVMTargetMachine {
-  PPCSubtarget        Subtarget;
+  std::unique_ptr<TargetLoweringObjectFile> TLOF;
+  PPCSubtarget Subtarget;
+
+  mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
 
 public:
   PPCTargetMachine(const Target &T, StringRef TT,
                    StringRef CPU, StringRef FS, const TargetOptions &Options,
                    Reloc::Model RM, CodeModel::Model CM,
-                   CodeGenOpt::Level OL, bool is64Bit);
+                   CodeGenOpt::Level OL);
 
-  const PPCInstrInfo *getInstrInfo() const override {
-    return getSubtargetImpl()->getInstrInfo();
-  }
-  const PPCFrameLowering *getFrameLowering() const override {
-    return getSubtargetImpl()->getFrameLowering();
-  }
-  PPCJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
-  const PPCTargetLowering *getTargetLowering() const override {
-    return getSubtargetImpl()->getTargetLowering();
-  }
-  const PPCSelectionDAGInfo* getSelectionDAGInfo() const override {
-    return getSubtargetImpl()->getSelectionDAGInfo();
-  }
-  const PPCRegisterInfo *getRegisterInfo() const override {
-    return &getInstrInfo()->getRegisterInfo();
-  }
+  ~PPCTargetMachine() override;
 
-  const DataLayout *getDataLayout() const override {
-    return getSubtargetImpl()->getDataLayout();
-  }
-  const PPCSubtarget  *getSubtargetImpl() const override { return &Subtarget; }
-  const InstrItineraryData *getInstrItineraryData() const override {
-    return &getSubtargetImpl()->getInstrItineraryData();
-  }
+  const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+  const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
 
   // Pass Pipeline Configuration
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-  bool addCodeEmitter(PassManagerBase &PM,
-                      JITCodeEmitter &JCE) override;
 
   /// \brief Register PPC analysis passes with a pass manager.
   void addAnalysisPasses(PassManagerBase &PM) override;
+  TargetLoweringObjectFile *getObjFileLowering() const override {
+    return TLOF.get();
+  }
 };
 
 /// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h
index 3e71bbc..cd84da2 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_PPC_TARGETOBJECTFILE_H
-#define LLVM_TARGET_PPC_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETOBJECTFILE_H
 
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index 74b5f45..6493713 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef PPCTARGETSTREAMER_H
-#define PPCTARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
 
 #include "llvm/MC/MCStreamer.h"
 
@@ -19,6 +19,8 @@ public:
   virtual ~PPCTargetStreamer();
   virtual void emitTCEntry(const MCSymbol &S) = 0;
   virtual void emitMachine(StringRef CPU) = 0;
+  virtual void emitAbiVersion(int AbiVersion) = 0;
+  virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) = 0;
 };
 }
 
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 007901b..37624ed 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -38,6 +38,7 @@ void initializePPCTTIPass(PassRegistry &);
 namespace {
 
 class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
+  const TargetMachine *TM;
   const PPCSubtarget *ST;
   const PPCTargetLowering *TLI;
 
@@ -47,16 +48,16 @@ public:
   }
 
   PPCTTI(const PPCTargetMachine *TM)
-      : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
-        TLI(TM->getTargetLowering()) {
+      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+        TLI(TM->getSubtargetImpl()->getTargetLowering()) {
     initializePPCTTIPass(*PassRegistry::getPassRegistry());
   }
 
-  virtual void initializePass() override {
+  void initializePass() override {
     pushTTIStack(this);
   }
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     TargetTransformInfo::getAnalysisUsage(AU);
   }
 
@@ -64,7 +65,7 @@ public:
   static char ID;
 
   /// Provide necessary pointer adjustments for the two base classes.
-  virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+  void *getAdjustedAnalysisPointer(const void *ID) override {
     if (ID == &TargetTransformInfo::ID)
       return (TargetTransformInfo*)this;
     return this;
@@ -79,33 +80,31 @@ public:
   unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                          Type *Ty) const override;
 
-  virtual PopcntSupportKind
-  getPopcntSupport(unsigned TyWidth) const override;
-  virtual void getUnrollingPreferences(
-    Loop *L, UnrollingPreferences &UP) const override;
+  PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
+  void getUnrollingPreferences(const Function *F, Loop *L,
+                               UnrollingPreferences &UP) const override;
 
   /// @}
 
   /// \name Vector TTI Implementations
   /// @{
 
-  virtual unsigned getNumberOfRegisters(bool Vector) const override;
-  virtual unsigned getRegisterBitWidth(bool Vector) const override;
-  virtual unsigned getMaximumUnrollFactor() const override;
-  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                          OperandValueKind,
-                                          OperandValueKind) const override;
-  virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
-                                  int Index, Type *SubTp) const override;
-  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
-                                    Type *Src) const override;
-  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                      Type *CondTy) const override;
-  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                                      unsigned Index) const override;
-  virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
-                                   unsigned Alignment,
-                                   unsigned AddressSpace) const override;
+  unsigned getNumberOfRegisters(bool Vector) const override;
+  unsigned getRegisterBitWidth(bool Vector) const override;
+  unsigned getMaxInterleaveFactor() const override;
+  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
+                                  OperandValueKind, OperandValueProperties,
+                                  OperandValueProperties) const override;
+  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+                          int Index, Type *SubTp) const override;
+  unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+                            Type *Src) const override;
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                              Type *CondTy) const override;
+  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                              unsigned Index) const override;
+  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                           unsigned AddressSpace) const override;
 
   /// @}
 };
@@ -271,8 +270,9 @@ unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
   return PPCTTI::getIntImmCost(Imm, Ty);
 }
 
-void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
-  if (ST->getDarwinDirective() == PPC::DIR_A2) {
+void PPCTTI::getUnrollingPreferences(const Function *F, Loop *L,
+                                     UnrollingPreferences &UP) const {
+  if (TM->getSubtarget<PPCSubtarget>(F).getDarwinDirective() == PPC::DIR_A2) {
     // The A2 is in-order with a deep pipeline, and concatenation unrolling
     // helps expose latency-hiding opportunities to the instruction scheduler.
     UP.Partial = UP.Runtime = true;
@@ -297,7 +297,7 @@ unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
 
 }
 
-unsigned PPCTTI::getMaximumUnrollFactor() const {
+unsigned PPCTTI::getMaxInterleaveFactor() const {
   unsigned Directive = ST->getDarwinDirective();
   // The 440 has no SIMD support, but floating-point instructions
   // have a 5-cycle latency, so unroll by 5x for latency hiding.
@@ -318,14 +318,15 @@ unsigned PPCTTI::getMaximumUnrollFactor() const {
   return 2;
 }
 
-unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                        OperandValueKind Op1Info,
-                                        OperandValueKind Op2Info) const {
+unsigned PPCTTI::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+    OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+    OperandValueProperties Opd2PropInfo) const {
   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 
   // Fallback to the default implementation.
-  return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
-                                                     Op2Info);
+  return TargetTransformInfo::getArithmeticInstrCost(
+      Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
 }
 
 unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,