42 files changed, 2607 insertions, 1360 deletions
diff --git a/lib/Target/PowerPC/AsmParser/CMakeLists.txt b/lib/Target/PowerPC/AsmParser/CMakeLists.txt
new file mode 100644
index 0000000..3aa59c0
--- /dev/null
+++ b/lib/Target/PowerPC/AsmParser/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPowerPCAsmParser
+  PPCAsmParser.cpp
+  )
+
+add_dependencies(LLVMPowerPCAsmParser PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
new file mode 100644
index 0000000..bd08c13
--- /dev/null
+++ b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt --------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PowerPCAsmParser
+parent = PowerPC
+required_libraries = PowerPCInfo MC MCParser Support
+add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/AsmParser/Makefile b/lib/Target/PowerPC/AsmParser/Makefile
new file mode 100644
index 0000000..c8a8915
--- /dev/null
+++ b/lib/Target/PowerPC/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PowerPC/AsmParser/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCAsmParser
+
+# Hack: we need to include 'main' PowerPC target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
new file mode 100644
index 0000000..9cf16f0
--- /dev/null
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -0,0 +1,723 @@
+//===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+static unsigned RRegs[32] = {
+  PPC::R0,  PPC::R1,  PPC::R2,  PPC::R3,
+  PPC::R4,  PPC::R5,  PPC::R6,  PPC::R7,
+  PPC::R8,  PPC::R9,  PPC::R10, PPC::R11,
+  PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+  PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+  PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+  PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+  PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+static unsigned RRegsNoR0[32] = {
+  PPC::ZERO,
+            PPC::R1,  PPC::R2,  PPC::R3,
+  PPC::R4,  PPC::R5,  PPC::R6,  PPC::R7,
+  PPC::R8,  PPC::R9,  PPC::R10, PPC::R11,
+  PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+  PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+  PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+  PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+  PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+static unsigned XRegs[32] = {
+  PPC::X0,  PPC::X1,  PPC::X2,  PPC::X3,
+  PPC::X4,  PPC::X5,  PPC::X6,  PPC::X7,
+  PPC::X8,  PPC::X9,  PPC::X10, PPC::X11,
+  PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+  PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+  PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+  PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+  PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+static unsigned XRegsNoX0[32] = {
+  PPC::ZERO8,
+            PPC::X1,  PPC::X2,  PPC::X3,
+  PPC::X4,  PPC::X5,  PPC::X6,  PPC::X7,
+  PPC::X8,  PPC::X9,  PPC::X10, PPC::X11,
+  PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+  PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+  PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+  PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+  PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+static unsigned FRegs[32] = {
+  PPC::F0,  PPC::F1,  PPC::F2,  PPC::F3,
+  PPC::F4,  PPC::F5,  PPC::F6,  PPC::F7,
+  PPC::F8,  PPC::F9,  PPC::F10, PPC::F11,
+  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+  PPC::F28, PPC::F29, PPC::F30, PPC::F31
+};
+static unsigned VRegs[32] = {
+  PPC::V0,  PPC::V1,  PPC::V2,  PPC::V3,
+  PPC::V4,  PPC::V5,  PPC::V6,  PPC::V7,
+  PPC::V8,  PPC::V9,  PPC::V10, PPC::V11,
+  PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+  PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+  PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+  PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+  PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+static unsigned CRBITRegs[32] = {
+  PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
+  PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
+  PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+  PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+  PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+  PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
+  PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
+  PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
+};
+static unsigned CRRegs[8] = {
+  PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
+  PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
+};
+
+struct PPCOperand;
+
+class PPCAsmParser : public MCTargetAsmParser {
+  MCSubtargetInfo &STI;
+  MCAsmParser &Parser;
+  bool IsPPC64;
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+  bool isPPC64() const { return IsPPC64; }
+
+  bool MatchRegisterName(const AsmToken &Tok,
+                         unsigned &RegNo, int64_t &IntVal);
+
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+  bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+  bool ParseDirectiveTC(unsigned Size, SMLoc L);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
+
+  void ProcessInstruction(MCInst &Inst,
+                          const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "PPCGenAsmMatcher.inc"
+
+  /// }
+
+
+public:
+  PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+    : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+    // Check for 64-bit vs. 32-bit pointer mode.
+    Triple TheTriple(STI.getTargetTriple());
+    IsPPC64 = TheTriple.getArch() == Triple::ppc64;
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  virtual bool ParseInstruction(ParseInstructionInfo &Info,
+                                StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+/// PPCOperand - Instances of this class represent a parsed PowerPC machine
+/// instruction.
+struct PPCOperand : public MCParsedAsmOperand {
+  enum KindTy {
+    Token,
+    Immediate,
+    Expression
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+  bool IsPPC64;
+
+  struct TokOp {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct ImmOp {
+    int64_t Val;
+  };
+
+  struct ExprOp {
+    const MCExpr *Val;
+  };
+
+  union {
+    struct TokOp Tok;
+    struct ImmOp Imm;
+    struct ExprOp Expr;
+  };
+
+  PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+  PPCOperand(const PPCOperand &o) : MCParsedAsmOperand() {
+    Kind = o.Kind;
+    StartLoc = o.StartLoc;
+    EndLoc = o.EndLoc;
+    IsPPC64 = o.IsPPC64;
+    switch (Kind) {
+    case Token:
+      Tok = o.Tok;
+      break;
+    case Immediate:
+      Imm = o.Imm;
+      break;
+    case Expression:
+      Expr = o.Expr;
+      break;
+    }
+  }
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+
+  /// isPPC64 - True if this operand is for an instruction in 64-bit mode.
+  bool isPPC64() const { return IsPPC64; }
+
+  int64_t getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  const MCExpr *getExpr() const {
+    assert(Kind == Expression && "Invalid access!");
+    return Expr.Val;
+  }
+
+  unsigned getReg() const {
+    assert(isRegNumber() && "Invalid access!");
+    return (unsigned) Imm.Val;
+  }
+
+  unsigned getCCReg() const {
+    assert(isCCRegNumber() && "Invalid access!");
+    return (unsigned) Imm.Val;
+  }
+
+  unsigned getCRBitMask() const {
+    assert(isCRBitMask() && "Invalid access!");
+    return 7 - countTrailingZeros<uint64_t>(Imm.Val);
+  }
+
+  bool isToken() const { return Kind == Token; }
+  bool isImm() const { return Kind == Immediate || Kind == Expression; }
+  bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
+  bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
+  bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
+  bool isU16Imm() const { return Kind == Expression ||
+                                 (Kind == Immediate && isUInt<16>(getImm())); }
+  bool isS16Imm() const { return Kind == Expression ||
+                                 (Kind == Immediate && isInt<16>(getImm())); }
+  bool isS16ImmX4() const { return Kind == Expression ||
+                                   (Kind == Immediate && isInt<16>(getImm()) &&
+                                    (getImm() & 3) == 0); }
+  bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+  bool isCCRegNumber() const { return Kind == Immediate &&
+                                      isUInt<3>(getImm()); }
+  bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) &&
+                                    isPowerOf2_32(getImm()); }
+  bool isMem() const { return false; }
+  bool isReg() const { return false; }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    llvm_unreachable("addRegOperands");
+  }
+
+  void addRegGPRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(RRegs[getReg()]));
+  }
+
+  void addRegGPRCNoR0Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(RRegsNoR0[getReg()]));
+  }
+
+  void addRegG8RCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(XRegs[getReg()]));
+  }
+
+  void addRegG8RCNoX0Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(XRegsNoX0[getReg()]));
+  }
+
+  void addRegGxRCOperands(MCInst &Inst, unsigned N) const {
+    if (isPPC64())
+      addRegG8RCOperands(Inst, N);
+    else
+      addRegGPRCOperands(Inst, N);
+  }
+
+  void addRegGxRCNoR0Operands(MCInst &Inst, unsigned N) const {
+    if (isPPC64())
+      addRegG8RCNoX0Operands(Inst, N);
+    else
+      addRegGPRCNoR0Operands(Inst, N);
+  }
+
+  void addRegF4RCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+  }
+
+  void addRegF8RCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+  }
+
+  void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()]));
+  }
+
+  void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getReg()]));
+  }
+
+  void addRegCRRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(CRRegs[getCCReg()]));
+  }
+
+  void addCRBitMaskOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(CRRegs[getCRBitMask()]));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    if (Kind == Immediate)
+      Inst.addOperand(MCOperand::CreateImm(getImm()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+  }
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  virtual void print(raw_ostream &OS) const;
+
+  static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) {
+    PPCOperand *Op = new PPCOperand(Token);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    Op->IsPPC64 = IsPPC64;
+    return Op;
+  }
+
+  static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
+    PPCOperand *Op = new PPCOperand(Immediate);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    Op->IsPPC64 = IsPPC64;
+    return Op;
+  }
+
+  static PPCOperand *CreateExpr(const MCExpr *Val,
+                                SMLoc S, SMLoc E, bool IsPPC64) {
+    PPCOperand *Op = new PPCOperand(Expression);
+    Op->Expr.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    Op->IsPPC64 = IsPPC64;
+    return Op;
+  }
+};
+
+} // end anonymous namespace.
+
+void PPCOperand::print(raw_ostream &OS) const {
+  switch (Kind) {
+  case Token:
+    OS << "'" << getToken() << "'";
+    break;
+  case Immediate:
+    OS << getImm();
+    break;
+  case Expression:
+    getExpr()->print(OS);
+    break;
+  }
+}
+
+
+void PPCAsmParser::
+ProcessInstruction(MCInst &Inst,
+                   const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  switch (Inst.getOpcode()) {
+  case PPC::SLWI: {
+    MCInst TmpInst;
+    int64_t N = Inst.getOperand(2).getImm();
+    TmpInst.setOpcode(PPC::RLWINM);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(MCOperand::CreateImm(N));
+    TmpInst.addOperand(MCOperand::CreateImm(0));
+    TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::SRWI: {
+    MCInst TmpInst;
+    int64_t N = Inst.getOperand(2).getImm();
+    TmpInst.setOpcode(PPC::RLWINM);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(MCOperand::CreateImm(32 - N));
+    TmpInst.addOperand(MCOperand::CreateImm(N));
+    TmpInst.addOperand(MCOperand::CreateImm(31));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::SLDI: {
+    MCInst TmpInst;
+    int64_t N = Inst.getOperand(2).getImm();
+    TmpInst.setOpcode(PPC::RLDICR);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(MCOperand::CreateImm(N));
+    TmpInst.addOperand(MCOperand::CreateImm(63 - N));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::SRDI: {
+    MCInst TmpInst;
+    int64_t N = Inst.getOperand(2).getImm();
+    TmpInst.setOpcode(PPC::RLDICL);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(MCOperand::CreateImm(64 - N));
+    TmpInst.addOperand(MCOperand::CreateImm(N));
+    Inst = TmpInst;
+    break;
+  }
+  }
+}
+
+bool PPCAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
+  MCInst Inst;
+
+  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
+  default: break;
+  case Match_Success:
+    // Post-process instructions (typically extended mnemonics)
+    ProcessInstruction(Inst, Operands);
+    Inst.setLoc(IDLoc);
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    return Error(IDLoc, "instruction use requires an option to be enabled");
+  case Match_MnemonicFail:
+      return Error(IDLoc, "unrecognized instruction mnemonic");
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((PPCOperand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  }
+
+  llvm_unreachable("Implement any new match types added!");
+}
+
+bool PPCAsmParser::
+MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
+  if (Tok.is(AsmToken::Identifier)) {
+    StringRef Name = Tok.getString();
+
+    if (Name.equals_lower("lr")) {
+      RegNo = isPPC64()? PPC::LR8 : PPC::LR;
+      IntVal = 8;
+      return false;
+    } else if (Name.equals_lower("ctr")) {
+      RegNo = isPPC64()? PPC::CTR8 : PPC::CTR;
+      IntVal = 9;
+      return false;
+    } else if (Name.substr(0, 1).equals_lower("r") &&
+               !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+      RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal];
+      return false;
+    } else if (Name.substr(0, 1).equals_lower("f") &&
+               !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+      RegNo = FRegs[IntVal];
+      return false;
+    } else if (Name.substr(0, 1).equals_lower("v") &&
+               !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+      RegNo = VRegs[IntVal];
+      return false;
+    } else if (Name.substr(0, 2).equals_lower("cr") &&
+               !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
+      RegNo = CRRegs[IntVal];
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool PPCAsmParser::
+ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+  const AsmToken &Tok = Parser.getTok();
+  StartLoc = Tok.getLoc();
+  EndLoc = Tok.getEndLoc();
+  RegNo = 0;
+  int64_t IntVal;
+
+  if (!MatchRegisterName(Tok, RegNo, IntVal)) {
+    Parser.Lex(); // Eat identifier token.
+    return false;
+  }
+
+  return Error(StartLoc, "invalid register name");
+}
+
+bool PPCAsmParser::
+ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  const MCExpr *EVal;
+  PPCOperand *Op;
+
+  // Attempt to parse the next token as an immediate
+  switch (getLexer().getKind()) {
+  // Special handling for register names.  These are interpreted
+  // as immediates corresponding to the register number.
+  case AsmToken::Percent:
+    Parser.Lex(); // Eat the '%'.
+    unsigned RegNo;
+    int64_t IntVal;
+    if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+      Parser.Lex(); // Eat the identifier token.
+      Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
+      Operands.push_back(Op);
+      return false;
+    }
+    return Error(S, "invalid register name");
+
+  // All other expressions
+  case AsmToken::LParen:
+  case AsmToken::Plus:
+  case AsmToken::Minus:
+  case AsmToken::Integer:
+  case AsmToken::Identifier:
+  case AsmToken::Dot:
+  case AsmToken::Dollar:
+    if (!getParser().parseExpression(EVal))
+      break;
+    /* fall through */
+  default:
+    return Error(S, "unknown operand");
+  }
+
+  if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(EVal))
+    Op = PPCOperand::CreateImm(CE->getValue(), S, E, isPPC64());
+  else
+    Op = PPCOperand::CreateExpr(EVal, S, E, isPPC64());
+
+  // Push the parsed operand into the list of operands
+  Operands.push_back(Op);
+
+  // Check for D-form memory operands
+  if (getLexer().is(AsmToken::LParen)) {
+    Parser.Lex(); // Eat the '('.
+    S = Parser.getTok().getLoc();
+
+    int64_t IntVal;
+    switch (getLexer().getKind()) {
+    case AsmToken::Percent:
+      Parser.Lex(); // Eat the '%'.
+      unsigned RegNo;
+      if (MatchRegisterName(Parser.getTok(), RegNo, IntVal))
+        return Error(S, "invalid register name");
+      Parser.Lex(); // Eat the identifier token.
+      break;
+
+    case AsmToken::Integer:
+      if (getParser().parseAbsoluteExpression(IntVal) ||
+          IntVal < 0 || IntVal > 31)
+        return Error(S, "invalid register number");
+      break;
+
+    default:
+      return Error(S, "invalid memory operand");
+    }
+
+    if (getLexer().isNot(AsmToken::RParen))
+      return Error(Parser.getTok().getLoc(), "missing ')'");
+    E = Parser.getTok().getLoc();
+    Parser.Lex(); // Eat the ')'.
+
+    Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
+    Operands.push_back(Op);
+  }
+
+  return false;
+}
+
+/// Parse an instruction mnemonic followed by its operands.
+bool PPCAsmParser::
+ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // The first operand is the token for the instruction name.
+  // If the instruction ends in a '.', we need to create a separate
+  // token for it, to match what TableGen is doing.
+  size_t Dot = Name.find('.');
+  StringRef Mnemonic = Name.slice(0, Dot);
+  Operands.push_back(PPCOperand::CreateToken(Mnemonic, NameLoc, isPPC64()));
+  if (Dot != StringRef::npos) {
+    SMLoc DotLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Dot);
+    StringRef DotStr = Name.slice(Dot, StringRef::npos);
+    Operands.push_back(PPCOperand::CreateToken(DotStr, DotLoc, isPPC64()));
+  }
+
+  // If there are no more operands then finish
+  if (getLexer().is(AsmToken::EndOfStatement))
+    return false;
+
+  // Parse the first operand
+  if (ParseOperand(Operands))
+    return true;
+
+  while (getLexer().isNot(AsmToken::EndOfStatement) &&
+         getLexer().is(AsmToken::Comma)) {
+    // Consume the comma token
+    getLexer().Lex();
+
+    // Parse the next operand
+    if (ParseOperand(Operands))
+      return true;
+  }
+
+  return false;
+}
+
+/// ParseDirective parses the PPC specific directives
+bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(4, DirectiveID.getLoc());
+  if (IDVal == ".tc")
+    return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
+  return true;
+}
+
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().parseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+
+  Parser.Lex();
+  return false;
+}
+
+/// ParseDirectiveTC
+///  ::= .tc [ symbol (, expression)* ]
+bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
+  // Skip TC symbol, which is only used with XCOFF.
+  while (getLexer().isNot(AsmToken::EndOfStatement)
+         && getLexer().isNot(AsmToken::Comma))
+    Parser.Lex();
+  if (getLexer().isNot(AsmToken::Comma))
+    return Error(L, "unexpected token in directive");
+  Parser.Lex();
+
+  // Align to word size.
+  getParser().getStreamer().EmitValueToAlignment(Size);
+
+  // Emit expressions.
+  return ParseDirectiveWord(Size, L);
+}
+
+/// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmParser() {
+  RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target);
+  RegisterMCAsmParser<PPCAsmParser> B(ThePPC64Target);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "PPCGenAsmMatcher.inc"
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 6036428..e5c5204 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(LLVM_TARGET_DEFINITIONS PPC.td)
 
 tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter)
 tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
 tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
@@ -26,12 +27,14 @@ add_llvm_target(PowerPCCodeGen
   PPCRegisterInfo.cpp
   PPCSubtarget.cpp
   PPCTargetMachine.cpp
+  PPCTargetObjectFile.cpp
   PPCTargetTransformInfo.cpp
   PPCSelectionDAGInfo.cpp
   )
 
 add_dependencies(LLVMPowerPCCodeGen intrinsics_gen)
 
+add_subdirectory(AsmParser)
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index bacc108..432167e 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -129,7 +129,10 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
 
 void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
                                         raw_ostream &O) {
-  O << (short)MI->getOperand(OpNo).getImm();
+  if (MI->getOperand(OpNo).isImm())
+    O << (short)MI->getOperand(OpNo).getImm();
+  else
+    printOperand(MI, OpNo, O);
 }
 
 void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
@@ -137,22 +140,14 @@ void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
   O << (unsigned short)MI->getOperand(OpNo).getImm();
 }
 
-void PPCInstPrinter::printS16X4ImmOperand(const MCInst *MI, unsigned OpNo,
-                                          raw_ostream &O) {
-  if (MI->getOperand(OpNo).isImm())
-    O << (short)(MI->getOperand(OpNo).getImm()*4);
-  else
-    printOperand(MI, OpNo, O);
-}
-
 void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
                                         raw_ostream &O) {
   if (!MI->getOperand(OpNo).isImm())
     return printOperand(MI, OpNo, O);
 
   // Branches can take an immediate operand.  This is used by the branch
-  // selection pass to print $+8, an eight byte displacement from the PC.
-  O << "$+";
+  // selection pass to print .+8, an eight byte displacement from the PC.
+  O << ".+";
   printAbsAddrOperand(MI, OpNo, O);
 }
 
@@ -182,7 +177,7 @@ void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
 
 void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
                                     raw_ostream &O) {
-  printSymbolLo(MI, OpNo, O);
+  printS16ImmOperand(MI, OpNo, O);
   O << '(';
   if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
     O << "0";
@@ -191,22 +186,6 @@ void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
   O << ')';
 }
 
-void PPCInstPrinter::printMemRegImmShifted(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O) {
-  if (MI->getOperand(OpNo).isImm())
-    printS16X4ImmOperand(MI, OpNo, O);
-  else
-    printSymbolLo(MI, OpNo, O);
-  O << '(';
-  
-  if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
-    O << "0";
-  else
-    printOperand(MI, OpNo+1, O);
-  O << ')';
-}
-
-
 void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
                                     raw_ostream &O) {
   // When used as the base register, r0 reads constant zero rather than
@@ -256,39 +235,4 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   assert(Op.isExpr() && "unknown operand kind in printOperand");
   O << *Op.getExpr();
 }
-  
-void PPCInstPrinter::printSymbolLo(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
-  if (MI->getOperand(OpNo).isImm())
-    return printS16ImmOperand(MI, OpNo, O);
-  
-  // FIXME: This is a terrible hack because we can't encode lo16() as an operand
-  // flag of a subtraction.  See the FIXME in GetSymbolRef in PPCMCInstLower.
-  if (MI->getOperand(OpNo).isExpr() &&
-      isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
-    O << "lo16(";
-    printOperand(MI, OpNo, O);
-    O << ')';
-  } else {
-    printOperand(MI, OpNo, O);
-  }
-}
-
-void PPCInstPrinter::printSymbolHi(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
-  if (MI->getOperand(OpNo).isImm())
-    return printS16ImmOperand(MI, OpNo, O);
-
-  // FIXME: This is a terrible hack because we can't encode lo16() as an operand
-  // flag of a subtraction.  See the FIXME in GetSymbolRef in PPCMCInstLower.
-  if (MI->getOperand(OpNo).isExpr() &&
-      isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
-    O << "ha16(";
-    printOperand(MI, OpNo, O);
-    O << ')';
-  } else {
-    printOperand(MI, OpNo, O);
-  }
-}
-
 
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8f1e211..f64a329 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -50,19 +50,13 @@ public:
   void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
 
   void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
 
   void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printMemRegImmShifted(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  
-  // FIXME: Remove
-  void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O);
 };
 } // end namespace llvm
 
diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt
index 95fac54..7b3e843 100644
--- a/lib/Target/PowerPC/LLVMBuild.txt
+++ b/lib/Target/PowerPC/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = InstPrinter MCTargetDesc TargetInfo
+subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
 
 [component_0]
 type = TargetGroup
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
index b674883..45be471 100644
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMPowerPCDesc
   PPCMCTargetDesc.cpp
   PPCMCAsmInfo.cpp
   PPCMCCodeEmitter.cpp
+  PPCMCExpr.cpp
   PPCPredicates.cpp
   PPCELFObjectWriter.cpp
   )
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index ec26574..3fa2e09 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -22,7 +22,7 @@
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
   switch (Kind) {
   default:
     llvm_unreachable("Unknown fixup kind!");
@@ -37,19 +37,35 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
     return Value & 0xfffc;
   case PPC::fixup_ppc_br24:
     return Value & 0x3fffffc;
-#if 0
-  case PPC::fixup_ppc_hi16:
-    return (Value >> 16) & 0xffff;
-#endif
-  case PPC::fixup_ppc_ha16:
-    return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
-  case PPC::fixup_ppc_lo16:
+  case PPC::fixup_ppc_half16:
     return Value & 0xffff;
-  case PPC::fixup_ppc_lo16_ds:
+  case PPC::fixup_ppc_half16ds:
     return Value & 0xfffc;
   }
 }
 
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+  case FK_Data_1:
+    return 1;
+  case FK_Data_2:
+  case PPC::fixup_ppc_half16:
+  case PPC::fixup_ppc_half16ds:
+    return 2;
+  case FK_Data_4:
+  case PPC::fixup_ppc_brcond14:
+  case PPC::fixup_ppc_br24:
+    return 4;
+  case FK_Data_8:
+    return 8;
+  case PPC::fixup_ppc_tlsreg:
+  case PPC::fixup_ppc_nofixup:
+    return 0;
+  }
+}
+
 namespace {
 class PPCMachObjectWriter : public MCMachObjectTargetWriter {
 public:
@@ -77,9 +93,8 @@ public:
       // name                    offset  bits  flags
       { "fixup_ppc_br24",        6,      24,   MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_ppc_brcond14",    16,     14,   MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_ppc_lo16",        16,     16,   0 },
-      { "fixup_ppc_ha16",        16,     16,   0 },
-      { "fixup_ppc_lo16_ds",     16,     14,   0 },
+      { "fixup_ppc_half16",       0,     16,   0 },
+      { "fixup_ppc_half16ds",     0,     14,   0 },
       { "fixup_ppc_tlsreg",       0,      0,   0 },
       { "fixup_ppc_nofixup",      0,      0,   0 }
     };
@@ -98,12 +113,13 @@ public:
     if (!Value) return;           // Doesn't change encoding.
 
     unsigned Offset = Fixup.getOffset();
+    unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
 
     // For each byte of the fragment that the fixup touches, mask in the bits
     // from the fixup value. The Value has been "split up" into the appropriate
     // bitfields above.
-    for (unsigned i = 0; i != 4; ++i)
-      Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
+    for (unsigned i = 0; i != NumBytes; ++i)
+      Data[Offset + i] |= uint8_t((Value >> ((NumBytes - i - 1)*8)) & 0xff);
   }
 
   bool mayNeedRelaxation(const MCInst &Inst) const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 81a86dc..7188f93 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -33,26 +33,9 @@ namespace {
     virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target,
                                                     const MCFixup &Fixup,
                                                     bool IsPCRel) const;
-    virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
-
-    virtual void sortRelocs(const MCAssembler &Asm,
-                            std::vector<ELFRelocationEntry> &Relocs);
-  };
-
-  class PPCELFRelocationEntry : public ELFRelocationEntry {
-  public:
-    PPCELFRelocationEntry(const ELFRelocationEntry &RE);
-    bool operator<(const PPCELFRelocationEntry &RE) const {
-      return (RE.r_offset < r_offset ||
-              (RE.r_offset == r_offset && RE.Type > Type));
-    }
   };
 }
 
-PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE)
-  : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol,
-                       RE.r_addend, *RE.Fixup) {}
-
 PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
   : MCELFObjectTargetWriter(Is64Bit, OSABI,
                             Is64Bit ?  ELF::EM_PPC64 : ELF::EM_PPC,
@@ -98,7 +81,7 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
     case PPC::fixup_ppc_brcond14:
       Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_
       break;
-    case PPC::fixup_ppc_ha16:
+    case PPC::fixup_ppc_half16:
       switch (Modifier) {
       default: llvm_unreachable("Unsupported Modifier");
       case MCSymbolRefExpr::VK_PPC_TPREL16_HA:
@@ -107,7 +90,7 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
         Type = ELF::R_PPC64_DTPREL16_HA;
         break;
-      case MCSymbolRefExpr::VK_None:
+      case MCSymbolRefExpr::VK_PPC_ADDR16_HA:
         Type = ELF::R_PPC_ADDR16_HA;
 	break;
       case MCSymbolRefExpr::VK_PPC_TOC16_HA:
@@ -122,11 +105,6 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA:
         Type = ELF::R_PPC64_GOT_TLSLD16_HA;
         break;
-      }
-      break;
-    case PPC::fixup_ppc_lo16:
-      switch (Modifier) {
-      default: llvm_unreachable("Unsupported Modifier");
       case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
         Type = ELF::R_PPC_TPREL16_LO;
         break;
@@ -134,6 +112,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
         Type = ELF::R_PPC64_DTPREL16_LO;
         break;
       case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_PPC_ADDR16;
+        break;
+      case MCSymbolRefExpr::VK_PPC_ADDR16_LO:
         Type = ELF::R_PPC_ADDR16_LO;
 	break;
       case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
@@ -150,12 +131,15 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
         break;
       }
       break;
-    case PPC::fixup_ppc_lo16_ds:
+    case PPC::fixup_ppc_half16ds:
       switch (Modifier) {
       default: llvm_unreachable("Unsupported Modifier");
       case MCSymbolRefExpr::VK_None:
         Type = ELF::R_PPC64_ADDR16_DS;
         break;
+      case MCSymbolRefExpr::VK_PPC_ADDR16_LO:
+        Type = ELF::R_PPC64_ADDR16_LO_DS;
+        break;
       case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
         Type = ELF::R_PPC64_TOC16_DS;
 	break;
@@ -231,47 +215,6 @@ const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Targe
   return NULL;
 }
 
-void PPCELFObjectWriter::
-adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
-  switch ((unsigned)Fixup.getKind()) {
-    case PPC::fixup_ppc_ha16:
-    case PPC::fixup_ppc_lo16:
-    case PPC::fixup_ppc_lo16_ds:
-      RelocOffset += 2;
-      break;
-    default:
-      break;
-  }
-}
-
-// The standard sorter only sorts on the r_offset field, but PowerPC can
-// have multiple relocations at the same offset.  Sort secondarily on the
-// relocation type to avoid nondeterminism.
-void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm,
-                                    std::vector<ELFRelocationEntry> &Relocs) {
-
-  // Copy to a temporary vector of relocation entries having a different
-  // sort function.
-  std::vector<PPCELFRelocationEntry> TmpRelocs;
-  
-  for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin();
-       R != Relocs.end(); ++R) {
-    TmpRelocs.push_back(PPCELFRelocationEntry(*R));
-  }
-
-  // Sort in place by ascending r_offset and descending r_type.
-  array_pod_sort(TmpRelocs.begin(), TmpRelocs.end());
-
-  // Copy back to the original vector.
-  unsigned I = 0;
-  for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin();
-       R != TmpRelocs.end(); ++R, ++I) {
-    Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type,
-                                   R->Symbol, R->r_addend, *R->Fixup);
-  }
-}
-
-
 MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
                                                bool Is64Bit,
                                                uint8_t OSABI) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 86c44f5..3ea59f0 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -25,17 +25,13 @@ enum Fixups {
   /// branches.
   fixup_ppc_brcond14,
   
-  /// fixup_ppc_lo16 - A 16-bit fixup corresponding to lo16(_foo) for instrs
-  /// like 'li'.
-  fixup_ppc_lo16,
+  /// fixup_ppc_half16 - A 16-bit fixup corresponding to lo16(_foo)
+  /// or ha16(_foo) for instrs like 'li' or 'addis'.
+  fixup_ppc_half16,
   
-  /// fixup_ppc_ha16 - A 16-bit fixup corresponding to ha16(_foo) for instrs
-  /// like 'lis'.
-  fixup_ppc_ha16,
-  
-  /// fixup_ppc_lo16_ds - A 14-bit fixup corresponding to lo16(_foo) with
+  /// fixup_ppc_half16ds - A 14-bit fixup corresponding to lo16(_foo) with
   /// implied 2 zero bits for instrs like 'std'.
-  fixup_ppc_lo16_ds,
+  fixup_ppc_half16ds,
 
   /// fixup_ppc_tlsreg - Insert thread-pointer register number.
   fixup_ppc_tlsreg,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index a25d7fe..bb7ce6f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -59,6 +59,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
 
   // Set up DWARF directives
   HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
+  MinInstAlignment = 4;
 
   // Exceptions handling
   ExceptionsType = ExceptionHandling::DwarfCFI;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 2223cd6..31c73ae 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -48,10 +48,8 @@ public:
                                SmallVectorImpl<MCFixup> &Fixups) const;
   unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
                              SmallVectorImpl<MCFixup> &Fixups) const;
-  unsigned getHA16Encoding(const MCInst &MI, unsigned OpNo,
-                           SmallVectorImpl<MCFixup> &Fixups) const;
-  unsigned getLO16Encoding(const MCInst &MI, unsigned OpNo,
-                           SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getS16ImmEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
   unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
                             SmallVectorImpl<MCFixup> &Fixups) const;
   unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
@@ -136,25 +134,14 @@ unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
   return 0;
 }
 
-unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo,
+unsigned PPCMCCodeEmitter::getS16ImmEncoding(const MCInst &MI, unsigned OpNo,
                                        SmallVectorImpl<MCFixup> &Fixups) const {
   const MCOperand &MO = MI.getOperand(OpNo);
   if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
   
   // Add a fixup for the branch target.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                   (MCFixupKind)PPC::fixup_ppc_ha16));
-  return 0;
-}
-
-unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo,
-                                       SmallVectorImpl<MCFixup> &Fixups) const {
-  const MCOperand &MO = MI.getOperand(OpNo);
-  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
-  
-  // Add a fixup for the branch target.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                   (MCFixupKind)PPC::fixup_ppc_lo16));
+  Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_half16));
   return 0;
 }
 
@@ -170,8 +157,8 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
     return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
   
   // Add a fixup for the displacement field.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                   (MCFixupKind)PPC::fixup_ppc_lo16));
+  Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_half16));
   return RegBits;
 }
 
@@ -185,11 +172,11 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
   
   const MCOperand &MO = MI.getOperand(OpNo);
   if (MO.isImm())
-    return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
+    return ((getMachineOpValue(MI, MO, Fixups) >> 2) & 0x3FFF) | RegBits;
   
   // Add a fixup for the displacement field.
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                   (MCFixupKind)PPC::fixup_ppc_lo16_ds));
+  Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_half16ds));
   return RegBits;
 }
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
new file mode 100644
index 0000000..f0613ff
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -0,0 +1,108 @@
+//===-- PPCMCExpr.cpp - PPC specific MC expression classes ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppcmcexpr"
+#include "PPCMCExpr.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+
+using namespace llvm;
+
+const PPCMCExpr*
+PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+                       MCContext &Ctx) {
+  return new (Ctx) PPCMCExpr(Kind, Expr);
+}
+
+void PPCMCExpr::PrintImpl(raw_ostream &OS) const {
+  switch (Kind) {
+  default: llvm_unreachable("Invalid kind!");
+  case VK_PPC_HA16: OS << "ha16"; break;
+  case VK_PPC_LO16: OS << "lo16"; break;
+  }
+
+  OS << '(';
+  getSubExpr()->print(OS);
+  OS << ')';
+}
+
+bool
+PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                     const MCAsmLayout *Layout) const {
+  MCValue Value;
+
+  if (!getSubExpr()->EvaluateAsRelocatable(Value, *Layout))
+    return false;
+
+  if (Value.isAbsolute()) {
+    int64_t Result = Value.getConstant();
+    switch (Kind) {
+      default:
+        llvm_unreachable("Invalid kind!");
+      case VK_PPC_HA16:
+        Result = ((Result >> 16) + ((Result & 0x8000) ? 1 : 0)) & 0xffff;
+        break;
+      case VK_PPC_LO16:
+        Result = Result & 0xffff;
+        break;
+    }
+    Res = MCValue::get(Result);
+  } else {
+    MCContext &Context = Layout->getAssembler().getContext();
+    const MCSymbolRefExpr *Sym = Value.getSymA();
+    MCSymbolRefExpr::VariantKind Modifier = Sym->getKind();
+    if (Modifier != MCSymbolRefExpr::VK_None)
+      return false;
+    switch (Kind) {
+      default:
+        llvm_unreachable("Invalid kind!");
+      case VK_PPC_HA16:
+        Modifier = MCSymbolRefExpr::VK_PPC_ADDR16_HA;
+        break;
+      case VK_PPC_LO16:
+        Modifier = MCSymbolRefExpr::VK_PPC_ADDR16_LO;
+        break;
+    }
+    Sym = MCSymbolRefExpr::Create(&Sym->getSymbol(), Modifier, Context);
+    Res = MCValue::get(Sym, Value.getSymB(), Value.getConstant());
+  }
+
+  return true;
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
+  switch (Value->getKind()) {
+  case MCExpr::Target:
+    llvm_unreachable("Can't handle nested target expr!");
+
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+    AddValueSymbols_(BE->getLHS(), Asm);
+    AddValueSymbols_(BE->getRHS(), Asm);
+    break;
+  }
+
+  case MCExpr::SymbolRef:
+    Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+    break;
+
+  case MCExpr::Unary:
+    AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+void PPCMCExpr::AddValueSymbols(MCAssembler *Asm) const {
+  AddValueSymbols_(getSubExpr(), Asm);
+}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
new file mode 100644
index 0000000..a080537
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -0,0 +1,78 @@
+//===-- PPCMCExpr.h - PPC specific MC expression classes --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCMCEXPR_H
+#define PPCMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAsmLayout.h"
+
+namespace llvm {
+
+class PPCMCExpr : public MCTargetExpr {
+public:
+  enum VariantKind {
+    VK_PPC_None,
+    VK_PPC_HA16,
+    VK_PPC_LO16
+  };
+
+private:
+  const VariantKind Kind;
+  const MCExpr *Expr;
+
+  explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr)
+    : Kind(_Kind), Expr(_Expr) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const PPCMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+                                      MCContext &Ctx);
+
+  static const PPCMCExpr *CreateHa16(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_PPC_HA16, Expr, Ctx);
+  }
+
+  static const PPCMCExpr *CreateLo16(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_PPC_LO16, Expr, Ctx);
+  }
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this expression.
+  VariantKind getKind() const { return Kind; }
+
+  /// getSubExpr - Get the child of this expression.
+  const MCExpr *getSubExpr() const { return Expr; }
+
+  /// @}
+
+  void PrintImpl(raw_ostream &OS) const;
+  bool EvaluateAsRelocatableImpl(MCValue &Res,
+                                 const MCAsmLayout *Layout) const;
+  void AddValueSymbols(MCAssembler *) const;
+  const MCSection *FindAssociatedSection() const {
+    return getSubExpr()->FindAssociatedSection();
+  }
+
+  // There are no TLS PPCMCExprs at the moment.
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 2209f93..2da30f9 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -58,7 +58,7 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
   return X;
 }
 
-static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
+static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
   Triple TheTriple(TT);
   bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
 
@@ -69,9 +69,10 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
     MAI = new PPCLinuxMCAsmInfo(isPPC64);
 
   // Initial state of the frame pointer is R1.
-  MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(isPPC64? PPC::X1 : PPC::R1, 0);
-  MAI->addInitialFrameState(0, Dst, Src);
+  unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
+  MCCFIInstruction Inst =
+      MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(Reg, true), 0);
+  MAI->addInitialFrameState(Inst);
 
   return MAI;
 }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 444758c..3ab9005 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -32,7 +32,8 @@ namespace PPC {
     PRED_GT     = (1 << 5) | 12,
     PRED_NE     = (2 << 5) |  4,
     PRED_UN     = (3 << 5) | 12,
-    PRED_NU     = (3 << 5) |  4
+    PRED_NU     = (3 << 5) |  4,
+    PRED_BAD    = 0
   };
   
   /// Invert the specified predicate.  != -> ==, < -> >=.
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index 1617b26..6666694 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -12,12 +12,12 @@ LIBRARYNAME = LLVMPowerPCCodeGen
 TARGET = PPC
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = PPCGenRegisterInfo.inc \
+BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \
                 PPCGenAsmWriter.inc  PPCGenCodeEmitter.inc \
                 PPCGenInstrInfo.inc PPCGenDAGISel.inc \
                 PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \
                 PPCGenMCCodeEmitter.inc
 
-DIRS = InstPrinter TargetInfo MCTargetDesc
+DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index b4be51a..2e79610 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -30,7 +30,10 @@ namespace llvm {
   class AsmPrinter;
   class MCInst;
 
-  FunctionPass *createPPCCTRLoops();
+  FunctionPass *createPPCCTRLoops(PPCTargetMachine &TM);
+#ifndef NDEBUG
+  FunctionPass *createPPCCTRLoopsVerify();
+#endif
   FunctionPass *createPPCEarlyReturnPass();
   FunctionPass *createPPCBranchSelectionPass();
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 649ffc1..eb73c67 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -268,9 +268,14 @@ def PPCAsmWriter : AsmWriter {
   bit isMCAsmWriter = 1;
 }
 
+def PPCAsmParser : AsmParser {
+  let ShouldEmitMatchRegisterName = 0;
+}
+
 def PPC : Target {
   // Information about the instructions.
   let InstructionSet = PPCInstrInfo;
   
   let AssemblyWriters = [PPCAsmWriter];
+  let AssemblyParsers = [PPCAsmParser];
 }
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 3c7cc4e..c43b5c9 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -20,6 +20,7 @@
 #include "PPC.h"
 #include "InstPrinter/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCPredicates.h"
+#include "MCTargetDesc/PPCMCExpr.h"
 #include "PPCSubtarget.h"
 #include "PPCTargetMachine.h"
 #include "llvm/ADT/MapVector.h"
@@ -910,6 +911,9 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
       OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
 
       const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
+      const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
+      const MCExpr *Sub =
+        MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext);
 
       // mflr r0
       OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
@@ -919,21 +923,20 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
       // mflr r11
       OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
       // addis r11, r11, ha16(LazyPtr - AnonSymbol)
-      const MCExpr *Sub =
-        MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext),
-                                Anon, OutContext);
+      const MCExpr *SubHa16 = PPCMCExpr::CreateHa16(Sub, OutContext);
       OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
         .addReg(PPC::R11)
         .addReg(PPC::R11)
-        .addExpr(Sub));
+        .addExpr(SubHa16));
       // mtlr r0
       OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
 
       // ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
       // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
+      const MCExpr *SubLo16 = PPCMCExpr::CreateLo16(Sub, OutContext);
       OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
         .addReg(PPC::R12)
-        .addExpr(Sub).addExpr(Sub)
+        .addExpr(SubLo16).addExpr(SubLo16)
         .addReg(PPC::R11));
       // mtctr r12
       OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
@@ -967,24 +970,22 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
     MCSymbol *Stub = Stubs[i].first;
     MCSymbol *RawSym = Stubs[i].second.getPointer();
     MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+    const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
 
     OutStreamer.SwitchSection(StubSection);
     EmitAlignment(4);
     OutStreamer.EmitLabel(Stub);
     OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
     // lis r11, ha16(LazyPtr)
-    const MCExpr *LazyPtrHa16 =
-      MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_HA16,
-                              OutContext);
+    const MCExpr *LazyPtrHa16 = PPCMCExpr::CreateHa16(LazyPtrExpr, OutContext);
     OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS)
       .addReg(PPC::R11)
       .addExpr(LazyPtrHa16));
 
-    const MCExpr *LazyPtrLo16 =
-      MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_LO16,
-                              OutContext);
     // ldu r12, lo16(LazyPtr)(r11)
     // lwzu r12, lo16(LazyPtr)(r11)
+    const MCExpr *LazyPtrLo16 = PPCMCExpr::CreateLo16(LazyPtrExpr, OutContext);
     OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
       .addReg(PPC::R12)
       .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index bd1c378..3e608ca 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -112,15 +112,21 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
       unsigned MBBStartOffset = 0;
       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
            I != E; ++I) {
-        if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
+        MachineBasicBlock *Dest = 0;
+        if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
+          Dest = I->getOperand(2).getMBB();
+        else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||
+                  I->getOpcode() == PPC::BDZ8  || I->getOpcode() == PPC::BDZ) &&
+                 !I->getOperand(0).isImm())
+          Dest = I->getOperand(0).getMBB();
+
+        if (!Dest) {
           MBBStartOffset += TII->GetInstSizeInBytes(I);
           continue;
         }
         
         // Determine the offset from the current branch to the destination
         // block.
-        MachineBasicBlock *Dest = I->getOperand(2).getMBB();
-        
         int BranchSize;
         if (Dest->getNumber() <= MBB.getNumber()) {
           // If this is a backwards branch, the delta is the offset from the
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 81a54d7..08247c2 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -9,767 +9,619 @@
 //
 // This pass identifies loops where we can generate the PPC branch instructions
 // that decrement and test the count register (CTR) (bdnz and friends).
-// This pass is based on the HexagonHardwareLoops pass.
 //
 // The pattern that defines the induction variable can changed depending on
 // prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
 // normalizes induction variables, and the Loop Strength Reduction pass
 // run by 'llc' may also make changes to the induction variable.
-// The pattern detected by this phase is due to running Strength Reduction.
 //
 // Criteria for CTR loops:
 //  - Countable loops (w/ ind. var for a trip count)
-//  - Assumes loops are normalized by IndVarSimplify
 //  - Try inner-most loops first
 //  - No nested CTR loops.
 //  - No function calls in loops.
 //
-//  Note: As with unconverted loops, PPCBranchSelector must be run after this
-//  pass in order to convert long-displacement jumps into jump pairs.
-//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ctrloops"
-#include "PPC.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "PPCTargetMachine.h"
-#include "llvm/ADT/DenseMap.h"
+
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "PPCTargetMachine.h"
+#include "PPC.h"
+
+#ifndef NDEBUG
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#endif
+
 #include <algorithm>
+#include <vector>
 
 using namespace llvm;
 
+#ifndef NDEBUG
+static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
+#endif
+
 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
 
 namespace llvm {
   void initializePPCCTRLoopsPass(PassRegistry&);
+#ifndef NDEBUG
+  void initializePPCCTRLoopsVerifyPass(PassRegistry&);
+#endif
 }
 
 namespace {
-  class CountValue;
-  struct PPCCTRLoops : public MachineFunctionPass {
-    MachineLoopInfo       *MLI;
-    MachineRegisterInfo   *MRI;
-    const TargetInstrInfo *TII;
+  struct PPCCTRLoops : public FunctionPass {
+
+#ifndef NDEBUG
+    static int Counter;
+#endif
 
   public:
-    static char ID;   // Pass identification, replacement for typeid
+    static char ID;
 
-    PPCCTRLoops() : MachineFunctionPass(ID) {
+    PPCCTRLoops() : FunctionPass(ID), TM(0) {
+      initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+    }
+    PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
       initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-
-    const char *getPassName() const { return "PPC CTR Loops"; }
+    virtual bool runOnFunction(Function &F);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      AU.addRequired<MachineDominatorTree>();
-      AU.addPreserved<MachineDominatorTree>();
-      AU.addRequired<MachineLoopInfo>();
-      AU.addPreserved<MachineLoopInfo>();
-      MachineFunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+      AU.addRequired<DominatorTree>();
+      AU.addPreserved<DominatorTree>();
+      AU.addRequired<ScalarEvolution>();
     }
 
   private:
-    /// getCanonicalInductionVariable - Check to see if the loop has a canonical
-    /// induction variable.
-    /// Should be defined in MachineLoop. Based upon version in class Loop.
-    void getCanonicalInductionVariable(MachineLoop *L,
-                              SmallVector<MachineInstr *, 4> &IVars,
-                              SmallVector<MachineInstr *, 4> &IOps) const;
-
-    /// getTripCount - Return a loop-invariant LLVM register indicating the
-    /// number of times the loop will be executed.  If the trip-count cannot
-    /// be determined, this return null.
-    CountValue *getTripCount(MachineLoop *L,
-                             SmallVector<MachineInstr *, 2> &OldInsts) const;
-
-    /// isInductionOperation - Return true if the instruction matches the
-    /// pattern for an opertion that defines an induction variable.
-    bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
-
-    /// isInvalidOperation - Return true if the instruction is not valid within
-    /// a CTR loop.
-    bool isInvalidLoopOperation(const MachineInstr *MI) const;
-
-    /// containsInavlidInstruction - Return true if the loop contains an
-    /// instruction that inhibits using the CTR loop.
-    bool containsInvalidInstruction(MachineLoop *L) const;
-
-    /// converToCTRLoop - Given a loop, check if we can convert it to a
-    /// CTR loop.  If so, then perform the conversion and return true.
-    bool convertToCTRLoop(MachineLoop *L);
-
-    /// isDead - Return true if the instruction is now dead.
-    bool isDead(const MachineInstr *MI,
-                SmallVector<MachineInstr *, 1> &DeadPhis) const;
-
-    /// removeIfDead - Remove the instruction if it is now dead.
-    void removeIfDead(MachineInstr *MI);
+    bool mightUseCTR(const Triple &TT, BasicBlock *BB);
+    bool convertToCTRLoop(Loop *L);
+
+  private:
+    PPCTargetMachine *TM;
+    LoopInfo *LI;
+    ScalarEvolution *SE;
+    DataLayout *TD;
+    DominatorTree *DT;
+    const TargetLibraryInfo *LibInfo;
   };
 
   char PPCCTRLoops::ID = 0;
+#ifndef NDEBUG
+  int PPCCTRLoops::Counter = 0;
+#endif
 
-
-  // CountValue class - Abstraction for a trip count of a loop. A
-  // smaller vesrsion of the MachineOperand class without the concerns
-  // of changing the operand representation.
-  class CountValue {
+#ifndef NDEBUG
+  struct PPCCTRLoopsVerify : public MachineFunctionPass {
   public:
-    enum CountValueType {
-      CV_Register,
-      CV_Immediate
-    };
-  private:
-    CountValueType Kind;
-    union Values {
-      unsigned RegNum;
-      int64_t ImmVal;
-      Values(unsigned r) : RegNum(r) {}
-      Values(int64_t i) : ImmVal(i) {}
-    } Contents;
-    bool isNegative;
+    static char ID;
 
-  public:
-    CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
-                                       isNegative(neg) {}
-    explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
-                                     isNegative(i < 0) {}
-    CountValueType getType() const { return Kind; }
-    bool isReg() const { return Kind == CV_Register; }
-    bool isImm() const { return Kind == CV_Immediate; }
-    bool isNeg() const { return isNegative; }
-
-    unsigned getReg() const {
-      assert(isReg() && "Wrong CountValue accessor");
-      return Contents.RegNum;
-    }
-    void setReg(unsigned Val) {
-      Contents.RegNum = Val;
-    }
-    int64_t getImm() const {
-      assert(isImm() && "Wrong CountValue accessor");
-      if (isNegative) {
-        return -Contents.ImmVal;
-      }
-      return Contents.ImmVal;
-    }
-    void setImm(int64_t Val) {
-      Contents.ImmVal = Val;
+    PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
+      initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
     }
 
-    void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
-      if (isReg()) { OS << PrintReg(getReg()); }
-      if (isImm()) { OS << getImm(); }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineDominatorTree>();
+      MachineFunctionPass::getAnalysisUsage(AU);
     }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    MachineDominatorTree *MDT;
   };
+
+  char PPCCTRLoopsVerify::ID = 0;
+#endif // NDEBUG
 } // end anonymous namespace
 
 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
                       false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
                     false, false)
 
-/// isCompareEquals - Returns true if the instruction is a compare equals
-/// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
-                               bool &Int64Cmp) {
-  if (MI->getOpcode() == PPC::CMPWI) {
-    SignedCmp = true;
-    Int64Cmp = false;
-    return true;
-  } else if (MI->getOpcode() == PPC::CMPDI) {
-    SignedCmp = true;
-    Int64Cmp = true;
-    return true;
-  } else if (MI->getOpcode() == PPC::CMPLWI) {
-    SignedCmp = false;
-    Int64Cmp = false;
-    return true;
-  } else if (MI->getOpcode() == PPC::CMPLDI) {
-    SignedCmp = false;
-    Int64Cmp = true;
-    return true;
-  }
-
-  return false;
+FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) {
+  return new PPCCTRLoops(TM);
 }
 
+#ifndef NDEBUG
+INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
+                      "PowerPC CTR Loops Verify", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
+                    "PowerPC CTR Loops Verify", false, false)
 
-/// createPPCCTRLoops - Factory for creating
-/// the CTR loop phase.
-FunctionPass *llvm::createPPCCTRLoops() {
-  return new PPCCTRLoops();
+FunctionPass *llvm::createPPCCTRLoopsVerify() {
+  return new PPCCTRLoopsVerify();
 }
+#endif // NDEBUG
 
+bool PPCCTRLoops::runOnFunction(Function &F) {
+  LI = &getAnalysis<LoopInfo>();
+  SE = &getAnalysis<ScalarEvolution>();
+  DT = &getAnalysis<DominatorTree>();
+  TD = getAnalysisIfAvailable<DataLayout>();
+  LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
 
-bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "********* PPC CTR Loops *********\n");
-
-  bool Changed = false;
+  bool MadeChange = false;
 
-  // get the loop information
-  MLI = &getAnalysis<MachineLoopInfo>();
-  // get the register information
-  MRI = &MF.getRegInfo();
-  // the target specific instructio info.
-  TII = MF.getTarget().getInstrInfo();
-
-  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
        I != E; ++I) {
-    MachineLoop *L = *I;
-    if (!L->getParentLoop()) {
-      Changed |= convertToCTRLoop(L);
-    }
+    Loop *L = *I;
+    if (!L->getParentLoop())
+      MadeChange |= convertToCTRLoop(L);
   }
 
-  return Changed;
+  return MadeChange;
 }
 
-/// getCanonicalInductionVariable - Check to see if the loop has a canonical
-/// induction variable. We check for a simple recurrence pattern - an
-/// integer recurrence that decrements by one each time through the loop and
-/// ends at zero.  If so, return the phi node that corresponds to it.
-///
-/// Based upon the similar code in LoopInfo except this code is specific to
-/// the machine.
-/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
-///
-void
-PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L,
-                                  SmallVector<MachineInstr *, 4> &IVars,
-                                  SmallVector<MachineInstr *, 4> &IOps) const {
-  MachineBasicBlock *TopMBB = L->getTopBlock();
-  MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
-  assert(PI != TopMBB->pred_end() &&
-         "Loop must have more than one incoming edge!");
-  MachineBasicBlock *Backedge = *PI++;
-  if (PI == TopMBB->pred_end()) return;  // dead loop
-  MachineBasicBlock *Incoming = *PI++;
-  if (PI != TopMBB->pred_end()) return;  // multiple backedges?
-
-  // make sure there is one incoming and one backedge and determine which
-  // is which.
-  if (L->contains(Incoming)) {
-    if (L->contains(Backedge))
-      return;
-    std::swap(Incoming, Backedge);
-  } else if (!L->contains(Backedge))
-    return;
-
-  // Loop over all of the PHI nodes, looking for a canonical induction variable:
-  //   - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
-  //   - The recurrence comes from the backedge.
-  //   - the definition is an induction operatio.n
-  for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
-       I != E && I->isPHI(); ++I) {
-    MachineInstr *MPhi = &*I;
-    unsigned DefReg = MPhi->getOperand(0).getReg();
-    for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
-      // Check each operand for the value from the backedge.
-      MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
-      if (L->contains(MBB)) { // operands comes from the backedge
-        // Check if the definition is an induction operation.
-        MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
-        if (isInductionOperation(DI, DefReg)) {
-          IOps.push_back(DI);
-          IVars.push_back(MPhi);
+bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
+  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
+       J != JE; ++J) {
+    if (CallInst *CI = dyn_cast<CallInst>(J)) {
+      if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
+        // Inline ASM is okay, unless it clobbers the ctr register.
+        InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
+        for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
+          InlineAsm::ConstraintInfo &C = CIV[i];
+          if (C.Type != InlineAsm::isInput)
+            for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
+              if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
+                return true;
         }
-      }
-    }
-  }
-  return;
-}
 
-/// getTripCount - Return a loop-invariant LLVM value indicating the
-/// number of times the loop will be executed.  The trip count can
-/// be either a register or a constant value.  If the trip-count
-/// cannot be determined, this returns null.
-///
-/// We find the trip count from the phi instruction that defines the
-/// induction variable.  We follow the links to the CMP instruction
-/// to get the trip count.
-///
-/// Based upon getTripCount in LoopInfo.
-///
-CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
-                           SmallVector<MachineInstr *, 2> &OldInsts) const {
-  MachineBasicBlock *LastMBB = L->getExitingBlock();
-  // Don't generate a CTR loop if the loop has more than one exit.
-  if (LastMBB == 0)
-    return 0;
-
-  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
-  if (LastI->getOpcode() != PPC::BCC)
-    return 0;
-
-  // We need to make sure that this compare is defining the condition
-  // register actually used by the terminating branch.
-
-  unsigned PredReg = LastI->getOperand(1).getReg();
-  DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI);
-
-  unsigned PredCond = LastI->getOperand(0).getImm();
-  if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
-    return 0;
-
-  // Check that the loop has a induction variable.
-  SmallVector<MachineInstr *, 4> IVars, IOps;
-  getCanonicalInductionVariable(L, IVars, IOps);
-  for (unsigned i = 0; i < IVars.size(); ++i) {
-    MachineInstr *IOp = IOps[i];
-    MachineInstr *IV_Inst = IVars[i];
-
-    // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
-    //  if Imm is 0, get the count from the PHI opnd
-    //  if Imm is -M, than M is the count
-    //  Otherwise, Imm is the count
-    MachineOperand *IV_Opnd;
-    const MachineOperand *InitialValue;
-    if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
-      InitialValue = &IV_Inst->getOperand(1);
-      IV_Opnd = &IV_Inst->getOperand(3);
-    } else {
-      InitialValue = &IV_Inst->getOperand(3);
-      IV_Opnd = &IV_Inst->getOperand(1);
-    }
+        continue;
+      }
 
-    DEBUG(dbgs() << "Considering:\n");
-    DEBUG(dbgs() << "  induction operation: " << *IOp);
-    DEBUG(dbgs() << "  induction variable: " << *IV_Inst);
-    DEBUG(dbgs() << "  initial value: " << *InitialValue << "\n");
-  
-    // Look for the cmp instruction to determine if we
-    // can get a useful trip count.  The trip count can
-    // be either a register or an immediate.  The location
-    // of the value depends upon the type (reg or imm).
-    for (MachineRegisterInfo::reg_iterator
-         RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
-         RI != RE; ++RI) {
-      IV_Opnd = &RI.getOperand();
-      bool SignedCmp, Int64Cmp;
-      MachineInstr *MI = IV_Opnd->getParent();
-      if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
-          MI->getOperand(0).getReg() == PredReg) {
-
-        OldInsts.push_back(MI);
-        OldInsts.push_back(IOp);
- 
-        DEBUG(dbgs() << "  compare: " << *MI);
- 
-        const MachineOperand &MO = MI->getOperand(2);
-        assert(MO.isImm() && "IV Cmp Operand should be an immediate");
-
-        int64_t ImmVal;
-        if (SignedCmp)
-          ImmVal = (short) MO.getImm();
-        else
-          ImmVal = MO.getImm();
-  
-        const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
-        assert(L->contains(IV_DefInstr->getParent()) &&
-               "IV definition should occurs in loop");
-        int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm();
-  
-        assert(InitialValue->isReg() && "Expecting register for init value");
-        unsigned InitialValueReg = InitialValue->getReg();
-  
-        MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
-  
-        // Here we need to look for an immediate load (an li or lis/ori pair).
-        if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
-                         DefInstr->getOpcode() == PPC::ORI)) {
-          int64_t start = DefInstr->getOperand(2).getImm();
-          MachineInstr *DefInstr2 =
-            MRI->getVRegDef(DefInstr->getOperand(1).getReg());
-          if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
-                            DefInstr2->getOpcode() == PPC::LIS)) {
-            DEBUG(dbgs() << "  initial constant: " << *DefInstr);
-            DEBUG(dbgs() << "  initial constant: " << *DefInstr2);
-
-            start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16;
-  
-            int64_t count = ImmVal - start;
-            if ((count % iv_value) != 0) {
-              return 0;
-            }
-
-            OldInsts.push_back(DefInstr);
-            OldInsts.push_back(DefInstr2);
-
-            // count/iv_value, the trip count, should be positive here. If it
-            // is negative, that indicates that the counter will wrap.
-            if (Int64Cmp)
-              return new CountValue(count/iv_value);
-            else
-              return new CountValue(uint32_t(count/iv_value));
-          }
-        } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
-                                DefInstr->getOpcode() == PPC::LI)) {
-          DEBUG(dbgs() << "  initial constant: " << *DefInstr);
-
-          int64_t count = ImmVal -
-            int64_t(short(DefInstr->getOperand(1).getImm()));
-          if ((count % iv_value) != 0) {
-            return 0;
+      if (!TM)
+        return true;
+      const TargetLowering *TLI = TM->getTargetLowering();
+
+      if (Function *F = CI->getCalledFunction()) {
+        // Most intrinsics don't become function calls, but some might.
+        // sin, cos, exp and log are always calls.
+        unsigned Opcode;
+        if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
+          switch (F->getIntrinsicID()) {
+          default: continue;
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                       !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
+#endif
+
+          case Intrinsic::setjmp:
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+#  pragma pop_macro("setjmp")
+#  undef setjmp_undefined_for_msvc
+#endif
+
+          case Intrinsic::longjmp:
+          case Intrinsic::memcpy:
+          case Intrinsic::memmove:
+          case Intrinsic::memset:
+          case Intrinsic::powi:
+          case Intrinsic::log:
+          case Intrinsic::log2:
+          case Intrinsic::log10:
+          case Intrinsic::exp:
+          case Intrinsic::exp2:
+          case Intrinsic::pow:
+          case Intrinsic::sin:
+          case Intrinsic::cos:
+            return true;
+          case Intrinsic::sqrt:      Opcode = ISD::FSQRT;      break;
+          case Intrinsic::floor:     Opcode = ISD::FFLOOR;     break;
+          case Intrinsic::ceil:      Opcode = ISD::FCEIL;      break;
+          case Intrinsic::trunc:     Opcode = ISD::FTRUNC;     break;
+          case Intrinsic::rint:      Opcode = ISD::FRINT;      break;
+          case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
           }
+        }
 
-          OldInsts.push_back(DefInstr);
-
-          if (Int64Cmp)
-            return new CountValue(count/iv_value);
-          else
-            return new CountValue(uint32_t(count/iv_value));
-        } else if (iv_value == 1 || iv_value == -1) {
-          // We can't determine a constant starting value.
-          if (ImmVal == 0) {
-            return new CountValue(InitialValueReg, iv_value > 0);
+        // PowerPC does not use [US]DIVREM or other library calls for
+        // operations on regular types which are not otherwise library calls
+        // (i.e. soft float or atomics). If adapting for targets that do,
+        // additional care is required here.
+
+        LibFunc::Func Func;
+        if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
+            LibInfo->getLibFunc(F->getName(), Func) &&
+            LibInfo->hasOptimizedCodeGen(Func)) {
+          // Non-read-only functions are never treated as intrinsics.
+          if (!CI->onlyReadsMemory())
+            return true;
+
+          // Conversion happens only for FP calls.
+          if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
+            return true;
+
+          switch (Func) {
+          default: return true;
+          case LibFunc::copysign:
+          case LibFunc::copysignf:
+          case LibFunc::copysignl:
+            continue; // ISD::FCOPYSIGN is never a library call.
+          case LibFunc::fabs:
+          case LibFunc::fabsf:
+          case LibFunc::fabsl:
+            continue; // ISD::FABS is never a library call.
+          case LibFunc::sqrt:
+          case LibFunc::sqrtf:
+          case LibFunc::sqrtl:
+            Opcode = ISD::FSQRT; break;
+          case LibFunc::floor:
+          case LibFunc::floorf:
+          case LibFunc::floorl:
+            Opcode = ISD::FFLOOR; break;
+          case LibFunc::nearbyint:
+          case LibFunc::nearbyintf:
+          case LibFunc::nearbyintl:
+            Opcode = ISD::FNEARBYINT; break;
+          case LibFunc::ceil:
+          case LibFunc::ceilf:
+          case LibFunc::ceill:
+            Opcode = ISD::FCEIL; break;
+          case LibFunc::rint:
+          case LibFunc::rintf:
+          case LibFunc::rintl:
+            Opcode = ISD::FRINT; break;
+          case LibFunc::trunc:
+          case LibFunc::truncf:
+          case LibFunc::truncl:
+            Opcode = ISD::FTRUNC; break;
           }
-          // FIXME: handle non-zero end value.
+
+          MVT VTy =
+            TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
+          if (VTy == MVT::Other)
+            return true;
+          
+          if (TLI->isOperationLegalOrCustom(Opcode, VTy))
+            continue;
+          else if (VTy.isVector() &&
+                   TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
+            continue;
+
+          return true;
         }
-        // FIXME: handle non-unit increments (we might not want to introduce
-        // division but we can handle some 2^n cases with shifts).
-  
       }
-    }
-  }
-  return 0;
-}
-
-/// isInductionOperation - return true if the operation is matches the
-/// pattern that defines an induction variable:
-///    addi iv, c
-///
-bool
-PPCCTRLoops::isInductionOperation(const MachineInstr *MI,
-                                           unsigned IVReg) const {
-  return ((MI->getOpcode() == PPC::ADDI || MI->getOpcode() == PPC::ADDI8) &&
-          MI->getOperand(1).isReg() && // could be a frame index instead
-          MI->getOperand(1).getReg() == IVReg);
-}
 
-/// isInvalidOperation - Return true if the operation is invalid within
-/// CTR loop.
-bool
-PPCCTRLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
-
-  // call is not allowed because the callee may use a CTR loop
-  if (MI->getDesc().isCall()) {
-    return true;
-  }
-  // check if the instruction defines a CTR loop register
-  // (this will also catch nested CTR loops)
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.isDef() &&
-        (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) {
       return true;
-    }
-  }
-  return false;
-}
+    } else if (isa<BinaryOperator>(J) &&
+               J->getType()->getScalarType()->isPPC_FP128Ty()) {
+      // Most operations on ppc_f128 values become calls.
+      return true;
+    } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
+               isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
+      CastInst *CI = cast<CastInst>(J);
+      if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
+          CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
+          (TT.isArch32Bit() &&
+           (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
+            CI->getDestTy()->getScalarType()->isIntegerTy(64))
+          ))
+        return true;
+    } else if (TT.isArch32Bit() &&
+               J->getType()->getScalarType()->isIntegerTy(64) &&
+               (J->getOpcode() == Instruction::UDiv ||
+                J->getOpcode() == Instruction::SDiv ||
+                J->getOpcode() == Instruction::URem ||
+                J->getOpcode() == Instruction::SRem)) {
+      return true;
+    } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
+      // On PowerPC, indirect jumps use the counter register.
+      return true;
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
+      if (!TM)
+        return true;
+      const TargetLowering *TLI = TM->getTargetLowering();
 
-/// containsInvalidInstruction - Return true if the loop contains
-/// an instruction that inhibits the use of the CTR loop function.
-///
-bool PPCCTRLoops::containsInvalidInstruction(MachineLoop *L) const {
-  const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
-  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
-    MachineBasicBlock *MBB = Blocks[i];
-    for (MachineBasicBlock::iterator
-           MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
-      const MachineInstr *MI = &*MII;
-      if (isInvalidLoopOperation(MI)) {
+      if (TLI->supportJumpTables() &&
+          SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
         return true;
-      }
     }
   }
+
   return false;
 }
 
-/// isDead returns true if the instruction is dead
-/// (this was essentially copied from DeadMachineInstructionElim::isDead, but
-/// with special cases for inline asm, physical registers and instructions with
-/// side effects removed)
-bool PPCCTRLoops::isDead(const MachineInstr *MI,
-                         SmallVector<MachineInstr *, 1> &DeadPhis) const {
-  // Examine each operand.
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.isDef()) {
-      unsigned Reg = MO.getReg();
-      if (!MRI->use_nodbg_empty(Reg)) {
-        // This instruction has users, but if the only user is the phi node for
-        // the parent block, and the only use of that phi node is this
-        // instruction, then this instruction is dead: both it (and the phi
-        // node) can be removed.
-        MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
-        if (llvm::next(I) == MRI->use_end() &&
-            I.getOperand().getParent()->isPHI()) {
-          MachineInstr *OnePhi = I.getOperand().getParent();
-
-          for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
-            const MachineOperand &OPO = OnePhi->getOperand(j);
-            if (OPO.isReg() && OPO.isDef()) {
-              unsigned OPReg = OPO.getReg();
-
-              MachineRegisterInfo::use_iterator nextJ;
-              for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg),
-                   E = MRI->use_end(); J!=E; J=nextJ) {
-                nextJ = llvm::next(J);
-                MachineOperand& Use = J.getOperand();
-                MachineInstr *UseMI = Use.getParent();
-
-                if (MI != UseMI) {
-                  // The phi node has a user that is not MI, bail...
-                  return false;
-                }
-              }
-            }
-          }
+bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
+  bool MadeChange = false;
 
-          DeadPhis.push_back(OnePhi);
-        } else {
-          // This def has a non-debug use. Don't delete the instruction!
-          return false;
-        }
-      }
-    }
+  Triple TT = Triple(L->getHeader()->getParent()->getParent()->
+                     getTargetTriple());
+  if (!TT.isArch32Bit() && !TT.isArch64Bit())
+    return MadeChange; // Unknown arch. type.
+
+  // Process nested loops first.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+    MadeChange |= convertToCTRLoop(*I);
   }
 
-  // If there are no defs with uses, the instruction is dead.
-  return true;
-}
+  // If a nested loop has been converted, then we can't convert this loop.
+  if (MadeChange)
+    return MadeChange;
+
+#ifndef NDEBUG
+  // Stop trying after reaching the limit (if any).
+  int Limit = CTRLoopLimit;
+  if (Limit >= 0) {
+    if (Counter >= CTRLoopLimit)
+      return false;
+    Counter++;
+  }
+#endif
+
+  // We don't want to spill/restore the counter register, and so we don't
+  // want to use the counter register if the loop contains calls.
+  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+       I != IE; ++I)
+    if (mightUseCTR(TT, *I))
+      return MadeChange;
+
+  SmallVector<BasicBlock*, 4> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  BasicBlock *CountedExitBlock = 0;
+  const SCEV *ExitCount = 0;
+  BranchInst *CountedExitBranch = 0;
+  for (SmallVector<BasicBlock*, 4>::iterator I = ExitingBlocks.begin(),
+       IE = ExitingBlocks.end(); I != IE; ++I) {
+    const SCEV *EC = SE->getExitCount(L, *I);
+    DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
+                    (*I)->getName() << ": " << *EC << "\n");
+    if (isa<SCEVCouldNotCompute>(EC))
+      continue;
+    if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
+      if (ConstEC->getValue()->isZero())
+        continue;
+    } else if (!SE->isLoopInvariant(EC, L))
+      continue;
+
+    // We now have a loop-invariant count of loop iterations (which is not the
+    // constant zero) for which we know that this loop will not exit via this
+    // exisiting block.
+
+    // We need to make sure that this block will run on every loop iteration.
+    // For this to be true, we must dominate all blocks with backedges. Such
+    // blocks are in-loop predecessors to the header block.
+    bool NotAlways = false;
+    for (pred_iterator PI = pred_begin(L->getHeader()),
+         PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
+      if (!L->contains(*PI))
+        continue;
 
-void PPCCTRLoops::removeIfDead(MachineInstr *MI) {
-  // This procedure was essentially copied from DeadMachineInstructionElim
+      if (!DT->dominates(*I, *PI)) {
+        NotAlways = true;
+        break;
+      }
+    }
 
-  SmallVector<MachineInstr *, 1> DeadPhis;
-  if (isDead(MI, DeadPhis)) {
-    DEBUG(dbgs() << "CTR looping will remove: " << *MI);
+    if (NotAlways)
+      continue;
 
-    // It is possible that some DBG_VALUE instructions refer to this
-    // instruction.  Examine each def operand for such references;
-    // if found, mark the DBG_VALUE as undef (but don't delete it).
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg() || !MO.isDef())
+    // Make sure this blocks ends with a conditional branch.
+    Instruction *TI = (*I)->getTerminator();
+    if (!TI)
+      continue;
+
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (!BI->isConditional())
         continue;
-      unsigned Reg = MO.getReg();
-      MachineRegisterInfo::use_iterator nextI;
-      for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
-           E = MRI->use_end(); I!=E; I=nextI) {
-        nextI = llvm::next(I);  // I is invalidated by the setReg
-        MachineOperand& Use = I.getOperand();
-        MachineInstr *UseMI = Use.getParent();
-        if (UseMI==MI)
-          continue;
-        if (Use.isDebug()) // this might also be a instr -> phi -> instr case
-                           // which can also be removed.
-          UseMI->getOperand(0).setReg(0U);
-      }
-    }
 
-    MI->eraseFromParent();
-    for (unsigned i = 0; i < DeadPhis.size(); ++i) {
-      DeadPhis[i]->eraseFromParent();
-    }
+      CountedExitBranch = BI;
+    } else
+      continue;
+
+    // Note that this block may not be the loop latch block, even if the loop
+    // has a latch block.
+    CountedExitBlock = *I;
+    ExitCount = EC;
+    break;
   }
+
+  if (!CountedExitBlock)
+    return MadeChange;
+
+  BasicBlock *Preheader = L->getLoopPreheader();
+
+  // If we don't have a preheader, then insert one. If we already have a
+  // preheader, then we can use it (except if the preheader contains a use of
+  // the CTR register because some such uses might be reordered by the
+  // selection DAG after the mtctr instruction).
+  if (!Preheader || mightUseCTR(TT, Preheader))
+    Preheader = InsertPreheaderForLoop(L, this);
+  if (!Preheader)
+    return MadeChange;
+
+  DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
+
+  // Insert the count into the preheader and replace the condition used by the
+  // selected branch.
+  MadeChange = true;
+
+  SCEVExpander SCEVE(*SE, "loopcnt");
+  LLVMContext &C = SE->getContext();
+  Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
+                                       Type::getInt32Ty(C);
+  if (!ExitCount->getType()->isPointerTy() &&
+      ExitCount->getType() != CountType)
+    ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
+  ExitCount = SE->getAddExpr(ExitCount,
+                             SE->getConstant(CountType, 1)); 
+  Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
+                                       Preheader->getTerminator());
+
+  IRBuilder<> CountBuilder(Preheader->getTerminator());
+  Module *M = Preheader->getParent()->getParent();
+  Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
+                                               CountType);
+  CountBuilder.CreateCall(MTCTRFunc, ECValue);
+
+  IRBuilder<> CondBuilder(CountedExitBranch);
+  Value *DecFunc =
+    Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
+  Value *NewCond = CondBuilder.CreateCall(DecFunc);
+  Value *OldCond = CountedExitBranch->getCondition();
+  CountedExitBranch->setCondition(NewCond);
+
+  // The false branch must exit the loop.
+  if (!L->contains(CountedExitBranch->getSuccessor(0)))
+    CountedExitBranch->swapSuccessors();
+
+  // The old condition may be dead now, and may have even created a dead PHI
+  // (the original induction variable).
+  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+  DeleteDeadPHIs(CountedExitBlock);
+
+  ++NumCTRLoops;
+  return MadeChange;
 }
 
-/// converToCTRLoop - check if the loop is a candidate for
-/// converting to a CTR loop.  If so, then perform the
-/// transformation.
-///
-/// This function works on innermost loops first.  A loop can
-/// be converted if it is a counting loop; either a register
-/// value or an immediate.
-///
-/// The code makes several assumptions about the representation
-/// of the loop in llvm.
-bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
-  bool Changed = false;
-  // Process nested loops first.
-  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
-    Changed |= convertToCTRLoop(*I);
-  }
-  // If a nested loop has been converted, then we can't convert this loop.
-  if (Changed) {
-    return Changed;
+#ifndef NDEBUG
+static bool clobbersCTR(const MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg()) {
+      if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
+        return true;
+    } else if (MO.isRegMask()) {
+      if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
+        return true;
+    }
   }
 
-  SmallVector<MachineInstr *, 2> OldInsts;
-  // Are we able to determine the trip count for the loop?
-  CountValue *TripCount = getTripCount(L, OldInsts);
-  if (TripCount == 0) {
-    DEBUG(dbgs() << "failed to get trip count!\n");
-    return false;
-  }
+  return false;
+}
 
-  if (TripCount->isImm()) {
-    DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
+static bool verifyCTRBranch(MachineBasicBlock *MBB,
+                            MachineBasicBlock::iterator I) {
+  MachineBasicBlock::iterator BI = I;
+  SmallSet<MachineBasicBlock *, 16>   Visited;
+  SmallVector<MachineBasicBlock *, 8> Preds;
+  bool CheckPreds;
+
+  if (I == MBB->begin()) {
+    Visited.insert(MBB);
+    goto queue_preds;
+  } else
+    --I;
+
+check_block:
+  Visited.insert(MBB);
+  if (I == MBB->end())
+    goto queue_preds;
+
+  CheckPreds = true;
+  for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
+    unsigned Opc = I->getOpcode();
+    if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
+      CheckPreds = false;
+      break;
+    }
 
-    // FIXME: We currently can't form 64-bit constants
-    // (including 32-bit unsigned constants)
-    if (!isInt<32>(TripCount->getImm()))
+    if (I != BI && clobbersCTR(I)) {
+      DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" <<
+                      MBB->getFullName() << ") instruction " << *I <<
+                      " clobbers CTR, invalidating " << "BB#" <<
+                      BI->getParent()->getNumber() << " (" <<
+                      BI->getParent()->getFullName() << ") instruction " <<
+                      *BI << "\n");
       return false;
-  }
+    }
 
-  // Does the loop contain any invalid instructions?
-  if (containsInvalidInstruction(L)) {
-    return false;
+    if (I == IE)
+      break;
   }
-  MachineBasicBlock *Preheader = L->getLoopPreheader();
-  // No preheader means there's not place for the loop instr.
-  if (Preheader == 0) {
-    return false;
-  }
-  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
 
-  DebugLoc dl;
-  if (InsertPos != Preheader->end())
-    dl = InsertPos->getDebugLoc();
+  if (!CheckPreds && Preds.empty())
+    return true;
 
-  MachineBasicBlock *LastMBB = L->getExitingBlock();
-  // Don't generate CTR loop if the loop has more than one exit.
-  if (LastMBB == 0) {
-    return false;
-  }
-  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
-
-  // Determine the loop start.
-  MachineBasicBlock *LoopStart = L->getTopBlock();
-  if (L->getLoopLatch() != LastMBB) {
-    // When the exit and latch are not the same, use the latch block as the
-    // start.
-    // The loop start address is used only after the 1st iteration, and the loop
-    // latch may contains instrs. that need to be executed after the 1st iter.
-    LoopStart = L->getLoopLatch();
-    // Make sure the latch is a successor of the exit, otherwise it won't work.
-    if (!LastMBB->isSuccessor(LoopStart)) {
+  if (CheckPreds) {
+queue_preds:
+    if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
+      DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" <<
+                      BI->getParent()->getNumber() << " (" <<
+                      BI->getParent()->getFullName() << ") instruction " <<
+                      *BI << "\n");
       return false;
     }
+
+    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PIE = MBB->pred_end(); PI != PIE; ++PI)
+      Preds.push_back(*PI);
   }
 
-  // Convert the loop to a CTR loop
-  DEBUG(dbgs() << "Change to CTR loop at "; L->dump());
-
-  MachineFunction *MF = LastMBB->getParent();
-  const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget<PPCSubtarget>();
-  bool isPPC64 = Subtarget.isPPC64();
-
-  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
-  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
-  const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
-
-  unsigned CountReg;
-  if (TripCount->isReg()) {
-    // Create a copy of the loop count register.
-    const TargetRegisterClass *SrcRC =
-      MF->getRegInfo().getRegClass(TripCount->getReg());
-    CountReg = MF->getRegInfo().createVirtualRegister(RC);
-    unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
-                        (unsigned) PPC::EXTSW_32_64 :
-                        (unsigned) TargetOpcode::COPY;
-    BuildMI(*Preheader, InsertPos, dl,
-            TII->get(CopyOp), CountReg).addReg(TripCount->getReg());
-    if (TripCount->isNeg()) {
-      unsigned CountReg1 = CountReg;
-      CountReg = MF->getRegInfo().createVirtualRegister(RC);
-      BuildMI(*Preheader, InsertPos, dl,
-              TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG),
-                       CountReg).addReg(CountReg1);
+  do {
+    MBB = Preds.pop_back_val();
+    if (!Visited.count(MBB)) {
+      I = MBB->getLastNonDebugInstr();
+      goto check_block;
     }
-  } else {
-    assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
-    // Put the trip count in a register for transfer into the count register.
-
-    int64_t CountImm = TripCount->getImm();
-    if (TripCount->isNeg())
-      CountImm = -CountImm;
-
-    CountReg = MF->getRegInfo().createVirtualRegister(RC);
-    if (abs64(CountImm) > 0x7FFF) {
-      BuildMI(*Preheader, InsertPos, dl,
-              TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
-              CountReg).addImm((CountImm >> 16) & 0xFFFF);
-      unsigned CountReg1 = CountReg;
-      CountReg = MF->getRegInfo().createVirtualRegister(RC);
-      BuildMI(*Preheader, InsertPos, dl,
-              TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
-              CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF);
-    } else {
-      BuildMI(*Preheader, InsertPos, dl,
-              TII->get(isPPC64 ? PPC::LI8 : PPC::LI),
-              CountReg).addImm(CountImm);
-    }
-  }
+  } while (!Preds.empty());
 
-  // Add the mtctr instruction to the beginning of the loop.
-  BuildMI(*Preheader, InsertPos, dl,
-          TII->get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(CountReg,
-            TripCount->isImm() ? RegState::Kill : 0);
-
-  // Make sure the loop start always has a reference in the CFG.  We need to
-  // create a BlockAddress operand to get this mechanism to work both the
-  // MachineBasicBlock and BasicBlock objects need the flag set.
-  LoopStart->setHasAddressTaken();
-  // This line is needed to set the hasAddressTaken flag on the BasicBlock
-  // object
-  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
-
-  // Replace the loop branch with a bdnz instruction.
-  dl = LastI->getDebugLoc();
-  const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
-  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
-    MachineBasicBlock *MBB = Blocks[i];
-    if (MBB != Preheader)
-      MBB->addLiveIn(isPPC64 ? PPC::CTR8 : PPC::CTR);
-  }
+  return true;
+}
 
-  // The loop ends with either:
-  //  - a conditional branch followed by an unconditional branch, or
-  //  - a conditional branch to the loop start.
-  assert(LastI->getOpcode() == PPC::BCC &&
-         "loop end must start with a BCC instruction");
-  // Either the BCC branches to the beginning of the loop, or it
-  // branches out of the loop and there is an unconditional branch
-  // to the start of the loop.
-  MachineBasicBlock *BranchTarget = LastI->getOperand(2).getMBB();
-  BuildMI(*LastMBB, LastI, dl,
-        TII->get((BranchTarget == LoopStart) ?
-                 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
-                 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget);
-
-  // Conditional branch; just delete it.
-  DEBUG(dbgs() << "Removing old branch: " << *LastI);
-  LastMBB->erase(LastI);
-
-  delete TripCount;
-
-  // The induction operation (add) and the comparison (cmpwi) may now be
-  // unneeded. If these are unneeded, then remove them.
-  for (unsigned i = 0; i < OldInsts.size(); ++i)
-    removeIfDead(OldInsts[i]);
+bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
+  MDT = &getAnalysis<MachineDominatorTree>();
+
+  // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
+  // any other instructions that might clobber the ctr register.
+  for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
+       I != IE; ++I) {
+    MachineBasicBlock *MBB = I;
+    if (!MDT->isReachableFromEntry(MBB))
+      continue;
+
+    for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
+      MIIE = MBB->end(); MII != MIIE; ++MII) {
+      unsigned Opc = MII->getOpcode();
+      if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
+          Opc == PPC::BDZ8  || Opc == PPC::BDZ)
+        if (!verifyCTRBranch(MBB, MII))
+          llvm_unreachable("Invalid PPC CTR loop!");
+    }
+  }
 
-  ++NumCTRLoops;
-  return true;
+  return false;
 }
+#endif // NDEBUG
 
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 6478718..40e4968 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -64,8 +64,7 @@ namespace {
     unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
 
-    unsigned getHA16Encoding(const MachineInstr &MI, unsigned OpNo) const;
-    unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getS16ImmEncoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
@@ -194,21 +193,19 @@ unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
   return 0;
 }
 
-unsigned PPCCodeEmitter::getHA16Encoding(const MachineInstr &MI,
-                                         unsigned OpNo) const {
+unsigned PPCCodeEmitter::getS16ImmEncoding(const MachineInstr &MI,
+                                           unsigned OpNo) const {
   const MachineOperand &MO = MI.getOperand(OpNo);
   if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
 
-  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_high));
-  return 0;
-}
+  unsigned RelocID;
+  switch (MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) {
+    default: llvm_unreachable("Unsupported target operand flags!");
+    case PPCII::MO_HA16: RelocID = PPC::reloc_absolute_high; break;
+    case PPCII::MO_LO16: RelocID = PPC::reloc_absolute_low; break;
+  }
 
-unsigned PPCCodeEmitter::getLO16Encoding(const MachineInstr &MI,
-                                         unsigned OpNo) const {
-  const MachineOperand &MO = MI.getOperand(OpNo);
-  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-  
-  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+  MCE.addRelocation(GetRelocation(MO, RelocID));
   return 0;
 }
 
@@ -237,7 +234,7 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
   
   const MachineOperand &MO = MI.getOperand(OpNo);
   if (MO.isImm())
-    return (getMachineOpValue(MI, MO) & 0x3FFF) | RegBits;
+    return ((getMachineOpValue(MI, MO) >> 2) & 0x3FFF) | RegBits;
   
   MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
   return RegBits;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 9ec10f6..dabe613 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -334,6 +334,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
     *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
 
   MachineModuleInfo &MMI = MF.getMMI();
+  const MCRegisterInfo &MRI = MMI.getContext().getRegisterInfo();
   DebugLoc dl;
   bool needsFrameMoves = MMI.hasDebugInfo() ||
     MF.getFunction()->needsUnwindTableEntry();
@@ -400,13 +401,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
     if (HasFP)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
         .addReg(PPC::X31)
-        .addImm(FPOffset/4)
+        .addImm(FPOffset)
         .addReg(PPC::X1);
 
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
         .addReg(PPC::X0)
-        .addImm(LROffset / 4)
+        .addImm(LROffset)
         .addReg(PPC::X1);
 
     if (!MustSaveCRs.empty())
@@ -500,7 +501,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
     } else if (isInt<16>(NegFrameSize)) {
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
         .addReg(PPC::X1)
-        .addImm(NegFrameSize / 4)
+        .addImm(NegFrameSize)
         .addReg(PPC::X1);
     } else {
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
@@ -515,8 +516,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
     }
   }
 
-  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
-
   // Add the "machine moves" for the instructions we generated above, but in
   // reverse order.
   if (needsFrameMoves) {
@@ -525,25 +524,22 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
     BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
 
     // Show update of SP.
-    if (NegFrameSize) {
-      MachineLocation SPDst(MachineLocation::VirtualFP);
-      MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
-      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-    } else {
-      MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
-      Moves.push_back(MachineMove(FrameLabel, SP, SP));
-    }
+    assert(NegFrameSize);
+    MMI.addFrameInst(
+        MCCFIInstruction::createDefCfaOffset(FrameLabel, NegFrameSize));
 
     if (HasFP) {
-      MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
-      MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
-      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+      unsigned Reg = isPPC64 ? PPC::X31 : PPC::R31;
+      Reg = MRI.getDwarfRegNum(Reg, true);
+      MMI.addFrameInst(
+          MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset));
     }
 
     if (MustSaveLR) {
-      MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
-      MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
-      Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc));
+      unsigned Reg = isPPC64 ? PPC::LR8 : PPC::LR;
+      Reg = MRI.getDwarfRegNum(Reg, true);
+      MMI.addFrameInst(
+          MCCFIInstruction::createOffset(FrameLabel, Reg, LROffset));
     }
   }
 
@@ -567,10 +563,10 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
       // Mark effective beginning of when frame pointer is ready.
       BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
 
-      MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
-                                    (isPPC64 ? PPC::X1 : PPC::R1));
-      MachineLocation FPSrc(MachineLocation::VirtualFP);
-      Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+      unsigned Reg = HasFP ? (isPPC64 ? PPC::X31 : PPC::R31)
+                           : (isPPC64 ? PPC::X1 : PPC::R1);
+      Reg = MRI.getDwarfRegNum(Reg, true);
+      MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg));
     }
   }
 
@@ -600,16 +596,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
       if (Subtarget.isSVR4ABI()
 	  && Subtarget.isPPC64()
 	  && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
-	MachineLocation CSDst(PPC::X1, 8);
-	MachineLocation CSSrc(PPC::CR2);
-	Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+        MMI.addFrameInst(MCCFIInstruction::createOffset(
+            Label, MRI.getDwarfRegNum(PPC::CR2, true), 8));
 	continue;
       }
 
       int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
-      MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
-      MachineLocation CSSrc(Reg);
-      Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+      MMI.addFrameInst(MCCFIInstruction::createOffset(
+          Label, MRI.getDwarfRegNum(Reg, true), Offset));
     }
   }
 }
@@ -747,7 +741,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   if (isPPC64) {
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
-        .addImm(LROffset/4).addReg(PPC::X1);
+        .addImm(LROffset).addReg(PPC::X1);
 
     if (!MustSaveCRs.empty())
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12)
@@ -755,7 +749,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
 
     if (HasFP)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
-        .addImm(FPOffset/4).addReg(PPC::X1);
+        .addImm(FPOffset).addReg(PPC::X1);
 
     if (!MustSaveCRs.empty())
       for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
@@ -1170,6 +1164,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
         FuncInfo->addMustSaveCR(Reg);
       } else {
         CRSpilled = true;
+        FuncInfo->setSpillsCR();
 
 	// 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
 	// the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 4bf1e33..0df50e1 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -71,8 +71,8 @@ void PPCScoreboardHazardRecognizer::Reset() {
 //   3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
 //
 
-PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
-  : TII(tii) {
+PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetMachine &TM)
+  : TM(TM) {
   EndDispatchGroup();
 }
 
@@ -91,7 +91,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
                                      bool &isFirst, bool &isSingle,
                                      bool &isCracked,
                                      bool &isLoad, bool &isStore) {
-  const MCInstrDesc &MCID = TII.get(Opcode);
+  const MCInstrDesc &MCID = TM.getInstrInfo()->get(Opcode);
 
   isLoad  = MCID.mayLoad();
   isStore = MCID.mayStore();
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 55b45d0..84b8e6d 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -43,7 +43,7 @@ public:
 /// setting the CTR register then branching through it within a dispatch group),
 /// or storing then loading from the same address within a dispatch group.
 class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
-  const TargetInstrInfo &TII;
+  const TargetMachine &TM;
 
   unsigned NumIssued;  // Number of insts issued, including advanced cycles.
 
@@ -64,7 +64,7 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
   unsigned NumStores;
 
 public:
-  PPCHazardRecognizer970(const TargetInstrInfo &TII);
+  PPCHazardRecognizer970(const TargetMachine &TM);
   virtual HazardType getHazardType(SUnit *SU, int Stalls);
   virtual void EmitInstruction(SUnit *SU);
   virtual void AdvanceCycle();
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index aed0fbb..e006945 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -110,13 +110,13 @@ namespace {
 
     /// SelectCC - Select a comparison of the specified values with the
     /// specified condition code, returning the CR# of the expression.
-    SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl);
+    SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDLoc dl);
 
     /// SelectAddrImm - Returns true if the address N can be represented by
     /// a base register plus a signed 16-bit displacement [r+imm].
     bool SelectAddrImm(SDValue N, SDValue &Disp,
                        SDValue &Base) {
-      return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
+      return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
     }
 
     /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
@@ -145,11 +145,11 @@ namespace {
       return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
     }
 
-    /// SelectAddrImmShift - Returns true if the address N can be represented by
-    /// a base register plus a signed 14-bit displacement [r+imm*4].  Suitable
-    /// for use by STD and friends.
-    bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) {
-      return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
+    /// SelectAddrImmX4 - Returns true if the address N can be represented by
+    /// a base register plus a signed 16-bit displacement that is a multiple of 4.
+    /// Suitable for use by STD and friends.
+    bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
+      return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
     }
 
     // Select an address into a single register.
@@ -332,17 +332,17 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
   if (isShiftedMask_32(Val)) {
     // look for the first non-zero bit
-    MB = CountLeadingZeros_32(Val);
+    MB = countLeadingZeros(Val);
     // look for the first zero bit after the run of ones
-    ME = CountLeadingZeros_32((Val - 1) ^ Val);
+    ME = countLeadingZeros((Val - 1) ^ Val);
     return true;
   } else {
     Val = ~Val; // invert mask
     if (isShiftedMask_32(Val)) {
       // effectively look for the first zero bit
-      ME = CountLeadingZeros_32(Val) - 1;
+      ME = countLeadingZeros(Val) - 1;
       // effectively look for the first one bit after the run of zeros
-      MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1;
+      MB = countLeadingZeros((Val - 1) ^ Val) + 1;
       return true;
     }
   }
@@ -397,7 +397,7 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
 SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
-  DebugLoc dl = N->getDebugLoc();
+  SDLoc dl(N);
 
   APInt LKZ, LKO, RKZ, RKO;
   CurDAG->ComputeMaskedBits(Op0, LKZ, LKO);
@@ -466,7 +466,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
 /// SelectCC - Select a comparison of the specified values with the specified
 /// condition code, returning the CR# of the expression.
 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
-                                    ISD::CondCode CC, DebugLoc dl) {
+                                    ISD::CondCode CC, SDLoc dl) {
   // Always select the LHS.
   unsigned Opc;
 
@@ -710,7 +710,7 @@ static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
 
 
 SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
-  DebugLoc dl = N->getDebugLoc();
+  SDLoc dl(N);
   unsigned Imm;
   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
@@ -894,7 +894,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
 // Select - Convert the specified operand from a target-independent to a
 // target-specific node if it hasn't already been changed.
 SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
-  DebugLoc dl = N->getDebugLoc();
+  SDLoc dl(N);
   if (N->isMachineOpcode())
     return NULL;   // Already selected.
 
@@ -912,7 +912,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
 
       // If it can't be represented as a 32 bit value.
       if (!isInt<32>(Imm)) {
-        Shift = CountTrailingZeros_64(Imm);
+        Shift = countTrailingZeros<uint64_t>(Imm);
         int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
 
         // If the shifted value fits 32 bits.
@@ -1242,6 +1242,15 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
                         getI32Imm(BROpc) };
     return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
   }
+  case PPCISD::BDNZ:
+  case PPCISD::BDZ: {
+    bool IsPPC64 = PPCSubTarget.isPPC64();
+    SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
+    return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ?
+                                   (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+                                   (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
+                                MVT::Other, Ops, 2);
+  }
   case PPCISD::COND_BRANCH: {
     // Op #0 is the Chain.
     // Op #1 is the PPC::PRED_* number.
@@ -1519,7 +1528,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
     // immediate operand, add it now.
     if (ReplaceFlags) {
       if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
-        DebugLoc dl = GA->getDebugLoc();
+        SDLoc dl(GA);
         const GlobalValue *GV = GA->getGlobal();
         ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
       } else if (ConstantPoolSDNode *CP =
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 3fcafdc..e2433e7 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16,6 +16,7 @@
 #include "PPCMachineFunctionInfo.h"
 #include "PPCPerfectShuffle.h"
 #include "PPCTargetMachine.h"
+#include "PPCTargetObjectFile.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -36,21 +37,6 @@
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                       CCValAssign::LocInfo &LocInfo,
-                                       ISD::ArgFlagsTy &ArgFlags,
-                                       CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                              MVT &LocVT,
-                                              CCValAssign::LocInfo &LocInfo,
-                                              ISD::ArgFlagsTy &ArgFlags,
-                                              CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                                MVT &LocVT,
-                                                CCValAssign::LocInfo &LocInfo,
-                                                ISD::ArgFlagsTy &ArgFlags,
-                                                CCState &State);
-
 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
 
@@ -64,14 +50,15 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
   if (TM.getSubtargetImpl()->isDarwin())
     return new TargetLoweringObjectFileMachO();
 
+  if (TM.getSubtargetImpl()->isSVR4ABI())
+    return new PPC64LinuxTargetObjectFile();
+
   return new TargetLoweringObjectFileELF();
 }
 
 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
   const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
-  PPCRegInfo = TM.getRegisterInfo();
-  PPCII = TM.getInstrInfo();
 
   setPow2DivIsCheap();
 
@@ -309,6 +296,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
+  // To handle counter-based loop conditions.
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
+
   // Comparisons that require checking two conditions.
   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
@@ -529,9 +519,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
 
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::SINT_TO_FP);
+  setTargetDAGCombine(ISD::LOAD);
   setTargetDAGCombine(ISD::STORE);
   setTargetDAGCombine(ISD::BR_CC);
   setTargetDAGCombine(ISD::BSWAP);
+  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
 
   // Use reciprocal estimates.
   if (TM.Options.UnsafeFPMath) {
@@ -642,6 +634,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::LARX:            return "PPCISD::LARX";
   case PPCISD::STCX:            return "PPCISD::STCX";
   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
+  case PPCISD::BDNZ:            return "PPCISD::BDNZ";
+  case PPCISD::BDZ:             return "PPCISD::BDZ";
   case PPCISD::MFFS:            return "PPCISD::MFFS";
   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
@@ -662,10 +656,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
+  case PPCISD::SC:              return "PPCISD::SC";
   }
 }
 
-EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
   if (!VT.isVector())
     return MVT::i32;
   return VT.changeVectorElementTypeToInteger();
@@ -1038,20 +1033,23 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
 
 /// Returns true if the address N can be represented by a base register plus
 /// a signed 16-bit displacement [r+imm], and if it is not better
-/// represented as reg+reg.
+/// represented as reg+reg.  If Aligned is true, only accept displacements
+/// suitable for STD and friends, i.e. multiples of 4.
 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
                                             SDValue &Base,
-                                            SelectionDAG &DAG) const {
+                                            SelectionDAG &DAG,
+                                            bool Aligned) const {
   // FIXME dl should come from parent load or store, not from address
-  DebugLoc dl = N.getDebugLoc();
+  SDLoc dl(N);
   // If this can be more profitably realized as r+r, fail.
   if (SelectAddressRegReg(N, Disp, Base, DAG))
     return false;
 
   if (N.getOpcode() == ISD::ADD) {
     short imm = 0;
-    if (isIntS16Immediate(N.getOperand(1), imm)) {
-      Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+    if (isIntS16Immediate(N.getOperand(1), imm) &&
+        (!Aligned || (imm & 3) == 0)) {
+      Disp = DAG.getTargetConstant(imm, N.getValueType());
       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
       } else {
@@ -1072,7 +1070,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
     }
   } else if (N.getOpcode() == ISD::OR) {
     short imm = 0;
-    if (isIntS16Immediate(N.getOperand(1), imm)) {
+    if (isIntS16Immediate(N.getOperand(1), imm) &&
+        (!Aligned || (imm & 3) == 0)) {
       // If this is an or of disjoint bitfields, we can codegen this as an add
       // (for better address arithmetic) if the LHS and RHS of the OR are
       // provably disjoint.
@@ -1083,7 +1082,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
         // If all of the bits are known zero on the LHS or RHS, the add won't
         // carry.
         Base = N.getOperand(0);
-        Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+        Disp = DAG.getTargetConstant(imm, N.getValueType());
         return true;
       }
     }
@@ -1093,7 +1092,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
     // If this address fits entirely in a 16-bit sext immediate field, codegen
     // this as "d, 0"
     short Imm;
-    if (isIntS16Immediate(CN, Imm)) {
+    if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
       Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
       Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                              CN->getValueType(0));
@@ -1101,8 +1100,9 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
     }
 
     // Handle 32-bit sext immediates with LIS + addr mode.
-    if (CN->getValueType(0) == MVT::i32 ||
-        (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+    if ((CN->getValueType(0) == MVT::i32 ||
+         (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
+        (!Aligned || (CN->getZExtValue() & 3) == 0)) {
       int Addr = (int)CN->getZExtValue();
 
       // Otherwise, break this down into an LIS + disp.
@@ -1150,92 +1150,6 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
   return true;
 }
 
-/// SelectAddressRegImmShift - Returns true if the address N can be
-/// represented by a base register plus a signed 14-bit displacement
-/// [r+imm*4].  Suitable for use by STD and friends.
-bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
-                                                 SDValue &Base,
-                                                 SelectionDAG &DAG) const {
-  // FIXME dl should come from the parent load or store, not the address
-  DebugLoc dl = N.getDebugLoc();
-  // If this can be more profitably realized as r+r, fail.
-  if (SelectAddressRegReg(N, Disp, Base, DAG))
-    return false;
-
-  if (N.getOpcode() == ISD::ADD) {
-    short imm = 0;
-    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
-      Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
-      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
-        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
-      } else {
-        Base = N.getOperand(0);
-      }
-      return true; // [r+i]
-    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
-      // Match LOAD (ADD (X, Lo(G))).
-      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
-             && "Cannot handle constant offsets yet!");
-      Disp = N.getOperand(1).getOperand(0);  // The global address.
-      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
-             Disp.getOpcode() == ISD::TargetConstantPool ||
-             Disp.getOpcode() == ISD::TargetJumpTable);
-      Base = N.getOperand(0);
-      return true;  // [&g+r]
-    }
-  } else if (N.getOpcode() == ISD::OR) {
-    short imm = 0;
-    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
-      // If this is an or of disjoint bitfields, we can codegen this as an add
-      // (for better address arithmetic) if the LHS and RHS of the OR are
-      // provably disjoint.
-      APInt LHSKnownZero, LHSKnownOne;
-      DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
-      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
-        // If all of the bits are known zero on the LHS or RHS, the add won't
-        // carry.
-        Base = N.getOperand(0);
-        Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
-        return true;
-      }
-    }
-  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
-    // Loading from a constant address.  Verify low two bits are clear.
-    if ((CN->getZExtValue() & 3) == 0) {
-      // If this address fits entirely in a 14-bit sext immediate field, codegen
-      // this as "d, 0"
-      short Imm;
-      if (isIntS16Immediate(CN, Imm)) {
-        Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
-        Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
-                               CN->getValueType(0));
-        return true;
-      }
-
-      // Fold the low-part of 32-bit absolute addresses into addr mode.
-      if (CN->getValueType(0) == MVT::i32 ||
-          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
-        int Addr = (int)CN->getZExtValue();
-
-        // Otherwise, break this down into an LIS + disp.
-        Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
-        Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
-        unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
-        Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
-        return true;
-      }
-    }
-  }
-
-  Disp = DAG.getTargetConstant(0, getPointerTy());
-  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
-    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
-  else
-    Base = N;
-  return true;      // [r+0]
-}
-
-
 /// getPreIndexedAddressParts - returns true by value, base pointer and
 /// offset pointer and addressing mode by reference if the node's address
 /// can be legally represented as pre-indexed load / store address.
@@ -1288,18 +1202,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
     return true;
   }
 
-  // LDU/STU use reg+imm*4, others use reg+imm.
+  // LDU/STU can only handle immediates that are a multiple of 4.
   if (VT != MVT::i64) {
-    // reg + imm
-    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
+    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
       return false;
   } else {
     // LDU/STU need an address with at least 4-byte alignment.
     if (Alignment < 4)
       return false;
 
-    // reg + imm * 4.
-    if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
+    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
       return false;
   }
 
@@ -1355,7 +1267,7 @@ static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
                              SelectionDAG &DAG) {
   EVT PtrVT = HiPart.getValueType();
   SDValue Zero = DAG.getConstant(0, PtrVT);
-  DebugLoc DL = HiPart.getDebugLoc();
+  SDLoc DL(HiPart);
 
   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
@@ -1380,7 +1292,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
   // The actual address of the GlobalValue is stored in the TOC.
   if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
-    return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA,
+    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
                        DAG.getRegister(PPC::X2, MVT::i64));
   }
 
@@ -1401,7 +1313,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   // The actual address of the GlobalValue is stored in the TOC.
   if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
-    return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA,
+    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
                        DAG.getRegister(PPC::X2, MVT::i64));
   }
 
@@ -1429,7 +1341,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                               SelectionDAG &DAG) const {
 
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
-  DebugLoc dl = GA->getDebugLoc();
+  SDLoc dl(GA);
   const GlobalValue *GV = GA->getGlobal();
   EVT PtrVT = getPointerTy();
   bool is64bit = PPCSubTarget.isPPC64();
@@ -1515,7 +1427,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
                                               SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-  DebugLoc DL = GSDN->getDebugLoc();
+  SDLoc DL(GSDN);
   const GlobalValue *GV = GSDN->getGlobal();
 
   // 64-bit SVR4 ABI code is always position-independent.
@@ -1546,7 +1458,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
 
 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
 
   // If we're comparing for equality to zero, expose the fact that this is
   // implented as a ctlz/srl pair on ppc, so that the dag combiner can
@@ -1595,7 +1507,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
   SDValue InChain = Node->getOperand(0);
   SDValue VAListPtr = Node->getOperand(1);
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
-  DebugLoc dl = Node->getDebugLoc();
+  SDLoc dl(Node);
 
   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
 
@@ -1706,7 +1618,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
   SDValue Trmp = Op.getOperand(1); // trampoline
   SDValue FPtr = Op.getOperand(2); // nested function
   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = (PtrVT == MVT::i64);
@@ -1748,7 +1660,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   MachineFunction &MF = DAG.getMachineFunction();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
 
   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
@@ -1842,18 +1754,18 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
 
 #include "PPCGenCallingConv.inc"
 
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                       CCValAssign::LocInfo &LocInfo,
-                                       ISD::ArgFlagsTy &ArgFlags,
-                                       CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                      CCValAssign::LocInfo &LocInfo,
+                                      ISD::ArgFlagsTy &ArgFlags,
+                                      CCState &State) {
   return true;
 }
 
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                              MVT &LocVT,
-                                              CCValAssign::LocInfo &LocInfo,
-                                              ISD::ArgFlagsTy &ArgFlags,
-                                              CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                             MVT &LocVT,
+                                             CCValAssign::LocInfo &LocInfo,
+                                             ISD::ArgFlagsTy &ArgFlags,
+                                             CCState &State) {
   static const uint16_t ArgRegs[] = {
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
@@ -1876,11 +1788,11 @@ static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
   return false;
 }
 
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                                MVT &LocVT,
-                                                CCValAssign::LocInfo &LocInfo,
-                                                ISD::ArgFlagsTy &ArgFlags,
-                                                CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                               MVT &LocVT,
+                                               CCValAssign::LocInfo &LocInfo,
+                                               ISD::ArgFlagsTy &ArgFlags,
+                                               CCState &State) {
   static const uint16_t ArgRegs[] = {
     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
     PPC::F8
@@ -1931,7 +1843,7 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
                                         CallingConv::ID CallConv, bool isVarArg,
                                         const SmallVectorImpl<ISD::InputArg>
                                           &Ins,
-                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SDLoc dl, SelectionDAG &DAG,
                                         SmallVectorImpl<SDValue> &InVals)
                                           const {
   if (PPCSubTarget.isSVR4ABI()) {
@@ -1953,7 +1865,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
                                       CallingConv::ID CallConv, bool isVarArg,
                                       const SmallVectorImpl<ISD::InputArg>
                                         &Ins,
-                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SDLoc dl, SelectionDAG &DAG,
                                       SmallVectorImpl<SDValue> &InVals) const {
 
   // 32-bit SVR4 ABI Stack Frame Layout:
@@ -2170,14 +2082,14 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
 SDValue
 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
                                      SelectionDAG &DAG, SDValue ArgVal,
-                                     DebugLoc dl) const {
+                                     SDLoc dl) const {
   if (Flags.isSExt())
     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
                          DAG.getValueType(ObjectVT));
   else if (Flags.isZExt())
     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
                          DAG.getValueType(ObjectVT));
-  
+
   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
 }
 
@@ -2213,7 +2125,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
                                       CallingConv::ID CallConv, bool isVarArg,
                                       const SmallVectorImpl<ISD::InputArg>
                                         &Ins,
-                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SDLoc dl, SelectionDAG &DAG,
                                       SmallVectorImpl<SDValue> &InVals) const {
   // TODO: add description of PPC stack frame format, or at least some docs.
   //
@@ -2502,7 +2414,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
                                       CallingConv::ID CallConv, bool isVarArg,
                                       const SmallVectorImpl<ISD::InputArg>
                                         &Ins,
-                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SDLoc dl, SelectionDAG &DAG,
                                       SmallVectorImpl<SDValue> &InVals) const {
   // TODO: add description of PPC stack frame format, or at least some docs.
   //
@@ -2600,17 +2512,17 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
 
   SmallVector<SDValue, 8> MemOps;
   unsigned nAltivecParamsAtEnd = 0;
-  // FIXME: FuncArg and Ins[ArgNo] must reference the same argument.
-  // When passing anonymous aggregates, this is currently not true.
-  // See LowerFormalArguments_64SVR4 for a fix.
   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
-  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+  unsigned CurArgIdx = 0;
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
     SDValue ArgVal;
     bool needsLoad = false;
     EVT ObjectVT = Ins[ArgNo].VT;
     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
     unsigned ArgSize = ObjSize;
     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+    std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+    CurArgIdx = Ins[ArgNo].OrigArgIndex;
 
     unsigned CurArgOffset = ArgOffset;
 
@@ -3004,7 +2916,7 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
                                            SDValue Chain,
                    const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
                    SmallVector<SDValue, 8> &MemOpChains,
-                   DebugLoc dl) {
+                   SDLoc dl) {
   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
     SDValue Arg = TailCallArgs[i].Arg;
     SDValue FIN = TailCallArgs[i].FrameIdxOp;
@@ -3026,7 +2938,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
                                                int SPDiff,
                                                bool isPPC64,
                                                bool isDarwinABI,
-                                               DebugLoc dl) {
+                                               SDLoc dl) {
   if (SPDiff) {
     // Calculate the new stack slot for the return address.
     int SlotSize = isPPC64 ? 8 : 4;
@@ -3083,7 +2995,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
                                                         SDValue &LROpOut,
                                                         SDValue &FPOpOut,
                                                         bool isDarwinABI,
-                                                        DebugLoc dl) const {
+                                                        SDLoc dl) const {
   if (SPDiff) {
     // Load the LR and FP stack slot for later adjusting.
     EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
@@ -3113,7 +3025,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
 static SDValue
 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
-                          DebugLoc dl) {
+                          SDLoc dl) {
   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        false, false, MachinePointerInfo(0),
@@ -3128,7 +3040,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
                  unsigned ArgOffset, bool isPPC64, bool isTailCall,
                  bool isVector, SmallVector<SDValue, 8> &MemOpChains,
                  SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
-                 DebugLoc dl) {
+                 SDLoc dl) {
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   if (!isTailCall) {
     if (isVector) {
@@ -3149,7 +3061,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
 
 static
 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
-                     DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+                     SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
                      SDValue LROp, SDValue FPOp, bool isDarwinABI,
                      SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
   MachineFunction &MF = DAG.getMachineFunction();
@@ -3171,13 +3083,13 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
 
   // Emit callseq_end just before tailcall node.
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(0, true), InFlag);
+                             DAG.getIntPtrConstant(0, true), InFlag, dl);
   InFlag = Chain.getValue(1);
 }
 
 static
 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
-                     SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
+                     SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
                      SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
                      SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
                      const PPCSubtarget &PPCSubTarget) {
@@ -3363,7 +3275,7 @@ SDValue
 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                    CallingConv::ID CallConv, bool isVarArg,
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
-                                   DebugLoc dl, SelectionDAG &DAG,
+                                   SDLoc dl, SelectionDAG &DAG,
                                    SmallVectorImpl<SDValue> &InVals) const {
 
   SmallVector<CCValAssign, 16> RVLocs;
@@ -3406,7 +3318,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 }
 
 SDValue
-PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
                               bool isTailCall, bool isVarArg,
                               SelectionDAG &DAG,
                               SmallVector<std::pair<unsigned, SDValue>, 8>
@@ -3493,7 +3405,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
 
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                              DAG.getIntPtrConstant(BytesCalleePops, true),
-                             InFlag);
+                             InFlag, dl);
   if (!Ins.empty())
     InFlag = Chain.getValue(1);
 
@@ -3505,7 +3417,7 @@ SDValue
 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                              SmallVectorImpl<SDValue> &InVals) const {
   SelectionDAG &DAG                     = CLI.DAG;
-  DebugLoc &dl                          = CLI.DL;
+  SDLoc &dl                          = CLI.DL;
   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
   SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
   SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
@@ -3542,7 +3454,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<SDValue> &OutVals,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
-                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SDLoc dl, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &InVals) const {
   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
   // of the 32-bit SVR4 ABI stack frame layout.
@@ -3628,7 +3540,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               dl);
   SDValue CallSeqStart = Chain;
 
   // Load the return address and frame pointer so it can be moved somewhere else
@@ -3679,7 +3592,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
 
       // This must go outside the CALLSEQ_START..END.
       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
-                           CallSeqStart.getNode()->getOperand(1));
+                           CallSeqStart.getNode()->getOperand(1),
+                           SDLoc(MemcpyCall));
       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
                              NewCallSeqStart.getNode());
       Chain = CallSeqStart = NewCallSeqStart;
@@ -3755,13 +3669,14 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
                                               SDValue CallSeqStart,
                                               ISD::ArgFlagsTy Flags,
                                               SelectionDAG &DAG,
-                                              DebugLoc dl) const {
+                                              SDLoc dl) const {
   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
                         CallSeqStart.getNode()->getOperand(0),
                         Flags, DAG, dl);
   // The MEMCPY must go outside the CALLSEQ_START..END.
   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
-                             CallSeqStart.getNode()->getOperand(1));
+                             CallSeqStart.getNode()->getOperand(1),
+                             SDLoc(MemcpyCall));
   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
                          NewCallSeqStart.getNode());
   return NewCallSeqStart;
@@ -3774,7 +3689,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<SDValue> &OutVals,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
-                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SDLoc dl, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &InVals) const {
 
   unsigned NumOps = Outs.size();
@@ -3815,7 +3730,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               dl);
   SDValue CallSeqStart = Chain;
 
   // Load the return address and frame pointer so it can be move somewhere else
@@ -3940,7 +3856,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
         // register.
         // FIXME: The memcpy seems to produce pretty awful code for
         // small aggregates, particularly for packed ones.
-        // FIXME: It would be preferable to use the slot in the 
+        // FIXME: It would be preferable to use the slot in the
         // parameter save area instead of a new local variable.
         SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
         SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
@@ -4145,7 +4061,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<SDValue> &OutVals,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
-                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SDLoc dl, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &InVals) const {
 
   unsigned NumOps = Outs.size();
@@ -4186,7 +4102,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               dl);
   SDValue CallSeqStart = Chain;
 
   // Load the return address and frame pointer so it can be move somewhere else
@@ -4502,7 +4419,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
                                CallingConv::ID CallConv, bool isVarArg,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<SDValue> &OutVals,
-                               DebugLoc dl, SelectionDAG &DAG) const {
+                               SDLoc dl, SelectionDAG &DAG) const {
 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
@@ -4551,7 +4468,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
                                    const PPCSubtarget &Subtarget) const {
   // When we pop the dynamic allocation we need to restore the SP link.
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
 
   // Get the corect type for pointers.
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -4636,7 +4553,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   // Get the inputs.
   SDValue Chain = Op.getOperand(0);
   SDValue Size  = Op.getOperand(1);
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
 
   // Get the corect type for pointers.
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -4653,7 +4570,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
 
 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
                                                SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
                      DAG.getVTList(MVT::i32, MVT::Other),
                      Op.getOperand(0), Op.getOperand(1));
@@ -4661,7 +4578,7 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
 
 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
                                                 SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
                      Op.getOperand(0), Op.getOperand(1));
 }
@@ -4687,7 +4604,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   EVT CmpVT = Op.getOperand(0).getValueType();
   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
 
   // If the RHS of the comparison is a 0.0, we don't need to do the
   // subtraction at all.
@@ -4768,7 +4685,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
 
 // FIXME: Split this code up when LegalizeDAGTypes lands.
 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
-                                           DebugLoc dl) const {
+                                           SDLoc dl) const {
   assert(Op.getOperand(0).getValueType().isFloatingPoint());
   SDValue Src = Op.getOperand(0);
   if (Src.getValueType() == MVT::f32)
@@ -4827,7 +4744,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
 
 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                            SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
     return SDValue();
@@ -4961,7 +4878,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
 
 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
                                             SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   /*
    The rounding mode is in bits 30:31 of FPSR, and has the following
    settings:
@@ -5027,7 +4944,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   assert(Op.getNumOperands() == 3 &&
          VT == Op.getOperand(1).getValueType() &&
          "Unexpected SHL!");
@@ -5055,7 +4972,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   unsigned BitWidth = VT.getSizeInBits();
   assert(Op.getNumOperands() == 3 &&
          VT == Op.getOperand(1).getValueType() &&
@@ -5083,7 +5000,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
 }
 
 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   EVT VT = Op.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
   assert(Op.getNumOperands() == 3 &&
@@ -5118,7 +5035,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
 /// BuildSplatI - Build a canonical splati of Val with an element size of
 /// SplatSize.  Cast the result to VT.
 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
-                             SelectionDAG &DAG, DebugLoc dl) {
+                             SelectionDAG &DAG, SDLoc dl) {
   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
 
   static const EVT VTys[] = { // canonical VT to use for each size.
@@ -5142,10 +5059,20 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
   return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
 }
 
+/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
+                                SelectionDAG &DAG, SDLoc dl,
+                                EVT DestVT = MVT::Other) {
+  if (DestVT == MVT::Other) DestVT = Op.getValueType();
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+                     DAG.getConstant(IID, MVT::i32), Op);
+}
+
 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
 /// specified intrinsic ID.
 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
-                                SelectionDAG &DAG, DebugLoc dl,
+                                SelectionDAG &DAG, SDLoc dl,
                                 EVT DestVT = MVT::Other) {
   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
@@ -5156,7 +5083,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
 /// specified intrinsic ID.
 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
                                 SDValue Op2, SelectionDAG &DAG,
-                                DebugLoc dl, EVT DestVT = MVT::Other) {
+                                SDLoc dl, EVT DestVT = MVT::Other) {
   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
                      DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
@@ -5166,7 +5093,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
 /// amount.  The result has the specified value type.
 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
-                             EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+                             EVT VT, SelectionDAG &DAG, SDLoc dl) {
   // Force LHS/RHS to be the right type.
   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
@@ -5185,7 +5112,7 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
 // sequence of ops that should be used.
 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
                                              SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
   assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
 
@@ -5341,7 +5268,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
 /// the specified operations to build the shuffle.
 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
                                       SDValue RHS, SelectionDAG &DAG,
-                                      DebugLoc dl) {
+                                      SDLoc dl) {
   unsigned OpNum = (PFEntry >> 26) & 0x0F;
   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
@@ -5420,7 +5347,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
 /// lowered into a vperm.
 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
                                                SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -5587,7 +5514,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                    SelectionDAG &DAG) const {
   // If this is a lowered altivec predicate compare, CompareOpc is set to the
   // opcode number of the comparison.
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   int CompareOpc;
   bool isDot;
   if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
@@ -5651,7 +5578,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                                                    SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   // Create a stack slot that is 16-byte aligned.
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
@@ -5668,7 +5595,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
 }
 
 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   if (Op.getValueType() == MVT::v4i32) {
     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
 
@@ -5755,7 +5682,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
   case ISD::FP_TO_UINT:
   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
-                                                       Op.getDebugLoc());
+                                                       SDLoc(Op));
   case ISD::UINT_TO_FP:
   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
@@ -5772,6 +5699,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
   case ISD::MUL:                return LowerMUL(Op, DAG);
 
+  // For counter-based loop handling.
+  case ISD::INTRINSIC_W_CHAIN:  return SDValue();
+
   // Frame & Return address.
   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
@@ -5782,10 +5712,26 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
                                            SelectionDAG &DAG) const {
   const TargetMachine &TM = getTargetMachine();
-  DebugLoc dl = N->getDebugLoc();
+  SDLoc dl(N);
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Do not know how to custom type legalize this operation!");
+  case ISD::INTRINSIC_W_CHAIN: {
+    if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
+        Intrinsic::ppc_is_decremented_ctr_nonzero)
+      break;
+
+    assert(N->getValueType(0) == MVT::i1 &&
+           "Unexpected result type for CTR decrement intrinsic");
+    EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
+    SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
+    SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
+                                 N->getOperand(1)); 
+
+    Results.push_back(NewInt);
+    Results.push_back(NewInt.getValue(1));
+    break;
+  }
   case ISD::VAARG: {
     if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
         || TM.getSubtarget<PPCSubtarget>().isPPC64())
@@ -6101,7 +6047,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
   if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
             .addReg(PPC::X2)
-            .addImm(TOCOffset / 4)
+            .addImm(TOCOffset)
             .addReg(BufReg);
 
     MIB.setMemRefs(MMOBegin, MMOEnd);
@@ -6109,7 +6055,9 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
 
   // Setup
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
-  MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+  const PPCRegisterInfo *TRI =
+    static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
+  MIB.addRegMask(TRI->getNoPreservedMask());
 
   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
 
@@ -6129,7 +6077,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
   if (PPCSubTarget.isPPC64()) {
     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
             .addReg(LabelReg)
-            .addImm(LabelOffset / 4)
+            .addImm(LabelOffset)
             .addReg(BufReg);
   } else {
     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
@@ -6202,7 +6150,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
   // Reload IP
   if (PVT == MVT::i64) {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
-            .addImm(LabelOffset / 4)
+            .addImm(LabelOffset)
             .addReg(BufReg);
   } else {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
@@ -6214,7 +6162,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
   // Reload SP
   if (PVT == MVT::i64) {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
-            .addImm(SPOffset / 4)
+            .addImm(SPOffset)
             .addReg(BufReg);
   } else {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
@@ -6229,7 +6177,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
   // Reload TOC
   if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
-            .addImm(TOCOffset / 4)
+            .addImm(TOCOffset)
             .addReg(BufReg);
 
     MIB.setMemRefs(MMOBegin, MMOEnd);
@@ -6272,8 +6220,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     Cond.push_back(MI->getOperand(1));
 
     DebugLoc dl = MI->getDebugLoc();
-    PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond,
-                        MI->getOperand(2).getReg(), MI->getOperand(3).getReg());
+    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
+                      Cond, MI->getOperand(2).getReg(),
+                      MI->getOperand(3).getReg());
   } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
              MI->getOpcode() == PPC::SELECT_CC_I8 ||
              MI->getOpcode() == PPC::SELECT_CC_F4 ||
@@ -6717,7 +6667,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
       ++Iterations;
 
     SelectionDAG &DAG = DCI.DAG;
-    DebugLoc dl = Op.getDebugLoc();
+    SDLoc dl(Op);
 
     SDValue FPOne =
       DAG.getConstantFP(1.0, VT.getScalarType());
@@ -6779,7 +6729,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
       ++Iterations;
 
     SelectionDAG &DAG = DCI.DAG;
-    DebugLoc dl = Op.getDebugLoc();
+    SDLoc dl(Op);
 
     SDValue FPThreeHalves =
       DAG.getConstantFP(1.5, VT.getScalarType());
@@ -6823,11 +6773,120 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
   return SDValue();
 }
 
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
+                            unsigned Bytes, int Dist,
+                            SelectionDAG &DAG) {
+  EVT VT = LS->getMemoryVT();
+  if (VT.getSizeInBits() / 8 != Bytes)
+    return false;
+
+  SDValue Loc = LS->getBasePtr();
+  SDValue BaseLoc = Base->getBasePtr();
+  if (Loc.getOpcode() == ISD::FrameIndex) {
+    if (BaseLoc.getOpcode() != ISD::FrameIndex)
+      return false;
+    const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
+    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+    int FS  = MFI->getObjectSize(FI);
+    int BFS = MFI->getObjectSize(BFI);
+    if (FS != BFS || FS != (int)Bytes) return false;
+    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+  }
+
+  // Handle X+C
+  if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+    return true;
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  const GlobalValue *GV1 = NULL;
+  const GlobalValue *GV2 = NULL;
+  int64_t Offset1 = 0;
+  int64_t Offset2 = 0;
+  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+  if (isGA1 && isGA2 && GV1 == GV2)
+    return Offset1 == (Offset2 + Dist*Bytes);
+  return false;
+}
+
+// Return true is there is a nearyby consecutive load to the one provided
+// (regardless of alignment). We search up and down the chain, looking though
+// token factors and other loads (but nothing else). As a result, a true
+// results indicates that it is safe to create a new consecutive load adjacent
+// to the load provided.
+static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
+  SDValue Chain = LD->getChain();
+  EVT VT = LD->getMemoryVT();
+
+  SmallSet<SDNode *, 16> LoadRoots;
+  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
+  SmallSet<SDNode *, 16> Visited;
+
+  // First, search up the chain, branching to follow all token-factor operands.
+  // If we find a consecutive load, then we're done, otherwise, record all
+  // nodes just above the top-level loads and token factors.
+  while (!Queue.empty()) {
+    SDNode *ChainNext = Queue.pop_back_val();
+    if (!Visited.insert(ChainNext))
+      continue;
+
+    if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+      if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+        return true;
+
+      if (!Visited.count(ChainLD->getChain().getNode()))
+        Queue.push_back(ChainLD->getChain().getNode());
+    } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
+      for (SDNode::op_iterator O = ChainNext->op_begin(),
+           OE = ChainNext->op_end(); O != OE; ++O)
+        if (!Visited.count(O->getNode()))
+          Queue.push_back(O->getNode());
+    } else
+      LoadRoots.insert(ChainNext);
+  }
+
+  // Second, search down the chain, starting from the top-level nodes recorded
+  // in the first phase. These top-level nodes are the nodes just above all
+  // loads and token factors. Starting with their uses, recursively look though
+  // all loads (just the chain uses) and token factors to find a consecutive
+  // load.
+  Visited.clear();
+  Queue.clear();
+
+  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
+       IE = LoadRoots.end(); I != IE; ++I) {
+    Queue.push_back(*I);
+       
+    while (!Queue.empty()) {
+      SDNode *LoadRoot = Queue.pop_back_val();
+      if (!Visited.insert(LoadRoot))
+        continue;
+
+      if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+        if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+          return true;
+
+      for (SDNode::use_iterator UI = LoadRoot->use_begin(),
+           UE = LoadRoot->use_end(); UI != UE; ++UI)
+        if (((isa<LoadSDNode>(*UI) &&
+            cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+            UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
+          Queue.push_back(*UI);
+    }
+  }
+
+  return false;
+}
+
 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   const TargetMachine &TM = getTargetMachine();
   SelectionDAG &DAG = DCI.DAG;
-  DebugLoc dl = N->getDebugLoc();
+  SDLoc dl(N);
   switch (N->getOpcode()) {
   default: break;
   case PPCISD::SHL:
@@ -6868,7 +6927,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                  DCI);
       if (RV.getNode() != 0) {
         DCI.AddToWorklist(RV.getNode());
-        RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(),
+        RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
                          N->getValueType(0), RV);
         DCI.AddToWorklist(RV.getNode());
         return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
@@ -6881,7 +6940,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                  DCI);
       if (RV.getNode() != 0) {
         DCI.AddToWorklist(RV.getNode());
-        RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(),
+        RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
                          N->getValueType(0), RV,
                          N->getOperand(1).getOperand(1));
         DCI.AddToWorklist(RV.getNode());
@@ -6999,6 +7058,157 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                 cast<StoreSDNode>(N)->getMemOperand());
     }
     break;
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    EVT VT = LD->getValueType(0);
+    Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+    unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+    if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
+        TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+        DCI.getDAGCombineLevel() == AfterLegalizeTypes &&
+        LD->getAlignment() < ABIAlignment) {
+      // This is a type-legal unaligned Altivec load.
+      SDValue Chain = LD->getChain();
+      SDValue Ptr = LD->getBasePtr();
+
+      // This implements the loading of unaligned vectors as described in
+      // the venerable Apple Velocity Engine overview. Specifically:
+      // https://developer.apple.com/hardwaredrivers/ve/alignment.html
+      // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
+      //
+      // The general idea is to expand a sequence of one or more unaligned
+      // loads into a alignment-based permutation-control instruction (lvsl),
+      // a series of regular vector loads (which always truncate their
+      // input address to an aligned address), and a series of permutations.
+      // The results of these permutations are the requested loaded values.
+      // The trick is that the last "extra" load is not taken from the address
+      // you might suspect (sizeof(vector) bytes after the last requested
+      // load), but rather sizeof(vector) - 1 bytes after the last
+      // requested vector. The point of this is to avoid a page fault if the
+      // base address happend to be aligned. This works because if the base
+      // address is aligned, then adding less than a full vector length will
+      // cause the last vector in the sequence to be (re)loaded. Otherwise,
+      // the next vector will be fetched as you might suspect was necessary.
+
+      // We might be able to reuse the permutation generation from
+      // a different base address offset from this one by an aligned amount.
+      // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
+      // optimization later.
+      SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr,
+                                          DAG, dl, MVT::v16i8);
+
+      // Refine the alignment of the original load (a "new" load created here
+      // which was identical to the first except for the alignment would be
+      // merged with the existing node regardless).
+      MachineFunction &MF = DAG.getMachineFunction();
+      MachineMemOperand *MMO =
+        MF.getMachineMemOperand(LD->getPointerInfo(),
+                                LD->getMemOperand()->getFlags(),
+                                LD->getMemoryVT().getStoreSize(),
+                                ABIAlignment);
+      LD->refineAlignment(MMO);
+      SDValue BaseLoad = SDValue(LD, 0);
+
+      // Note that the value of IncOffset (which is provided to the next
+      // load's pointer info offset value, and thus used to calculate the
+      // alignment), and the value of IncValue (which is actually used to
+      // increment the pointer value) are different! This is because we
+      // require the next load to appear to be aligned, even though it
+      // is actually offset from the base pointer by a lesser amount.
+      int IncOffset = VT.getSizeInBits() / 8;
+      int IncValue = IncOffset;
+
+      // Walk (both up and down) the chain looking for another load at the real
+      // (aligned) offset (the alignment of the other load does not matter in
+      // this case). If found, then do not use the offset reduction trick, as
+      // that will prevent the loads from being later combined (as they would
+      // otherwise be duplicates).
+      if (!findConsecutiveLoad(LD, DAG))
+        --IncValue;
+
+      SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
+      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+
+      SDValue ExtraLoad =
+        DAG.getLoad(VT, dl, Chain, Ptr,
+                    LD->getPointerInfo().getWithOffset(IncOffset),
+                    LD->isVolatile(), LD->isNonTemporal(),
+                    LD->isInvariant(), ABIAlignment);
+
+      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+        BaseLoad.getValue(1), ExtraLoad.getValue(1));
+
+      if (BaseLoad.getValueType() != MVT::v4i32)
+        BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad);
+
+      if (ExtraLoad.getValueType() != MVT::v4i32)
+        ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
+
+      SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+                                      BaseLoad, ExtraLoad, PermCntl, DAG, dl);
+
+      if (VT != MVT::v4i32)
+        Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
+
+      // Now we need to be really careful about how we update the users of the
+      // original load. We cannot just call DCI.CombineTo (or
+      // DAG.ReplaceAllUsesWith for that matter), because the load still has
+      // uses created here (the permutation for example) that need to stay.
+      SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+      while (UI != UE) {
+        SDUse &Use = UI.getUse();
+        SDNode *User = *UI;
+        // Note: BaseLoad is checked here because it might not be N, but a
+        // bitcast of N.
+        if (User == Perm.getNode() || User == BaseLoad.getNode() ||
+            User == TF.getNode() || Use.getResNo() > 1) {
+          ++UI;
+          continue;
+        }
+
+        SDValue To = Use.getResNo() ? TF : Perm;
+        ++UI;
+
+        SmallVector<SDValue, 8> Ops;
+        for (SDNode::op_iterator O = User->op_begin(),
+             OE = User->op_end(); O != OE; ++O) {
+          if (*O == Use)
+            Ops.push_back(To);
+          else
+            Ops.push_back(*O);
+        }
+
+        DAG.UpdateNodeOperands(User, Ops.data(), Ops.size());
+      }
+
+      return SDValue(N, 0);
+    }
+    }
+    break;
+  case ISD::INTRINSIC_WO_CHAIN:
+    if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() ==
+          Intrinsic::ppc_altivec_lvsl &&
+        N->getOperand(1)->getOpcode() == ISD::ADD) {
+      SDValue Add = N->getOperand(1);
+
+      if (DAG.MaskedValueIsZero(Add->getOperand(1),
+            APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
+              Add.getValueType().getScalarType().getSizeInBits()))) {
+        SDNode *BasePtr = Add->getOperand(0).getNode();
+        for (SDNode::use_iterator UI = BasePtr->use_begin(),
+             UE = BasePtr->use_end(); UI != UE; ++UI) {
+          if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+              cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
+                Intrinsic::ppc_altivec_lvsl) {
+            // We've found another LVSL, and this address if an aligned
+            // multiple of that one. The results will be the same, so use the
+            // one we've just found instead.
+
+            return SDValue(*UI, 0);
+          }
+        }
+      }
+    }
   case ISD::BSWAP:
     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
@@ -7097,6 +7307,39 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     // compare down to code that is difficult to reassemble.
     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+
+    // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
+    // value. If so, pass-through the AND to get to the intrinsic.
+    if (LHS.getOpcode() == ISD::AND &&
+        LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+        cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
+          Intrinsic::ppc_is_decremented_ctr_nonzero &&
+        isa<ConstantSDNode>(LHS.getOperand(1)) &&
+        !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
+          isZero())
+      LHS = LHS.getOperand(0);
+
+    if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+        cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
+          Intrinsic::ppc_is_decremented_ctr_nonzero &&
+        isa<ConstantSDNode>(RHS)) {
+      assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+             "Counter decrement comparison is not EQ or NE");
+
+      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+      bool isBDNZ = (CC == ISD::SETEQ && Val) ||
+                    (CC == ISD::SETNE && !Val);
+
+      // We now need to make the intrinsic dead (it cannot be instruction
+      // selected).
+      DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
+      assert(LHS.getNode()->hasOneUse() &&
+             "Counter decrement has more than one use");
+
+      return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
+                         N->getOperand(0), N->getOperand(4));
+    }
+
     int CompareOpc;
     bool isDot;
 
@@ -7406,25 +7649,13 @@ bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   return true;
 }
 
-/// isLegalAddressImmediate - Return true if the integer value can be used
-/// as the offset of the target addressing mode for load / store of the
-/// given type.
-bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
-  // PPC allows a sign-extended 16-bit immediate field.
-  return (V > -(1 << 16) && V < (1 << 16)-1);
-}
-
-bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
-  return false;
-}
-
 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
                                            SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MFI->setReturnAddressIsTaken(true);
 
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
   // Make sure the function does not optimize away the store of the RA to
@@ -7454,7 +7685,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
 
 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
                                           SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 423e983..e85f96c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -20,6 +20,7 @@
 #include "PPCRegisterInfo.h"
 #include "PPCSubtarget.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
@@ -146,6 +147,10 @@ namespace llvm {
       /// an optional input flag argument.
       COND_BRANCH,
 
+      /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
+      /// loops.
+      BDNZ, BDZ,
+
       /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
       /// towards zero.  Used only as part of the long double-to-int
       /// conversion sequence.
@@ -175,61 +180,61 @@ namespace llvm {
 
       /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
       /// TLS model, produces an ADDIS8 instruction that adds the GOT
-      /// base to sym@got@tprel@ha.
+      /// base to sym\@got\@tprel\@ha.
       ADDIS_GOT_TPREL_HA,
 
       /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
       /// TLS model, produces a LD instruction with base register G8RReg
-      /// and offset sym@got@tprel@l.  This completes the addition that
+      /// and offset sym\@got\@tprel\@l.  This completes the addition that
       /// finds the offset of "sym" relative to the thread pointer.
       LD_GOT_TPREL_L,
 
       /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
       /// model, produces an ADD instruction that adds the contents of
       /// G8RReg to the thread pointer.  Symbol contains a relocation
-      /// sym@tls which is to be replaced by the thread pointer and
+      /// sym\@tls which is to be replaced by the thread pointer and
       /// identifies to the linker that the instruction is part of a
       /// TLS sequence.
       ADD_TLS,
 
       /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
       /// model, produces an ADDIS8 instruction that adds the GOT base
-      /// register to sym@got@tlsgd@ha.
+      /// register to sym\@got\@tlsgd\@ha.
       ADDIS_TLSGD_HA,
 
       /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
       /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym@got@tlsgd@l.
+      /// sym\@got\@tlsgd\@l.
       ADDI_TLSGD_L,
 
       /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
-      /// model, produces a call to __tls_get_addr(sym@tlsgd).
+      /// model, produces a call to __tls_get_addr(sym\@tlsgd).
       GET_TLS_ADDR,
 
       /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
       /// model, produces an ADDIS8 instruction that adds the GOT base
-      /// register to sym@got@tlsld@ha.
+      /// register to sym\@got\@tlsld\@ha.
       ADDIS_TLSLD_HA,
 
       /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
       /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym@got@tlsld@l.
+      /// sym\@got\@tlsld\@l.
       ADDI_TLSLD_L,
 
       /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
-      /// model, produces a call to __tls_get_addr(sym@tlsld).
+      /// model, produces a call to __tls_get_addr(sym\@tlsld).
       GET_TLSLD_ADDR,
 
       /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
       /// local-dynamic TLS model, produces an ADDIS8 instruction
-      /// that adds X3 to sym@dtprel@ha.  The Chain operand is needed 
+      /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
       /// to tie this in place following a copy to %X3 from the result
       /// of a GET_TLSLD_ADDR.
       ADDIS_DTPREL_HA,
 
       /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
       /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym@got@dtprel@l.
+      /// sym\@got\@dtprel\@l.
       ADDI_DTPREL_L,
 
       /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
@@ -238,6 +243,10 @@ namespace llvm {
       /// optimizations due to constant folding.
       VADD_SPLAT,
 
+      /// CHAIN = SC CHAIN, Imm128 - System call.  The 7-bit unsigned
+      /// operand identifies the operating system entry point.
+      SC,
+
       /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
       /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
       /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
@@ -266,16 +275,16 @@ namespace llvm {
 
       /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
       /// produces an ADDIS8 instruction that adds the TOC base register to
-      /// sym@toc@ha.
+      /// sym\@toc\@ha.
       ADDIS_TOC_HA,
 
       /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
       /// produces a LD instruction with base register G8RReg and offset
-      /// sym@toc@l.  Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+      /// sym\@toc\@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
       LD_TOC_L,
 
       /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
-      /// an ADDI8 instruction that adds G8RReg to sym@toc@l.
+      /// an ADDI8 instruction that adds G8RReg to sym\@toc\@l.
       /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
       ADDI_TOC_L
     };
@@ -327,8 +336,6 @@ namespace llvm {
 
   class PPCTargetLowering : public TargetLowering {
     const PPCSubtarget &PPCSubTarget;
-    const PPCRegisterInfo *PPCRegInfo;
-    const PPCInstrInfo *PPCII;
 
   public:
     explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -340,7 +347,7 @@ namespace llvm {
     virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
 
     /// getSetCCResultType - Return the ISD::SETCC ValueType
-    virtual EVT getSetCCResultType(EVT VT) const;
+    virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
 
     /// getPreIndexedAddressParts - returns true by value, base pointer and
     /// offset pointer and addressing mode by reference if the node's address
@@ -358,21 +365,16 @@ namespace llvm {
 
     /// SelectAddressRegImm - Returns true if the address N can be represented
     /// by a base register plus a signed 16-bit displacement [r+imm], and if it
-    /// is not better represented as reg+reg.
+    /// is not better represented as reg+reg.  If Aligned is true, only accept
+    /// displacements suitable for STD and friends, i.e. multiples of 4.
     bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
-                             SelectionDAG &DAG) const;
+                             SelectionDAG &DAG, bool Aligned) const;
 
     /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
     /// represented as an indexed [r+r] operation.
     bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
                                  SelectionDAG &DAG) const;
 
-    /// SelectAddressRegImmShift - Returns true if the address N can be
-    /// represented by a base register plus a signed 14-bit displacement
-    /// [r+imm*4].  Suitable for use by STD and friends.
-    bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
-                                  SelectionDAG &DAG) const;
-
     Sched::Preference getSchedulingPreference(SDNode *N) const;
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
@@ -436,15 +438,6 @@ namespace llvm {
     /// by AM is legal for this target, for a load/store of the specified type.
     virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
 
-    /// isLegalAddressImmediate - Return true if the integer value can be used
-    /// as the offset of the target addressing mode for load / store of the
-    /// given type.
-    virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
-
-    /// isLegalAddressImmediate - Return true if the GlobalValue can be used as
-    /// the offset of the target addressing mode.
-    virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
-
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
     /// getOptimalMemOpType - Returns the target specific optimal type for load
@@ -459,7 +452,7 @@ namespace llvm {
     /// It returns EVT::Other if the type should be determined using generic
     /// target-independent logic.
     virtual EVT
-    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
                         bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                         MachineFunction &MF) const;
 
@@ -490,7 +483,7 @@ namespace llvm {
                                          SDValue &LROpOut,
                                          SDValue &FPOpOut,
                                          bool isDarwinABI,
-                                         DebugLoc dl) const;
+                                         SDLoc dl) const;
 
     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
@@ -511,7 +504,7 @@ namespace llvm {
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
                                       const PPCSubtarget &Subtarget) const;
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
+    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const;
     SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
@@ -526,9 +519,9 @@ namespace llvm {
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
-                            DebugLoc dl, SelectionDAG &DAG,
+                            SDLoc dl, SelectionDAG &DAG,
                             SmallVectorImpl<SDValue> &InVals) const;
-    SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall,
+    SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall,
                        bool isVarArg,
                        SelectionDAG &DAG,
                        SmallVector<std::pair<unsigned, SDValue>, 8>
@@ -543,7 +536,7 @@ namespace llvm {
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
-                           DebugLoc dl, SelectionDAG &DAG,
+                           SDLoc dl, SelectionDAG &DAG,
                            SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
@@ -561,11 +554,11 @@ namespace llvm {
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                   const SmallVectorImpl<SDValue> &OutVals,
-                  DebugLoc dl, SelectionDAG &DAG) const;
+                  SDLoc dl, SelectionDAG &DAG) const;
 
     SDValue
       extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
-                        SDValue ArgVal, DebugLoc dl) const;
+                        SDValue ArgVal, SDLoc dl) const;
 
     void
       setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
@@ -576,25 +569,25 @@ namespace llvm {
       LowerFormalArguments_Darwin(SDValue Chain,
                                   CallingConv::ID CallConv, bool isVarArg,
                                   const SmallVectorImpl<ISD::InputArg> &Ins,
-                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SDLoc dl, SelectionDAG &DAG,
                                   SmallVectorImpl<SDValue> &InVals) const;
     SDValue
       LowerFormalArguments_64SVR4(SDValue Chain,
                                   CallingConv::ID CallConv, bool isVarArg,
                                   const SmallVectorImpl<ISD::InputArg> &Ins,
-                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SDLoc dl, SelectionDAG &DAG,
                                   SmallVectorImpl<SDValue> &InVals) const;
     SDValue
       LowerFormalArguments_32SVR4(SDValue Chain,
                                   CallingConv::ID CallConv, bool isVarArg,
                                   const SmallVectorImpl<ISD::InputArg> &Ins,
-                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SDLoc dl, SelectionDAG &DAG,
                                   SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue
       createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
                                  SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
-                                 SelectionDAG &DAG, DebugLoc dl) const;
+                                 SelectionDAG &DAG, SDLoc dl) const;
 
     SDValue
       LowerCall_Darwin(SDValue Chain, SDValue Callee,
@@ -603,7 +596,7 @@ namespace llvm {
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
                        const SmallVectorImpl<SDValue> &OutVals,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
-                       DebugLoc dl, SelectionDAG &DAG,
+                       SDLoc dl, SelectionDAG &DAG,
                        SmallVectorImpl<SDValue> &InVals) const;
     SDValue
       LowerCall_64SVR4(SDValue Chain, SDValue Callee,
@@ -612,7 +605,7 @@ namespace llvm {
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
                        const SmallVectorImpl<SDValue> &OutVals,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
-                       DebugLoc dl, SelectionDAG &DAG,
+                       SDLoc dl, SelectionDAG &DAG,
                        SmallVectorImpl<SDValue> &InVals) const;
     SDValue
     LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
@@ -620,7 +613,7 @@ namespace llvm {
                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                      const SmallVectorImpl<SDValue> &OutVals,
                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                     DebugLoc dl, SelectionDAG &DAG,
+                     SDLoc dl, SelectionDAG &DAG,
                      SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
@@ -629,6 +622,23 @@ namespace llvm {
     SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
     SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
   };
+
+  bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                  CCValAssign::LocInfo &LocInfo,
+                                  ISD::ArgFlagsTy &ArgFlags,
+                                  CCState &State);
+
+  bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                         MVT &LocVT,
+                                         CCValAssign::LocInfo &LocInfo,
+                                         ISD::ArgFlagsTy &ArgFlags,
+                                         CCState &State);
+
+  bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                           MVT &LocVT,
+                                           CCValAssign::LocInfo &LocInfo,
+                                           ISD::ArgFlagsTy &ArgFlags,
+                                           CCState &State);
 }
 
 #endif   // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index e5d0b91..0245ba7 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -17,17 +17,12 @@
 //
 def s16imm64 : Operand<i64> {
   let PrintMethod = "printS16ImmOperand";
+  let EncoderMethod = "getS16ImmEncoding";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
 }
 def u16imm64 : Operand<i64> {
   let PrintMethod = "printU16ImmOperand";
-}
-def symbolHi64 : Operand<i64> {
-  let PrintMethod = "printSymbolHi";
-  let EncoderMethod = "getHA16Encoding";
-}
-def symbolLo64 : Operand<i64> {
-  let PrintMethod = "printSymbolLo";
-  let EncoderMethod = "getLO16Encoding";
+  let ParserMatchClass = PPCU16ImmAsmOperand;
 }
 def tocentry : Operand<iPTR> {
   let MIOperandInfo = (ops i64imm:$imm);
@@ -289,6 +284,12 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
                            "mtctr $rS", SprMTSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
+let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR8] in {
+let Pattern = [(int_ppc_mtctr i64:$rS)] in
+def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
+                               "mtctr $rS", SprMTSPR>,
+                 PPC970_DGroup_First, PPC970_Unit_FXU;
+}
 
 let Pattern = [(set i64:$rT, readcyclecounter)] in
 def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
@@ -325,10 +326,10 @@ let Interpretation64Bit = 1 in {
 let neverHasSideEffects = 1 in {
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
-def LI8  : DForm_2_r0<14, (outs g8rc:$rD), (ins symbolLo64:$imm),
+def LI8  : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm),
                       "li $rD, $imm", IntSimple,
-                      [(set i64:$rD, immSExt16:$imm)]>;
-def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins symbolHi64:$imm),
+                      [(set i64:$rD, imm64SExt16:$imm)]>;
+def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s16imm64:$imm),
                       "lis $rD, $imm", IntSimple,
                       [(set i64:$rD, imm16ShiftedSExt:$imm)]>;
 }
@@ -400,18 +401,18 @@ defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
 let Defs = [CARRY] in
 def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
                      "addic $rD, $rA, $imm", IntGeneral,
-                     [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>;
-def ADDI8  : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolLo64:$imm),
+                     [(set i64:$rD, (addc i64:$rA, imm64SExt16:$imm))]>;
+def ADDI8  : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm),
                      "addi $rD, $rA, $imm", IntSimple,
-                     [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolHi64:$imm),
+                     [(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm),
                      "addis $rD, $rA, $imm", IntSimple,
                      [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
 
 let Defs = [CARRY] in {
 def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
                      "subfic $rD, $rA, $imm", IntGeneral,
-                     [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>;
+                     [(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>;
 defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
                         "subfc", "$rT, $rA, $rB", IntGeneral,
                         [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
@@ -746,25 +747,25 @@ def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
                        (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
 
 // Support for thread-local storage.
-def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
+def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                          "#ADDISgotTprelHA",
                          [(set i64:$rD,
                            (PPCaddisGotTprelHA i64:$reg,
                                                tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins symbolLo64:$disp, g8rc_nox0:$reg),
+def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
                         "#LDgotTprelL",
                         [(set i64:$rD,
                           (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
                  isPPC64;
 def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
           (ADD8TLS $in, tglobaltlsaddr:$g)>;
-def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
+def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                          "#ADDIStlsgdHA",
                          [(set i64:$rD,
                            (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
+def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                        "#ADDItlsgdL",
                        [(set i64:$rD,
                          (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -774,12 +775,12 @@ def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
                         [(set i64:$rD,
                           (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
                  isPPC64;
-def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
+def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                          "#ADDIStlsldHA",
                          [(set i64:$rD,
                            (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
+def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                        "#ADDItlsldL",
                        [(set i64:$rD,
                          (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -789,13 +790,13 @@ def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
                           [(set i64:$rD,
                             (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
                    isPPC64;
-def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
+def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                           "#ADDISdtprelHA",
                           [(set i64:$rD,
                             (PPCaddisDtprelHA i64:$reg,
                                               tglobaltlsaddr:$disp))]>,
                    isPPC64;
-def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
+def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                          "#ADDIdtprelL",
                          [(set i64:$rD,
                            (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 41b4e01..a244058 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -145,6 +145,19 @@ class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
   let Inst{31}    = lk;
 }
 
+// 1.7.3 SC-Form
+class SCForm<bits<6> opcode, bits<1> xo,
+                     dag OOL, dag IOL, string asmstr, InstrItinClass itin,
+                     list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<7>  LEV;
+
+  let Pattern = pattern;
+
+  let Inst{20-26} = LEV;
+  let Inst{30}    = xo;
+}
+
 // 1.7.4 D-Form
 class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
                  InstrItinClass itin, list<dag> pattern> 
@@ -365,6 +378,12 @@ class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern> 
   : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
 
+class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let RST = 0;
+}
+
 class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern> 
   : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 847bd22..a3eeb20 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -47,7 +47,7 @@ cl::desc("Disable compare instruction optimization"), cl::Hidden);
 
 PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
   : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
-    TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+    TM(tm), RI(*TM.getSubtargetImpl()) {}
 
 /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
 /// this target when scheduling the DAG.
@@ -74,10 +74,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
   // Most subtargets use a PPC970 recognizer.
   if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
       Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
-    const TargetInstrInfo *TII = TM.getInstrInfo();
-    assert(TII && "No InstrInfo?");
+    assert(TM.getInstrInfo() && "No InstrInfo?");
 
-    return new PPCHazardRecognizer970(*TII);
+    return new PPCHazardRecognizer970(TM);
   }
 
   return new PPCScoreboardHazardRecognizer(II, DAG);
@@ -1096,8 +1095,11 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
 
   int OpC = CmpInstr->getOpcode();
   unsigned CRReg = CmpInstr->getOperand(0).getReg();
-  bool isFP = OpC == PPC::FCMPUS || OpC == PPC::FCMPUD;
-  unsigned CRRecReg = isFP ? PPC::CR1 : PPC::CR0;
+
+  // FP record forms set CR1 based on the execption status bits, not a
+  // comparison with zero.
+  if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
+    return false;
 
   // The record forms set the condition register based on a signed comparison
   // with zero (so says the ISA manual). This is not as straightforward as it
@@ -1140,9 +1142,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
         equalityOnly = true;
       } else
         return false;
-    } else if (!isFP)
+    } else
       equalityOnly = is64BitUnsignedCompare;
-  } else if (!isFP)
+  } else
     equalityOnly = is32BitUnsignedCompare;
 
   if (equalityOnly) {
@@ -1153,25 +1155,19 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
       MachineInstr *UseMI = &*I;
       if (UseMI->getOpcode() == PPC::BCC) {
         unsigned Pred = UseMI->getOperand(0).getImm();
-        if (Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE)
-          continue;
-
-        return false;
+        if (Pred != PPC::PRED_EQ && Pred != PPC::PRED_NE)
+          return false;
       } else if (UseMI->getOpcode() == PPC::ISEL ||
                  UseMI->getOpcode() == PPC::ISEL8) {
         unsigned SubIdx = UseMI->getOperand(3).getSubReg();
-        if (SubIdx == PPC::sub_eq)
-          continue;
-
-        return false;
+        if (SubIdx != PPC::sub_eq)
+          return false;
       } else
         return false;
     }
   }
 
-  // Get ready to iterate backward from CmpInstr.
-  MachineBasicBlock::iterator I = CmpInstr, E = MI,
-                              B = CmpInstr->getParent()->begin();
+  MachineBasicBlock::iterator I = CmpInstr;
 
   // Scan forward to find the first use of the compare.
   for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end();
@@ -1188,9 +1184,6 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
       break;
   }
 
-  // Early exit if we're at the beginning of the BB.
-  if (I == B) return false;
-
   // There are two possible candidates which can be changed to set CR[01].
   // One is MI, the other is a SUB instruction.
   // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
@@ -1210,13 +1203,18 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
   // Search for Sub.
   const TargetRegisterInfo *TRI = &getRegisterInfo();
   --I;
+
+  // Get ready to iterate backward from CmpInstr.
+  MachineBasicBlock::iterator E = MI,
+                              B = CmpInstr->getParent()->begin();
+
   for (; I != E && !noSub; --I) {
     const MachineInstr &Instr = *I;
     unsigned IOpC = Instr.getOpcode();
 
     if (&*I != CmpInstr && (
-        Instr.modifiesRegister(CRRecReg, TRI) ||
-        Instr.readsRegister(CRRecReg, TRI)))
+        Instr.modifiesRegister(PPC::CR0, TRI) ||
+        Instr.readsRegister(PPC::CR0, TRI)))
       // This instruction modifies or uses the record condition register after
       // the one we want to change. While we could do this transformation, it
       // would likely not be profitable. This transformation removes one
@@ -1236,15 +1234,6 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
       break;
     }
 
-    if (isFP && (IOpC == PPC::FSUB || IOpC == PPC::FSUBS) &&
-        ((Instr.getOperand(1).getReg() == SrcReg &&
-          Instr.getOperand(2).getReg() == SrcReg2) ||
-        (Instr.getOperand(1).getReg() == SrcReg2 &&
-         Instr.getOperand(2).getReg() == SrcReg))) {
-      Sub = &*I;
-      break;
-    }
-
     if (I == B)
       // The 'and' is below the comparison instruction.
       return false;
@@ -1290,8 +1279,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
 
     // The operands to subf are the opposite of sub, so only in the fixed-point
     // case, invert the order.
-    if (!isFP)
-      ShouldSwap = !ShouldSwap;
+    ShouldSwap = !ShouldSwap;
   }
 
   if (ShouldSwap)
@@ -1330,7 +1318,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
   MachineBasicBlock::iterator MII = MI;
   BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(),
           get(TargetOpcode::COPY), CRReg)
-    .addReg(CRRecReg, MIOpC != NewOpC ? RegState::Kill : 0);
+    .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
 
   if (MIOpC != NewOpC) {
     // We need to be careful here: we're replacing one instruction with
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 7d3540e..1b7ea93 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -162,6 +162,10 @@ def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
                                 SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
                                 [SDNPHasChain, SDNPSideEffect]>;
 
+def SDT_PPCsc     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def PPCsc         : SDNode<"PPCISD::SC", SDT_PPCsc,
+                           [SDNPHasChain, SDNPSideEffect]>;
+
 def PPCvcmp       : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
 def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
 
@@ -246,13 +250,15 @@ def maskimm32 : PatLeaf<(imm), [{
     return false;
 }]>;
 
-def immSExt16  : PatLeaf<(imm), [{
-  // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.  Used by instructions like 'addi'.
-  if (N->getValueType(0) == MVT::i32)
-    return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
-  else
-    return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
+def imm32SExt16  : Operand<i32>, ImmLeaf<i32, [{
+  // imm32SExt16 predicate - True if the i32 immediate fits in a 16-bit
+  // sign extended field.  Used by instructions like 'addi'.
+  return (int32_t)Imm == (short)Imm;
+}]>;
+def imm64SExt16  : Operand<i64>, ImmLeaf<i64, [{
+  // imm64SExt16 predicate - True if the i64 immediate fits in a 16-bit
+  // sign extended field.  Used by instructions like 'addi'.
+  return (int64_t)Imm == (short)Imm;
 }]>;
 def immZExt16  : PatLeaf<(imm), [{
   // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
@@ -283,7 +289,7 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{
 }], HI16>;
 
 // Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
-// restricted memrix (offset/4) constants are alignment sensitive. If these
+// restricted memrix (4-aligned) constants are alignment sensitive. If these
 // offsets are hidden behind TOC entries than the values of the lower-order
 // bits cannot be checked directly. As a result, we need to also incorporate
 // an alignment check into the relevant patterns.
@@ -342,30 +348,102 @@ class NoEncode<string E> {
 // all their register operands.
 // For this purpose, we define one RegisterOperand for each RegisterClass,
 // using the same name as the class, just in lower case.
-def gprc : RegisterOperand<GPRC>;
-def g8rc : RegisterOperand<G8RC>;
-def gprc_nor0 : RegisterOperand<GPRC_NOR0>;
-def g8rc_nox0 : RegisterOperand<G8RC_NOX0>;
-def f8rc : RegisterOperand<F8RC>;
-def f4rc : RegisterOperand<F4RC>;
-def vrrc : RegisterOperand<VRRC>;
-def crbitrc : RegisterOperand<CRBITRC>;
-def crrc : RegisterOperand<CRRC>;
 
+def PPCRegGPRCAsmOperand : AsmOperandClass {
+  let Name = "RegGPRC"; let PredicateMethod = "isRegNumber";
+}
+def gprc : RegisterOperand<GPRC> {
+  let ParserMatchClass = PPCRegGPRCAsmOperand;
+}
+def PPCRegG8RCAsmOperand : AsmOperandClass {
+  let Name = "RegG8RC"; let PredicateMethod = "isRegNumber";
+}
+def g8rc : RegisterOperand<G8RC> {
+  let ParserMatchClass = PPCRegG8RCAsmOperand;
+}
+def PPCRegGPRCNoR0AsmOperand : AsmOperandClass {
+  let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def gprc_nor0 : RegisterOperand<GPRC_NOR0> {
+  let ParserMatchClass = PPCRegGPRCNoR0AsmOperand;
+}
+def PPCRegG8RCNoX0AsmOperand : AsmOperandClass {
+  let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber";
+}
+def g8rc_nox0 : RegisterOperand<G8RC_NOX0> {
+  let ParserMatchClass = PPCRegG8RCNoX0AsmOperand;
+}
+def PPCRegF8RCAsmOperand : AsmOperandClass {
+  let Name = "RegF8RC"; let PredicateMethod = "isRegNumber";
+}
+def f8rc : RegisterOperand<F8RC> {
+  let ParserMatchClass = PPCRegF8RCAsmOperand;
+}
+def PPCRegF4RCAsmOperand : AsmOperandClass {
+  let Name = "RegF4RC"; let PredicateMethod = "isRegNumber";
+}
+def f4rc : RegisterOperand<F4RC> {
+  let ParserMatchClass = PPCRegF4RCAsmOperand;
+}
+def PPCRegVRRCAsmOperand : AsmOperandClass {
+  let Name = "RegVRRC"; let PredicateMethod = "isRegNumber";
+}
+def vrrc : RegisterOperand<VRRC> {
+  let ParserMatchClass = PPCRegVRRCAsmOperand;
+}
+def PPCRegCRBITRCAsmOperand : AsmOperandClass {
+  let Name = "RegCRBITRC"; let PredicateMethod = "isRegNumber";
+}
+def crbitrc : RegisterOperand<CRBITRC> {
+  let ParserMatchClass = PPCRegCRBITRCAsmOperand;
+}
+def PPCRegCRRCAsmOperand : AsmOperandClass {
+  let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber";
+}
+def crrc : RegisterOperand<CRRC> {
+  let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+
+def PPCS5ImmAsmOperand : AsmOperandClass {
+  let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
+  let RenderMethod = "addImmOperands";
+}
 def s5imm   : Operand<i32> {
   let PrintMethod = "printS5ImmOperand";
+  let ParserMatchClass = PPCS5ImmAsmOperand;
+}
+def PPCU5ImmAsmOperand : AsmOperandClass {
+  let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
+  let RenderMethod = "addImmOperands";
 }
 def u5imm   : Operand<i32> {
   let PrintMethod = "printU5ImmOperand";
+  let ParserMatchClass = PPCU5ImmAsmOperand;
+}
+def PPCU6ImmAsmOperand : AsmOperandClass {
+  let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
+  let RenderMethod = "addImmOperands";
 }
 def u6imm   : Operand<i32> {
   let PrintMethod = "printU6ImmOperand";
+  let ParserMatchClass = PPCU6ImmAsmOperand;
+}
+def PPCS16ImmAsmOperand : AsmOperandClass {
+  let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
+  let RenderMethod = "addImmOperands";
 }
 def s16imm  : Operand<i32> {
   let PrintMethod = "printS16ImmOperand";
+  let EncoderMethod = "getS16ImmEncoding";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
+}
+def PPCU16ImmAsmOperand : AsmOperandClass {
+  let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
+  let RenderMethod = "addImmOperands";
 }
 def u16imm  : Operand<i32> {
   let PrintMethod = "printU16ImmOperand";
+  let ParserMatchClass = PPCU16ImmAsmOperand;
 }
 def directbrtarget : Operand<OtherVT> {
   let PrintMethod = "printBranchOperand";
@@ -381,24 +459,44 @@ def calltarget : Operand<iPTR> {
 def aaddr : Operand<iPTR> {
   let PrintMethod = "printAbsAddrOperand";
 }
-def symbolHi: Operand<i32> {
-  let PrintMethod = "printSymbolHi";
-  let EncoderMethod = "getHA16Encoding";
-}
-def symbolLo: Operand<i32> {
-  let PrintMethod = "printSymbolLo";
-  let EncoderMethod = "getLO16Encoding";
+def PPCCRBitMaskOperand : AsmOperandClass {
+ let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask";
 }
 def crbitm: Operand<i8> {
   let PrintMethod = "printcrbitm";
   let EncoderMethod = "get_crbitm_encoding";
+  let ParserMatchClass = PPCCRBitMaskOperand;
 }
 // Address operands
 // A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
-def ptr_rc_nor0 : PointerLikeRegClass<1>;
+def PPCRegGxRCNoR0Operand : AsmOperandClass {
+  let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> {
+  let ParserMatchClass = PPCRegGxRCNoR0Operand;
+}
+// A version of ptr_rc usable with the asm parser.
+def PPCRegGxRCOperand : AsmOperandClass {
+  let Name = "RegGxRC"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
+  let ParserMatchClass = PPCRegGxRCOperand;
+}
 
-def dispRI : Operand<iPTR>;
-def dispRIX : Operand<iPTR>;
+def PPCDispRIOperand : AsmOperandClass {
+ let Name = "DispRI"; let PredicateMethod = "isS16Imm";
+ let RenderMethod = "addImmOperands";
+}
+def dispRI : Operand<iPTR> {
+  let ParserMatchClass = PPCDispRIOperand;
+}
+def PPCDispRIXOperand : AsmOperandClass {
+ let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4";
+ let RenderMethod = "addImmOperands";
+}
+def dispRIX : Operand<iPTR> {
+  let ParserMatchClass = PPCDispRIXOperand;
+}
 
 def memri : Operand<iPTR> {
   let PrintMethod = "printMemRegImm";
@@ -407,10 +505,10 @@ def memri : Operand<iPTR> {
 }
 def memrr : Operand<iPTR> {
   let PrintMethod = "printMemRegReg";
-  let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg);
+  let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg);
 }
-def memrix : Operand<iPTR> {   // memri where the imm is shifted 2 bits.
-  let PrintMethod = "printMemRegImmShifted";
+def memrix : Operand<iPTR> {   // memri where the imm is 4-aligned.
+  let PrintMethod = "printMemRegImm";
   let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
   let EncoderMethod = "getMemRIXEncoding";
 }
@@ -431,7 +529,7 @@ def pred : Operand<OtherVT> {
 def iaddr  : ComplexPattern<iPTR, 2, "SelectAddrImm",    [], []>;
 def xaddr  : ComplexPattern<iPTR, 2, "SelectAddrIdx",    [], []>;
 def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
-def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4",  [], []>; // "std"
 
 // The address in a single register. This is used with the SjLj
 // pseudo-instructions.
@@ -888,6 +986,12 @@ let isBranch = 1, isTerminator = 1 in {
                         "#EH_SjLj_Setup\t$dst", []>;
 }
 
+// System call.
+let PPC970_Unit = 7 in {
+  def SC     : SCForm<17, 1, (outs), (ins i32imm:$lev),
+                      "sc $lev", BrB, [(PPCsc (i32 imm:$lev))]>;
+}
+
 // DCB* instructions.
 def DCBA   : DCB_Form<758, 0, (outs), (ins memrr:$dst),
                       "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
@@ -1290,41 +1394,41 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
 //
 
 let PPC970_Unit = 1 in {  // FXU Operations.
-def ADDI   : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$imm),
+def ADDI   : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm),
                      "addi $rD, $rA, $imm", IntSimple,
-                     [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>;
+                     [(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>;
 let BaseName = "addic" in {
 let Defs = [CARRY] in
 def ADDIC  : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "addic $rD, $rA, $imm", IntGeneral,
-                     [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>,
+                     [(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>,
                      RecFormRel, PPC970_DGroup_Cracked;
 let Defs = [CARRY, CR0] in
 def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "addic. $rD, $rA, $imm", IntGeneral,
                      []>, isDOT, RecFormRel;
 }
-def ADDIS  : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolHi:$imm),
+def ADDIS  : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm),
                      "addis $rD, $rA, $imm", IntSimple,
                      [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
 let isCodeGenOnly = 1 in
-def LA     : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$sym),
+def LA     : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym),
                      "la $rD, $sym($rA)", IntGeneral,
                      [(set i32:$rD, (add i32:$rA,
                                           (PPClo tglobaladdr:$sym, 0)))]>;
 def MULLI  : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "mulli $rD, $rA, $imm", IntMulLI,
-                     [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>;
+                     [(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>;
 let Defs = [CARRY] in
 def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "subfic $rD, $rA, $imm", IntGeneral,
-                     [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>;
+                     [(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>;
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
-  def LI  : DForm_2_r0<14, (outs gprc:$rD), (ins symbolLo:$imm),
+  def LI  : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm),
                        "li $rD, $imm", IntSimple,
-                       [(set i32:$rD, immSExt16:$imm)]>;
-  def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins symbolHi:$imm),
+                       [(set i32:$rD, imm32SExt16:$imm)]>;
+  def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s16imm:$imm),
                        "lis $rD, $imm", IntSimple,
                        [(set i32:$rD, imm16ShiftedSExt:$imm)]>;
 }
@@ -1591,6 +1695,12 @@ def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
                           "mtctr $rS", SprMTSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
+let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in {
+let Pattern = [(int_ppc_mtctr i32:$rS)] in
+def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
+                              "mtctr $rS", SprMTSPR>,
+                PPC970_DGroup_First, PPC970_Unit_FXU;
+}
 
 let Defs = [LR] in {
 def MTLR  : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
@@ -1905,7 +2015,7 @@ def : Pat<(or i32:$in, imm:$imm),
 def : Pat<(xor i32:$in, imm:$imm),
           (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
 // SUBFIC
-def : Pat<(sub immSExt16:$imm, i32:$in),
+def : Pat<(sub imm32SExt16:$imm, i32:$in),
           (SUBFIC $in, imm:$imm)>;
 
 // SHL/SRL
@@ -2012,3 +2122,82 @@ def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
 
 include "PPCInstrAltivec.td"
 include "PPCInstr64Bit.td"
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instructions used for assembler/disassembler only
+//
+
+def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
+                         "isync", SprISYNC, []>;
+
+def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
+                    "icbi $src", LdStICBI, []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Assembler Instruction Aliases
+//
+
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class PPCAsmPseudo<string asm, dag iops>
+  : Instruction {
+  let Namespace = "PPC";
+  bit PPC64 = 0;  // Default value, override with isPPC64
+
+  let OutOperandList = (outs);
+  let InOperandList = iops;
+  let Pattern = [];
+  let AsmString = asm;
+  let isAsmParserOnly = 1;
+  let isPseudo = 1;
+}
+
+def : InstAlias<"sc", (SC 0)>;
+
+def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n",
+                        (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n",
+                        (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n",
+                        (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n",
+                        (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+
+multiclass BranchExtendedMnemonic<string name, int bibo> {
+  def : InstAlias<"b"#name#" $cc, $dst",
+                  (BCC bibo, crrc:$cc, condbrtarget:$dst)>;
+  def : InstAlias<"b"#name#" $dst",
+                  (BCC bibo, CR0, condbrtarget:$dst)>;
+
+  def : InstAlias<"b"#name#"lr $cc",
+                  (BCLR bibo, crrc:$cc)>;
+  def : InstAlias<"b"#name#"lr",
+                  (BCLR bibo, CR0)>;
+
+  def : InstAlias<"b"#name#"ctr $cc",
+                  (BCCTR bibo, crrc:$cc)>;
+  def : InstAlias<"b"#name#"ctr",
+                  (BCCTR bibo, CR0)>;
+
+  def : InstAlias<"b"#name#"ctrl $cc",
+                  (BCCTRL bibo, crrc:$cc)>;
+  def : InstAlias<"b"#name#"ctrl",
+                  (BCCTRL bibo, CR0)>;
+}
+defm : BranchExtendedMnemonic<"lt", 12>;
+defm : BranchExtendedMnemonic<"gt", 44>;
+defm : BranchExtendedMnemonic<"eq", 76>;
+defm : BranchExtendedMnemonic<"un", 108>;
+defm : BranchExtendedMnemonic<"so", 108>;
+defm : BranchExtendedMnemonic<"ge", 4>;
+defm : BranchExtendedMnemonic<"nl", 4>;
+defm : BranchExtendedMnemonic<"le", 36>;
+defm : BranchExtendedMnemonic<"ng", 36>;
+defm : BranchExtendedMnemonic<"ne", 68>;
+defm : BranchExtendedMnemonic<"nu", 100>;
+defm : BranchExtendedMnemonic<"ns", 100>;
+
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index f8cf3a5..ba7efc1 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
+#include "MCTargetDesc/PPCMCExpr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
@@ -110,32 +111,32 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
 
   unsigned access = MO.getTargetFlags() & PPCII::MO_ACCESS_MASK;
 
-  switch (access) {
-    case PPCII::MO_HA16: RefKind = isDarwin ? 
-                           MCSymbolRefExpr::VK_PPC_DARWIN_HA16 : 
-                           MCSymbolRefExpr::VK_PPC_GAS_HA16; 
-                         break;
-    case PPCII::MO_LO16: RefKind = isDarwin ? 
-                           MCSymbolRefExpr::VK_PPC_DARWIN_LO16 : 
-                           MCSymbolRefExpr::VK_PPC_GAS_LO16; 
-                         break;
-    case PPCII::MO_TPREL16_HA: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_HA;
-                               break;
-    case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
-                               break;
-    case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO;
-                                break;
-    case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO;
-                               break;
-    case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO;
-                             break;
-   }
+  if (!isDarwin) {
+    switch (access) {
+      case PPCII::MO_HA16:
+        RefKind = MCSymbolRefExpr::VK_PPC_ADDR16_HA;
+        break;
+      case PPCII::MO_LO16:
+        RefKind = MCSymbolRefExpr::VK_PPC_ADDR16_LO;
+        break;
+      case PPCII::MO_TPREL16_HA:
+        RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_HA;
+        break;
+      case PPCII::MO_TPREL16_LO:
+        RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
+        break;
+      case PPCII::MO_DTPREL16_LO:
+        RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO;
+        break;
+      case PPCII::MO_TLSLD16_LO:
+        RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO;
+        break;
+      case PPCII::MO_TOC16_LO:
+        RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO;
+        break;
+    }
+  }
 
-  // FIXME: This isn't right, but we don't have a good way to express this in
-  // the MC Level, see below.
-  if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG)
-    RefKind = MCSymbolRefExpr::VK_None;
-  
   const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
 
   if (!MO.isJTI() && MO.getOffset())
@@ -149,10 +150,20 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
     
     const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
     Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx);
-    // FIXME: We have no way to make the result be VK_PPC_LO16/VK_PPC_HA16,
-    // since it is not a symbol!
   }
-  
+
+  // Add Darwin ha16() / lo16() markers if required.
+  if (isDarwin) {
+    switch (access) {
+      case PPCII::MO_HA16:
+        Expr = PPCMCExpr::CreateHa16(Expr, Ctx);
+        break;
+      case PPCII::MO_LO16:
+        Expr = PPCMCExpr::CreateLo16(Expr, Ctx);
+        break;
+    }
+  }
+
   return MCOperand::CreateExpr(Expr);
 }
 
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 2be6324..a4e328e 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -48,12 +48,11 @@
 
 using namespace llvm;
 
-PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
-                                 const TargetInstrInfo &tii)
+PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST)
   : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
                        ST.isPPC64() ? 0 : 1,
                        ST.isPPC64() ? 0 : 1),
-    Subtarget(ST), TII(tii) {
+    Subtarget(ST) {
   ImmToIdxMap[PPC::LD]   = PPC::LDX;    ImmToIdxMap[PPC::STD]  = PPC::STDX;
   ImmToIdxMap[PPC::LBZ]  = PPC::LBZX;   ImmToIdxMap[PPC::STB]  = PPC::STBX;
   ImmToIdxMap[PPC::LHZ]  = PPC::LHZX;   ImmToIdxMap[PPC::LHA]  = PPC::LHAX;
@@ -136,6 +135,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(PPC::FP);
   Reserved.set(PPC::FP8);
 
+  // The counter registers must be reserved so that counter-based loops can
+  // be correctly formed (and the mtctr instructions are not DCE'd).
+  Reserved.set(PPC::CTR);
+  Reserved.set(PPC::CTR8);
+
   Reserved.set(PPC::R1);
   Reserved.set(PPC::LR);
   Reserved.set(PPC::LR8);
@@ -214,6 +218,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
   MachineFunction &MF = *MBB.getParent();
   // Get the frame info.
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Get the instruction info.
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   // Determine whether 64-bit pointers are used.
   bool LP64 = Subtarget.isPPC64();
   DebugLoc dl = MI.getDebugLoc();
@@ -307,6 +313,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   bool LP64 = Subtarget.isPPC64();
@@ -350,6 +357,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   bool LP64 = Subtarget.isPPC64();
@@ -391,6 +399,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -415,6 +424,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -454,9 +464,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
   return false;
 }
 
-// Figure out if the offset in the instruction is shifted right two bits. This
-// is true for instructions like "STD", which the machine implicitly adds two
-// low zeros to.
+// Figure out if the offset in the instruction must be a multiple of 4.
+// This is true for instructions like "STD".
 static bool usesIXAddr(const MachineInstr &MI) {
   unsigned OpC = MI.getOpcode();
 
@@ -493,6 +502,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineBasicBlock &MBB = *MI.getParent();
   // Get the basic block's function.
   MachineFunction &MF = *MBB.getParent();
+  // Get the instruction info.
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   // Get the frame info.
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
@@ -549,10 +560,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Now add the frame object offset to the offset from r1.
   int Offset = MFI->getObjectOffset(FrameIndex);
-  if (!isIXAddr)
-    Offset += MI.getOperand(OffsetOperandNo).getImm();
-  else
-    Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+  Offset += MI.getOperand(OffsetOperandNo).getImm();
 
   // If we're not using a Frame Pointer that has been set to the value of the
   // SP before having the stack size subtracted from it, then add the stack size
@@ -572,8 +580,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
       (!noImmForm &&
        isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
-    if (isIXAddr)
-      Offset >>= 2;    // The actual encoded value has the low two bits zero.
     MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
     return;
   }
@@ -650,11 +656,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   }
 
   unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
-
-  if (!usesIXAddr(*MI))
-    Offset += MI->getOperand(OffsetOperandNo).getImm();
-  else
-    Offset += MI->getOperand(OffsetOperandNo).getImm() << 2;
+  Offset += MI->getOperand(OffsetOperandNo).getImm();
 
   // It's the load/store FI references that cause issues, as it can be difficult
   // to materialize the offset if it won't fit in the literal field. Estimate
@@ -711,9 +713,10 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
   if (Ins != MBB->end())
     DL = Ins->getDebugLoc();
 
+  const MachineFunction &MF = *MBB->getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   const MCInstrDesc &MCID = TII.get(ADDriOpc);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  const MachineFunction &MF = *MBB->getParent();
   MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
 
   BuildMI(*MBB, Ins, DL, MCID, BaseReg)
@@ -734,17 +737,7 @@ PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
 
   MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
   unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
-
-  bool isIXAddr = usesIXAddr(MI);
-  if (!isIXAddr)
-    Offset += MI.getOperand(OffsetOperandNo).getImm();
-  else
-    Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
-
-  // Figure out if the offset in the instruction is shifted right two bits.
-  if (isIXAddr)
-    Offset >>= 2;    // The actual encoded value has the low two bits zero.
-
+  Offset += MI.getOperand(OffsetOperandNo).getImm();
   MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
 }
 
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 7a48b4b..93626a9 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -29,9 +29,8 @@ class Type;
 class PPCRegisterInfo : public PPCGenRegisterInfo {
   DenseMap<unsigned, unsigned> ImmToIdxMap;
   const PPCSubtarget &Subtarget;
-  const TargetInstrInfo &TII;
 public:
-  PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
+  PPCRegisterInfo(const PPCSubtarget &SubTarget);
   
   /// getPointerRegClass - Return the register class to use to hold pointers.
   /// This is used for addressing modes.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 57a25f5..b1b4f06 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -11,11 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 let Namespace = "PPC" in {
-def sub_lt : SubRegIndex;
-def sub_gt : SubRegIndex;
-def sub_eq : SubRegIndex;
-def sub_un : SubRegIndex;
-def sub_32 : SubRegIndex;
+def sub_lt : SubRegIndex<1>;
+def sub_gt : SubRegIndex<1, 1>;
+def sub_eq : SubRegIndex<1, 2>;
+def sub_un : SubRegIndex<1, 3>;
+def sub_32 : SubRegIndex<32>;
 }
 
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 14dc794..da03b4c 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -48,6 +48,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
   // The binutils for the BG/P are too old for CFI.
   if (Subtarget.isBGP())
     setMCUseCFI(false);
+  initAsmInfo();
 }
 
 void PPC32TargetMachine::anchor() { }
@@ -90,7 +91,7 @@ public:
     return *getPPCTargetMachine().getSubtargetImpl();
   }
 
-  virtual bool addPreRegAlloc();
+  virtual bool addPreISel();
   virtual bool addILPOpts();
   virtual bool addInstSelector();
   virtual bool addPreSched2();
@@ -102,9 +103,9 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new PPCPassConfig(this, PM);
 }
 
-bool PPCPassConfig::addPreRegAlloc() {
+bool PPCPassConfig::addPreISel() {
   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
-    addPass(createPPCCTRLoops());
+    addPass(createPPCCTRLoops(getPPCTargetMachine()));
 
   return false;
 }
@@ -121,6 +122,12 @@ bool PPCPassConfig::addILPOpts() {
 bool PPCPassConfig::addInstSelector() {
   // Install an instruction selector.
   addPass(createPPCISelDag(getPPCTargetMachine()));
+
+#ifndef NDEBUG
+  if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
+    addPass(createPPCCTRLoopsVerify());
+#endif
+
   return false;
 }
 
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
new file mode 100644
index 0000000..90e4f15
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -0,0 +1,57 @@
+//===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetObjectFile.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+void
+PPC64LinuxTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+}
+
+const MCSection * PPC64LinuxTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+
+  const MCSection *DefaultSection = 
+    TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
+
+  if (DefaultSection != ReadOnlySection)
+    return DefaultSection;
+
+  // Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI
+  // when we have a constant that contains global relocations.  This is
+  // necessary because of this ABI's handling of pointers to functions in
+  // a shared library.  The address of a function is actually the address
+  // of a function descriptor, which resides in the .opd section.  Generated
+  // code uses the descriptor directly rather than going via the GOT as some
+  // other ABIs do, which means that initialized function pointers must
+  // reference the descriptor.  The linker must convert copy relocs of
+  // pointers to functions in shared libraries into dynamic relocations,
+  // because of an ordering problem with initialization of copy relocs and
+  // PLT entries.  The dynamic relocation will be initialized by the dynamic
+  // linker, so we must use DataRelROSection instead of ReadOnlySection.
+  // For more information, see the description of ELIMINATE_COPY_RELOCS in
+  // GNU ld.
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+
+  if (GVar && GVar->isConstant() &&
+      (GVar->getInitializer()->getRelocationInfo() ==
+       Constant::GlobalRelocations))
+    return DataRelROSection;
+
+  return DefaultSection;
+}
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h
new file mode 100644
index 0000000..9203e23
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -0,0 +1,32 @@
+//===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_PPC_TARGETOBJECTFILE_H
+#define LLVM_TARGET_PPC_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+  /// PPC64LinuxTargetObjectFile - This implementation is used for
+  /// 64-bit PowerPC Linux.
+  class PPC64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+
+    virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+    virtual const MCSection *
+    SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                           Mangler *Mang, const TargetMachine &TM) const;
+  };
+
+}  // end namespace llvm
+
+#endif