30 files changed, 2261 insertions, 706 deletions
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index efb10db..acf7496 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -10,7 +10,6 @@ add_public_tablegen_target(SparcCommonTableGen)
 
 add_llvm_target(SparcCodeGen
   DelaySlotFiller.cpp
-  FPMover.cpp
   SparcAsmPrinter.cpp
   SparcInstrInfo.cpp
   SparcISelDAGToDAG.cpp
@@ -23,7 +22,7 @@ add_llvm_target(SparcCodeGen
   SparcSelectionDAGInfo.cpp
   )
 
-add_dependencies(LLVMSparcCodeGen intrinsics_gen)
+add_dependencies(LLVMSparcCodeGen SparcCommonTableGen intrinsics_gen)
 
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 6123773..b101751 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -39,11 +39,10 @@ namespace {
     /// layout, etc.
     ///
     TargetMachine &TM;
-    const TargetInstrInfo *TII;
 
     static char ID;
-    Filler(TargetMachine &tm) 
-      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+    Filler(TargetMachine &tm)
+      : MachineFunctionPass(ID), TM(tm) { }
 
     virtual const char *getPassName() const {
       return "SPARC Delay Slot Filler";
@@ -61,8 +60,9 @@ namespace {
     bool isDelayFiller(MachineBasicBlock &MBB,
                        MachineBasicBlock::iterator candidate);
 
-    void insertCallUses(MachineBasicBlock::iterator MI,
-                        SmallSet<unsigned, 32>& RegUses);
+    void insertCallDefsUses(MachineBasicBlock::iterator MI,
+                            SmallSet<unsigned, 32>& RegDefs,
+                            SmallSet<unsigned, 32>& RegUses);
 
     void insertDefsUses(MachineBasicBlock::iterator MI,
                         SmallSet<unsigned, 32>& RegDefs,
@@ -81,6 +81,9 @@ namespace {
 
     bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize);
 
+    bool tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI);
+
   };
   char Filler::ID = 0;
 } // end of anonymous namespace
@@ -99,29 +102,45 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
 bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
 
-  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
-    if (I->hasDelaySlot()) {
-      MachineBasicBlock::iterator D = MBB.end();
-      MachineBasicBlock::iterator J = I;
-
-      if (!DisableDelaySlotFiller)
-        D = findDelayInstr(MBB, I);
-
-      ++FilledSlots;
-      Changed = true;
-
-      if (D == MBB.end())
-        BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP));
-      else
-        MBB.splice(++J, &MBB, D);
-      unsigned structSize = 0;
-      if (needsUnimp(I, structSize)) {
-        MachineBasicBlock::iterator J = I;
-        ++J; //skip the delay filler.
-        BuildMI(MBB, ++J, I->getDebugLoc(),
-                TII->get(SP::UNIMP)).addImm(structSize);
-      }
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+    MachineBasicBlock::iterator MI = I;
+    ++I;
+
+    // If MI is restore, try combining it with previous inst.
+    if (!DisableDelaySlotFiller &&
+        (MI->getOpcode() == SP::RESTORErr
+         || MI->getOpcode() == SP::RESTOREri)) {
+      Changed |= tryCombineRestoreWithPrevInst(MBB, MI);
+      continue;
+    }
+
+    // If MI has no delay slot, skip.
+    if (!MI->hasDelaySlot())
+      continue;
+
+    MachineBasicBlock::iterator D = MBB.end();
+
+    if (!DisableDelaySlotFiller)
+      D = findDelayInstr(MBB, MI);
+
+    ++FilledSlots;
+    Changed = true;
+
+    const TargetInstrInfo *TII = TM.getInstrInfo();
+    if (D == MBB.end())
+      BuildMI(MBB, I, MI->getDebugLoc(), TII->get(SP::NOP));
+    else
+      MBB.splice(I, &MBB, D);
+
+    unsigned structSize = 0;
+    if (needsUnimp(MI, structSize)) {
+      MachineBasicBlock::iterator J = MI;
+      ++J; // skip the delay filler.
+      assert (J != MBB.end() && "MI needs a delay instruction.");
+      BuildMI(MBB, ++J, MI->getDebugLoc(),
+              TII->get(SP::UNIMP)).addImm(structSize);
     }
+  }
   return Changed;
 }
 
@@ -134,28 +153,34 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
   bool sawLoad = false;
   bool sawStore = false;
 
-  MachineBasicBlock::iterator I = slot;
+  if (slot == MBB.begin())
+    return MBB.end();
 
   if (slot->getOpcode() == SP::RET)
     return MBB.end();
 
   if (slot->getOpcode() == SP::RETL) {
-    --I;
-    if (I->getOpcode() != SP::RESTORErr)
-      return MBB.end();
-    //change retl to ret
-    slot->setDesc(TII->get(SP::RET));
-    return I;
+    MachineBasicBlock::iterator J = slot;
+    --J;
+
+    if (J->getOpcode() == SP::RESTORErr
+        || J->getOpcode() == SP::RESTOREri) {
+      // change retl to ret.
+      slot->setDesc(TM.getInstrInfo()->get(SP::RET));
+      return J;
+    }
   }
 
-  //Call's delay filler can def some of call's uses.
+  // Call's delay filler can def some of call's uses.
   if (slot->isCall())
-    insertCallUses(slot, RegUses);
+    insertCallDefsUses(slot, RegDefs, RegUses);
   else
     insertDefsUses(slot, RegDefs, RegUses);
 
   bool done = false;
 
+  MachineBasicBlock::iterator I = slot;
+
   while (!done) {
     done = (I == MBB.begin());
 
@@ -216,12 +241,12 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
     unsigned Reg = MO.getReg();
 
     if (MO.isDef()) {
-      //check whether Reg is defined or used before delay slot.
+      // check whether Reg is defined or used before delay slot.
       if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
         return true;
     }
     if (MO.isUse()) {
-      //check whether Reg is defined before delay slot.
+      // check whether Reg is defined before delay slot.
       if (IsRegInSet(RegDefs, Reg))
         return true;
     }
@@ -230,9 +255,12 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
 }
 
 
-void Filler::insertCallUses(MachineBasicBlock::iterator MI,
-                            SmallSet<unsigned, 32>& RegUses)
+void Filler::insertCallDefsUses(MachineBasicBlock::iterator MI,
+                                SmallSet<unsigned, 32>& RegDefs,
+                                SmallSet<unsigned, 32>& RegUses)
 {
+  // Call defines o7, which is visible to the instruction in delay slot.
+  RegDefs.insert(SP::O7);
 
   switch(MI->getOpcode()) {
   default: llvm_unreachable("Unknown opcode.");
@@ -255,7 +283,7 @@ void Filler::insertCallUses(MachineBasicBlock::iterator MI,
   }
 }
 
-//Insert Defs and Uses of MI into the sets RegDefs and RegUses.
+// Insert Defs and Uses of MI into the sets RegDefs and RegUses.
 void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
                             SmallSet<unsigned, 32>& RegDefs,
                             SmallSet<unsigned, 32>& RegUses)
@@ -270,13 +298,17 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
       continue;
     if (MO.isDef())
       RegDefs.insert(Reg);
-    if (MO.isUse())
+    if (MO.isUse()) {
+      // Implicit register uses of retl are return values and
+      // retl does not use them.
+      if (MO.isImplicit() && MI->getOpcode() == SP::RETL)
+        continue;
       RegUses.insert(Reg);
-
+    }
   }
 }
 
-//returns true if the Reg or its alias is in the RegSet.
+// returns true if the Reg or its alias is in the RegSet.
 bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
 {
   // Check Reg and all aliased Registers.
@@ -318,3 +350,142 @@ bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
   StructSize = MO.getImm();
   return true;
 }
+
+static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI,
+                              MachineBasicBlock::iterator AddMI,
+                              const TargetInstrInfo *TII)
+{
+  // Before:  add  <op0>, <op1>, %i[0-7]
+  //          restore %g0, %g0, %i[0-7]
+  //
+  // After :  restore <op0>, <op1>, %o[0-7]
+
+  unsigned reg = AddMI->getOperand(0).getReg();
+  if (reg < SP::I0 || reg > SP::I7)
+    return false;
+
+  // Erase RESTORE.
+  RestoreMI->eraseFromParent();
+
+  // Change ADD to RESTORE.
+  AddMI->setDesc(TII->get((AddMI->getOpcode() == SP::ADDrr)
+                          ? SP::RESTORErr
+                          : SP::RESTOREri));
+
+  // Map the destination register.
+  AddMI->getOperand(0).setReg(reg - SP::I0 + SP::O0);
+
+  return true;
+}
+
+static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI,
+                             MachineBasicBlock::iterator OrMI,
+                             const TargetInstrInfo *TII)
+{
+  // Before:  or  <op0>, <op1>, %i[0-7]
+  //          restore %g0, %g0, %i[0-7]
+  //    and <op0> or <op1> is zero,
+  //
+  // After :  restore <op0>, <op1>, %o[0-7]
+
+  unsigned reg = OrMI->getOperand(0).getReg();
+  if (reg < SP::I0 || reg > SP::I7)
+    return false;
+
+  // check whether it is a copy.
+  if (OrMI->getOpcode() == SP::ORrr
+      && OrMI->getOperand(1).getReg() != SP::G0
+      && OrMI->getOperand(2).getReg() != SP::G0)
+    return false;
+
+  if (OrMI->getOpcode() == SP::ORri
+      && OrMI->getOperand(1).getReg() != SP::G0
+      && (!OrMI->getOperand(2).isImm() || OrMI->getOperand(2).getImm() != 0))
+    return false;
+
+  // Erase RESTORE.
+  RestoreMI->eraseFromParent();
+
+  // Change OR to RESTORE.
+  OrMI->setDesc(TII->get((OrMI->getOpcode() == SP::ORrr)
+                         ? SP::RESTORErr
+                         : SP::RESTOREri));
+
+  // Map the destination register.
+  OrMI->getOperand(0).setReg(reg - SP::I0 + SP::O0);
+
+  return true;
+}
+
+static bool combineRestoreSETHIi(MachineBasicBlock::iterator RestoreMI,
+                                 MachineBasicBlock::iterator SetHiMI,
+                                 const TargetInstrInfo *TII)
+{
+  // Before:  sethi imm3, %i[0-7]
+  //          restore %g0, %g0, %g0
+  //
+  // After :  restore %g0, (imm3<<10), %o[0-7]
+
+  unsigned reg = SetHiMI->getOperand(0).getReg();
+  if (reg < SP::I0 || reg > SP::I7)
+    return false;
+
+  if (!SetHiMI->getOperand(1).isImm())
+    return false;
+
+  int64_t imm = SetHiMI->getOperand(1).getImm();
+
+  // Is it a 3 bit immediate?
+  if (!isInt<3>(imm))
+    return false;
+
+  // Make it a 13 bit immediate.
+  imm = (imm << 10) & 0x1FFF;
+
+  assert(RestoreMI->getOpcode() == SP::RESTORErr);
+
+  RestoreMI->setDesc(TII->get(SP::RESTOREri));
+
+  RestoreMI->getOperand(0).setReg(reg - SP::I0 + SP::O0);
+  RestoreMI->getOperand(1).setReg(SP::G0);
+  RestoreMI->getOperand(2).ChangeToImmediate(imm);
+
+
+  // Erase the original SETHI.
+  SetHiMI->eraseFromParent();
+
+  return true;
+}
+
+bool Filler::tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MBBI)
+{
+  // No previous instruction.
+  if (MBBI == MBB.begin())
+    return false;
+
+  // assert that MBBI is a "restore %g0, %g0, %g0".
+  assert(MBBI->getOpcode() == SP::RESTORErr
+         && MBBI->getOperand(0).getReg() == SP::G0
+         && MBBI->getOperand(1).getReg() == SP::G0
+         && MBBI->getOperand(2).getReg() == SP::G0);
+
+  MachineBasicBlock::iterator PrevInst = MBBI; --PrevInst;
+
+  // It cannot combine with a delay filler.
+  if (isDelayFiller(MBB, PrevInst))
+    return false;
+
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+
+  switch (PrevInst->getOpcode()) {
+  default: break;
+  case SP::ADDrr:
+  case SP::ADDri: return combineRestoreADD(MBBI, PrevInst, TII); break;
+  case SP::ORrr:
+  case SP::ORri:  return combineRestoreOR(MBBI, PrevInst, TII); break;
+  case SP::SETHIi: return combineRestoreSETHIi(MBBI, PrevInst, TII); break;
+  }
+  // It cannot combine with the previous instruction.
+  return false;
+}
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
deleted file mode 100644
index 1325b98..0000000
--- a/lib/Target/Sparc/FPMover.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-//===-- FPMover.cpp - Sparc double-precision floating point move fixer ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Expand FpMOVD/FpABSD/FpNEGD instructions into their single-precision pieces.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "fpmover"
-#include "Sparc.h"
-#include "SparcSubtarget.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-STATISTIC(NumFpDs , "Number of instructions translated");
-STATISTIC(NoopFpDs, "Number of noop instructions removed");
-
-namespace {
-  struct FPMover : public MachineFunctionPass {
-    /// Target machine description which we query for reg. names, data
-    /// layout, etc.
-    ///
-    TargetMachine &TM;
-    
-    static char ID;
-    explicit FPMover(TargetMachine &tm) 
-      : MachineFunctionPass(ID), TM(tm) { }
-
-    virtual const char *getPassName() const {
-      return "Sparc Double-FP Move Fixer";
-    }
-
-    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
-    bool runOnMachineFunction(MachineFunction &F);
-  };
-  char FPMover::ID = 0;
-} // end of anonymous namespace
-
-/// createSparcFPMoverPass - Returns a pass that turns FpMOVD
-/// instructions into FMOVS instructions
-///
-FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
-  return new FPMover(tm);
-}
-
-/// getDoubleRegPair - Given a DFP register, return the even and odd FP
-/// registers that correspond to it.
-static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
-                             unsigned &OddReg) {
-  static const uint16_t EvenHalvesOfPairs[] = {
-    SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
-    SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
-  };
-  static const uint16_t OddHalvesOfPairs[] = {
-    SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
-    SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
-  };
-  static const uint16_t DoubleRegsInOrder[] = {
-    SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
-    SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
-  };
-  for (unsigned i = 0; i < array_lengthof(DoubleRegsInOrder); ++i)
-    if (DoubleRegsInOrder[i] == DoubleReg) {
-      EvenReg = EvenHalvesOfPairs[i];
-      OddReg = OddHalvesOfPairs[i];
-      return;
-    }
-  llvm_unreachable("Can't find reg");
-}
-
-/// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB.
-///
-bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
-  bool Changed = false;
-  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
-    MachineInstr *MI = I++;
-    DebugLoc dl = MI->getDebugLoc();
-    if (MI->getOpcode() == SP::FpMOVD || MI->getOpcode() == SP::FpABSD ||
-        MI->getOpcode() == SP::FpNEGD) {
-      Changed = true;
-      unsigned DestDReg = MI->getOperand(0).getReg();
-      unsigned SrcDReg  = MI->getOperand(1).getReg();
-      if (DestDReg == SrcDReg && MI->getOpcode() == SP::FpMOVD) {
-        MBB.erase(MI);   // Eliminate the noop copy.
-        ++NoopFpDs;
-        continue;
-      }
-      
-      unsigned EvenSrcReg = 0, OddSrcReg = 0, EvenDestReg = 0, OddDestReg = 0;
-      getDoubleRegPair(DestDReg, EvenDestReg, OddDestReg);
-      getDoubleRegPair(SrcDReg, EvenSrcReg, OddSrcReg);
-
-      const TargetInstrInfo *TII = TM.getInstrInfo();
-      if (MI->getOpcode() == SP::FpMOVD)
-        MI->setDesc(TII->get(SP::FMOVS));
-      else if (MI->getOpcode() == SP::FpNEGD)
-        MI->setDesc(TII->get(SP::FNEGS));
-      else if (MI->getOpcode() == SP::FpABSD)
-        MI->setDesc(TII->get(SP::FABSS));
-      else
-        llvm_unreachable("Unknown opcode!");
-        
-      MI->getOperand(0).setReg(EvenDestReg);
-      MI->getOperand(1).setReg(EvenSrcReg);
-      DEBUG(errs() << "FPMover: the modified instr is: " << *MI);
-      // Insert copy for the other half of the double.
-      if (DestDReg != SrcDReg) {
-        MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg)
-          .addReg(OddSrcReg);
-        DEBUG(errs() << "FPMover: the inserted instr is: " << *MI);
-      }
-      ++NumFpDs;
-    }
-  }
-  return Changed;
-}
-
-bool FPMover::runOnMachineFunction(MachineFunction &F) {
-  // If the target has V9 instructions, the fp-mover pseudos will never be
-  // emitted.  Avoid a scan of the instructions to improve compile time.
-  if (TM.getSubtarget<SparcSubtarget>().isV9())
-    return false;
-  
-  bool Changed = false;
-  for (MachineFunction::iterator FI = F.begin(), FE = F.end();
-       FI != FE; ++FI)
-    Changed |= runOnMachineBasicBlock(*FI);
-  return Changed;
-}
diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt
index fe20d2f..7d54d32 100644
--- a/lib/Target/Sparc/LLVMBuild.txt
+++ b/lib/Target/Sparc/LLVMBuild.txt
@@ -28,5 +28,6 @@ has_asmprinter = 1
 type = Library
 name = SparcCodeGen
 parent = Sparc
-required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc SparcInfo Support Target
+required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc
+                     SparcInfo Support Target
 add_to_library_groups = Sparc
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
new file mode 100644
index 0000000..aac0e8d
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
@@ -0,0 +1,62 @@
+//===-- SparcBaseInfo.h - Top level definitions for Sparc ---- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions
+// for the Sparc target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core code gen
+// types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCBASEINFO_H
+#define SPARCBASEINFO_H
+
+namespace llvm {
+
+/// SPII - This namespace holds target specific flags for instruction info.
+namespace SPII {
+
+/// Target Operand Flags. Sparc specific TargetFlags for MachineOperands and
+/// SDNodes.
+enum TOF {
+  MO_NO_FLAG,
+
+  // Extract the low 10 bits of an address.
+  // Assembler: %lo(addr)
+  MO_LO,
+
+  // Extract bits 31-10 of an address. Only for sethi.
+  // Assembler: %hi(addr) or %lm(addr)
+  MO_HI,
+
+  // Extract bits 43-22 of an adress. Only for sethi.
+  // Assembler: %h44(addr)
+  MO_H44,
+
+  // Extract bits 21-12 of an address.
+  // Assembler: %m44(addr)
+  MO_M44,
+
+  // Extract bits 11-0 of an address.
+  // Assembler: %l44(addr)
+  MO_L44,
+
+  // Extract bits 63-42 of an address. Only for sethi.
+  // Assembler: %hh(addr)
+  MO_HH,
+
+  // Extract bits 41-32 of an address.
+  // Assembler: %hm(addr)
+  MO_HM
+};
+
+} // end namespace SPII
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 3d4bfdc..5a52abe 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
 
 void SparcELFMCAsmInfo::anchor() { }
 
-SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
+SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
   IsLittleEndian = false;
   Triple TheTriple(TT);
   if (TheTriple.getArch() == Triple::sparcv9) {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index f0e1354..621e8ff 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -18,12 +18,11 @@
 
 namespace llvm {
   class StringRef;
-  class Target;
 
   class SparcELFMCAsmInfo : public MCAsmInfo {
     virtual void anchor();
   public:
-    explicit SparcELFMCAsmInfo(const Target &T, StringRef TT);
+    explicit SparcELFMCAsmInfo(StringRef TT);
   };
 
 } // namespace llvm
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 7fdb0c3..1c64e1b 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -50,14 +50,42 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
   return X;
 }
 
+// Code models. Some only make sense for 64-bit code.
+//
+// SunCC  Reloc   CodeModel  Constraints
+// abs32  Static  Small      text+data+bss linked below 2^32 bytes
+// abs44  Static  Medium     text+data+bss linked below 2^44 bytes
+// abs64  Static  Large      text smaller than 2^31 bytes
+// pic13  PIC_    Small      GOT < 2^13 bytes
+// pic32  PIC_    Medium     GOT < 2^32 bytes
+//
+// All code models require that the text segment is smaller than 2GB.
+
 static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
                                                CodeModel::Model CM,
                                                CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
+
+  // The default 32-bit code model is abs32/pic32.
+  if (CM == CodeModel::Default)
+    CM = RM == Reloc::PIC_ ? CodeModel::Medium : CodeModel::Small;
+
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
+static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                                 CodeModel::Model CM,
+                                                 CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+
+  // The default 64-bit code model is abs44/pic32.
+  if (CM == CodeModel::Default)
+    CM = CodeModel::Medium;
+
+  X->InitMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
 extern "C" void LLVMInitializeSparcTargetMC() {
   // Register the MC asm info.
   RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
@@ -67,7 +95,7 @@ extern "C" void LLVMInitializeSparcTargetMC() {
   TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget,
                                        createSparcMCCodeGenInfo);
   TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target,
-                                       createSparcMCCodeGenInfo);
+                                       createSparcV9MCCodeGenInfo);
 
   // Register the MC instruction info.
   TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt
index b4991fe..34e68cf 100644
--- a/lib/Target/Sparc/README.txt
+++ b/lib/Target/Sparc/README.txt
@@ -38,7 +38,7 @@ t1:
 
 1) should be replaced with a brz in V9 mode.
 
-* Same as above, but emit conditional move on register zero (p192) in V9 
+* Same as above, but emit conditional move on register zero (p192) in V9
   mode.  Testcase:
 
 int %t1(int %a, int %b) {
@@ -47,13 +47,15 @@ int %t1(int %a, int %b) {
         ret int %D
 }
 
-* Emit MULX/[SU]DIVX instructions in V9 mode instead of fiddling 
+* Emit MULX/[SU]DIVX instructions in V9 mode instead of fiddling
   with the Y register, if they are faster.
 
 * Codegen bswap(load)/store(bswap) -> load/store ASI
 
-* Implement frame pointer elimination, e.g. eliminate save/restore for 
+* Implement frame pointer elimination, e.g. eliminate save/restore for
   leaf fns.
 * Fill delay slots
 
 * Implement JIT support
+
+* Use %g0 directly to materialize 0. No instruction is required.
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index ce6ae17..98563db 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -26,7 +26,6 @@ namespace llvm {
 
   FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
   FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
-  FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
 
 } // end namespace llvm;
 
@@ -51,7 +50,7 @@ namespace llvm {
       ICC_NEG =  6   ,  // Negative
       ICC_VC  = 15   ,  // Overflow Clear
       ICC_VS  =  7   ,  // Overflow Set
-      
+
       //FCC_A   =  8+16,  // Always
       //FCC_N   =  0+16,  // Never
       FCC_U   =  7+16,  // Unordered
@@ -70,7 +69,7 @@ namespace llvm {
       FCC_O   = 15+16   // Ordered
     };
   }
-  
+
   inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
     switch (CC) {
     case SPCC::ICC_NE:  return "ne";
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index 611f8e8..d42c40f 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -19,7 +19,7 @@ include "llvm/Target/Target.td"
 //===----------------------------------------------------------------------===//
 // SPARC Subtarget features.
 //
- 
+
 def FeatureV9
   : SubtargetFeature<"v9", "IsV9", "true",
                      "Enable SPARC-V9 instructions">;
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index e14b3cb..3fe2b44 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -16,6 +16,7 @@
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
 #include "SparcTargetMachine.h"
+#include "MCTargetDesc/SparcBaseInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -59,11 +60,9 @@ namespace {
                                raw_ostream &O);
 
     bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS);
-    
+
     virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
                        const;
-
-    virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
   };
 } // end of anonymous namespace
 
@@ -72,15 +71,39 @@ namespace {
 void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                    raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand (opNum);
-  bool CloseParen = false;
-  if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
-    O << "%hi(";
-    CloseParen = true;
-  } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
-             !MO.isReg() && !MO.isImm()) {
-    O << "%lo(";
-    CloseParen = true;
+  unsigned TF = MO.getTargetFlags();
+#ifndef NDEBUG
+  // Verify the target flags.
+  if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) {
+    if (MI->getOpcode() == SP::CALL)
+      assert(TF == SPII::MO_NO_FLAG &&
+             "Cannot handle target flags on call address");
+    else if (MI->getOpcode() == SP::SETHIi)
+      assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH) &&
+             "Invalid target flags for address operand on sethi");
+    else
+      assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44 ||
+              TF == SPII::MO_HM) &&
+             "Invalid target flags for small address operand");
   }
+#endif
+
+  bool CloseParen = true;
+  switch (TF) {
+  default:
+      llvm_unreachable("Unknown target flags on operand");
+  case SPII::MO_NO_FLAG:
+    CloseParen = false;
+    break;
+  case SPII::MO_LO:  O << "%lo(";  break;
+  case SPII::MO_HI:  O << "%hi(";  break;
+  case SPII::MO_H44: O << "%h44("; break;
+  case SPII::MO_M44: O << "%m44("; break;
+  case SPII::MO_L44: O << "%l44("; break;
+  case SPII::MO_HH:  O << "%hh(";  break;
+  case SPII::MO_HM:  O << "%hm(";  break;
+  }
+
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
     O << "%" << StringRef(getRegisterName(MO.getReg())).lower();
@@ -95,6 +118,9 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
   case MachineOperand::MO_GlobalAddress:
     O << *Mang->getSymbol(MO.getGlobal());
     break;
+  case MachineOperand::MO_BlockAddress:
+    O <<  GetBlockAddressSymbol(MO.getBlockAddress())->getName();
+    break;
   case MachineOperand::MO_ExternalSymbol:
     O << MO.getSymbolName();
     break;
@@ -127,14 +153,7 @@ void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
     return;   // don't print "+0"
 
   O << "+";
-  if (MI->getOperand(opNum+1).isGlobal() ||
-      MI->getOperand(opNum+1).isCPI()) {
-    O << "%lo(";
-    printOperand(MI, opNum+1, O);
-    O << ")";
-  } else {
-    printOperand(MI, opNum+1, O);
-  }
+  printOperand(MI, opNum+1, O);
 }
 
 bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
@@ -146,7 +165,7 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
   case MachineOperand::MO_Register:
     assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
            "Operand is not a physical register ");
-    assert(MO.getReg() != SP::O7 && 
+    assert(MO.getReg() != SP::O7 &&
            "%o7 is assigned as destination for getpcx!");
     operand = "%" + StringRef(getRegisterName(MO.getReg())).lower();
     break;
@@ -159,15 +178,15 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
   O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
 
   O << "\t  sethi\t"
-    << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum 
+    << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
     << ")), "  << operand << '\n' ;
 
   O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
-  O << "\tor\t" << operand  
+  O << "\tor\t" << operand
     << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
     << ")), " << operand << '\n';
-  O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; 
-  
+  O << "\tadd\t" << operand << ", %o7, " << operand << '\n';
+
   return true;
 }
 
@@ -225,19 +244,19 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
   // then nothing falls through to it.
   if (MBB->isLandingPad() || MBB->pred_empty())
     return false;
-  
+
   // If there isn't exactly one predecessor, it can't be a fall through.
   MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
   ++PI2;
   if (PI2 != MBB->pred_end())
     return false;
-  
+
   // The predecessor has to be immediately before this block.
   const MachineBasicBlock *Pred = *PI;
-  
+
   if (!Pred->isLayoutSuccessor(MBB))
     return false;
-  
+
   // Check if the last terminator is an unconditional branch.
   MachineBasicBlock::const_iterator I = Pred->end();
   while (I != Pred->begin() && !(--I)->isTerminator())
@@ -245,17 +264,8 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
   return I == Pred->end() || !I->isBarrier();
 }
 
-MachineLocation SparcAsmPrinter::
-getDebugValueLocation(const MachineInstr *MI) const {
-  assert(MI->getNumOperands() == 4 && "Invalid number of operands!");
-  assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
-         "Unexpected MachineOperand types");
-  return MachineLocation(MI->getOperand(0).getReg(),
-                         MI->getOperand(1).getImm());
-}
-
 // Force static initialization.
-extern "C" void LLVMInitializeSparcAsmPrinter() { 
+extern "C" void LLVMInitializeSparcAsmPrinter() {
   RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
   RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target);
 }
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index d471220..a181bcf 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -12,19 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// Return Value Calling Conventions
+// SPARC v8 32-bit.
 //===----------------------------------------------------------------------===//
 
-// Sparc 32-bit C return-value convention.
-def RetCC_Sparc32 : CallingConv<[
-  CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
-  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
-  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
-]>;
-
-// Sparc 32-bit C Calling convention.
 def CC_Sparc32 : CallingConv<[
-  //Custom assign SRet to [sp+64].
+  // Custom assign SRet to [sp+64].
   CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
   // i32 f32 arguments get passed in integer registers if there is space.
   CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
@@ -34,3 +26,94 @@ def CC_Sparc32 : CallingConv<[
   // Alternatively, they are assigned to the stack in 4-byte aligned units.
   CCAssignToStack<4, 4>
 ]>;
+
+def RetCC_Sparc32 : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// SPARC v9 64-bit.
+//===----------------------------------------------------------------------===//
+//
+// The 64-bit ABI conceptually assigns all function arguments to a parameter
+// array starting at [%fp+BIAS+128] in the callee's stack frame. All arguments
+// occupy a multiple of 8 bytes in the array. Integer arguments are extended to
+// 64 bits by the caller. Floats are right-aligned in their 8-byte slot, the
+// first 4 bytes in the slot are undefined.
+//
+// The integer registers %i0 to %i5 shadow the first 48 bytes of the parameter
+// array at fixed offsets. Integer arguments are promoted to registers when
+// possible.
+//
+// The floating point registers %f0 to %f31 shadow the first 128 bytes of the
+// parameter array at fixed offsets. Float and double parameters are promoted
+// to these registers when possible.
+//
+// Structs up to 16 bytes in size are passed by value. They are right-aligned
+// in one or two 8-byte slots in the parameter array. Struct members are
+// promoted to both floating point and integer registers when possible. A
+// struct containing two floats would thus be passed in %f0 and %f1, while two
+// float function arguments would occupy 8 bytes each, and be passed in %f1 and
+// %f3.
+//
+// When a struct { int, float } is passed by value, the int goes in the high
+// bits of an integer register while the float goes in a floating point
+// register.
+//
+// The difference is encoded in LLVM IR using the inreg atttribute on function
+// arguments:
+//
+//   C:   void f(float, float);
+//   IR:  declare void f(float %f1, float %f3)
+//
+//   C:   void f(struct { float f0, f1; });
+//   IR:  declare void f(float inreg %f0, float inreg %f1)
+//
+//   C:   void f(int, float);
+//   IR:  declare void f(int signext %i0, float %f3)
+//
+//   C:   void f(struct { int i0high; float f1; });
+//   IR:  declare void f(i32 inreg %i0high, float inreg %f1)
+//
+// Two ints in a struct are simply coerced to i64:
+//
+//   C:   void f(struct { int i0high, i0low; });
+//   IR:  declare void f(i64 %i0.coerced)
+//
+// The frontend and backend divide the task of producing ABI compliant code for
+// C functions. The C frontend will:
+//
+//  - Annotate integer arguments with zeroext or signext attributes.
+//
+//  - Split structs into one or two 64-bit sized chunks, or 32-bit chunks with
+//    inreg attributes.
+//
+//  - Pass structs larger than 16 bytes indirectly with an explicit pointer
+//    argument. The byval attribute is not used.
+//
+// The backend will:
+//
+//  - Assign all arguments to 64-bit aligned stack slots, 32-bits for inreg.
+//
+//  - Promote to integer or floating point registers depending on type.
+//
+// Function return values are passed exactly like function arguments, except a
+// struct up to 32 bytes in size can be returned in registers.
+
+// Function arguments AND return values.
+def CC_Sparc64 : CallingConv<[
+  // The frontend uses the inreg flag to indicate i32 and float arguments from
+  // structs. These arguments are not promoted to 64 bits, but they can still
+  // be assigned to integer and float registers.
+  CCIfInReg<CCIfType<[i32, f32], CCCustom<"CC_Sparc64_Half">>>,
+
+  // All integers are promoted to i64 by the caller.
+  CCIfType<[i32], CCPromoteToType<i64>>,
+
+  // Custom assignment is required because stack space is reserved for all
+  // arguments whether they are passed in registers or not.
+  CCCustom<"CC_Sparc64_Full">
+]>;
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index a0dae6e..536e466 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -26,7 +26,16 @@
 
 using namespace llvm;
 
+static cl::opt<bool>
+DisableLeafProc("disable-sparc-leaf-proc",
+                cl::init(false),
+                cl::desc("Disable Sparc leaf procedure optimization."),
+                cl::Hidden);
+
+
 void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
   MachineBasicBlock &MBB = MF.front();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const SparcInstrInfo &TII =
@@ -37,22 +46,18 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
   // Get the number of bytes to allocate from the FrameInfo
   int NumBytes = (int) MFI->getStackSize();
 
-  // Emit the correct save instruction based on the number of bytes in
-  // the frame. Minimum stack frame size according to V8 ABI is:
-  //   16 words for register window spill
-  //    1 word for address of returned aggregate-value
-  // +  6 words for passing parameters on the stack
-  // ----------
-  //   23 words * 4 bytes per word = 92 bytes
-  NumBytes += 92;
-
-  // Round up to next doubleword boundary -- a double-word boundary
-  // is required by the ABI.
-  NumBytes = (NumBytes + 7) & ~7;
-  NumBytes = -NumBytes;
+  unsigned SAVEri = SP::SAVEri;
+  unsigned SAVErr = SP::SAVErr;
+  if (FuncInfo->isLeafProc()) {
+    if (NumBytes == 0)
+      return;
+    SAVEri = SP::ADDri;
+    SAVErr = SP::ADDrr;
+  }
+  NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes);
 
   if (NumBytes >= -4096) {
-    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
+    BuildMI(MBB, MBBI, dl, TII.get(SAVEri), SP::O6)
       .addReg(SP::O6).addImm(NumBytes);
   } else {
     // Emit this the hard way.  This clobbers G1 which we always know is
@@ -62,7 +67,7 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
     // Emit G1 = G1 + I6
     BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
       .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
-    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
+    BuildMI(MBB, MBBI, dl, TII.get(SAVErr), SP::O6)
       .addReg(SP::O6).addReg(SP::G1);
   }
 }
@@ -70,27 +75,144 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
 void SparcFrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  MachineInstr &MI = *I;
-  DebugLoc dl = MI.getDebugLoc();
-  int Size = MI.getOperand(0).getImm();
-  if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
-    Size = -Size;
-  const SparcInstrInfo &TII =
-    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
-  if (Size)
-    BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+  if (!hasReservedCallFrame(MF)) {
+    MachineInstr &MI = *I;
+    DebugLoc DL = MI.getDebugLoc();
+    int Size = MI.getOperand(0).getImm();
+    if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+      Size = -Size;
+    const SparcInstrInfo &TII =
+      *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+    if (Size)
+      BuildMI(MBB, I, DL, TII.get(SP::ADDri), SP::O6).addReg(SP::O6)
+        .addImm(Size);
+  }
   MBB.erase(I);
 }
 
 
 void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
                                   MachineBasicBlock &MBB) const {
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   const SparcInstrInfo &TII =
     *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
   DebugLoc dl = MBBI->getDebugLoc();
   assert(MBBI->getOpcode() == SP::RETL &&
          "Can only put epilog before 'retl' instruction!");
-  BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
-    .addReg(SP::G0);
+  if (!FuncInfo->isLeafProc()) {
+    BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+      .addReg(SP::G0);
+    return;
+  }
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  int NumBytes = (int) MFI->getStackSize();
+  if (NumBytes == 0)
+    return;
+
+  NumBytes = SubTarget.getAdjustedFrameSize(NumBytes);
+
+  if (NumBytes < 4096) {
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6)
+      .addReg(SP::O6).addImm(NumBytes);
+  } else {
+    // Emit this the hard way.  This clobbers G1 which we always know is
+    // available here.
+    unsigned OffHi = (unsigned)NumBytes >> 10U;
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+    // Emit G1 = G1 + I6
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+      .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ADDrr), SP::O6)
+      .addReg(SP::O6).addReg(SP::G1);
+  }
+}
+
+bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  // Reserve call frame if there are no variable sized objects on the stack.
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool SparcFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+    MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+}
+
+
+static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
+{
+
+  for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
+    if (MRI->isPhysRegUsed(reg))
+      return false;
+
+  for (unsigned reg = SP::L0; reg <= SP::L7; ++reg)
+    if (MRI->isPhysRegUsed(reg))
+      return false;
+
+  return true;
+}
+
+bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
+{
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MachineFrameInfo    *MFI = MF.getFrameInfo();
+
+  return !(MFI->hasCalls()              // has calls
+           || MRI.isPhysRegUsed(SP::L0) // Too many registers needed
+           || MRI.isPhysRegUsed(SP::O6) // %SP is used
+           || hasFP(MF));               // need %FP
+}
+
+void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  // Remap %i[0-7] to %o[0-7].
+  for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
+    if (!MRI.isPhysRegUsed(reg))
+      continue;
+    unsigned mapped_reg = (reg - SP::I0 + SP::O0);
+    assert(!MRI.isPhysRegUsed(mapped_reg));
+
+    // Replace I register with O register.
+    MRI.replaceRegWith(reg, mapped_reg);
+
+    // Mark the reg unused.
+    MRI.setPhysRegUnused(reg);
+  }
+
+  // Rewrite MBB's Live-ins.
+  for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+       MBB != E; ++MBB) {
+    for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
+      if (!MBB->isLiveIn(reg))
+        continue;
+      MBB->removeLiveIn(reg);
+      MBB->addLiveIn(reg - SP::I0 + SP::O0);
+    }
+  }
+
+  assert(verifyLeafProcRegUse(&MRI));
+#ifdef XDEBUG
+  MF.verify(0, "After LeafProc Remapping");
+#endif
+}
+
+void SparcFrameLowering::processFunctionBeforeCalleeSavedScan
+                  (MachineFunction &MF, RegScavenger *RS) const {
+
+  if (!DisableLeafProc && isLeafProc(MF)) {
+    SparcMachineFunctionInfo *MFI = MF.getInfo<SparcMachineFunctionInfo>();
+    MFI->setLeafProc(true);
+
+    remapRegsForLeafProc(MF);
+  }
+
 }
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 464233e..8eaef59 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -22,10 +22,12 @@ namespace llvm {
   class SparcSubtarget;
 
 class SparcFrameLowering : public TargetFrameLowering {
+  const SparcSubtarget &SubTarget;
 public:
-  explicit SparcFrameLowering(const SparcSubtarget &/*sti*/)
-    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {
-  }
+  explicit SparcFrameLowering(const SparcSubtarget &ST)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+                          ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8),
+      SubTarget(ST) {}
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
@@ -36,7 +38,17 @@ public:
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
-  bool hasFP(const MachineFunction &MF) const { return false; }
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+  bool hasFP(const MachineFunction &MF) const;
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS = NULL) const;
+
+private:
+  // Remap input registers to output registers for leaf procedure.
+  void remapRegsForLeafProc(MachineFunction &MF) const;
+
+  // Returns true if MF is a leaf procedure.
+  bool isLeafProc(MachineFunction &MF) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 5fa545d..db62151 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -33,7 +33,7 @@ class SparcDAGToDAGISel : public SelectionDAGISel {
   /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can
   /// make the right decision when generating code for different targets.
   const SparcSubtarget &Subtarget;
-  SparcTargetMachine& TM;
+  SparcTargetMachine &TM;
 public:
   explicit SparcDAGToDAGISel(SparcTargetMachine &tm)
     : SelectionDAGISel(tm),
@@ -67,13 +67,15 @@ private:
 
 SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
   unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
-  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+  return CurDAG->getRegister(GlobalBaseReg,
+                             getTargetLowering()->getPointerTy()).getNode();
 }
 
 bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
                                      SDValue &Base, SDValue &Offset) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
+                                       getTargetLowering()->getPointerTy());
     Offset = CurDAG->getTargetConstant(0, MVT::i32);
     return true;
   }
@@ -87,7 +89,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
         if (FrameIndexSDNode *FIN =
                 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
           // Constant offset from frame ref.
-          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
+                                           getTargetLowering()->getPointerTy());
         } else {
           Base = Addr.getOperand(0);
         }
@@ -130,12 +133,12 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
   }
 
   R1 = Addr;
-  R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+  R2 = CurDAG->getRegister(SP::G0, getTargetLowering()->getPointerTy());
   return true;
 }
 
 SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
-  DebugLoc dl = N->getDebugLoc();
+  SDLoc dl(N);
   if (N->isMachineOpcode())
     return NULL;   // Already selected.
 
@@ -146,6 +149,9 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
 
   case ISD::SDIV:
   case ISD::UDIV: {
+    // sdivx / udivx handle 64-bit divides.
+    if (N->getValueType(0) == MVT::i64)
+      break;
     // FIXME: should use a custom expander to expose the SRA to the dag.
     SDValue DivLHS = N->getOperand(0);
     SDValue DivRHS = N->getOperand(1);
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 28ac02a..4b0fa67 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "SparcISelLowering.h"
 #include "SparcMachineFunctionInfo.h"
 #include "SparcTargetMachine.h"
+#include "MCTargetDesc/SparcBaseInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -39,7 +40,7 @@ static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
 {
   assert (ArgFlags.isSRet());
 
-  //Assign SRet argument
+  // Assign SRet argument.
   State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
                                          0,
                                          LocVT, LocInfo));
@@ -53,18 +54,18 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
   static const uint16_t RegList[] = {
     SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
   };
-  //Try to get first reg
+  // Try to get first reg.
   if (unsigned Reg = State.AllocateReg(RegList, 6)) {
     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   } else {
-    //Assign whole thing in stack
+    // Assign whole thing in stack.
     State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
                                            State.AllocateStack(8,4),
                                            LocVT, LocInfo));
     return true;
   }
 
-  //Try to get second reg
+  // Try to get second reg.
   if (unsigned Reg = State.AllocateReg(RegList, 6))
     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   else
@@ -74,25 +75,117 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
   return true;
 }
 
+// Allocate a full-sized argument for the 64-bit ABI.
+static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
+                            MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                            ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  assert((LocVT == MVT::f32 || LocVT.getSizeInBits() == 64) &&
+         "Can't handle non-64 bits locations");
+
+  // Stack space is allocated for all arguments starting from [%fp+BIAS+128].
+  unsigned Offset = State.AllocateStack(8, 8);
+  unsigned Reg = 0;
+
+  if (LocVT == MVT::i64 && Offset < 6*8)
+    // Promote integers to %i0-%i5.
+    Reg = SP::I0 + Offset/8;
+  else if (LocVT == MVT::f64 && Offset < 16*8)
+    // Promote doubles to %d0-%d30. (Which LLVM calls D0-D15).
+    Reg = SP::D0 + Offset/8;
+  else if (LocVT == MVT::f32 && Offset < 16*8)
+    // Promote floats to %f1, %f3, ...
+    Reg = SP::F1 + Offset/4;
+
+  // Promote to register when possible, otherwise use the stack slot.
+  if (Reg) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return true;
+  }
+
+  // This argument goes on the stack in an 8-byte slot.
+  // When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to
+  // the right-aligned float. The first 4 bytes of the stack slot are undefined.
+  if (LocVT == MVT::f32)
+    Offset += 4;
+
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return true;
+}
+
+// Allocate a half-sized argument for the 64-bit ABI.
+//
+// This is used when passing { float, int } structs by value in registers.
+static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT,
+                            MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                            ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations");
+  unsigned Offset = State.AllocateStack(4, 4);
+
+  if (LocVT == MVT::f32 && Offset < 16*8) {
+    // Promote floats to %f0-%f31.
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, SP::F0 + Offset/4,
+                                     LocVT, LocInfo));
+    return true;
+  }
+
+  if (LocVT == MVT::i32 && Offset < 6*8) {
+    // Promote integers to %i0-%i5, using half the register.
+    unsigned Reg = SP::I0 + Offset/8;
+    LocVT = MVT::i64;
+    LocInfo = CCValAssign::AExt;
+
+    // Set the Custom bit if this i32 goes in the high bits of a register.
+    if (Offset % 8 == 0)
+      State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg,
+                                             LocVT, LocInfo));
+    else
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return true;
+  }
+
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return true;
+}
+
 #include "SparcGenCallingConv.inc"
 
+// The calling conventions in SparcCallingConv.td are described in terms of the
+// callee's register window. This function translates registers to the
+// corresponding caller window %o register.
+static unsigned toCallerWindow(unsigned Reg) {
+  assert(SP::I0 + 7 == SP::I7 && SP::O0 + 7 == SP::O7 && "Unexpected enum");
+  if (Reg >= SP::I0 && Reg <= SP::I7)
+    return Reg - SP::I0 + SP::O0;
+  return Reg;
+}
+
 SDValue
 SparcTargetLowering::LowerReturn(SDValue Chain,
-                                 CallingConv::ID CallConv, bool isVarArg,
+                                 CallingConv::ID CallConv, bool IsVarArg,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
                                  const SmallVectorImpl<SDValue> &OutVals,
-                                 DebugLoc dl, SelectionDAG &DAG) const {
+                                 SDLoc DL, SelectionDAG &DAG) const {
+  if (Subtarget->is64Bit())
+    return LowerReturn_64(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
+  return LowerReturn_32(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
+}
 
+SDValue
+SparcTargetLowering::LowerReturn_32(SDValue Chain,
+                                    CallingConv::ID CallConv, bool IsVarArg,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    SDLoc DL, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
 
   // CCValAssign - represent the assignment of the return value to locations.
   SmallVector<CCValAssign, 16> RVLocs;
 
   // CCState - Info about the registers and stack slot.
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
                  DAG.getTarget(), RVLocs, *DAG.getContext());
 
-  // Analize return values.
+  // Analyze return values.
   CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
 
   SDValue Flag;
@@ -105,7 +198,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(),
                              OutVals[i], Flag);
 
     // Guarantee that all emitted copies are stuck together with flags.
@@ -113,15 +206,15 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
-  unsigned RetAddrOffset = 8; //Call Inst + Delay Slot
+  unsigned RetAddrOffset = 8; // Call Inst + Delay Slot
   // If the function returns a struct, copy the SRetReturnReg to I0
   if (MF.getFunction()->hasStructRetAttr()) {
     SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
     unsigned Reg = SFI->getSRetReturnReg();
     if (!Reg)
       llvm_unreachable("sret virtual register not created in the entry block");
-    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
-    Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+    SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+    Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag);
     Flag = Chain.getValue(1);
     RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy()));
     RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
@@ -134,22 +227,114 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
   if (Flag.getNode())
     RetOps.push_back(Flag);
 
-  return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other,
+  return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other,
                      &RetOps[0], RetOps.size());
 }
 
-/// LowerFormalArguments - V8 uses a very simple ABI, where all values are
-/// passed in either one or two GPRs, including FP values.  TODO: we should
-/// pass FP values in FP registers for fastcc functions.
+// Lower return values for the 64-bit ABI.
+// Return values are passed the exactly the same way as function arguments.
 SDValue
-SparcTargetLowering::LowerFormalArguments(SDValue Chain,
-                                          CallingConv::ID CallConv, bool isVarArg,
-                                          const SmallVectorImpl<ISD::InputArg>
-                                            &Ins,
-                                          DebugLoc dl, SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals)
-                                            const {
+SparcTargetLowering::LowerReturn_64(SDValue Chain,
+                                    CallingConv::ID CallConv, bool IsVarArg,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    SDLoc DL, SelectionDAG &DAG) const {
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
 
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+                 DAG.getTarget(), RVLocs, *DAG.getContext());
+
+  // Analyze return values.
+  CCInfo.AnalyzeReturn(Outs, CC_Sparc64);
+
+  SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+
+  // The second operand on the return instruction is the return address offset.
+  // The return address is always %i7+8 with the 64-bit ABI.
+  RetOps.push_back(DAG.getConstant(8, MVT::i32));
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    SDValue OutVal = OutVals[i];
+
+    // Integer return values must be sign or zero extended by the callee.
+    switch (VA.getLocInfo()) {
+    case CCValAssign::SExt:
+      OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
+      break;
+    case CCValAssign::ZExt:
+      OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
+      break;
+    case CCValAssign::AExt:
+      OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
+    default:
+      break;
+    }
+
+    // The custom bit on an i32 return value indicates that it should be passed
+    // in the high bits of the register.
+    if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
+      OutVal = DAG.getNode(ISD::SHL, DL, MVT::i64, OutVal,
+                           DAG.getConstant(32, MVT::i32));
+
+      // The next value may go in the low bits of the same register.
+      // Handle both at once.
+      if (i+1 < RVLocs.size() && RVLocs[i+1].getLocReg() == VA.getLocReg()) {
+        SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, OutVals[i+1]);
+        OutVal = DAG.getNode(ISD::OR, DL, MVT::i64, OutVal, NV);
+        // Skip the next value, it's already done.
+        ++i;
+      }
+    }
+
+    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
+
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+  }
+
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
+  if (Flag.getNode())
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other,
+                     &RetOps[0], RetOps.size());
+}
+
+SDValue SparcTargetLowering::
+LowerFormalArguments(SDValue Chain,
+                     CallingConv::ID CallConv,
+                     bool IsVarArg,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     SDLoc DL,
+                     SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) const {
+  if (Subtarget->is64Bit())
+    return LowerFormalArguments_64(Chain, CallConv, IsVarArg, Ins,
+                                   DL, DAG, InVals);
+  return LowerFormalArguments_32(Chain, CallConv, IsVarArg, Ins,
+                                 DL, DAG, InVals);
+}
+
+/// LowerFormalArguments32 - V8 uses a very simple ABI, where all values are
+/// passed in either one or two GPRs, including FP values.  TODO: we should
+/// pass FP values in FP registers for fastcc functions.
+SDValue SparcTargetLowering::
+LowerFormalArguments_32(SDValue Chain,
+                        CallingConv::ID CallConv,
+                        bool isVarArg,
+                        const SmallVectorImpl<ISD::InputArg> &Ins,
+                        SDLoc dl,
+                        SelectionDAG &DAG,
+                        SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
@@ -166,7 +351,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
     CCValAssign &VA = ArgLocs[i];
 
     if (i == 0  && Ins[i].Flags.isSRet()) {
-      //Get SRet from [%fp+64]
+      // Get SRet from [%fp+64].
       int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true);
       SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
       SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
@@ -225,7 +410,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
 
     if (VA.needsCustom()) {
       assert(VA.getValVT() == MVT::f64);
-      //If it is double-word aligned, just load.
+      // If it is double-word aligned, just load.
       if (Offset % 8 == 0) {
         int FI = MF.getFrameInfo()->CreateFixedObject(8,
                                                       Offset,
@@ -285,7 +470,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
   }
 
   if (MF.getFunction()->hasStructRetAttr()) {
-    //Copy the SRet Argument to SRetReturnReg
+    // Copy the SRet Argument to SRetReturnReg.
     SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
     unsigned Reg = SFI->getSRetReturnReg();
     if (!Reg) {
@@ -341,14 +526,137 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
   return Chain;
 }
 
+// Lower formal arguments for the 64 bit ABI.
+SDValue SparcTargetLowering::
+LowerFormalArguments_64(SDValue Chain,
+                        CallingConv::ID CallConv,
+                        bool IsVarArg,
+                        const SmallVectorImpl<ISD::InputArg> &Ins,
+                        SDLoc DL,
+                        SelectionDAG &DAG,
+                        SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Analyze arguments according to CC_Sparc64.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64);
+
+  // The argument array begins at %fp+BIAS+128, after the register save area.
+  const unsigned ArgArea = 128;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    if (VA.isRegLoc()) {
+      // This argument is passed in a register.
+      // All integer register arguments are promoted by the caller to i64.
+
+      // Create a virtual register for the promoted live-in value.
+      unsigned VReg = MF.addLiveIn(VA.getLocReg(),
+                                   getRegClassFor(VA.getLocVT()));
+      SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
+
+      // Get the high bits for i32 struct elements.
+      if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+        Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
+                          DAG.getConstant(32, MVT::i32));
+
+      // The caller promoted the argument, so insert an Assert?ext SDNode so we
+      // won't promote the value again in this function.
+      switch (VA.getLocInfo()) {
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
+                          DAG.getValueType(VA.getValVT()));
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
+                          DAG.getValueType(VA.getValVT()));
+        break;
+      default:
+        break;
+      }
+
+      // Truncate the register down to the argument type.
+      if (VA.isExtInLoc())
+        Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
+
+      InVals.push_back(Arg);
+      continue;
+    }
+
+    // The registers are exhausted. This argument was passed on the stack.
+    assert(VA.isMemLoc());
+    // The CC_Sparc64_Full/Half functions compute stack offsets relative to the
+    // beginning of the arguments area at %fp+BIAS+128.
+    unsigned Offset = VA.getLocMemOffset() + ArgArea;
+    unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
+    // Adjust offset for extended arguments, SPARC is big-endian.
+    // The caller will have written the full slot with extended bytes, but we
+    // prefer our own extending loads.
+    if (VA.isExtInLoc())
+      Offset += 8 - ValSize;
+    int FI = MF.getFrameInfo()->CreateFixedObject(ValSize, Offset, true);
+    InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain,
+                                 DAG.getFrameIndex(FI, getPointerTy()),
+                                 MachinePointerInfo::getFixedStack(FI),
+                                 false, false, false, 0));
+  }
+
+  if (!IsVarArg)
+    return Chain;
+
+  // This function takes variable arguments, some of which may have been passed
+  // in registers %i0-%i5. Variable floating point arguments are never passed
+  // in floating point registers. They go on %i0-%i5 or on the stack like
+  // integer arguments.
+  //
+  // The va_start intrinsic needs to know the offset to the first variable
+  // argument.
+  unsigned ArgOffset = CCInfo.getNextStackOffset();
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+  // Skip the 128 bytes of register save area.
+  FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgArea +
+                                  Subtarget->getStackPointerBias());
+
+  // Save the variable arguments that were passed in registers.
+  // The caller is required to reserve stack space for 6 arguments regardless
+  // of how many arguments were actually passed.
+  SmallVector<SDValue, 8> OutChains;
+  for (; ArgOffset < 6*8; ArgOffset += 8) {
+    unsigned VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass);
+    SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+    int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true);
+    OutChains.push_back(DAG.getStore(Chain, DL, VArg,
+                                     DAG.getFrameIndex(FI, getPointerTy()),
+                                     MachinePointerInfo::getFixedStack(FI),
+                                     false, false, 0));
+  }
+
+  if (!OutChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                        &OutChains[0], OutChains.size());
+
+  return Chain;
+}
+
 SDValue
 SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                SmallVectorImpl<SDValue> &InVals) const {
+  if (Subtarget->is64Bit())
+    return LowerCall_64(CLI, InVals);
+  return LowerCall_32(CLI, InVals);
+}
+
+// Lower a call for the 32-bit ABI.
+SDValue
+SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
+                                  SmallVectorImpl<SDValue> &InVals) const {
   SelectionDAG &DAG                     = CLI.DAG;
-  DebugLoc &dl                          = CLI.DL;
-  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
-  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
-  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
+  SDLoc &dl                             = CLI.DL;
+  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
+  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
   SDValue Chain                         = CLI.Chain;
   SDValue Callee                        = CLI.Callee;
   bool &isTailCall                      = CLI.IsTailCall;
@@ -372,7 +680,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
 
-  //Create local copies for byval args.
+  // Create local copies for byval args.
   SmallVector<SDValue, 8> ByValArgs;
   for (unsigned i = 0,  e = Outs.size(); i != e; ++i) {
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -388,13 +696,14 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
 
     Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
-                          false,        //isVolatile,
-                          (Size <= 32), //AlwaysInline if size <= 32
+                          false,        // isVolatile,
+                          (Size <= 32), // AlwaysInline if size <= 32
                           MachinePointerInfo(), MachinePointerInfo());
     ByValArgs.push_back(FIPtr);
   }
 
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+                               dl);
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
@@ -410,7 +719,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
 
-    //Use local copy if it is a byval arg.
+    // Use local copy if it is a byval arg.
     if (Flags.isByVal())
       Arg = ByValArgs[byvalArgIdx++];
 
@@ -450,7 +759,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
       if (VA.isMemLoc()) {
         unsigned Offset = VA.getLocMemOffset() + StackOffset;
-        //if it is double-word aligned, just store.
+        // if it is double-word aligned, just store.
         if (Offset % 8 == 0) {
           SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
           SDValue PtrOff = DAG.getIntPtrConstant(Offset);
@@ -483,7 +792,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         if (NextVA.isRegLoc()) {
           RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
         } else {
-          //Store the low part in stack.
+          // Store the low part in stack.
           unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
           SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
           SDValue PtrOff = DAG.getIntPtrConstant(Offset);
@@ -546,11 +855,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    unsigned Reg = RegsToPass[i].first;
-    // Remap I0->I7 -> O0->O7.
-    if (Reg >= SP::I0 && Reg <= SP::I7)
-      Reg = Reg-SP::I0+SP::O0;
-
+    unsigned Reg = toCallerWindow(RegsToPass[i].first);
     Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
     InFlag = Chain.getValue(1);
   }
@@ -572,13 +877,9 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   Ops.push_back(Callee);
   if (hasStructRetAttr)
     Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    unsigned Reg = RegsToPass[i].first;
-    if (Reg >= SP::I0 && Reg <= SP::I7)
-      Reg = Reg-SP::I0+SP::O0;
-
-    Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
-  }
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first),
+                                  RegsToPass[i].second.getValueType()));
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
@@ -586,7 +887,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   InFlag = Chain.getValue(1);
 
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
-                             DAG.getIntPtrConstant(0, true), InFlag);
+                             DAG.getIntPtrConstant(0, true), InFlag, dl);
   InFlag = Chain.getValue(1);
 
   // Assign locations to each value returned by this call.
@@ -598,13 +899,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    unsigned Reg = RVLocs[i].getLocReg();
-
-    // Remap I0->I7 -> O0->O7.
-    if (Reg >= SP::I0 && Reg <= SP::I7)
-      Reg = Reg-SP::I0+SP::O0;
-
-    Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+    Chain = DAG.getCopyFromReg(Chain, dl, toCallerWindow(RVLocs[i].getLocReg()),
                                RVLocs[i].getValVT(), InFlag).getValue(1);
     InFlag = Chain.getValue(2);
     InVals.push_back(Chain.getValue(0));
@@ -637,6 +932,260 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
   return getDataLayout()->getTypeAllocSize(ElementTy);
 }
 
+
+// Fixup floating point arguments in the ... part of a varargs call.
+//
+// The SPARC v9 ABI requires that floating point arguments are treated the same
+// as integers when calling a varargs function. This does not apply to the
+// fixed arguments that are part of the function's prototype.
+//
+// This function post-processes a CCValAssign array created by
+// AnalyzeCallOperands().
+static void fixupVariableFloatArgs(SmallVectorImpl<CCValAssign> &ArgLocs,
+                                   ArrayRef<ISD::OutputArg> Outs) {
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    const CCValAssign &VA = ArgLocs[i];
+    // FIXME: What about f32 arguments? C promotes them to f64 when calling
+    // varargs functions.
+    if (!VA.isRegLoc() || VA.getLocVT() != MVT::f64)
+      continue;
+    // The fixed arguments to a varargs function still go in FP registers.
+    if (Outs[VA.getValNo()].IsFixed)
+      continue;
+
+    // This floating point argument should be reassigned.
+    CCValAssign NewVA;
+
+    // Determine the offset into the argument array.
+    unsigned Offset = 8 * (VA.getLocReg() - SP::D0);
+    assert(Offset < 16*8 && "Offset out of range, bad register enum?");
+
+    if (Offset < 6*8) {
+      // This argument should go in %i0-%i5.
+      unsigned IReg = SP::I0 + Offset/8;
+      // Full register, just bitconvert into i64.
+      NewVA = CCValAssign::getReg(VA.getValNo(), VA.getValVT(),
+                                  IReg, MVT::i64, CCValAssign::BCvt);
+    } else {
+      // This needs to go to memory, we're out of integer registers.
+      NewVA = CCValAssign::getMem(VA.getValNo(), VA.getValVT(),
+                                  Offset, VA.getLocVT(), VA.getLocInfo());
+    }
+    ArgLocs[i] = NewVA;
+  }
+}
+
+// Lower a call for the 64-bit ABI.
+SDValue
+SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
+                                  SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  SDLoc DL = CLI.DL;
+  SDValue Chain = CLI.Chain;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
+                 DAG.getTarget(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64);
+
+  // Get the size of the outgoing arguments stack space requirement.
+  // The stack offset computed by CC_Sparc64 includes all arguments.
+  // Called functions expect 6 argument words to exist in the stack frame, used
+  // or not.
+  unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset());
+
+  // Keep stack frames 16-byte aligned.
+  ArgsSize = RoundUpToAlignment(ArgsSize, 16);
+
+  // Varargs calls require special treatment.
+  if (CLI.IsVarArg)
+    fixupVariableFloatArgs(ArgLocs, CLI.Outs);
+
+  // Adjust the stack pointer to make room for the arguments.
+  // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
+  // with more than 6 arguments.
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+                               DL);
+
+  // Collect the set of registers to pass to the function and their values.
+  // This will be emitted as a sequence of CopyToReg nodes glued to the call
+  // instruction.
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+  // Collect chains from all the memory opeations that copy arguments to the
+  // stack. They must follow the stack pointer adjustment above and precede the
+  // call instruction itself.
+  SmallVector<SDValue, 8> MemOpChains;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    const CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = CLI.OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default:
+      llvm_unreachable("Unknown location info!");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (VA.isRegLoc()) {
+      // The custom bit on an i32 return value indicates that it should be
+      // passed in the high bits of the register.
+      if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
+        Arg = DAG.getNode(ISD::SHL, DL, MVT::i64, Arg,
+                          DAG.getConstant(32, MVT::i32));
+
+        // The next value may go in the low bits of the same register.
+        // Handle both at once.
+        if (i+1 < ArgLocs.size() && ArgLocs[i+1].isRegLoc() &&
+            ArgLocs[i+1].getLocReg() == VA.getLocReg()) {
+          SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64,
+                                   CLI.OutVals[i+1]);
+          Arg = DAG.getNode(ISD::OR, DL, MVT::i64, Arg, NV);
+          // Skip the next value, it's already done.
+          ++i;
+        }
+      }
+      RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), Arg));
+      continue;
+    }
+
+    assert(VA.isMemLoc());
+
+    // Create a store off the stack pointer for this argument.
+    SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy());
+    // The argument area starts at %fp+BIAS+128 in the callee frame,
+    // %sp+BIAS+128 in ours.
+    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() +
+                                           Subtarget->getStackPointerBias() +
+                                           128);
+    PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+    MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+  }
+
+  // Emit all stores, make sure they occur before the call.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of CopyToReg nodes glued together with token chain and
+  // glue operands which copy the outgoing args into registers. The InGlue is
+  // necessary since all emitted instructions must be stuck together in order
+  // to pass the live physical registers.
+  SDValue InGlue;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, DL,
+                             RegsToPass[i].first, RegsToPass[i].second, InGlue);
+    InGlue = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  SDValue Callee = CLI.Callee;
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy());
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
+
+  // Build the operands for the call instruction itself.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  // Make sure the CopyToReg nodes are glued to the call instruction which
+  // consumes the registers.
+  if (InGlue.getNode())
+    Ops.push_back(InGlue);
+
+  // Now the call itself.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
+  InGlue = Chain.getValue(1);
+
+  // Revert the stack pointer immediately after the call.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+                             DAG.getIntPtrConstant(0, true), InGlue, DL);
+  InGlue = Chain.getValue(1);
+
+  // Now extract the return values. This is more or less the same as
+  // LowerFormalArguments_64.
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
+                 DAG.getTarget(), RVLocs, *DAG.getContext());
+  RVInfo.AnalyzeCallResult(CLI.Ins, CC_Sparc64);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    unsigned Reg = toCallerWindow(VA.getLocReg());
+
+    // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
+    // reside in the same register in the high and low bits. Reuse the
+    // CopyFromReg previous node to avoid duplicate copies.
+    SDValue RV;
+    if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
+      if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
+        RV = Chain.getValue(0);
+
+    // But usually we'll create a new CopyFromReg for a different register.
+    if (!RV.getNode()) {
+      RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
+      Chain = RV.getValue(1);
+      InGlue = Chain.getValue(2);
+    }
+
+    // Get the high bits for i32 struct elements.
+    if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+      RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
+                       DAG.getConstant(32, MVT::i32));
+
+    // The callee promoted the return value, so insert an Assert?ext SDNode so
+    // we won't promote the value again in this function.
+    switch (VA.getLocInfo()) {
+    case CCValAssign::SExt:
+      RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
+                       DAG.getValueType(VA.getValVT()));
+      break;
+    case CCValAssign::ZExt:
+      RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
+                       DAG.getValueType(VA.getValVT()));
+      break;
+    default:
+      break;
+    }
+
+    // Truncate the register down to the return value type.
+    if (VA.isExtInLoc())
+      RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
+
+    InVals.push_back(RV);
+  }
+
+  return Chain;
+}
+
 //===----------------------------------------------------------------------===//
 // TargetLowering Implementation
 //===----------------------------------------------------------------------===//
@@ -689,11 +1238,14 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
 
 SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+  Subtarget = &TM.getSubtarget<SparcSubtarget>();
 
   // Set up the register classes.
   addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
   addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
   addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
+  if (Subtarget->is64Bit())
+    addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
 
   // Turn FP extload into load/fextend
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
@@ -703,9 +1255,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 
   // Custom legalize GlobalAddress nodes into LO/HI parts.
-  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
-  setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom);
+  setOperationAction(ISD::GlobalTLSAddress, getPointerTy(), Custom);
+  setOperationAction(ISD::ConstantPool, getPointerTy(), Custom);
+  setOperationAction(ISD::BlockAddress, getPointerTy(), Custom);
 
   // Sparc doesn't have sext_inreg, replace them with shl/sra
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -749,11 +1302,25 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+    setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+    setOperationAction(ISD::SELECT, MVT::i64, Expand);
+    setOperationAction(ISD::SETCC, MVT::i64, Expand);
+    setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+    setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+  }
+
   // FIXME: There are instructions available for ATOMIC_FENCE
   // on SparcV8 and later.
-  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
 
+  if (!Subtarget->isV9()) {
+    // SparcV8 does not have FNEGD and FABSD.
+    setOperationAction(ISD::FNEG, MVT::f64, Custom);
+    setOperationAction(ISD::FABS, MVT::f64, Custom);
+  }
+
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
   setOperationAction(ISD::FCOS , MVT::f64, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
@@ -804,7 +1371,7 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
 
   setStackPointerRegisterToSaveRestore(SP::O6);
 
-  if (TM.getSubtarget<SparcSubtarget>().isV9())
+  if (Subtarget->isV9())
     setOperationAction(ISD::CTPOP, MVT::i32, Legal);
 
   setMinFunctionAlignment(2);
@@ -818,8 +1385,10 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case SPISD::CMPICC:     return "SPISD::CMPICC";
   case SPISD::CMPFCC:     return "SPISD::CMPFCC";
   case SPISD::BRICC:      return "SPISD::BRICC";
+  case SPISD::BRXCC:      return "SPISD::BRXCC";
   case SPISD::BRFCC:      return "SPISD::BRFCC";
   case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
+  case SPISD::SELECT_XCC: return "SPISD::SELECT_XCC";
   case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
   case SPISD::Hi:         return "SPISD::Hi";
   case SPISD::Lo:         return "SPISD::Lo";
@@ -835,17 +1404,19 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
 /// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
 /// be zero. Op is expected to be a target specific node. Used by DAG
 /// combiner.
-void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
-                                                         APInt &KnownZero,
-                                                         APInt &KnownOne,
-                                                         const SelectionDAG &DAG,
-                                                         unsigned Depth) const {
+void SparcTargetLowering::computeMaskedBitsForTargetNode
+                                (const SDValue Op,
+                                 APInt &KnownZero,
+                                 APInt &KnownOne,
+                                 const SelectionDAG &DAG,
+                                 unsigned Depth) const {
   APInt KnownZero2, KnownOne2;
   KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
 
   switch (Op.getOpcode()) {
   default: break;
   case SPISD::SELECT_ICC:
+  case SPISD::SELECT_XCC:
   case SPISD::SELECT_FCC:
     DAG.ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
     DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
@@ -866,7 +1437,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
   if (isa<ConstantSDNode>(RHS) &&
       cast<ConstantSDNode>(RHS)->isNullValue() &&
       CC == ISD::SETNE &&
-      ((LHS.getOpcode() == SPISD::SELECT_ICC &&
+      (((LHS.getOpcode() == SPISD::SELECT_ICC ||
+         LHS.getOpcode() == SPISD::SELECT_XCC) &&
         LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
        (LHS.getOpcode() == SPISD::SELECT_FCC &&
         LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
@@ -881,50 +1453,104 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
   }
 }
 
+// Convert to a target node and set target flags.
+SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF,
+                                             SelectionDAG &DAG) const {
+  if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
+    return DAG.getTargetGlobalAddress(GA->getGlobal(),
+                                      SDLoc(GA),
+                                      GA->getValueType(0),
+                                      GA->getOffset(), TF);
+
+  if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
+    return DAG.getTargetConstantPool(CP->getConstVal(),
+                                     CP->getValueType(0),
+                                     CP->getAlignment(),
+                                     CP->getOffset(), TF);
+
+  if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
+    return DAG.getTargetBlockAddress(BA->getBlockAddress(),
+                                     Op.getValueType(),
+                                     0,
+                                     TF);
+
+  if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
+    return DAG.getTargetExternalSymbol(ES->getSymbol(),
+                                       ES->getValueType(0), TF);
+
+  llvm_unreachable("Unhandled address SDNode");
+}
+
+// Split Op into high and low parts according to HiTF and LoTF.
+// Return an ADD node combining the parts.
+SDValue SparcTargetLowering::makeHiLoPair(SDValue Op,
+                                          unsigned HiTF, unsigned LoTF,
+                                          SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  SDValue Hi = DAG.getNode(SPISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
+  SDValue Lo = DAG.getNode(SPISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
+  return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
+}
+
+// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
+// or ExternalSymbol SDNode.
+SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = getPointerTy();
+
+  // Handle PIC mode first.
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    // This is the pic32 code model, the GOT is known to be smaller than 4GB.
+    SDValue HiLo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+    SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, VT);
+    SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo);
+    return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
+                       MachinePointerInfo::getGOT(), false, false, false, 0);
+  }
+
+  // This is one of the absolute code models.
+  switch(getTargetMachine().getCodeModel()) {
+  default:
+    llvm_unreachable("Unsupported absolute code model");
+  case CodeModel::Small:
+    // abs32.
+    return makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+  case CodeModel::Medium: {
+    // abs44.
+    SDValue H44 = makeHiLoPair(Op, SPII::MO_H44, SPII::MO_M44, DAG);
+    H44 = DAG.getNode(ISD::SHL, DL, VT, H44, DAG.getConstant(12, MVT::i32));
+    SDValue L44 = withTargetFlags(Op, SPII::MO_L44, DAG);
+    L44 = DAG.getNode(SPISD::Lo, DL, VT, L44);
+    return DAG.getNode(ISD::ADD, DL, VT, H44, L44);
+  }
+  case CodeModel::Large: {
+    // abs64.
+    SDValue Hi = makeHiLoPair(Op, SPII::MO_HH, SPII::MO_HM, DAG);
+    Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(32, MVT::i32));
+    SDValue Lo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+    return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
+  }
+  }
+}
+
 SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
                                                 SelectionDAG &DAG) const {
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
-  SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
-  SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
-
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
-    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-
-  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
-                                   getPointerTy());
-  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                GlobalBase, RelAddr);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     AbsAddr, MachinePointerInfo(), false, false, false, 0);
+  return makeAddress(Op, DAG);
 }
 
 SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
                                                SelectionDAG &DAG) const {
-  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = Op.getDebugLoc();
-  const Constant *C = N->getConstVal();
-  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
-  SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
-  SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
-    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-
-  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
-                                   getPointerTy());
-  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                GlobalBase, RelAddr);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     AbsAddr, MachinePointerInfo(), false, false, false, 0);
+  return makeAddress(Op, DAG);
+}
+
+SDValue SparcTargetLowering::LowerBlockAddress(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  return makeAddress(Op, DAG);
 }
 
 static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   // Convert the fp value to integer in an FP register.
   assert(Op.getValueType() == MVT::i32);
   Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
@@ -932,7 +1558,7 @@ static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
 }
 
 static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   assert(Op.getOperand(0).getValueType() == MVT::i32);
   SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
   // Convert the int value to FP in an FP register.
@@ -945,7 +1571,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
   SDValue LHS = Op.getOperand(2);
   SDValue RHS = Op.getOperand(3);
   SDValue Dest = Op.getOperand(4);
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   unsigned Opc, SPCC = ~0U;
 
   // If this is a br_cc of a "setcc", and if the setcc got lowered into
@@ -954,12 +1580,11 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
 
   // Get the condition flag.
   SDValue CompareFlag;
-  if (LHS.getValueType() == MVT::i32) {
-    EVT VTs[] = { MVT::i32, MVT::Glue };
-    SDValue Ops[2] = { LHS, RHS };
-    CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+  if (LHS.getValueType().isInteger()) {
+    CompareFlag = DAG.getNode(SPISD::CMPICC, dl, MVT::Glue, LHS, RHS);
     if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
-    Opc = SPISD::BRICC;
+    // 32-bit compares use the icc flags, 64-bit uses the xcc flags.
+    Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC;
   } else {
     CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
     if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
@@ -975,7 +1600,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
   SDValue TrueVal = Op.getOperand(2);
   SDValue FalseVal = Op.getOperand(3);
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   unsigned Opc, SPCC = ~0U;
 
   // If this is a select_cc of a "setcc", and if the setcc got lowered into
@@ -983,12 +1608,10 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
   LookThroughSetCC(LHS, RHS, CC, SPCC);
 
   SDValue CompareFlag;
-  if (LHS.getValueType() == MVT::i32) {
-    // subcc returns a value
-    EVT VTs[] = { LHS.getValueType(), MVT::Glue };
-    SDValue Ops[2] = { LHS, RHS };
-    CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
-    Opc = SPISD::SELECT_ICC;
+  if (LHS.getValueType().isInteger()) {
+    CompareFlag = DAG.getNode(SPISD::CMPICC, dl, MVT::Glue, LHS, RHS);
+    Opc = LHS.getValueType() == MVT::i32 ?
+          SPISD::SELECT_ICC : SPISD::SELECT_XCC;
     if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
   } else {
     CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
@@ -1004,16 +1627,18 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
   MachineFunction &MF = DAG.getMachineFunction();
   SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
 
+  // Need frame address to find the address of VarArgsFrameIndex.
+  MF.getFrameInfo()->setFrameAddressIsTaken(true);
+
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc DL(Op);
   SDValue Offset =
-    DAG.getNode(ISD::ADD, dl, MVT::i32,
-                DAG.getRegister(SP::I6, MVT::i32),
-                DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
-                                MVT::i32));
+    DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(),
+                DAG.getRegister(SP::I6, TLI.getPointerTy()),
+                DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset()));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+  return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
                       MachinePointerInfo(SV), false, false, 0);
 }
 
@@ -1022,39 +1647,28 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
   EVT VT = Node->getValueType(0);
   SDValue InChain = Node->getOperand(0);
   SDValue VAListPtr = Node->getOperand(1);
+  EVT PtrVT = VAListPtr.getValueType();
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
-  DebugLoc dl = Node->getDebugLoc();
-  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+  SDLoc DL(Node);
+  SDValue VAList = DAG.getLoad(PtrVT, DL, InChain, VAListPtr,
                                MachinePointerInfo(SV), false, false, false, 0);
-  // Increment the pointer, VAList, to the next vaarg
-  SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
-                                  DAG.getConstant(VT.getSizeInBits()/8,
-                                                  MVT::i32));
-  // Store the incremented VAList to the legalized pointer
-  InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
+  // Increment the pointer, VAList, to the next vaarg.
+  SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+                                DAG.getIntPtrConstant(VT.getSizeInBits()/8));
+  // Store the incremented VAList to the legalized pointer.
+  InChain = DAG.getStore(VAList.getValue(1), DL, NextPtr,
                          VAListPtr, MachinePointerInfo(SV), false, false, 0);
-  // Load the actual argument out of the pointer VAList, unless this is an
-  // f64 load.
-  if (VT != MVT::f64)
-    return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
-                       false, false, false, 0);
-
-  // Otherwise, load it as i64, then do a bitconvert.
-  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
-                          false, false, false, 0);
-
-  // Bit-Convert the value to f64.
-  SDValue Ops[2] = {
-    DAG.getNode(ISD::BITCAST, dl, MVT::f64, V),
-    V.getValue(1)
-  };
-  return DAG.getMergeValues(Ops, 2, dl);
+  // Load the actual argument out of the pointer VAList.
+  // We can't count on greater alignment than the word size.
+  return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
+                     false, false, false,
+                     std::min(PtrVT.getSizeInBits(), VT.getSizeInBits())/8);
 }
 
 static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
   SDValue Chain = Op.getOperand(0);  // Legalize the chain.
   SDValue Size  = Op.getOperand(1);  // Legalize the size.
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
 
   unsigned SPReg = SP::O6;
   SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
@@ -1071,7 +1685,7 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
 
 
 static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   SDValue Chain = DAG.getNode(SPISD::FLUSHW,
                               dl, MVT::Other, DAG.getEntryNode());
   return Chain;
@@ -1082,7 +1696,7 @@ static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
   MFI->setFrameAddressIsTaken(true);
 
   EVT VT = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
   unsigned FrameReg = SP::I6;
 
   uint64_t depth = Op.getConstantOperandVal(0);
@@ -1108,20 +1722,25 @@ static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
   return FrameAddr;
 }
 
-static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
+                               const SparcTargetLowering &TLI) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
   MFI->setReturnAddressIsTaken(true);
 
   EVT VT = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
-  unsigned RetReg = SP::I7;
-
+  SDLoc dl(Op);
   uint64_t depth = Op.getConstantOperandVal(0);
 
   SDValue RetAddr;
-  if (depth == 0)
+  if (depth == 0) {
+    unsigned RetReg = MF.addLiveIn(SP::I7,
+                                   TLI.getRegClassFor(TLI.getPointerTy()));
     RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
-  else {
+  } else {
+    // Need frame address to find return address of the caller.
+    MFI->setFrameAddressIsTaken(true);
+
     // flush first to make sure the windowed registers' values are in stack
     SDValue Chain = getFLUSHW(Op, DAG);
     RetAddr = DAG.getCopyFromReg(Chain, dl, SP::I6, VT);
@@ -1140,15 +1759,48 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
   return RetAddr;
 }
 
+static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG)
+{
+  SDLoc dl(Op);
+
+  assert(Op.getValueType() == MVT::f64 && "LowerF64Op called on non-double!");
+  assert(Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS);
+
+  // Lower fneg/fabs on f64 to fneg/fabs on f32.
+  // fneg f64 => fneg f32:sub_even, fmov f32:sub_odd.
+  // fabs f64 => fabs f32:sub_even, fmov f32:sub_odd.
+
+  SDValue SrcReg64 = Op.getOperand(0);
+  SDValue Hi32 = DAG.getTargetExtractSubreg(SP::sub_even, dl, MVT::f32,
+                                            SrcReg64);
+  SDValue Lo32 = DAG.getTargetExtractSubreg(SP::sub_odd, dl, MVT::f32,
+                                            SrcReg64);
+
+  Hi32 = DAG.getNode(Op.getOpcode(), dl, MVT::f32, Hi32);
+
+  SDValue DstReg64 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                dl, MVT::f64), 0);
+  DstReg64 = DAG.getTargetInsertSubreg(SP::sub_even, dl, MVT::f64,
+                                       DstReg64, Hi32);
+  DstReg64 = DAG.getTargetInsertSubreg(SP::sub_odd, dl, MVT::f64,
+                                       DstReg64, Lo32);
+  return DstReg64;
+}
+
 SDValue SparcTargetLowering::
 LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Should not custom lower this!");
-  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
+
+  case ISD::FNEG:
+  case ISD::FABS:               return LowerF64Op(Op, DAG);
+
+  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG, *this);
   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
   case ISD::GlobalTLSAddress:
     llvm_unreachable("TLS not implemented for Sparc.");
   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
@@ -1256,7 +1908,7 @@ SparcTargetLowering::getConstraintType(const std::string &Constraint) const {
 
 std::pair<unsigned, const TargetRegisterClass*>
 SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                  EVT VT) const {
+                                                  MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 09148ea..261c25a 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -19,14 +19,18 @@
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
+  class SparcSubtarget;
+
   namespace SPISD {
     enum {
       FIRST_NUMBER = ISD::BUILTIN_OP_END,
-      CMPICC,      // Compare two GPR operands, set icc.
+      CMPICC,      // Compare two GPR operands, set icc+xcc.
       CMPFCC,      // Compare two FP operands, set fcc.
       BRICC,       // Branch to dest on icc condition
+      BRXCC,       // Branch to dest on xcc condition (64-bit only).
       BRFCC,       // Branch to dest on fcc condition
       SELECT_ICC,  // Select between two values using the current ICC flags.
+      SELECT_XCC,  // Select between two values using the current XCC flags.
       SELECT_FCC,  // Select between two values using the current FCC flags.
 
       Hi, Lo,      // Hi/Lo operations, typically on a global address.
@@ -42,6 +46,7 @@ namespace llvm {
   }
 
   class SparcTargetLowering : public TargetLowering {
+    const SparcSubtarget *Subtarget;
   public:
     SparcTargetLowering(TargetMachine &TM);
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -63,33 +68,65 @@ namespace llvm {
 
     ConstraintType getConstraintType(const std::string &Constraint) const;
     std::pair<unsigned, const TargetRegisterClass*>
-    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+    getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+    virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv,
                            bool isVarArg,
                            const SmallVectorImpl<ISD::InputArg> &Ins,
-                           DebugLoc dl, SelectionDAG &DAG,
+                           SDLoc dl, SelectionDAG &DAG,
                            SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerFormalArguments_32(SDValue Chain,
+                                    CallingConv::ID CallConv,
+                                    bool isVarArg,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    SDLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerFormalArguments_64(SDValue Chain,
+                                    CallingConv::ID CallConv,
+                                    bool isVarArg,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    SDLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(TargetLowering::CallLoweringInfo &CLI,
                 SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
+                         SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
+                         SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                   const SmallVectorImpl<SDValue> &OutVals,
-                  DebugLoc dl, SelectionDAG &DAG) const;
+                  SDLoc dl, SelectionDAG &DAG) const;
+    SDValue LowerReturn_32(SDValue Chain,
+                           CallingConv::ID CallConv, bool IsVarArg,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           SDLoc DL, SelectionDAG &DAG) const;
+    SDValue LowerReturn_64(SDValue Chain,
+                           CallingConv::ID CallConv, bool IsVarArg,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           SDLoc DL, SelectionDAG &DAG) const;
 
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
 
     unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
+    SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
+    SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
+                         SelectionDAG &DAG) const;
+    SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td
new file mode 100644
index 0000000..47658ee
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstr64Bit.td
@@ -0,0 +1,358 @@
+//===-- SparcInstr64Bit.td - 64-bit instructions for Sparc Target ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains instruction definitions and patterns needed for 64-bit
+// code generation on SPARC v9.
+//
+// Some SPARC v9 instructions are defined in SparcInstrInfo.td because they can
+// also be used in 32-bit code running on a SPARC v9 CPU.
+//
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+// The same integer registers are used for i32 and i64 values.
+// When registers hold i32 values, the high bits are don't care.
+// This give us free trunc and anyext.
+def : Pat<(i64 (anyext i32:$val)), (COPY_TO_REGCLASS $val, I64Regs)>;
+def : Pat<(i32 (trunc i64:$val)), (COPY_TO_REGCLASS $val, IntRegs)>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Shift Instructions.
+//===----------------------------------------------------------------------===//
+//
+// The 32-bit shift instructions are still available. The left shift srl
+// instructions shift all 64 bits, but it only accepts a 5-bit shift amount.
+//
+// The srl instructions only shift the low 32 bits and clear the high 32 bits.
+// Finally, sra shifts the low 32 bits and sign-extends to 64 bits.
+
+let Predicates = [Is64Bit] in {
+
+def : Pat<(i64 (zext i32:$val)), (SRLri $val, 0)>;
+def : Pat<(i64 (sext i32:$val)), (SRAri $val, 0)>;
+
+def : Pat<(i64 (and i64:$val, 0xffffffff)), (SRLri $val, 0)>;
+def : Pat<(i64 (sext_inreg i64:$val, i32)), (SRAri $val, 0)>;
+
+defm SLLX : F3_S<"sllx", 0b100101, 1, shl, i64, I64Regs>;
+defm SRLX : F3_S<"srlx", 0b100110, 1, srl, i64, I64Regs>;
+defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Immediates.
+//===----------------------------------------------------------------------===//
+//
+// All 32-bit immediates can be materialized with sethi+or, but 64-bit
+// immediates may require more code. There may be a point where it is
+// preferable to use a constant pool load instead, depending on the
+// microarchitecture.
+
+// Single-instruction patterns.
+
+// The ALU instructions want their simm13 operands as i32 immediates.
+def as_i32imm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32);
+}]>;
+def : Pat<(i64 simm13:$val), (ORri (i64 G0), (as_i32imm $val))>;
+def : Pat<(i64 SETHIimm:$val), (SETHIi (HI22 $val))>;
+
+// Double-instruction patterns.
+
+// All unsigned i32 immediates can be handled by sethi+or.
+def uimm32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>;
+def : Pat<(i64 uimm32:$val), (ORri (SETHIi (HI22 $val)), (LO10 $val))>,
+      Requires<[Is64Bit]>;
+
+// All negative i33 immediates can be handled by sethi+xor.
+def nimm33 : PatLeaf<(imm), [{
+  int64_t Imm = N->getSExtValue();
+  return Imm < 0 && isInt<33>(Imm);
+}]>;
+// Bits 10-31 inverted. Same as assembler's %hix.
+def HIX22 : SDNodeXForm<imm, [{
+  uint64_t Val = (~N->getZExtValue() >> 10) & ((1u << 22) - 1);
+  return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+// Bits 0-9 with ones in bits 10-31. Same as assembler's %lox.
+def LOX10 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(~(~N->getZExtValue() & 0x3ff), MVT::i32);
+}]>;
+def : Pat<(i64 nimm33:$val), (XORri (SETHIi (HIX22 $val)), (LOX10 $val))>,
+      Requires<[Is64Bit]>;
+
+// More possible patterns:
+//
+//   (sllx sethi, n)
+//   (sllx simm13, n)
+//
+// 3 instrs:
+//
+//   (xor (sllx sethi), simm13)
+//   (sllx (xor sethi, simm13))
+//
+// 4 instrs:
+//
+//   (or sethi, (sllx sethi))
+//   (xnor sethi, (sllx sethi))
+//
+// 5 instrs:
+//
+//   (or (sllx sethi), (or sethi, simm13))
+//   (xnor (sllx sethi), (or sethi, simm13))
+//   (or (sllx sethi), (sllx sethi))
+//   (xnor (sllx sethi), (sllx sethi))
+//
+// Worst case is 6 instrs:
+//
+//   (or (sllx (or sethi, simmm13)), (or sethi, simm13))
+
+// Bits 42-63, same as assembler's %hh.
+def HH22 : SDNodeXForm<imm, [{
+  uint64_t Val = (N->getZExtValue() >> 42) & ((1u << 22) - 1);
+  return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+// Bits 32-41, same as assembler's %hm.
+def HM10 : SDNodeXForm<imm, [{
+  uint64_t Val = (N->getZExtValue() >> 32) & ((1u << 10) - 1);
+  return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+def : Pat<(i64 imm:$val),
+          (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i32 32)),
+                (ORri (SETHIi (HI22 $val)), (LO10 $val)))>,
+      Requires<[Is64Bit]>;
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Integer Arithmetic and Logic.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+
+// Register-register instructions.
+
+def : Pat<(and i64:$a, i64:$b), (ANDrr $a, $b)>;
+def : Pat<(or  i64:$a, i64:$b), (ORrr  $a, $b)>;
+def : Pat<(xor i64:$a, i64:$b), (XORrr $a, $b)>;
+
+def : Pat<(and i64:$a, (not i64:$b)), (ANDNrr $a, $b)>;
+def : Pat<(or  i64:$a, (not i64:$b)), (ORNrr  $a, $b)>;
+def : Pat<(xor i64:$a, (not i64:$b)), (XNORrr $a, $b)>;
+
+def : Pat<(add i64:$a, i64:$b), (ADDrr $a, $b)>;
+def : Pat<(sub i64:$a, i64:$b), (SUBrr $a, $b)>;
+
+// Add/sub with carry were renamed to addc/subc in SPARC v9.
+def : Pat<(adde i64:$a, i64:$b), (ADDXrr $a, $b)>;
+def : Pat<(sube i64:$a, i64:$b), (SUBXrr $a, $b)>;
+
+def : Pat<(addc i64:$a, i64:$b), (ADDCCrr $a, $b)>;
+def : Pat<(subc i64:$a, i64:$b), (SUBCCrr $a, $b)>;
+
+def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>;
+
+// Register-immediate instructions.
+
+def : Pat<(and i64:$a, (i64 simm13:$b)), (ANDri $a, (as_i32imm $b))>;
+def : Pat<(or  i64:$a, (i64 simm13:$b)), (ORri  $a, (as_i32imm $b))>;
+def : Pat<(xor i64:$a, (i64 simm13:$b)), (XORri $a, (as_i32imm $b))>;
+
+def : Pat<(add i64:$a, (i64 simm13:$b)), (ADDri $a, (as_i32imm $b))>;
+def : Pat<(sub i64:$a, (i64 simm13:$b)), (SUBri $a, (as_i32imm $b))>;
+
+def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Integer Multiply and Divide.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+
+def MULXrr : F3_1<2, 0b001001,
+                  (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+                  "mulx $rs1, $rs2, $rd",
+                  [(set i64:$rd, (mul i64:$rs1, i64:$rs2))]>;
+def MULXri : F3_2<2, 0b001001,
+                  (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+                  "mulx $rs1, $i, $rd",
+                  [(set i64:$rd, (mul i64:$rs1, (i64 simm13:$i)))]>;
+
+// Division can trap.
+let hasSideEffects = 1 in {
+def SDIVXrr : F3_1<2, 0b101101,
+                   (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+                   "sdivx $rs1, $rs2, $rd",
+                   [(set i64:$rd, (sdiv i64:$rs1, i64:$rs2))]>;
+def SDIVXri : F3_2<2, 0b101101,
+                   (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+                   "sdivx $rs1, $i, $rd",
+                   [(set i64:$rd, (sdiv i64:$rs1, (i64 simm13:$i)))]>;
+
+def UDIVXrr : F3_1<2, 0b001101,
+                   (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+                   "udivx $rs1, $rs2, $rd",
+                   [(set i64:$rd, (udiv i64:$rs1, i64:$rs2))]>;
+def UDIVXri : F3_2<2, 0b001101,
+                   (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+                   "udivx $rs1, $i, $rd",
+                   [(set i64:$rd, (udiv i64:$rs1, (i64 simm13:$i)))]>;
+} // hasSideEffects = 1
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Loads and Stores.
+//===----------------------------------------------------------------------===//
+//
+// All the 32-bit loads and stores are available. The extending loads are sign
+// or zero-extending to 64 bits. The LDrr and LDri instructions load 32 bits
+// zero-extended to i64. Their mnemonic is lduw in SPARC v9 (Load Unsigned
+// Word).
+//
+// SPARC v9 adds 64-bit loads as well as a sign-extending ldsw i32 loads.
+
+let Predicates = [Is64Bit] in {
+
+// 64-bit loads.
+def LDXrr  : F3_1<3, 0b001011,
+                  (outs I64Regs:$dst), (ins MEMrr:$addr),
+                  "ldx [$addr], $dst",
+                  [(set i64:$dst, (load ADDRrr:$addr))]>;
+def LDXri  : F3_2<3, 0b001011,
+                  (outs I64Regs:$dst), (ins MEMri:$addr),
+                  "ldx [$addr], $dst",
+                  [(set i64:$dst, (load ADDRri:$addr))]>;
+
+// Extending loads to i64.
+def : Pat<(i64 (zextloadi1 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi1 ADDRri:$addr)), (LDUBri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi1 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi1 ADDRri:$addr)), (LDUBri ADDRri:$addr)>;
+
+def : Pat<(i64 (zextloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi8 ADDRrr:$addr)),  (LDUBrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi8 ADDRri:$addr)),  (LDUBri ADDRri:$addr)>;
+def : Pat<(i64 (sextloadi8 ADDRrr:$addr)), (LDSBrr ADDRrr:$addr)>;
+def : Pat<(i64 (sextloadi8 ADDRri:$addr)), (LDSBri ADDRri:$addr)>;
+
+def : Pat<(i64 (zextloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi16 ADDRrr:$addr)),  (LDUHrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi16 ADDRri:$addr)),  (LDUHri ADDRri:$addr)>;
+def : Pat<(i64 (sextloadi16 ADDRrr:$addr)), (LDSHrr ADDRrr:$addr)>;
+def : Pat<(i64 (sextloadi16 ADDRri:$addr)), (LDSHri ADDRri:$addr)>;
+
+def : Pat<(i64 (zextloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi32 ADDRrr:$addr)),  (LDrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi32 ADDRri:$addr)),  (LDri ADDRri:$addr)>;
+
+// Sign-extending load of i32 into i64 is a new SPARC v9 instruction.
+def LDSWrr : F3_1<3, 0b001011,
+                 (outs I64Regs:$dst), (ins MEMrr:$addr),
+                 "ldsw [$addr], $dst",
+                 [(set i64:$dst, (sextloadi32 ADDRrr:$addr))]>;
+def LDSWri : F3_2<3, 0b001011,
+                 (outs I64Regs:$dst), (ins MEMri:$addr),
+                 "ldsw [$addr], $dst",
+                 [(set i64:$dst, (sextloadi32 ADDRri:$addr))]>;
+
+// 64-bit stores.
+def STXrr  : F3_1<3, 0b001110,
+                 (outs), (ins MEMrr:$addr, I64Regs:$src),
+                 "stx $src, [$addr]",
+                 [(store i64:$src, ADDRrr:$addr)]>;
+def STXri  : F3_2<3, 0b001110,
+                 (outs), (ins MEMri:$addr, I64Regs:$src),
+                 "stx $src, [$addr]",
+                 [(store i64:$src, ADDRri:$addr)]>;
+
+// Truncating stores from i64 are identical to the i32 stores.
+def : Pat<(truncstorei8  i64:$src, ADDRrr:$addr), (STBrr ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei8  i64:$src, ADDRri:$addr), (STBri ADDRri:$addr, $src)>;
+def : Pat<(truncstorei16 i64:$src, ADDRrr:$addr), (STHrr ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei16 i64:$src, ADDRri:$addr), (STHri ADDRri:$addr, $src)>;
+def : Pat<(truncstorei32 i64:$src, ADDRrr:$addr), (STrr  ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei32 i64:$src, ADDRri:$addr), (STri  ADDRri:$addr, $src)>;
+
+// store 0, addr -> store %g0, addr
+def : Pat<(store (i64 0), ADDRrr:$dst), (STXrr ADDRrr:$dst, (i64 G0))>;
+def : Pat<(store (i64 0), ADDRri:$dst), (STXri ADDRri:$dst, (i64 G0))>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Conditionals.
+//===----------------------------------------------------------------------===//
+//
+// Flag-setting instructions like subcc and addcc set both icc and xcc flags.
+// The icc flags correspond to the 32-bit result, and the xcc are for the
+// full 64-bit result.
+//
+// We reuse CMPICC SDNodes for compares, but use new BRXCC branch nodes for
+// 64-bit compares. See LowerBR_CC.
+
+let Predicates = [Is64Bit] in {
+
+let Uses = [ICC] in
+def BPXCC : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+                     "b$cc %xcc, $dst",
+                     [(SPbrxcc bb:$dst, imm:$cc)]>;
+
+// Conditional moves on %xcc.
+let Uses = [ICC], Constraints = "$f = $rd" in {
+def MOVXCCrr : Pseudo<(outs IntRegs:$rd),
+                      (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cond),
+                      "mov$cond %xcc, $rs2, $rd",
+                      [(set i32:$rd,
+                       (SPselectxcc i32:$rs2, i32:$f, imm:$cond))]>;
+def MOVXCCri : Pseudo<(outs IntRegs:$rd),
+                      (ins i32imm:$i, IntRegs:$f, CCOp:$cond),
+                      "mov$cond %xcc, $i, $rd",
+                      [(set i32:$rd,
+                       (SPselectxcc simm11:$i, i32:$f, imm:$cond))]>;
+def FMOVS_XCC : Pseudo<(outs FPRegs:$rd),
+                      (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cond),
+                      "fmovs$cond %xcc, $rs2, $rd",
+                      [(set f32:$rd,
+                       (SPselectxcc f32:$rs2, f32:$f, imm:$cond))]>;
+def FMOVD_XCC : Pseudo<(outs DFPRegs:$rd),
+                      (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond),
+                      "fmovd$cond %xcc, $rs2, $rd",
+                      [(set f64:$rd,
+                       (SPselectxcc f64:$rs2, f64:$f, imm:$cond))]>;
+} // Uses, Constraints
+
+def : Pat<(SPselectxcc i64:$t, i64:$f, imm:$cond),
+          (MOVXCCrr $t, $f, imm:$cond)>;
+def : Pat<(SPselectxcc (i64 simm11:$t), i64:$f, imm:$cond),
+          (MOVXCCri (as_i32imm $t), $f, imm:$cond)>;
+
+def : Pat<(SPselecticc i64:$t, i64:$f, imm:$cond),
+          (MOVICCrr $t, $f, imm:$cond)>;
+def : Pat<(SPselecticc (i64 simm11:$t), i64:$f, imm:$cond),
+          (MOVICCri (as_i32imm $t), $f, imm:$cond)>;
+
+def : Pat<(SPselectfcc i64:$t, i64:$f, imm:$cond),
+          (MOVFCCrr $t, $f, imm:$cond)>;
+def : Pat<(SPselectfcc (i64 simm11:$t), i64:$f, imm:$cond),
+          (MOVFCCri (as_i32imm $t), $f, imm:$cond)>;
+
+} // Predicates = [Is64Bit]
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
index dce3312..6cdf6bc 100644
--- a/lib/Target/Sparc/SparcInstrFormats.td
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -7,14 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
+class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern>
+          : Instruction {
   field bits<32> Inst;
 
   let Namespace = "SP";
 
   bits<2> op;
   let Inst{31-30} = op;               // Top two bits are the 'op' field
-  
+
   dag OutOperandList = outs;
   dag InOperandList = ins;
   let AsmString   = asmstr;
@@ -46,7 +47,7 @@ class F2_1<bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{29-25} = rd;
 }
 
-class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr, 
+class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr,
            list<dag> pattern> : F2<outs, ins, asmstr, pattern> {
   bits<4>   cond;
   bit       annul = 0;     // currently unused
@@ -88,7 +89,7 @@ class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
   let Inst{4-0}  = rs2;
 }
 
-class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins, 
+class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
            string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
   bits<13> simm13;
 
@@ -111,4 +112,41 @@ class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
   let Inst{4-0}  = rs2;
 }
 
+// Shift by register rs2.
+class F3_Sr<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
+            string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bit x = xVal;           // 1 for 64-bit shifts.
+  bits<5> rs2;
+
+  let op         = opVal;
+  let op3        = op3val;
+
+  let Inst{13}   = 0;     // i field = 0
+  let Inst{12}   = x;     // extended registers.
+  let Inst{4-0}  = rs2;
+}
 
+// Shift by immediate.
+class F3_Si<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
+            string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bit x = xVal;           // 1 for 64-bit shifts.
+  bits<6> shcnt;          // shcnt32 / shcnt64.
+
+  let op         = opVal;
+  let op3        = op3val;
+
+  let Inst{13}   = 1;     // i field = 1
+  let Inst{12}   = x;     // extended registers.
+  let Inst{5-0}  = shcnt;
+}
+
+// Define rr and ri shift instructions with patterns.
+multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode,
+                ValueType VT, RegisterClass RC> {
+  def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, IntRegs:$rs2),
+                 !strconcat(OpcStr, " $rs, $rs2, $rd"),
+                 [(set VT:$rd, (OpNode VT:$rs, i32:$rs2))]>;
+  def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, i32imm:$shcnt),
+                 !strconcat(OpcStr, " $rs, $shcnt, $rd"),
+                 [(set VT:$rd, (OpNode VT:$rs, (i32 imm:$shcnt)))]>;
+}
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 39d7329..6c14bc9 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -17,7 +17,9 @@
 #include "SparcSubtarget.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -29,7 +31,7 @@ using namespace llvm;
 
 SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
   : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
-    RI(ST, *this), Subtarget(ST) {
+    RI(ST), Subtarget(ST) {
 }
 
 /// isLoadFromStackSlot - If the specified machine instruction is a direct
@@ -40,6 +42,7 @@ SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
 unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
                                              int &FrameIndex) const {
   if (MI->getOpcode() == SP::LDri ||
+      MI->getOpcode() == SP::LDXri ||
       MI->getOpcode() == SP::LDFri ||
       MI->getOpcode() == SP::LDDFri) {
     if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
@@ -59,6 +62,7 @@ unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
 unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
                                             int &FrameIndex) const {
   if (MI->getOpcode() == SP::STri ||
+      MI->getOpcode() == SP::STXri ||
       MI->getOpcode() == SP::STFri ||
       MI->getOpcode() == SP::STDFri) {
     if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
@@ -112,18 +116,6 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
   llvm_unreachable("Invalid cond code");
 }
 
-MachineInstr *
-SparcInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
-                                         int FrameIx,
-                                         uint64_t Offset,
-                                         const MDNode *MDPtr,
-                                         DebugLoc dl) const {
-  MachineInstrBuilder MIB = BuildMI(MF, dl, get(SP::DBG_VALUE))
-    .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
-  return &*MIB;
-}
-
-
 bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                    MachineBasicBlock *&TBB,
                                    MachineBasicBlock *&FBB,
@@ -139,15 +131,15 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     if (I->isDebugValue())
       continue;
 
-    //When we see a non-terminator, we are done
+    // When we see a non-terminator, we are done.
     if (!isUnpredicatedTerminator(I))
       break;
 
-    //Terminator is not a branch
+    // Terminator is not a branch.
     if (!I->isBranch())
       return true;
 
-    //Handle Unconditional branches
+    // Handle Unconditional branches.
     if (I->getOpcode() == SP::BA) {
       UnCondBrIter = I;
 
@@ -176,7 +168,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     unsigned Opcode = I->getOpcode();
     if (Opcode != SP::BCOND && Opcode != SP::FBCOND)
-      return true; //Unknown Opcode
+      return true; // Unknown Opcode.
 
     SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm();
 
@@ -185,7 +177,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       if (AllowModify && UnCondBrIter != MBB.end() &&
           MBB.isLayoutSuccessor(TargetBB)) {
 
-        //Transform the code
+        // Transform the code
         //
         //    brCC L1
         //    ba L2
@@ -219,8 +211,8 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       Cond.push_back(MachineOperand::CreateImm(BranchCode));
       continue;
     }
-    //FIXME: Handle subsequent conditional branches
-    //For now, we can't handle multiple conditional branches
+    // FIXME: Handle subsequent conditional branches.
+    // For now, we can't handle multiple conditional branches.
     return true;
   }
   return false;
@@ -241,7 +233,7 @@ SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
     return 1;
   }
 
-  //Conditional branch
+  // Conditional branch
   unsigned CC = Cond[0].getImm();
 
   if (IsIntegerCC(CC))
@@ -287,10 +279,28 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
     BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
       .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg))
-    BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-  else
+  else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) {
+    if (Subtarget.isV9()) {
+      BuildMI(MBB, I, DL, get(SP::FMOVD), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    } else {
+      // Use two FMOVS instructions.
+      const TargetRegisterInfo *TRI = &getRegisterInfo();
+      MachineInstr *MovMI = 0;
+      unsigned subRegIdx[] = {SP::sub_even, SP::sub_odd};
+      for (unsigned i = 0; i != 2; ++i) {
+        unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]);
+        unsigned Src = TRI->getSubReg(SrcReg,  subRegIdx[i]);
+        assert(Dst && Src && "Bad sub-register");
+
+        MovMI = BuildMI(MBB, I, DL, get(SP::FMOVS), Dst).addReg(Src);
+      }
+      // Add implicit super-register defs and kills to the last MovMI.
+      MovMI->addRegisterDefined(DestReg, TRI);
+      if (KillSrc)
+        MovMI->addRegisterKilled(SrcReg, TRI);
+    }
+  } else
     llvm_unreachable("Impossible reg-to-reg copy");
 }
 
@@ -302,16 +312,27 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
+  MachineFunction *MF = MBB.getParent();
+  const MachineFrameInfo &MFI = *MF->getFrameInfo();
+  MachineMemOperand *MMO =
+    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+                             MachineMemOperand::MOStore,
+                             MFI.getObjectSize(FI),
+                             MFI.getObjectAlignment(FI));
+
   // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
-  if (RC == &SP::IntRegsRegClass)
+  if (RC == &SP::I64RegsRegClass)
+    BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0)
+      .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  else if (RC == &SP::IntRegsRegClass)
     BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
-      .addReg(SrcReg, getKillRegState(isKill));
+      .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
   else if (RC == &SP::FPRegsRegClass)
     BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
-      .addReg(SrcReg,  getKillRegState(isKill));
+      .addReg(SrcReg,  getKillRegState(isKill)).addMemOperand(MMO);
   else if (RC == &SP::DFPRegsRegClass)
     BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0)
-      .addReg(SrcReg,  getKillRegState(isKill));
+      .addReg(SrcReg,  getKillRegState(isKill)).addMemOperand(MMO);
   else
     llvm_unreachable("Can't store this register to stack slot");
 }
@@ -324,12 +345,26 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  if (RC == &SP::IntRegsRegClass)
-    BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0);
+  MachineFunction *MF = MBB.getParent();
+  const MachineFrameInfo &MFI = *MF->getFrameInfo();
+  MachineMemOperand *MMO =
+    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+                             MachineMemOperand::MOLoad,
+                             MFI.getObjectSize(FI),
+                             MFI.getObjectAlignment(FI));
+
+  if (RC == &SP::I64RegsRegClass)
+    BuildMI(MBB, I, DL, get(SP::LDXri), DestReg).addFrameIndex(FI).addImm(0)
+      .addMemOperand(MMO);
+  else if (RC == &SP::IntRegsRegClass)
+    BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0)
+      .addMemOperand(MMO);
   else if (RC == &SP::FPRegsRegClass)
-    BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0);
+    BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0)
+      .addMemOperand(MMO);
   else if (RC == &SP::DFPRegsRegClass)
-    BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0);
+    BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0)
+      .addMemOperand(MMO);
   else
     llvm_unreachable("Can't load this register from stack slot");
 }
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 204f698..d0b220b 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -53,7 +53,7 @@ public:
   /// any side effects other than loading from the stack slot.
   virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
                                        int &FrameIndex) const;
-  
+
   /// isStoreToStackSlot - If the specified machine instruction is a direct
   /// store to a stack slot, return the virtual or physical register number of
   /// the source reg along with the FrameIndex of the loaded stack slot.  If
@@ -62,14 +62,6 @@ public:
   virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
                                       int &FrameIndex) const;
 
-  /// emitFrameIndexDebugValue - Emit a target-dependent form of
-  /// DBG_VALUE encoding the address of a frame index.
-  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
-                                                 int FrameIx,
-                                                 uint64_t Offset,
-                                                 const MDNode *MDPtr,
-                                                 DebugLoc dl) const;
-
   virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                              MachineBasicBlock *&FBB,
                              SmallVectorImpl<MachineOperand> &Cond,
@@ -86,7 +78,7 @@ public:
                            MachineBasicBlock::iterator I, DebugLoc DL,
                            unsigned DestReg, unsigned SrcReg,
                            bool KillSrc) const;
-  
+
   virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
@@ -98,7 +90,7 @@ public:
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC,
                                     const TargetRegisterInfo *TRI) const;
-  
+
   unsigned getGlobalBaseReg(MachineFunction *MF) const;
 };
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 90b698d..d4cac4d 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -21,6 +21,12 @@ include "SparcInstrFormats.td"
 // Feature predicates.
 //===----------------------------------------------------------------------===//
 
+// True when generating 32-bit code.
+def Is32Bit : Predicate<"!Subtarget.is64Bit()">;
+
+// True when generating 64-bit code. This also implies HasV9.
+def Is64Bit : Predicate<"Subtarget.is64Bit()">;
+
 // HasV9 - This predicate is true when the target processor supports V9
 // instructions.  Note that the machine may be running in 32-bit mode.
 def HasV9   : Predicate<"Subtarget.isV9()">;
@@ -58,22 +64,21 @@ def HI22 : SDNodeXForm<imm, [{
 }]>;
 
 def SETHIimm : PatLeaf<(imm), [{
-  return (((unsigned)N->getZExtValue() >> 10) << 10) ==
-         (unsigned)N->getZExtValue();
+  return isShiftedUInt<22, 10>(N->getZExtValue());
 }], HI22>;
 
 // Addressing modes.
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRrr : ComplexPattern<iPTR, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<iPTR, 2, "SelectADDRri", [frameindex], []>;
 
 // Address operands
-def MEMrr : Operand<i32> {
+def MEMrr : Operand<iPTR> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops IntRegs, IntRegs);
+  let MIOperandInfo = (ops ptr_rc, ptr_rc);
 }
-def MEMri : Operand<i32> {
+def MEMri : Operand<iPTR> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops IntRegs, i32imm);
+  let MIOperandInfo = (ops ptr_rc, i32imm);
 }
 
 // Branch targets have OtherVT type.
@@ -84,9 +89,11 @@ def calltarget : Operand<i32>;
 let PrintMethod = "printCCOperand" in
   def CCOp : Operand<i32>;
 
-def SDTSPcmpfcc : 
+def SDTSPcmpicc :
+SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
+def SDTSPcmpfcc :
 SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
-def SDTSPbrcc : 
+def SDTSPbrcc :
 SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
 def SDTSPselectcc :
 SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>;
@@ -95,9 +102,10 @@ SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
 def SDTSPITOF :
 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
 
-def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>;
+def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>;
 def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
 def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrxcc : SDNode<"SPISD::BRXCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
 def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
 
 def SPhi    : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
@@ -107,6 +115,7 @@ def SPftoi  : SDNode<"SPISD::FTOI", SDTSPFTOI>;
 def SPitof  : SDNode<"SPISD::ITOF", SDTSPITOF>;
 
 def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>;
 def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
 
 //  These are target-independent nodes, but have target-specific formats.
@@ -179,20 +188,20 @@ def FCC_O   : FCC_VAL<29>;  // Ordered
 
 /// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot.
 multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
-  def rr  : F3_1<2, Op3Val, 
+  def rr  : F3_1<2, Op3Val,
                  (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                  !strconcat(OpcStr, " $b, $c, $dst"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+                 [(set i32:$dst, (OpNode i32:$b, i32:$c))]>;
   def ri  : F3_2<2, Op3Val,
                  (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                  !strconcat(OpcStr, " $b, $c, $dst"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+                 [(set i32:$dst, (OpNode i32:$b, (i32 simm13:$c)))]>;
 }
 
 /// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
 /// pattern.
 multiclass F3_12np<string OpcStr, bits<6> Op3Val> {
-  def rr  : F3_1<2, Op3Val, 
+  def rr  : F3_1<2, Op3Val,
                  (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                  !strconcat(OpcStr, " $b, $c, $dst"), []>;
   def ri  : F3_2<2, Op3Val,
@@ -236,40 +245,24 @@ let hasSideEffects = 1, mayStore = 1 in {
 def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val),
                 "unimp $val", []>;
 
-// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the 
-// fpmover pass.
-let Predicates = [HasNoV9] in {  // Only emit these in V8 mode.
-  def FpMOVD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
-                      "!FpMOVD $src, $dst", []>;
-  def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
-                      "!FpNEGD $src, $dst",
-                      [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
-  def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
-                      "!FpABSD $src, $dst",
-                      [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
-}
-
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.  This has to handle all
 // permutations of selection between i32/f32/f64 on ICC and FCC.
-  // Expanded after instruction selection.
-let Uses = [ICC], usesCustomInserter = 1 in { 
+// Expanded after instruction selection.
+let Uses = [ICC], usesCustomInserter = 1 in {
   def SELECT_CC_Int_ICC
    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
             "; SELECT_CC_Int_ICC PSEUDO!",
-            [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
-                                             imm:$Cond))]>;
+            [(set i32:$dst, (SPselecticc i32:$T, i32:$F, imm:$Cond))]>;
   def SELECT_CC_FP_ICC
    : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_FP_ICC PSEUDO!",
-            [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
-                                            imm:$Cond))]>;
+            [(set f32:$dst, (SPselecticc f32:$T, f32:$F, imm:$Cond))]>;
 
   def SELECT_CC_DFP_ICC
    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_DFP_ICC PSEUDO!",
-            [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
-                                             imm:$Cond))]>;
+            [(set f64:$dst, (SPselecticc f64:$T, f64:$F, imm:$Cond))]>;
 }
 
 let usesCustomInserter = 1, Uses = [FCC] in {
@@ -277,19 +270,16 @@ let usesCustomInserter = 1, Uses = [FCC] in {
   def SELECT_CC_Int_FCC
    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
             "; SELECT_CC_Int_FCC PSEUDO!",
-            [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
-                                             imm:$Cond))]>;
+            [(set i32:$dst, (SPselectfcc i32:$T, i32:$F, imm:$Cond))]>;
 
   def SELECT_CC_FP_FCC
    : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_FP_FCC PSEUDO!",
-            [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
-                                            imm:$Cond))]>;
+            [(set f32:$dst, (SPselectfcc f32:$T, f32:$F, imm:$Cond))]>;
   def SELECT_CC_DFP_FCC
    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_DFP_FCC PSEUDO!",
-            [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F,
-                                             imm:$Cond))]>;
+            [(set f64:$dst, (SPselectfcc f64:$T, f64:$F, imm:$Cond))]>;
 }
 
 
@@ -309,111 +299,111 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
 def LDSBrr : F3_1<3, 0b001001,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "ldsb [$addr], $dst",
-                  [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>;
+                  [(set i32:$dst, (sextloadi8 ADDRrr:$addr))]>;
 def LDSBri : F3_2<3, 0b001001,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "ldsb [$addr], $dst",
-                  [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>;
+                  [(set i32:$dst, (sextloadi8 ADDRri:$addr))]>;
 def LDSHrr : F3_1<3, 0b001010,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "ldsh [$addr], $dst",
-                  [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>;
+                  [(set i32:$dst, (sextloadi16 ADDRrr:$addr))]>;
 def LDSHri : F3_2<3, 0b001010,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "ldsh [$addr], $dst",
-                  [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>;
+                  [(set i32:$dst, (sextloadi16 ADDRri:$addr))]>;
 def LDUBrr : F3_1<3, 0b000001,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "ldub [$addr], $dst",
-                  [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>;
+                  [(set i32:$dst, (zextloadi8 ADDRrr:$addr))]>;
 def LDUBri : F3_2<3, 0b000001,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "ldub [$addr], $dst",
-                  [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>;
+                  [(set i32:$dst, (zextloadi8 ADDRri:$addr))]>;
 def LDUHrr : F3_1<3, 0b000010,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "lduh [$addr], $dst",
-                  [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>;
+                  [(set i32:$dst, (zextloadi16 ADDRrr:$addr))]>;
 def LDUHri : F3_2<3, 0b000010,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "lduh [$addr], $dst",
-                  [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>;
+                  [(set i32:$dst, (zextloadi16 ADDRri:$addr))]>;
 def LDrr   : F3_1<3, 0b000000,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "ld [$addr], $dst",
-                  [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+                  [(set i32:$dst, (load ADDRrr:$addr))]>;
 def LDri   : F3_2<3, 0b000000,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "ld [$addr], $dst",
-                  [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+                  [(set i32:$dst, (load ADDRri:$addr))]>;
 
 // Section B.2 - Load Floating-point Instructions, p. 92
 def LDFrr  : F3_1<3, 0b100000,
                   (outs FPRegs:$dst), (ins MEMrr:$addr),
                   "ld [$addr], $dst",
-                  [(set FPRegs:$dst, (load ADDRrr:$addr))]>;
+                  [(set f32:$dst, (load ADDRrr:$addr))]>;
 def LDFri  : F3_2<3, 0b100000,
                   (outs FPRegs:$dst), (ins MEMri:$addr),
                   "ld [$addr], $dst",
-                  [(set FPRegs:$dst, (load ADDRri:$addr))]>;
+                  [(set f32:$dst, (load ADDRri:$addr))]>;
 def LDDFrr : F3_1<3, 0b100011,
                   (outs DFPRegs:$dst), (ins MEMrr:$addr),
                   "ldd [$addr], $dst",
-                  [(set DFPRegs:$dst, (load ADDRrr:$addr))]>;
+                  [(set f64:$dst, (load ADDRrr:$addr))]>;
 def LDDFri : F3_2<3, 0b100011,
                   (outs DFPRegs:$dst), (ins MEMri:$addr),
                   "ldd [$addr], $dst",
-                  [(set DFPRegs:$dst, (load ADDRri:$addr))]>;
+                  [(set f64:$dst, (load ADDRri:$addr))]>;
 
 // Section B.4 - Store Integer Instructions, p. 95
 def STBrr : F3_1<3, 0b000101,
                  (outs), (ins MEMrr:$addr, IntRegs:$src),
                  "stb $src, [$addr]",
-                 [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>;
+                 [(truncstorei8 i32:$src, ADDRrr:$addr)]>;
 def STBri : F3_2<3, 0b000101,
                  (outs), (ins MEMri:$addr, IntRegs:$src),
                  "stb $src, [$addr]",
-                 [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>;
+                 [(truncstorei8 i32:$src, ADDRri:$addr)]>;
 def STHrr : F3_1<3, 0b000110,
                  (outs), (ins MEMrr:$addr, IntRegs:$src),
                  "sth $src, [$addr]",
-                 [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>;
+                 [(truncstorei16 i32:$src, ADDRrr:$addr)]>;
 def STHri : F3_2<3, 0b000110,
                  (outs), (ins MEMri:$addr, IntRegs:$src),
                  "sth $src, [$addr]",
-                 [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>;
+                 [(truncstorei16 i32:$src, ADDRri:$addr)]>;
 def STrr  : F3_1<3, 0b000100,
                  (outs), (ins MEMrr:$addr, IntRegs:$src),
                  "st $src, [$addr]",
-                 [(store IntRegs:$src, ADDRrr:$addr)]>;
+                 [(store i32:$src, ADDRrr:$addr)]>;
 def STri  : F3_2<3, 0b000100,
                  (outs), (ins MEMri:$addr, IntRegs:$src),
                  "st $src, [$addr]",
-                 [(store IntRegs:$src, ADDRri:$addr)]>;
+                 [(store i32:$src, ADDRri:$addr)]>;
 
 // Section B.5 - Store Floating-point Instructions, p. 97
 def STFrr   : F3_1<3, 0b100100,
                    (outs), (ins MEMrr:$addr, FPRegs:$src),
                    "st $src, [$addr]",
-                   [(store FPRegs:$src, ADDRrr:$addr)]>;
+                   [(store f32:$src, ADDRrr:$addr)]>;
 def STFri   : F3_2<3, 0b100100,
                    (outs), (ins MEMri:$addr, FPRegs:$src),
                    "st $src, [$addr]",
-                   [(store FPRegs:$src, ADDRri:$addr)]>;
+                   [(store f32:$src, ADDRri:$addr)]>;
 def STDFrr  : F3_1<3, 0b100111,
                    (outs), (ins MEMrr:$addr, DFPRegs:$src),
                    "std  $src, [$addr]",
-                   [(store DFPRegs:$src, ADDRrr:$addr)]>;
+                   [(store f64:$src, ADDRrr:$addr)]>;
 def STDFri  : F3_2<3, 0b100111,
                    (outs), (ins MEMri:$addr, DFPRegs:$src),
                    "std $src, [$addr]",
-                   [(store DFPRegs:$src, ADDRri:$addr)]>;
+                   [(store f64:$src, ADDRri:$addr)]>;
 
 // Section B.9 - SETHI Instruction, p. 104
 def SETHIi: F2_1<0b100,
                  (outs IntRegs:$dst), (ins i32imm:$src),
                  "sethi $src, $dst",
-                 [(set IntRegs:$dst, SETHIimm:$src)]>;
+                 [(set i32:$dst, SETHIimm:$src)]>;
 
 // Section B.10 - NOP Instruction, p. 105
 // (It's a special case of SETHI)
@@ -426,7 +416,7 @@ defm AND    : F3_12<"and", 0b000001, and>;
 def ANDNrr  : F3_1<2, 0b000101,
                    (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                    "andn $b, $c, $dst",
-                   [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>;
+                   [(set i32:$dst, (and i32:$b, (not i32:$c)))]>;
 def ANDNri  : F3_2<2, 0b000101,
                    (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                    "andn $b, $c, $dst", []>;
@@ -436,7 +426,7 @@ defm OR     : F3_12<"or", 0b000010, or>;
 def ORNrr   : F3_1<2, 0b000110,
                    (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                    "orn $b, $c, $dst",
-                   [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>;
+                   [(set i32:$dst, (or i32:$b, (not i32:$c)))]>;
 def ORNri   : F3_2<2, 0b000110,
                    (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                    "orn $b, $c, $dst", []>;
@@ -445,7 +435,7 @@ defm XOR    : F3_12<"xor", 0b000011, xor>;
 def XNORrr  : F3_1<2, 0b000111,
                    (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                    "xnor $b, $c, $dst",
-                   [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+                   [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>;
 def XNORri  : F3_2<2, 0b000111,
                    (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                    "xnor $b, $c, $dst", []>;
@@ -462,9 +452,9 @@ defm ADD   : F3_12<"add", 0b000000, add>;
 def LEA_ADDri   : F3_2<2, 0b000000,
                    (outs IntRegs:$dst), (ins MEMri:$addr),
                    "add ${addr:arith}, $dst",
-                   [(set IntRegs:$dst, ADDRri:$addr)]>;
+                   [(set iPTR:$dst, ADDRri:$addr)]>;
 
-let Defs = [ICC] in                   
+let Defs = [ICC] in
   defm ADDCC  : F3_12<"addcc", 0b010000, addc>;
 
 let Uses = [ICC] in
@@ -472,14 +462,24 @@ let Uses = [ICC] in
 
 // Section B.15 - Subtract Instructions, p. 110
 defm SUB    : F3_12  <"sub"  , 0b000100, sub>;
-let Uses = [ICC] in 
+let Uses = [ICC] in
   defm SUBX   : F3_12  <"subx" , 0b001100, sube>;
 
-let Defs = [ICC] in 
-  defm SUBCC  : F3_12  <"subcc", 0b010100, SPcmpicc>;
+let Defs = [ICC] in {
+  defm SUBCC  : F3_12  <"subcc", 0b010100, subc>;
+
+  def CMPrr   : F3_1<2, 0b010100,
+                     (outs), (ins IntRegs:$b, IntRegs:$c),
+                     "cmp $b, $c",
+                     [(SPcmpicc i32:$b, i32:$c)]>;
+  def CMPri   : F3_1<2, 0b010100,
+                     (outs), (ins IntRegs:$b, i32imm:$c),
+                     "cmp $b, $c",
+                     [(SPcmpicc i32:$b, (i32 simm13:$c))]>;
+}
 
 let Uses = [ICC], Defs = [ICC] in
-  def SUBXCCrr: F3_1<2, 0b011100, 
+  def SUBXCCrr: F3_1<2, 0b011100,
                 (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                 "subxcc $b, $c, $dst", []>;
 
@@ -515,6 +515,20 @@ let isBarrier = 1 in
                       "ba $dst",
                       [(br bb:$dst)]>;
 
+// Indirect branch instructions.
+let isTerminator = 1, isBarrier = 1,
+     hasDelaySlot = 1, isBranch =1,
+     isIndirectBranch = 1 in {
+  def BINDrr  : F3_1<2, 0b111000,
+                   (outs), (ins MEMrr:$ptr),
+                   "jmp $ptr",
+                   [(brind ADDRrr:$ptr)]>;
+  def BINDri  : F3_2<2, 0b111000,
+                   (outs), (ins MEMri:$ptr),
+                   "jmp $ptr",
+                   [(brind ADDRri:$ptr)]>;
+}
+
 // FIXME: the encoding for the JIT should look at the condition field.
 let Uses = [ICC] in
   def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
@@ -552,7 +566,7 @@ let Uses = [O6],
     let op = 1;
     let Inst{29-0} = disp;
   }
-  
+
   // indirect calls
   def JMPLrr : F3_1<2, 0b111000,
                     (outs), (ins MEMrr:$ptr, variable_ops),
@@ -565,7 +579,7 @@ let Uses = [O6],
 }
 
 // Section B.28 - Read State Register Instructions
-let Uses = [Y] in 
+let Uses = [Y] in
   def RDY : F3_1<2, 0b101000,
                  (outs IntRegs:$dst), (ins),
                  "rd %y, $dst", []>;
@@ -584,7 +598,7 @@ def FITOS : F3_3<2, 0b110100, 0b011000100,
                  (outs FPRegs:$dst), (ins FPRegs:$src),
                  "fitos $src, $dst",
                  [(set FPRegs:$dst, (SPitof FPRegs:$src))]>;
-def FITOD : F3_3<2, 0b110100, 0b011001000, 
+def FITOD : F3_3<2, 0b110100, 0b011001000,
                  (outs DFPRegs:$dst), (ins FPRegs:$src),
                  "fitod $src, $dst",
                  [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>;
@@ -600,38 +614,38 @@ def FDTOI : F3_3<2, 0b110100, 0b011010010,
                  [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>;
 
 // Convert between Floating-point Formats Instructions, p. 143
-def FSTOD : F3_3<2, 0b110100, 0b011001001, 
+def FSTOD : F3_3<2, 0b110100, 0b011001001,
                  (outs DFPRegs:$dst), (ins FPRegs:$src),
                  "fstod $src, $dst",
-                 [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+                 [(set f64:$dst, (fextend f32:$src))]>;
 def FDTOS : F3_3<2, 0b110100, 0b011000110,
                  (outs FPRegs:$dst), (ins DFPRegs:$src),
                  "fdtos $src, $dst",
-                 [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+                 [(set f32:$dst, (fround f64:$src))]>;
 
 // Floating-point Move Instructions, p. 144
 def FMOVS : F3_3<2, 0b110100, 0b000000001,
                  (outs FPRegs:$dst), (ins FPRegs:$src),
                  "fmovs $src, $dst", []>;
-def FNEGS : F3_3<2, 0b110100, 0b000000101, 
+def FNEGS : F3_3<2, 0b110100, 0b000000101,
                  (outs FPRegs:$dst), (ins FPRegs:$src),
                  "fnegs $src, $dst",
-                 [(set FPRegs:$dst, (fneg FPRegs:$src))]>;
-def FABSS : F3_3<2, 0b110100, 0b000001001, 
+                 [(set f32:$dst, (fneg f32:$src))]>;
+def FABSS : F3_3<2, 0b110100, 0b000001001,
                  (outs FPRegs:$dst), (ins FPRegs:$src),
                  "fabss $src, $dst",
-                 [(set FPRegs:$dst, (fabs FPRegs:$src))]>;
+                 [(set f32:$dst, (fabs f32:$src))]>;
 
 
 // Floating-point Square Root Instructions, p.145
-def FSQRTS : F3_3<2, 0b110100, 0b000101001, 
+def FSQRTS : F3_3<2, 0b110100, 0b000101001,
                   (outs FPRegs:$dst), (ins FPRegs:$src),
                   "fsqrts $src, $dst",
-                  [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>;
-def FSQRTD : F3_3<2, 0b110100, 0b000101010, 
+                  [(set f32:$dst, (fsqrt f32:$src))]>;
+def FSQRTD : F3_3<2, 0b110100, 0b000101010,
                   (outs DFPRegs:$dst), (ins DFPRegs:$src),
                   "fsqrtd $src, $dst",
-                  [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>;
+                  [(set f64:$dst, (fsqrt f64:$src))]>;
 
 
 
@@ -639,42 +653,42 @@ def FSQRTD : F3_3<2, 0b110100, 0b000101010,
 def FADDS  : F3_3<2, 0b110100, 0b001000001,
                   (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                   "fadds $src1, $src2, $dst",
-                  [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>;
+                  [(set f32:$dst, (fadd f32:$src1, f32:$src2))]>;
 def FADDD  : F3_3<2, 0b110100, 0b001000010,
                   (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
                   "faddd $src1, $src2, $dst",
-                  [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>;
+                  [(set f64:$dst, (fadd f64:$src1, f64:$src2))]>;
 def FSUBS  : F3_3<2, 0b110100, 0b001000101,
                   (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                   "fsubs $src1, $src2, $dst",
-                  [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>;
+                  [(set f32:$dst, (fsub f32:$src1, f32:$src2))]>;
 def FSUBD  : F3_3<2, 0b110100, 0b001000110,
                   (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
                   "fsubd $src1, $src2, $dst",
-                  [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>;
+                  [(set f64:$dst, (fsub f64:$src1, f64:$src2))]>;
 
 // Floating-point Multiply and Divide Instructions, p. 147
 def FMULS  : F3_3<2, 0b110100, 0b001001001,
                   (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                   "fmuls $src1, $src2, $dst",
-                  [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>;
+                  [(set f32:$dst, (fmul f32:$src1, f32:$src2))]>;
 def FMULD  : F3_3<2, 0b110100, 0b001001010,
                   (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
                   "fmuld $src1, $src2, $dst",
-                  [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>;
+                  [(set f64:$dst, (fmul f64:$src1, f64:$src2))]>;
 def FSMULD : F3_3<2, 0b110100, 0b001101001,
                   (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                   "fsmuld $src1, $src2, $dst",
-                  [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1),
-                                            (fextend FPRegs:$src2)))]>;
+                  [(set f64:$dst, (fmul (fextend f32:$src1),
+                                        (fextend f32:$src2)))]>;
 def FDIVS  : F3_3<2, 0b110100, 0b001001101,
                  (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                  "fdivs $src1, $src2, $dst",
-                 [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>;
+                 [(set f32:$dst, (fdiv f32:$src1, f32:$src2))]>;
 def FDIVD  : F3_3<2, 0b110100, 0b001001110,
                  (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
                  "fdivd $src1, $src2, $dst",
-                 [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>;
+                 [(set f64:$dst, (fdiv f64:$src1, f64:$src2))]>;
 
 // Floating-point Compare Instructions, p. 148
 // Note: the 2nd template arg is different for these guys.
@@ -685,11 +699,11 @@ let Defs = [FCC] in {
   def FCMPS  : F3_3<2, 0b110101, 0b001010001,
                    (outs), (ins FPRegs:$src1, FPRegs:$src2),
                    "fcmps $src1, $src2\n\tnop",
-                   [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+                   [(SPcmpfcc f32:$src1, f32:$src2)]>;
   def FCMPD  : F3_3<2, 0b110101, 0b001010010,
                    (outs), (ins DFPRegs:$src1, DFPRegs:$src2),
                    "fcmpd $src1, $src2\n\tnop",
-                   [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+                   [(SPcmpfcc f64:$src1, f64:$src2)]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -697,59 +711,51 @@ let Defs = [FCC] in {
 //===----------------------------------------------------------------------===//
 
 // V9 Conditional Moves.
-let Predicates = [HasV9], Constraints = "$T = $dst" in {
+let Predicates = [HasV9], Constraints = "$f = $rd" in {
   // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
   // FIXME: Add instruction encodings for the JIT some day.
   let Uses = [ICC] in {
     def MOVICCrr
-      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
-               "mov$cc %icc, $F, $dst",
-               [(set IntRegs:$dst,
-                           (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+      : Pseudo<(outs IntRegs:$rd), (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cc),
+               "mov$cc %icc, $rs2, $rd",
+               [(set i32:$rd, (SPselecticc i32:$rs2, i32:$f, imm:$cc))]>;
     def MOVICCri
-      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
-               "mov$cc %icc, $F, $dst",
-               [(set IntRegs:$dst,
-                            (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+      : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cc),
+               "mov$cc %icc, $i, $rd",
+               [(set i32:$rd, (SPselecticc simm11:$i, i32:$f, imm:$cc))]>;
   }
 
   let Uses = [FCC] in {
     def MOVFCCrr
-      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
-               "mov$cc %fcc0, $F, $dst",
-               [(set IntRegs:$dst,
-                           (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+      : Pseudo<(outs IntRegs:$rd), (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cc),
+               "mov$cc %fcc0, $rs2, $rd",
+               [(set i32:$rd, (SPselectfcc i32:$rs2, i32:$f, imm:$cc))]>;
     def MOVFCCri
-      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
-               "mov$cc %fcc0, $F, $dst",
-               [(set IntRegs:$dst,
-                            (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+      : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cc),
+               "mov$cc %fcc0, $i, $rd",
+               [(set i32:$rd, (SPselectfcc simm11:$i, i32:$f, imm:$cc))]>;
   }
 
   let Uses = [ICC] in {
     def FMOVS_ICC
-      : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
-               "fmovs$cc %icc, $F, $dst",
-               [(set FPRegs:$dst,
-                           (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+      : Pseudo<(outs FPRegs:$rd), (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cc),
+               "fmovs$cc %icc, $rs2, $rd",
+               [(set f32:$rd, (SPselecticc f32:$rs2, f32:$f, imm:$cc))]>;
     def FMOVD_ICC
-      : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
-               "fmovd$cc %icc, $F, $dst",
-               [(set DFPRegs:$dst,
-                           (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+      : Pseudo<(outs DFPRegs:$rd), (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cc),
+               "fmovd$cc %icc, $rs2, $rd",
+               [(set f64:$rd, (SPselecticc f64:$rs2, f64:$f, imm:$cc))]>;
   }
 
   let Uses = [FCC] in {
     def FMOVS_FCC
-      : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
-               "fmovs$cc %fcc0, $F, $dst",
-               [(set FPRegs:$dst,
-                           (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+      : Pseudo<(outs FPRegs:$rd), (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cc),
+               "fmovs$cc %fcc0, $rs2, $rd",
+               [(set f32:$rd, (SPselectfcc f32:$rs2, f32:$f, imm:$cc))]>;
     def FMOVD_FCC
-      : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
-               "fmovd$cc %fcc0, $F, $dst",
-               [(set DFPRegs:$dst,
-                           (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+      : Pseudo<(outs DFPRegs:$rd), (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cc),
+               "fmovd$cc %fcc0, $rs2, $rd",
+               [(set f64:$rd, (SPselectfcc f64:$rs2, f64:$f, imm:$cc))]>;
   }
 
 }
@@ -759,23 +765,23 @@ let Predicates = [HasV9] in {
   def FMOVD : F3_3<2, 0b110100, 0b000000010,
                    (outs DFPRegs:$dst), (ins DFPRegs:$src),
                    "fmovd $src, $dst", []>;
-  def FNEGD : F3_3<2, 0b110100, 0b000000110, 
+  def FNEGD : F3_3<2, 0b110100, 0b000000110,
                    (outs DFPRegs:$dst), (ins DFPRegs:$src),
                    "fnegd $src, $dst",
-                   [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
-  def FABSD : F3_3<2, 0b110100, 0b000001010, 
+                   [(set f64:$dst, (fneg f64:$src))]>;
+  def FABSD : F3_3<2, 0b110100, 0b000001010,
                    (outs DFPRegs:$dst), (ins DFPRegs:$src),
                    "fabsd $src, $dst",
-                   [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+                   [(set f64:$dst, (fabs f64:$src))]>;
 }
 
 // POPCrr - This does a ctpop of a 64-bit register.  As such, we have to clear
 // the top 32-bits before using it.  To do this clearing, we use a SLLri X,0.
-def POPCrr : F3_1<2, 0b101110, 
+def POPCrr : F3_1<2, 0b101110,
                   (outs IntRegs:$dst), (ins IntRegs:$src),
                   "popc $src, $dst", []>, Requires<[HasV9]>;
-def : Pat<(ctpop IntRegs:$src),
-          (POPCrr (SLLri IntRegs:$src, 0))>;
+def : Pat<(ctpop i32:$src),
+          (POPCrr (SLLri $src, 0))>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
@@ -783,30 +789,29 @@ def : Pat<(ctpop IntRegs:$src),
 
 // Small immediates.
 def : Pat<(i32 simm13:$val),
-          (ORri G0, imm:$val)>;
+          (ORri (i32 G0), imm:$val)>;
 // Arbitrary immediates.
 def : Pat<(i32 imm:$val),
           (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
 
-// subc
-def : Pat<(subc IntRegs:$b, IntRegs:$c),
-          (SUBCCrr IntRegs:$b, IntRegs:$c)>;
-def : Pat<(subc IntRegs:$b, simm13:$val),
-          (SUBCCri IntRegs:$b, imm:$val)>;
 
 // Global addresses, constant pool entries
 def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
-def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri (i32 G0), tglobaladdr:$in)>;
 def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
-def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>;
+
+// Blockaddress
+def : Pat<(SPhi tblockaddress:$in), (SETHIi tblockaddress:$in)>;
+def : Pat<(SPlo tblockaddress:$in), (ORri (i32 G0), tblockaddress:$in)>;
 
 // Add reg, lo.  This is used when taking the addr of a global/constpool entry.
-def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)),
-          (ADDri IntRegs:$r, tglobaladdr:$in)>;
-def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)),
-          (ADDri IntRegs:$r, tconstpool:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDri $r, tglobaladdr:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)),  (ADDri $r, tconstpool:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tblockaddress:$in)),
+                        (ADDri $r, tblockaddress:$in)>;
 
-// Calls: 
+// Calls:
 def : Pat<(call tglobaladdr:$dst),
           (CALL tglobaladdr:$dst)>;
 def : Pat<(call texternalsym:$dst),
@@ -823,3 +828,9 @@ def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>;
 // zextload bool -> zextload byte
 def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
 def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+
+// store 0, addr -> store %g0, addr
+def : Pat<(store (i32 0), ADDRrr:$dst), (STrr ADDRrr:$dst, (i32 G0))>;
+def : Pat<(store (i32 0), ADDRri:$dst), (STri ADDRri:$dst, (i32 G0))>;
+
+include "SparcInstr64Bit.td"
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index 90c27a4..3783c16 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -28,11 +28,16 @@ namespace llvm {
     /// SRetReturnReg - Holds the virtual register into which the sret
     /// argument is passed.
     unsigned SRetReturnReg;
+
+    /// IsLeafProc - True if the function is a leaf procedure.
+    bool IsLeafProc;
   public:
     SparcMachineFunctionInfo()
-      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
+      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0),
+        IsLeafProc(false) {}
     explicit SparcMachineFunctionInfo(MachineFunction &MF)
-      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
+      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0),
+        IsLeafProc(false) {}
 
     unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
     void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
@@ -42,6 +47,9 @@ namespace llvm {
 
     unsigned getSRetReturnReg() const { return SRetReturnReg; }
     void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+    void setLeafProc(bool rhs) { IsLeafProc = rhs; }
+    bool isLeafProc() const { return IsLeafProc; }
   };
 }
 
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 25e90b7..dc97f06 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "SparcRegisterInfo.h"
 #include "Sparc.h"
+#include "SparcMachineFunctionInfo.h"
 #include "SparcSubtarget.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
@@ -20,6 +21,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
@@ -28,9 +30,12 @@
 
 using namespace llvm;
 
-SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
-                                     const TargetInstrInfo &tii)
-  : SparcGenRegisterInfo(SP::I7), Subtarget(st), TII(tii) {
+static cl::opt<bool>
+ReserveAppRegisters("sparc-reserve-app-registers", cl::Hidden, cl::init(false),
+                    cl::desc("Reserve application registers (%g2-%g4)"));
+
+SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st)
+  : SparcGenRegisterInfo(SP::I7), Subtarget(st) {
 }
 
 const uint16_t* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
@@ -43,19 +48,32 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   // FIXME: G1 reserved for now for large imm generation by frame code.
   Reserved.set(SP::G1);
-  Reserved.set(SP::G2);
-  Reserved.set(SP::G3);
-  Reserved.set(SP::G4);
+
+  // G1-G4 can be used in applications.
+  if (ReserveAppRegisters) {
+    Reserved.set(SP::G2);
+    Reserved.set(SP::G3);
+    Reserved.set(SP::G4);
+  }
+  // G5 is not reserved in 64 bit mode.
+  if (!Subtarget.is64Bit())
+    Reserved.set(SP::G5);
+
   Reserved.set(SP::O6);
   Reserved.set(SP::I6);
   Reserved.set(SP::I7);
   Reserved.set(SP::G0);
-  Reserved.set(SP::G5);
   Reserved.set(SP::G6);
   Reserved.set(SP::G7);
   return Reserved;
 }
 
+const TargetRegisterClass*
+SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+                                      unsigned Kind) const {
+  return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
+}
+
 void
 SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                        int SPAdj, unsigned FIOperandNum,
@@ -68,23 +86,33 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Addressable stack objects are accessed using neg. offsets from %fp
   MachineFunction &MF = *MI.getParent()->getParent();
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-               MI.getOperand(FIOperandNum + 1).getImm();
+  int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+                   MI.getOperand(FIOperandNum + 1).getImm() +
+                   Subtarget.getStackPointerBias();
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+  unsigned FramePtr = SP::I6;
+  if (FuncInfo->isLeafProc()) {
+    // Use %sp and adjust offset if needed.
+    FramePtr = SP::O6;
+    int stackSize = MF.getFrameInfo()->getStackSize();
+    Offset += (stackSize) ? Subtarget.getAdjustedFrameSize(stackSize) : 0 ;
+  }
 
   // Replace frame index with a frame pointer reference.
   if (Offset >= -4096 && Offset <= 4095) {
     // If the offset is small enough to fit in the immediate field, directly
     // encode it.
-    MI.getOperand(FIOperandNum).ChangeToRegister(SP::I6, false);
+    MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false);
     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
   } else {
-    // Otherwise, emit a G1 = SETHI %hi(offset).  FIXME: it would be better to 
+    // Otherwise, emit a G1 = SETHI %hi(offset).  FIXME: it would be better to
     // scavenge a register here instead of reserving G1 all of the time.
+    const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
     unsigned OffHi = (unsigned)Offset >> 10U;
     BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
     // Emit G1 = G1 + I6
     BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
-      .addReg(SP::I6);
+      .addReg(FramePtr);
     // Insert: G1+%lo(offset) into the user.
     MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1));
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index b53a1ed..6b77d4e 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -27,15 +27,17 @@ class Type;
 
 struct SparcRegisterInfo : public SparcGenRegisterInfo {
   SparcSubtarget &Subtarget;
-  const TargetInstrInfo &TII;
 
-  SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
+  SparcRegisterInfo(SparcSubtarget &st);
 
   /// Code Generation virtual methods...
   const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
+  const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
+                                                unsigned Kind) const;
+
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = NULL) const;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index 81bff6c..a59c442 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-//  Declarations that describe the Sparc register file 
+//  Declarations that describe the Sparc register file
 //===----------------------------------------------------------------------===//
 
 class SparcReg<string n> : Register<n> {
@@ -21,8 +21,8 @@ class SparcCtrlReg<string n>: Register<n> {
 }
 
 let Namespace = "SP" in {
-def sub_even : SubRegIndex;
-def sub_odd  : SubRegIndex;
+def sub_even : SubRegIndex<32>;
+def sub_odd  : SubRegIndex<32, 32>;
 }
 
 // Registers are identified with 5-bit ID numbers.
@@ -43,7 +43,7 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
 }
 
 // Control Registers
-def ICC : SparcCtrlReg<"ICC">;
+def ICC : SparcCtrlReg<"ICC">; // This represents icc and xcc in 64-bit code.
 def FCC : SparcCtrlReg<"FCC">;
 
 // Y register
@@ -52,68 +52,68 @@ def Y : SparcCtrlReg<"Y">;
 // Integer registers
 def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
 def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
-def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>; 
+def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>;
 def G3 : Ri< 3, "G3">, DwarfRegNum<[3]>;
 def G4 : Ri< 4, "G4">, DwarfRegNum<[4]>;
-def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>; 
+def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>;
 def G6 : Ri< 6, "G6">, DwarfRegNum<[6]>;
 def G7 : Ri< 7, "G7">, DwarfRegNum<[7]>;
 def O0 : Ri< 8, "O0">, DwarfRegNum<[8]>;
 def O1 : Ri< 9, "O1">, DwarfRegNum<[9]>;
-def O2 : Ri<10, "O2">, DwarfRegNum<[10]>; 
+def O2 : Ri<10, "O2">, DwarfRegNum<[10]>;
 def O3 : Ri<11, "O3">, DwarfRegNum<[11]>;
 def O4 : Ri<12, "O4">, DwarfRegNum<[12]>;
-def O5 : Ri<13, "O5">, DwarfRegNum<[13]>; 
+def O5 : Ri<13, "O5">, DwarfRegNum<[13]>;
 def O6 : Ri<14, "SP">, DwarfRegNum<[14]>;
 def O7 : Ri<15, "O7">, DwarfRegNum<[15]>;
 def L0 : Ri<16, "L0">, DwarfRegNum<[16]>;
 def L1 : Ri<17, "L1">, DwarfRegNum<[17]>;
-def L2 : Ri<18, "L2">, DwarfRegNum<[18]>; 
+def L2 : Ri<18, "L2">, DwarfRegNum<[18]>;
 def L3 : Ri<19, "L3">, DwarfRegNum<[19]>;
 def L4 : Ri<20, "L4">, DwarfRegNum<[20]>;
-def L5 : Ri<21, "L5">, DwarfRegNum<[21]>; 
+def L5 : Ri<21, "L5">, DwarfRegNum<[21]>;
 def L6 : Ri<22, "L6">, DwarfRegNum<[22]>;
 def L7 : Ri<23, "L7">, DwarfRegNum<[23]>;
 def I0 : Ri<24, "I0">, DwarfRegNum<[24]>;
 def I1 : Ri<25, "I1">, DwarfRegNum<[25]>;
-def I2 : Ri<26, "I2">, DwarfRegNum<[26]>; 
+def I2 : Ri<26, "I2">, DwarfRegNum<[26]>;
 def I3 : Ri<27, "I3">, DwarfRegNum<[27]>;
 def I4 : Ri<28, "I4">, DwarfRegNum<[28]>;
-def I5 : Ri<29, "I5">, DwarfRegNum<[29]>; 
+def I5 : Ri<29, "I5">, DwarfRegNum<[29]>;
 def I6 : Ri<30, "FP">, DwarfRegNum<[30]>;
 def I7 : Ri<31, "I7">, DwarfRegNum<[31]>;
 
 // Floating-point registers
 def F0  : Rf< 0,  "F0">, DwarfRegNum<[32]>;
 def F1  : Rf< 1,  "F1">, DwarfRegNum<[33]>;
-def F2  : Rf< 2,  "F2">, DwarfRegNum<[34]>; 
+def F2  : Rf< 2,  "F2">, DwarfRegNum<[34]>;
 def F3  : Rf< 3,  "F3">, DwarfRegNum<[35]>;
 def F4  : Rf< 4,  "F4">, DwarfRegNum<[36]>;
-def F5  : Rf< 5,  "F5">, DwarfRegNum<[37]>; 
+def F5  : Rf< 5,  "F5">, DwarfRegNum<[37]>;
 def F6  : Rf< 6,  "F6">, DwarfRegNum<[38]>;
 def F7  : Rf< 7,  "F7">, DwarfRegNum<[39]>;
-def F8  : Rf< 8,  "F8">, DwarfRegNum<[40]>; 
+def F8  : Rf< 8,  "F8">, DwarfRegNum<[40]>;
 def F9  : Rf< 9,  "F9">, DwarfRegNum<[41]>;
 def F10 : Rf<10, "F10">, DwarfRegNum<[42]>;
-def F11 : Rf<11, "F11">, DwarfRegNum<[43]>; 
+def F11 : Rf<11, "F11">, DwarfRegNum<[43]>;
 def F12 : Rf<12, "F12">, DwarfRegNum<[44]>;
 def F13 : Rf<13, "F13">, DwarfRegNum<[45]>;
-def F14 : Rf<14, "F14">, DwarfRegNum<[46]>; 
+def F14 : Rf<14, "F14">, DwarfRegNum<[46]>;
 def F15 : Rf<15, "F15">, DwarfRegNum<[47]>;
 def F16 : Rf<16, "F16">, DwarfRegNum<[48]>;
-def F17 : Rf<17, "F17">, DwarfRegNum<[49]>; 
+def F17 : Rf<17, "F17">, DwarfRegNum<[49]>;
 def F18 : Rf<18, "F18">, DwarfRegNum<[50]>;
 def F19 : Rf<19, "F19">, DwarfRegNum<[51]>;
-def F20 : Rf<20, "F20">, DwarfRegNum<[52]>; 
+def F20 : Rf<20, "F20">, DwarfRegNum<[52]>;
 def F21 : Rf<21, "F21">, DwarfRegNum<[53]>;
 def F22 : Rf<22, "F22">, DwarfRegNum<[54]>;
 def F23 : Rf<23, "F23">, DwarfRegNum<[55]>;
 def F24 : Rf<24, "F24">, DwarfRegNum<[56]>;
 def F25 : Rf<25, "F25">, DwarfRegNum<[57]>;
-def F26 : Rf<26, "F26">, DwarfRegNum<[58]>; 
+def F26 : Rf<26, "F26">, DwarfRegNum<[58]>;
 def F27 : Rf<27, "F27">, DwarfRegNum<[59]>;
 def F28 : Rf<28, "F28">, DwarfRegNum<[60]>;
-def F29 : Rf<29, "F29">, DwarfRegNum<[61]>; 
+def F29 : Rf<29, "F29">, DwarfRegNum<[61]>;
 def F30 : Rf<30, "F30">, DwarfRegNum<[62]>;
 def F31 : Rf<31, "F31">, DwarfRegNum<[63]>;
 
@@ -140,21 +140,22 @@ def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
 // FIXME: the register order should be defined in terms of the preferred
 // allocation order...
 //
-def IntRegs : RegisterClass<"SP", [i32], 32,
-                            (add L0, L1, L2, L3, L4, L5, L6,
-                                 L7, I0, I1, I2, I3, I4, I5,
-                                 O0, O1, O2, O3, O4, O5, O7,
-                                 G1,
-                                 // Non-allocatable regs:
-                                 G2, G3, G4, // FIXME: OK for use only in
-                                             // applications, not libraries.
-                                 O6, // stack ptr
-                                 I6, // frame ptr
-                                 I7, // return address
-                                 G0, // constant zero
-                                 G5, G6, G7 // reserved for kernel
-                                 )>;
+// This register class should not be used to hold i64 values, use the I64Regs
+// register class for that. The i64 type is included here to allow i64 patterns
+// using the integer instructions.
+def IntRegs : RegisterClass<"SP", [i32, i64], 32,
+                            (add (sequence "I%u", 0, 7),
+                                 (sequence "G%u", 0, 7),
+                                 (sequence "L%u", 0, 7),
+                                 (sequence "O%u", 0, 7))>;
 
+// Register class for 64-bit mode, with a 64-bit spill slot size.
+// These are the same as the 32-bit registers, so TableGen will consider this
+// to be a sub-class of IntRegs. That works out because requiring a 64-bit
+// spill slot is a stricter constraint than only requiring a 32-bit spill slot.
+def I64Regs : RegisterClass<"SP", [i64], 64, (add IntRegs)>;
+
+// Floating point register classes.
 def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
 
 def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>;
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index e5b2aeb..f9ce098 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -13,6 +13,7 @@
 
 #include "SparcSubtarget.h"
 #include "Sparc.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
@@ -30,7 +31,7 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
   V8DeprecatedInsts(false),
   IsVIS(false),
   Is64Bit(is64Bit) {
-  
+
   // Determine default and user specified characteristics
   std::string CPUName = CPU;
   if (CPUName.empty()) {
@@ -44,3 +45,30 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
   // Parse features string.
   ParseSubtargetFeatures(CPUName, FS);
 }
+
+
+int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
+
+  if (is64Bit()) {
+    // All 64-bit stack frames must be 16-byte aligned, and must reserve space
+    // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128.
+    frameSize += 128;
+    // Frames with calls must also reserve space for 6 outgoing arguments
+    // whether they are used or not. LowerCall_64 takes care of that.
+    assert(frameSize % 16 == 0 && "Stack size not 16-byte aligned");
+  } else {
+    // Emit the correct save instruction based on the number of bytes in
+    // the frame. Minimum stack frame size according to V8 ABI is:
+    //   16 words for register window spill
+    //    1 word for address of returned aggregate-value
+    // +  6 words for passing parameters on the stack
+    // ----------
+    //   23 words * 4 bytes per word = 92 bytes
+    frameSize += 92;
+
+    // Round up to next doubleword boundary -- a double-word boundary
+    // is required by the ABI.
+    frameSize = RoundUpToAlignment(frameSize, 8);
+  }
+  return frameSize;
+}
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index a81931b..2bf599d 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -29,7 +29,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
   bool V8DeprecatedInsts;
   bool IsVIS;
   bool Is64Bit;
-  
+
 public:
   SparcSubtarget(const std::string &TT, const std::string &CPU,
                  const std::string &FS, bool is64bit);
@@ -37,11 +37,11 @@ public:
   bool isV9() const { return IsV9; }
   bool isVIS() const { return IsVIS; }
   bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; }
-  
-  /// ParseSubtargetFeatures - Parses features string setting specified 
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-  
+
   bool is64Bit() const { return Is64Bit; }
   std::string getDataLayout() const {
     const char *p;
@@ -52,6 +52,18 @@ public:
     }
     return std::string(p);
   }
+
+  /// The 64-bit ABI uses biased stack and frame pointers, so the stack frame
+  /// of the current function is the area from [%sp+BIAS] to [%fp+BIAS].
+  int64_t getStackPointerBias() const {
+    return is64Bit() ? 2047 : 0;
+  }
+
+  /// Given a actual stack size as determined by FrameInfo, this function
+  /// returns adjusted framesize which includes space for register window
+  /// spills and arguments.
+  int getAdjustedFrameSize(int stackSize) const;
+
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 60bceb7..a7355f4 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -37,6 +37,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
     InstrInfo(Subtarget),
     TLInfo(*this), TSInfo(*this),
     FrameLowering(Subtarget) {
+  initAsmInfo();
 }
 
 namespace {
@@ -68,7 +69,6 @@ bool SparcPassConfig::addInstSelector() {
 /// passes immediately before machine code is emitted.  This should return
 /// true if -print-machineinstrs should print out the code after the passes.
 bool SparcPassConfig::addPreEmitPass(){
-  addPass(createSparcFPMoverPass(getSparcTargetMachine()));
   addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine()));
   return true;
 }