5 files changed, 472 insertions, 3 deletions
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index a584188..9c937ed 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -37,6 +37,26 @@ def RetCC_PPC : CallingConv<[
 ]>;
 
 
+// Note that we don't currently have calling conventions for 64-bit
+// PowerPC, but handle all the complexities of the ABI in the lowering
+// logic.  FIXME: See if the logic can be simplified with use of CCs.
+// This may require some extensions to current table generation.
+
+// Simple return-value convention for 64-bit ELF PowerPC fast isel.
+// All small ints are promoted to i64.  Vector types, quadword ints,
+// and multiple register returns are "supported" to avoid compile
+// errors, but none are handled by the fast selector.
+def RetCC_PPC64_ELF_FIS : CallingConv<[
+  CCIfType<[i8],   CCPromoteToType<i64>>,
+  CCIfType<[i16],  CCPromoteToType<i64>>,
+  CCIfType<[i32],  CCPromoteToType<i64>>,
+  CCIfType<[i64],  CCAssignToReg<[X3, X4]>>,
+  CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+  CCIfType<[f32],  CCAssignToReg<[F1, F2]>>,
+  CCIfType<[f64],  CCAssignToReg<[F1, F2, F3, F4]>>,
+  CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+]>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC System V Release 4 32-bit ABI
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index ebc7057..8db4432 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -95,6 +95,8 @@ class PPCFastISel : public FastISel {
   private:
     bool SelectBranch(const Instruction *I);
     bool SelectIndirectBr(const Instruction *I);
+    bool SelectRet(const Instruction *I);
+    bool SelectIntExt(const Instruction *I);
 
   // Utility routines.
   private:
@@ -109,6 +111,10 @@ class PPCFastISel : public FastISel {
     unsigned PPCMaterialize64BitInt(int64_t Imm,
                                     const TargetRegisterClass *RC);
 
+  // Call handling routines.
+  private:
+    CCAssignFn *usePPC32CCs(unsigned Flag);
+
   private:
   #include "PPCGenFastISel.inc"
 
@@ -116,6 +122,21 @@ class PPCFastISel : public FastISel {
 
 } // end anonymous namespace
 
+#include "PPCGenCallingConv.inc"
+
+// Function whose sole purpose is to kill compiler warnings 
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
+  if (Flag == 1)
+    return CC_PPC32_SVR4;
+  else if (Flag == 2)
+    return CC_PPC32_SVR4_ByVal;
+  else if (Flag == 3)
+    return CC_PPC32_SVR4_VarArg;
+  else
+    return RetCC_PPC;
+}
+
 static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
   switch (Pred) {
     // These are not representable with any single compare.
@@ -309,13 +330,164 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
   return true;
 }
 
+// Attempt to fast-select a return instruction.
+bool PPCFastISel::SelectRet(const Instruction *I) {
+
+  if (!FuncInfo.CanLowerReturn)
+    return false;
+
+  const ReturnInst *Ret = cast<ReturnInst>(I);
+  const Function &F = *I->getParent()->getParent();
+
+  // Build a list of return value registers.
+  SmallVector<unsigned, 4> RetRegs;
+  CallingConv::ID CC = F.getCallingConv();
+
+  if (Ret->getNumOperands() > 0) {
+    SmallVector<ISD::OutputArg, 4> Outs;
+    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+
+    // Analyze operands of the call, assigning locations to each operand.
+    SmallVector<CCValAssign, 16> ValLocs;
+    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
+    CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
+    const Value *RV = Ret->getOperand(0);
+    
+    // FIXME: Only one output register for now.
+    if (ValLocs.size() > 1)
+      return false;
+
+    // Special case for returning a constant integer of any size.
+    // Materialize the constant as an i64 and copy it to the return
+    // register.  This avoids an unnecessary extend or truncate.
+    if (isa<ConstantInt>(*RV)) {
+      const Constant *C = cast<Constant>(RV);
+      unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
+      unsigned RetReg = ValLocs[0].getLocReg();
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              RetReg).addReg(SrcReg);
+      RetRegs.push_back(RetReg);
+
+    } else {
+      unsigned Reg = getRegForValue(RV);
+
+      if (Reg == 0)
+        return false;
+
+      // Copy the result values into the output registers.
+      for (unsigned i = 0; i < ValLocs.size(); ++i) {
+
+        CCValAssign &VA = ValLocs[i];
+        assert(VA.isRegLoc() && "Can only return in registers!");
+        RetRegs.push_back(VA.getLocReg());
+        unsigned SrcReg = Reg + VA.getValNo();
+
+        EVT RVEVT = TLI.getValueType(RV->getType());
+        if (!RVEVT.isSimple())
+          return false;
+        MVT RVVT = RVEVT.getSimpleVT();
+        MVT DestVT = VA.getLocVT();
+
+        if (RVVT != DestVT && RVVT != MVT::i8 &&
+            RVVT != MVT::i16 && RVVT != MVT::i32)
+          return false;
+      
+        if (RVVT != DestVT) {
+          switch (VA.getLocInfo()) {
+            default:
+              llvm_unreachable("Unknown loc info!");
+            case CCValAssign::Full:
+              llvm_unreachable("Full value assign but types don't match?");
+            case CCValAssign::AExt:
+            case CCValAssign::ZExt: {
+              const TargetRegisterClass *RC =
+                (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+              unsigned TmpReg = createResultReg(RC);
+              if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
+                return false;
+              SrcReg = TmpReg;
+              break;
+            }
+            case CCValAssign::SExt: {
+              const TargetRegisterClass *RC =
+                (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+              unsigned TmpReg = createResultReg(RC);
+              if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
+                return false;
+              SrcReg = TmpReg;
+              break;
+            }
+          }
+        }
+
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                TII.get(TargetOpcode::COPY), RetRegs[i])
+          .addReg(SrcReg);
+      }
+    }
+  }
+
+  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                    TII.get(PPC::BLR));
+
+  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+    MIB.addReg(RetRegs[i], RegState::Implicit);
+
+  return true;
+}
+
 // Attempt to emit an integer extend of SrcReg into DestReg.  Both
 // signed and zero extensions are supported.  Return false if we
-// can't handle it.  Not yet implemented.
+// can't handle it.
 bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
                                 unsigned DestReg, bool IsZExt) {
-  return (SrcVT == MVT::i8 && SrcReg && DestVT == MVT::i8 && DestReg
-          && IsZExt && false);
+  if (DestVT != MVT::i32 && DestVT != MVT::i64)
+    return false;
+  if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
+    return false;
+
+  // Signed extensions use EXTSB, EXTSH, EXTSW.
+  if (!IsZExt) {
+    unsigned Opc;
+    if (SrcVT == MVT::i8)
+      Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
+    else if (SrcVT == MVT::i16)
+      Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
+    else {
+      assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
+      Opc = PPC::EXTSW_32_64;
+    }
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+      .addReg(SrcReg);
+
+  // Unsigned 32-bit extensions use RLWINM.
+  } else if (DestVT == MVT::i32) {
+    unsigned MB;
+    if (SrcVT == MVT::i8)
+      MB = 24;
+    else {
+      assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
+      MB = 16;
+    }
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM),
+            DestReg)
+      .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
+
+  // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
+  } else {
+    unsigned MB;
+    if (SrcVT == MVT::i8)
+      MB = 56;
+    else if (SrcVT == MVT::i16)
+      MB = 48;
+    else
+      MB = 32;
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(PPC::RLDICL_32_64), DestReg)
+      .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
+  }
+
+  return true;
 }
 
 // Attempt to fast-select an indirect branch instruction.
@@ -335,6 +507,45 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
   return true;
 }
 
+// Attempt to fast-select an integer extend instruction.
+bool PPCFastISel::SelectIntExt(const Instruction *I) {
+  Type *DestTy = I->getType();
+  Value *Src = I->getOperand(0);
+  Type *SrcTy = Src->getType();
+
+  bool IsZExt = isa<ZExtInst>(I);
+  unsigned SrcReg = getRegForValue(Src);
+  if (!SrcReg) return false;
+
+  EVT SrcEVT, DestEVT;
+  SrcEVT = TLI.getValueType(SrcTy, true);
+  DestEVT = TLI.getValueType(DestTy, true);
+  if (!SrcEVT.isSimple())
+    return false;
+  if (!DestEVT.isSimple())
+    return false;
+
+  MVT SrcVT = SrcEVT.getSimpleVT();
+  MVT DestVT = DestEVT.getSimpleVT();
+
+  // If we know the register class needed for the result of this
+  // instruction, use it.  Otherwise pick the register class of the
+  // correct size that does not contain X0/R0, since we don't know
+  // whether downstream uses permit that assignment.
+  unsigned AssignedReg = FuncInfo.ValueMap[I];
+  const TargetRegisterClass *RC =
+    (AssignedReg ? MRI.getRegClass(AssignedReg) :
+     (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
+      &PPC::GPRC_and_GPRC_NOR0RegClass));
+  unsigned ResultReg = createResultReg(RC);
+
+  if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
+    return false;
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
 // Attempt to fast-select an instruction that wasn't handled by
 // the table-generated machinery.
 bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
@@ -344,6 +555,11 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
       return SelectBranch(I);
     case Instruction::IndirectBr:
       return SelectIndirectBr(I);
+    case Instruction::Ret:
+      return SelectRet(I);
+    case Instruction::ZExt:
+    case Instruction::SExt:
+      return SelectIntExt(I);
     // Here add other flavors of Instruction::XXX that automated
     // cases don't catch.  For example, switches are terminators
     // that aren't yet handled.
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index f78bb38..9257904 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -506,6 +506,14 @@ defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
                         [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
 } // Interpretation64Bit
 
+// For fast-isel:
+let isCodeGenOnly = 1 in {
+def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS),
+                           "extsb $rA, $rS", IntSimple, []>, isPPC64;
+def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS),
+                           "extsh $rA, $rS", IntSimple, []>, isPPC64;
+} // isCodeGenOnly for fast-isel
+
 defm EXTSW  : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
                         "extsw", "$rA, $rS", IntSimple,
                         [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
@@ -569,6 +577,14 @@ defm RLDICL : MDForm_1r<30, 0,
                         (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
                         "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI,
                         []>, isPPC64;
+// For fast-isel:
+let isCodeGenOnly = 1 in
+def RLDICL_32_64 : MDForm_1<30, 0,
+                           (outs g8rc:$rA),
+                           (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+                           "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
+                           []>, isPPC64;
+// End fast-isel.
 defm RLDICR : MDForm_1r<30, 1,
                         (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
                         "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI,
diff --git a/test/CodeGen/PowerPC/fast-isel-ext.ll b/test/CodeGen/PowerPC/fast-isel-ext.ll
new file mode 100644
index 0000000..753305a
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-ext.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+; zext
+
+define i32 @zext_8_32(i8 %a) nounwind ssp {
+; ELF64: zext_8_32
+  %r = zext i8 %a to i32
+; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
+  ret i32 %r
+}
+
+define i32 @zext_16_32(i16 %a) nounwind ssp {
+; ELF64: zext_16_32
+  %r = zext i16 %a to i32
+; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
+  ret i32 %r
+}
+
+define i64 @zext_8_64(i8 %a) nounwind ssp {
+; ELF64: zext_8_64
+  %r = zext i8 %a to i64
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+  ret i64 %r
+}
+
+define i64 @zext_16_64(i16 %a) nounwind ssp {
+; ELF64: zext_16_64
+  %r = zext i16 %a to i64
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+  ret i64 %r
+}
+
+define i64 @zext_32_64(i32 %a) nounwind ssp {
+; ELF64: zext_32_64
+  %r = zext i32 %a to i64
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
+  ret i64 %r
+}
+
+; sext
+
+define i32 @sext_8_32(i8 %a) nounwind ssp {
+; ELF64: sext_8_32
+  %r = sext i8 %a to i32
+; ELF64: extsb
+  ret i32 %r
+}
+
+define i32 @sext_16_32(i16 %a) nounwind ssp {
+; ELF64: sext_16_32
+  %r = sext i16 %a to i32
+; ELF64: extsh
+  ret i32 %r
+}
+
+define i64 @sext_8_64(i8 %a) nounwind ssp {
+; ELF64: sext_8_64
+  %r = sext i8 %a to i64
+; ELF64: extsb
+  ret i64 %r
+}
+
+define i64 @sext_16_64(i16 %a) nounwind ssp {
+; ELF64: sext_16_64
+  %r = sext i16 %a to i64
+; ELF64: extsh
+  ret i64 %r
+}
+
+define i64 @sext_32_64(i32 %a) nounwind ssp {
+; ELF64: sext_32_64
+  %r = sext i32 %a to i64
+; ELF64: extsw
+  ret i64 %r
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-ret.ll b/test/CodeGen/PowerPC/fast-isel-ret.ll
new file mode 100644
index 0000000..fa19f8b
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-ret.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret2
+; ELF64: extsb
+; ELF64: blr
+  ret i8 %a
+}
+
+define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret3
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64: blr
+  ret i8 %a
+}
+
+define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret4
+; ELF64: extsh
+; ELF64: blr
+  ret i16 %a
+}
+
+define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret5
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64: blr
+  ret i16 %a
+}
+
+define i16 @ret6(i16 %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret6
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64: blr
+  ret i16 %a
+}
+
+define signext i32 @ret7(i32 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret7
+; ELF64: extsw
+; ELF64: blr
+  ret i32 %a
+}
+
+define zeroext i32 @ret8(i32 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret8
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
+; ELF64: blr
+  ret i32 %a
+}
+
+define i32 @ret9(i32 %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret9
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
+; ELF64: blr
+  ret i32 %a
+}
+
+define i64 @ret10(i64 %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret10
+; ELF64-NOT: exts
+; ELF64-NOT: rldicl
+; ELF64: blr
+  ret i64 %a
+}
+
+define float @ret11(float %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret11
+; ELF64: blr
+  ret float %a
+}
+
+define double @ret12(double %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret12
+; ELF64: blr
+  ret double %a
+}
+
+define i8 @ret13() nounwind uwtable ssp {
+entry:
+; ELF64: ret13
+; ELF64: li
+; ELF64: blr
+  ret i8 15;
+}
+
+define i16 @ret14() nounwind uwtable ssp {
+entry:
+; ELF64: ret14
+; ELF64: li
+; ELF64: blr
+  ret i16 -225;
+}
+
+define i32 @ret15() nounwind uwtable ssp {
+entry:
+; ELF64: ret15
+; ELF64: lis
+; ELF64: ori
+; ELF64: blr
+  ret i32 278135;
+}
+
+define i64 @ret16() nounwind uwtable ssp {
+entry:
+; ELF64: ret16
+; ELF64: li
+; ELF64: sldi
+; ELF64: oris
+; ELF64: ori
+; ELF64: blr
+  ret i64 27813515225;
+}
+
+define float @ret17() nounwind uwtable ssp {
+entry:
+; ELF64: ret17
+; ELF64: addis
+; ELF64: lfs
+; ELF64: blr
+  ret float 2.5;
+}
+
+define double @ret18() nounwind uwtable ssp {
+entry:
+; ELF64: ret18
+; ELF64: addis
+; ELF64: lfd
+; ELF64: blr
+  ret double 2.5e-33;
+}