1 files changed, 160 insertions, 51 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index bb4d1f1..04afe88 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -61,6 +61,9 @@ namespace llvm {
       ///
       VPERM,
 
+      /// The CMPB instruction (takes two operands of i32 or i64).
+      CMPB,
+
       /// Hi/Lo - These represent the high and low 16-bit parts of a global
       /// address respectively.  These nodes have two operands, the first of
       /// which must be a TargetGlobalAddress, and the second of which must be a
@@ -68,18 +71,9 @@ namespace llvm {
       /// though these are usually folded into other nodes.
       Hi, Lo,
 
-      TOC_ENTRY,
-
       /// The following two target-specific nodes are used for calls through
       /// function pointers in the 64-bit SVR4 ABI.
 
-      /// Like a regular LOAD but additionally taking/producing a flag.
-      LOAD,
-
-      /// Like LOAD (taking/producing a flag), but using r2 as hard-coded
-      /// destination.
-      LOAD_TOC,
-
       /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
       /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
       /// compute an allocation on the stack.
@@ -94,15 +88,17 @@ namespace llvm {
       /// code.
       SRL, SRA, SHL,
 
+      /// The combination of sra[wd]i and addze used to implemented signed
+      /// integer division by a power of 2. The first operand is the dividend,
+      /// and the second is the constant shift amount (representing the
+      /// divisor).
+      SRA_ADDZE,
+
       /// CALL - A direct function call.
       /// CALL_NOP is a call with the special NOP which follows 64-bit
       /// SVR4 calls.
       CALL, CALL_NOP,
 
-      /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
-      /// to access TLS variables.
-      CALL_TLS, CALL_NOP_TLS,
-
       /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
       /// MTCTR instruction.
       MTCTR,
@@ -111,6 +107,10 @@ namespace llvm {
       /// BCTRL instruction.
       BCTRL,
 
+      /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
+      /// instruction and the TOC reload required on SVR4 PPC64.
+      BCTRL_LOAD_TOC,
+
       /// Return with a flag operand, matched by 'blr'
       RET_FLAG,
 
@@ -125,6 +125,10 @@ namespace llvm {
       /// implement truncation of i32 or i64 to i1.
       ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT,
 
+      // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
+      // target (returns (Lo, Hi)). It takes a chain operand.
+      READ_TIME_BASE,
+
       // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
       EH_SJLJ_SETJMP,
 
@@ -186,7 +190,7 @@ namespace llvm {
       PPC32_GOT,
 
       /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
-      /// local dynamic TLS  on PPC32.
+      /// local dynamic TLS on PPC32.
       PPC32_PICGOT,
 
       /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
@@ -213,26 +217,46 @@ namespace llvm {
       /// register to sym\@got\@tlsgd\@ha.
       ADDIS_TLSGD_HA,
 
-      /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+      /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
       /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym\@got\@tlsgd\@l.
+      /// sym\@got\@tlsgd\@l and stores the result in X3.  Hidden by
+      /// ADDIS_TLSGD_L_ADDR until after register assignment.
       ADDI_TLSGD_L,
 
+      /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+      /// model, produces a call to __tls_get_addr(sym\@tlsgd).  Hidden by
+      /// ADDIS_TLSGD_L_ADDR until after register assignment.
+      GET_TLS_ADDR,
+
+      /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
+      /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
+      /// register assignment.
+      ADDI_TLSGD_L_ADDR,
+
       /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
       /// model, produces an ADDIS8 instruction that adds the GOT base
       /// register to sym\@got\@tlsld\@ha.
       ADDIS_TLSLD_HA,
 
-      /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+      /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
       /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym\@got\@tlsld\@l.
+      /// sym\@got\@tlsld\@l and stores the result in X3.  Hidden by
+      /// ADDIS_TLSLD_L_ADDR until after register assignment.
       ADDI_TLSLD_L,
 
-      /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
-      /// local-dynamic TLS model, produces an ADDIS8 instruction
-      /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
-      /// to tie this in place following a copy to %X3 from the result
-      /// of a GET_TLSLD_ADDR.
+      /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+      /// model, produces a call to __tls_get_addr(sym\@tlsld).  Hidden by
+      /// ADDIS_TLSLD_L_ADDR until after register assignment.
+      GET_TLSLD_ADDR,
+
+      /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
+      /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
+      /// following register assignment.
+      ADDI_TLSLD_L_ADDR,
+
+      /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS
+      /// model, produces an ADDIS8 instruction that adds X3 to
+      /// sym\@dtprel\@ha.
       ADDIS_DTPREL_HA,
 
       /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
@@ -250,6 +274,29 @@ namespace llvm {
       /// operand identifies the operating system entry point.
       SC,
 
+      /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
+      /// endian.  Maps to an xxswapd instruction that corrects an lxvd2x
+      /// or stxvd2x instruction.  The chain is necessary because the
+      /// sequence replaces a load and needs to provide the same number
+      /// of outputs.
+      XXSWAPD,
+
+      /// QVFPERM = This corresponds to the QPX qvfperm instruction.
+      QVFPERM,
+
+      /// QVGPCI = This corresponds to the QPX qvgpci instruction.
+      QVGPCI,
+
+      /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
+      QVALIGNI,
+
+      /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
+      QVESPLATI,
+
+      /// QBFLT = Access the underlying QPX floating-point boolean
+      /// representation.
+      QBFLT,
+
       /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
       /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
       /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
@@ -276,20 +323,24 @@ namespace llvm {
       /// destination 64-bit register.
       LFIWZX,
 
-      /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
-      /// produces an ADDIS8 instruction that adds the TOC base register to
-      /// sym\@toc\@ha.
-      ADDIS_TOC_HA,
+      /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
+      /// Maps directly to an lxvd2x instruction that will be followed by
+      /// an xxswapd.
+      LXVD2X,
 
-      /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
-      /// produces a LD instruction with base register G8RReg and offset
-      /// sym\@toc\@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
-      LD_TOC_L,
+      /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
+      /// Maps directly to an stxvd2x instruction that will be preceded by
+      /// an xxswapd.
+      STXVD2X,
 
-      /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
-      /// an ADDI8 instruction that adds G8RReg to sym\@toc\@l.
-      /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
-      ADDI_TOC_L
+      /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
+      /// The 4xf32 load used for v4i1 constants.
+      QVLFSb,
+
+      /// GPRC = TOC_ENTRY GA, TOC
+      /// Loads the entry for GA from the TOC, where the TOC base is given by
+      /// the last operand.
+      TOC_ENTRY
     };
   }
 
@@ -338,14 +389,18 @@ namespace llvm {
     /// size, return the constant being splatted.  The ByteSize field indicates
     /// the number of bytes of each element [124] -> [bhw].
     SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+
+    /// If this is a qvaligni shuffle mask, return the shift
+    /// amount, otherwise return -1.
+    int isQVALIGNIShuffleMask(SDNode *N);
   }
 
-  class PPCSubtarget;
   class PPCTargetLowering : public TargetLowering {
     const PPCSubtarget &Subtarget;
 
   public:
-    explicit PPCTargetLowering(const PPCTargetMachine &TM);
+    explicit PPCTargetLowering(const PPCTargetMachine &TM,
+                               const PPCSubtarget &STI);
 
     /// getTargetNodeName() - This method returns the name of a target specific
     /// DAG node.
@@ -353,6 +408,14 @@ namespace llvm {
 
     MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
 
+    bool isCheapToSpeculateCttz() const override {
+      return true;
+    }
+
+    bool isCheapToSpeculateCtlz() const override {
+      return true;
+    }
+
     /// getSetCCResultType - Return the ISD::SETCC ValueType
     EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
 
@@ -399,8 +462,14 @@ namespace llvm {
     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
                             SelectionDAG &DAG) const override;
 
+    SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const;
+
     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
+    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+                          std::vector<SDNode *> *Created) const override;
+
     unsigned getRegisterByName(const char* RegName, EVT VT) const override;
 
     void computeKnownBitsForTargetNode(const SDValue Op,
@@ -409,6 +478,8 @@ namespace llvm {
                                        const SelectionDAG &DAG,
                                        unsigned Depth = 0) const override;
 
+    unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
+
     Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
                                   bool IsStore, bool IsLoad) const override;
     Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
@@ -438,9 +509,10 @@ namespace llvm {
     ConstraintWeight getSingleConstraintMatchWeight(
       AsmOperandInfo &info, const char *constraint) const override;
 
-    std::pair<unsigned, const TargetRegisterClass*>
-      getRegForInlineAsmConstraint(const std::string &Constraint,
-                                   MVT VT) const override;
+    std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                 const std::string &Constraint,
+                                 MVT VT) const override;
 
     /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
     /// function arguments in the caller parameter area.  This is the actual
@@ -476,6 +548,10 @@ namespace llvm {
     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
     bool isTruncateFree(EVT VT1, EVT VT2) const override;
 
+    bool isZExtFree(SDValue Val, EVT VT2) const override;
+
+    bool isFPExtFree(EVT VT) const override;
+
     /// \brief Returns true if it is beneficial to convert a load of a constant
     /// to just the constant itself.
     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
@@ -516,6 +592,8 @@ namespace llvm {
     /// expanded to fmul + fadd.
     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
 
+    const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
+
     // Should we expand the build vector with shuffles?
     bool
     shouldExpandBuildVectorWithShuffles(EVT VT,
@@ -541,6 +619,29 @@ namespace llvm {
     }
 
   private:
+
+    struct ReuseLoadInfo {
+      SDValue Ptr;
+      SDValue Chain;
+      SDValue ResChain;
+      MachinePointerInfo MPI;
+      bool IsInvariant;
+      unsigned Alignment;
+      AAMDNodes AAInfo;
+      const MDNode *Ranges;
+
+      ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {}
+    };
+
+    bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
+                             SelectionDAG &DAG,
+                             ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
+    void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
+                         SelectionDAG &DAG) const;
+
+    void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
+                                SelectionDAG &DAG, SDLoc dl) const;
+
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
     SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
 
@@ -563,8 +664,6 @@ namespace llvm {
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
-    std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl,
-                                            SelectionDAG &DAG) const;
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
@@ -593,26 +692,31 @@ namespace llvm {
     SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
 
+    SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             SDLoc dl, SelectionDAG &DAG,
                             SmallVectorImpl<SDValue> &InVals) const;
     SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall,
-                       bool isVarArg,
+                       bool isVarArg, bool IsPatchPoint,
                        SelectionDAG &DAG,
                        SmallVector<std::pair<unsigned, SDValue>, 8>
                          &RegsToPass,
-                       SDValue InFlag, SDValue Chain,
+                       SDValue InFlag, SDValue Chain, SDValue CallSeqStart,
                        SDValue &Callee,
                        int SPDiff, unsigned NumBytes,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
-                       SmallVectorImpl<SDValue> &InVals) const;
+                       SmallVectorImpl<SDValue> &InVals,
+                       ImmutableCallSite *CS) const;
 
     SDValue
       LowerFormalArguments(SDValue Chain,
@@ -669,41 +773,46 @@ namespace llvm {
     SDValue
       LowerCall_Darwin(SDValue Chain, SDValue Callee,
                        CallingConv::ID CallConv,
-                       bool isVarArg, bool isTailCall,
+                       bool isVarArg, bool isTailCall, bool IsPatchPoint,
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
                        const SmallVectorImpl<SDValue> &OutVals,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
                        SDLoc dl, SelectionDAG &DAG,
-                       SmallVectorImpl<SDValue> &InVals) const;
+                       SmallVectorImpl<SDValue> &InVals,
+                       ImmutableCallSite *CS) const;
     SDValue
       LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                        CallingConv::ID CallConv,
-                       bool isVarArg, bool isTailCall,
+                       bool isVarArg, bool isTailCall, bool IsPatchPoint,
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
                        const SmallVectorImpl<SDValue> &OutVals,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
                        SDLoc dl, SelectionDAG &DAG,
-                       SmallVectorImpl<SDValue> &InVals) const;
+                       SmallVectorImpl<SDValue> &InVals,
+                       ImmutableCallSite *CS) const;
     SDValue
     LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
-                     bool isVarArg, bool isTailCall,
+                     bool isVarArg, bool isTailCall, bool IsPatchPoint,
                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                      const SmallVectorImpl<SDValue> &OutVals,
                      const SmallVectorImpl<ISD::InputArg> &Ins,
                      SDLoc dl, SelectionDAG &DAG,
-                     SmallVectorImpl<SDValue> &InVals) const;
+                     SmallVectorImpl<SDValue> &InVals,
+                     ImmutableCallSite *CS) const;
 
     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
 
     SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
                              unsigned &RefinementSteps,
                              bool &UseOneConstNR) const override;
     SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
                              unsigned &RefinementSteps) const override;
+    bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
 
     CCAssignFn *useFastISelCCs(unsigned Flag) const;
   };