From 2c3e0051c31c3f5b2328b447eadf1cf9c4427442 Mon Sep 17 00:00:00 2001 From: Pirama Arumuga Nainar Date: Wed, 6 May 2015 11:46:36 -0700 Subject: Update aosp/master LLVM for rebase to r235153 Change-Id: I9bf53792f9fc30570e81a8d80d296c681d005ea7 (cherry picked from commit 0c7f116bb6950ef819323d855415b2f2b0aad987) --- lib/Target/AArch64/AArch64ISelLowering.cpp | 234 ++++++++++++++++++++++------- 1 file changed, 177 insertions(+), 57 deletions(-) (limited to 'lib/Target/AArch64/AArch64ISelLowering.cpp') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 0c0e856..90a5e5e 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -281,14 +281,39 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); - // f16 is storage-only, so we promote operations to f32 if we know this is - // valid, and ignore them otherwise. The operations not mentioned here will - // fail to select, but this is not a major problem as no source language - // should be emitting native f16 operations yet. - setOperationAction(ISD::FADD, MVT::f16, Promote); - setOperationAction(ISD::FDIV, MVT::f16, Promote); - setOperationAction(ISD::FMUL, MVT::f16, Promote); - setOperationAction(ISD::FSUB, MVT::f16, Promote); + // f16 is a storage-only type, always promote it to f32. + setOperationAction(ISD::SETCC, MVT::f16, Promote); + setOperationAction(ISD::BR_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, Promote); + setOperationAction(ISD::FSUB, MVT::f16, Promote); + setOperationAction(ISD::FMUL, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FMA, MVT::f16, Promote); + setOperationAction(ISD::FNEG, MVT::f16, Promote); + setOperationAction(ISD::FABS, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSQRT, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); // v4f16 is also a storage-only type, so promote it to v4f32 when that is // known to be safe. @@ -481,6 +506,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Enable TBZ/TBNZ MaskAndBranchFoldingIsLegal = true; + EnableExtLdPromotion = true; setMinFunctionAlignment(2); @@ -1557,6 +1583,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, if (Op.getOperand(0).getValueType().isVector()) return LowerVectorFP_TO_INT(Op, DAG); + // f16 conversions are promoted to f32. + if (Op.getOperand(0).getValueType() == MVT::f16) { + SDLoc dl(Op); + return DAG.getNode( + Op.getOpcode(), dl, Op.getValueType(), + DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0))); + } + if (Op.getOperand(0).getValueType() != MVT::f128) { // It's legal except when f128 is involved return Op; @@ -1606,6 +1640,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, if (Op.getValueType().isVector()) return LowerVectorINT_TO_FP(Op, DAG); + // f16 conversions are promoted to f32. + if (Op.getValueType() == MVT::f16) { + SDLoc dl(Op); + return DAG.getNode( + ISD::FP_ROUND, dl, MVT::f16, + DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)), + DAG.getIntPtrConstant(0)); + } + // i128 conversions are libcalls. if (Op.getOperand(0).getValueType() == MVT::i128) return SDValue(); @@ -2701,8 +2744,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64); SDValue Cpy = DAG.getMemcpy( Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(), - /*isVol = */ false, - /*AlwaysInline = */ false, DstInfo, MachinePointerInfo()); + /*isVol = */ false, /*AlwaysInline = */ false, + /*isTailCall = */ false, + DstInfo, MachinePointerInfo()); MemOpChains.push_back(Cpy); } else { @@ -3514,49 +3558,10 @@ static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) { return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp; } -SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, - SelectionDAG &DAG) const { - SDValue CC = Op->getOperand(0); - SDValue TVal = Op->getOperand(1); - SDValue FVal = Op->getOperand(2); - SDLoc DL(Op); - - unsigned Opc = CC.getOpcode(); - // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select - // instruction. - if (CC.getResNo() == 1 && - (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { - // Only lower legal XALUO ops. - if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0))) - return SDValue(); - - AArch64CC::CondCode OFCC; - SDValue Value, Overflow; - std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG); - SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); - - return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, - CCVal, Overflow); - } - - if (CC.getOpcode() == ISD::SETCC) - return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal, - cast(CC.getOperand(2))->get()); - else - return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal, - FVal, ISD::SETNE); -} - -SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, +SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, + SDValue RHS, SDValue TVal, + SDValue FVal, SDLoc dl, SelectionDAG &DAG) const { - ISD::CondCode CC = cast(Op.getOperand(4))->get(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue TVal = Op.getOperand(2); - SDValue FVal = Op.getOperand(3); - SDLoc dl(Op); - // Handle f128 first, because it will result in a comparison of some RTLIB // call result against zero. if (LHS.getValueType() == MVT::f128) { @@ -3664,14 +3669,14 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SDValue CCVal; SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - EVT VT = Op.getValueType(); + EVT VT = TVal.getValueType(); return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp); } // Now we know we're dealing with FP values. assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); assert(LHS.getValueType() == RHS.getValueType()); - EVT VT = Op.getValueType(); + EVT VT = TVal.getValueType(); // Try to match this select into a max/min operation, which have dedicated // opcode in the instruction set. @@ -3732,6 +3737,58 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, return CS1; } +SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue TVal = Op.getOperand(2); + SDValue FVal = Op.getOperand(3); + SDLoc DL(Op); + return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); +} + +SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, + SelectionDAG &DAG) const { + SDValue CCVal = Op->getOperand(0); + SDValue TVal = Op->getOperand(1); + SDValue FVal = Op->getOperand(2); + SDLoc DL(Op); + + unsigned Opc = CCVal.getOpcode(); + // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select + // instruction. + if (CCVal.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0))) + return SDValue(); + + AArch64CC::CondCode OFCC; + SDValue Value, Overflow; + std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG); + SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); + + return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, + CCVal, Overflow); + } + + // Lower it the same way as we would lower a SELECT_CC node. + ISD::CondCode CC; + SDValue LHS, RHS; + if (CCVal.getOpcode() == ISD::SETCC) { + LHS = CCVal.getOperand(0); + RHS = CCVal.getOperand(1); + CC = cast(CCVal->getOperand(2))->get(); + } else { + LHS = CCVal; + RHS = DAG.getConstant(0, CCVal.getValueType()); + CC = ISD::SETNE; + } + return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); +} + SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // Jump table entries as PC relative offsets. No additional tweaking @@ -3920,7 +3977,7 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1), Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32), - 8, false, false, MachinePointerInfo(DestSV), + 8, false, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); } @@ -4989,7 +5046,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, unsigned Opcode; if (EltTy == MVT::i8) Opcode = AArch64ISD::DUPLANE8; - else if (EltTy == MVT::i16) + else if (EltTy == MVT::i16 || EltTy == MVT::f16) Opcode = AArch64ISD::DUPLANE16; else if (EltTy == MVT::i32 || EltTy == MVT::f32) Opcode = AArch64ISD::DUPLANE32; @@ -6554,6 +6611,59 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { VT1.getSizeInBits() <= 32); } +bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const { + if (isa(Ext)) + return false; + + // Vector types are next free. + if (Ext->getType()->isVectorTy()) + return false; + + for (const Use &U : Ext->uses()) { + // The extension is free if we can fold it with a left shift in an + // addressing mode or an arithmetic operation: add, sub, and cmp. + + // Is there a shift? + const Instruction *Instr = cast(U.getUser()); + + // Is this a constant shift? + switch (Instr->getOpcode()) { + case Instruction::Shl: + if (!isa(Instr->getOperand(1))) + return false; + break; + case Instruction::GetElementPtr: { + gep_type_iterator GTI = gep_type_begin(Instr); + std::advance(GTI, U.getOperandNo()); + Type *IdxTy = *GTI; + // This extension will end up with a shift because of the scaling factor. + // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0. + // Get the shift amount based on the scaling factor: + // log2(sizeof(IdxTy)) - log2(8). + uint64_t ShiftAmt = + countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3; + // Is the constant foldable in the shift of the addressing mode? + // I.e., shift amount is between 1 and 4 inclusive. + if (ShiftAmt == 0 || ShiftAmt > 4) + return false; + break; + } + case Instruction::Trunc: + // Check if this is a noop. + // trunc(sext ty1 to ty2) to ty1. + if (Instr->getType() == Ext->getOperand(0)->getType()) + continue; + // FALL THROUGH. + default: + return false; + } + + // At this point we can use the bfm family, so this extension is free + // for that use. + } + return true; +} + bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType, unsigned &RequiredAligment) const { if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy()) @@ -6597,7 +6707,17 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast))) return MVT::f128; - return Size >= 8 ? MVT::i64 : MVT::i32; + if (Size >= 8 && + (memOpAlign(SrcAlign, DstAlign, 8) || + (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast))) + return MVT::i64; + + if (Size >= 4 && + (memOpAlign(SrcAlign, DstAlign, 4) || + (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast))) + return MVT::i32; + + return MVT::Other; } // 12-bit optionally shifted immediates are legal for adds. -- cgit v1.1