diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
| -rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 265 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 2 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 43 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 73 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 13 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 6 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 4 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 100 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 120 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 15 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 6 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 24 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 7 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/TargetLowering.cpp | 5 |
16 files changed, 557 insertions, 129 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1c485a0..d7fa009 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -413,7 +413,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) return 0; - // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, Depth + 1)) return V; @@ -2496,8 +2496,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // lanes of the constant together. EVT VT = Vector->getValueType(0); unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + + // If the splat value has been compressed to a bitlength lower + // than the size of the vector lane, we need to re-expand it to + // the lane size. + if (BitWidth > SplatBitSize) + for (SplatValue = SplatValue.zextOrTrunc(BitWidth); + SplatBitSize < BitWidth; + SplatBitSize = SplatBitSize * 2) + SplatValue |= SplatValue.shl(SplatBitSize); + Constant = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i) + for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); } } @@ -5681,6 +5691,127 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // In unsafe math mode, we can fold chains of FADD's of the same value + // into multiplications. This transform is not safe in general because + // we are reducing the number of rounding steps. + if (DAG.getTarget().Options.UnsafeFPMath && + TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && + !N0CFP && !N1CFP) { + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + + // (fadd (fmul c, x), x) -> (fmul c+1, x) + if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP00, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, NewCFP); + } + + // (fadd (fmul x, c), x) -> (fmul c+1, x) + if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, NewCFP); + } + + // (fadd (fadd x, x), x) -> (fmul 3.0, x) + if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) && + N0.getOperand(0) == N1) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, DAG.getConstantFP(3.0, VT)); + } + + // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) + if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(1) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP00, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), NewCFP); + } + + // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) + if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), NewCFP); + } + } + + if (N1.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); + + // (fadd x, (fmul c, x)) -> (fmul c+1, x) + if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP10, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, NewCFP); + } + + // (fadd x, (fmul x, c)) -> (fmul c+1, x) + if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, NewCFP); + } + + // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, DAG.getConstantFP(3.0, VT)); + } + + // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) + if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(1) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP10, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), NewCFP); + } + + // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) + if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), NewCFP); + } + } + + // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) + if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), + DAG.getConstantFP(4.0, VT)); + } + } + // FADD -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && @@ -5692,8 +5823,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), N1); } - - // fold (fadd x, (fmul y, z)) -> (fma x, y, z) + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, @@ -5867,6 +5998,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); @@ -5877,6 +6009,58 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); + // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N2.getOpcode() == ISD::FMUL && + N0 == N2.getOperand(0) && + N2.getOperand(1).getOpcode() == ISD::ConstantFP) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); + } + + + // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + if (DAG.getTarget().Options.UnsafeFPMath && + N0.getOpcode() == ISD::FMUL && N1CFP && + N0.getOperand(1).getOpcode() == ISD::ConstantFP) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), + DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), + N2); + } + + // (fma x, 1, y) -> (fadd x, y) + // (fma x, -1, y) -> (fadd (fneg x), y) + if (N1CFP) { + if (N1CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, dl, VT, N0, N2); + + if (N1CFP->isExactlyValue(-1.0) && + (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { + SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); + AddToWorkList(RHSNeg.getNode()); + return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); + } + } + + // (fma x, c, x) -> (fmul x, (c+1)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { + return DAG.getNode(ISD::FMUL, dl, VT, + N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(1.0, VT))); + } + + // (fma x, c, (fneg x)) -> (fmul x, (c-1)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, dl, VT, + N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(-1.0, VT))); + } + + return SDValue(); } @@ -6225,6 +6409,30 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + if (VT.isVector() && !LegalOperations) { + // If operand is a BUILD_VECTOR node, see if we can constant fold it. + if (N0.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { + SDValue Op = N0.getOperand(i); + if (Op.getOpcode() != ISD::UNDEF && + Op.getOpcode() != ISD::ConstantFP) + break; + EVT EltVT = Op.getValueType(); + SDValue FoldOp = DAG.getNode(ISD::FNEG, N0.getDebugLoc(), EltVT, Op); + if (FoldOp.getOpcode() != ISD::UNDEF && + FoldOp.getOpcode() != ISD::ConstantFP) + break; + Ops.push_back(FoldOp); + AddToWorkList(FoldOp.getNode()); + } + + if (Ops.size() == N0.getNumOperands()) + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + VT, &Ops[0], Ops.size()); + } + } + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); @@ -6246,6 +6454,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { } } + // (fneg (fmul c, x)) -> (fmul -c, x) + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + if (CFP1) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), + DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + N0.getOperand(1))); + } + } + return SDValue(); } @@ -7876,29 +8095,15 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) return SDValue(); - // If the element type of the input vector is not the same as - // the output element type, make concat_vectors based on input element - // type and then bitcast it to the output vector type. - // - // In another words avoid nodes like this: - // <NODE> v16i8 = concat_vectors v4i16 v4i16 - // Replace it with this one: - // <NODE0> v8i16 = concat_vectors v4i16 v4i16 - // <NODE1> v16i8 = bitcast NODE0 - EVT ItemType = VecIn1.getValueType().getVectorElementType(); - if (ItemType != VT.getVectorElementType()) { - EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), - ItemType, - VecIn1.getValueType().getVectorNumElements()*2); - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); - VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1); - } else - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + // If the input vector type has a different base type to the output + // vector type, bail out. + if (VecIn1.getValueType().getVectorElementType() != + VT.getVectorElementType()) + return SDValue(); + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); } // If VecIn2 is unused then change it to undef. @@ -8749,7 +8954,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { // to alias with anything but itself. Provides base object and offset as // results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, - const GlobalValue *&GV, void *&CV) { + const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. Base = Ptr; Offset = 0; GV = 0; CV = 0; @@ -8774,8 +8979,8 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, // for ConstantSDNodes since the same constant pool entry may be represented // by multiple nodes with different offsets. if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { - CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() - : (void *)C->getConstVal(); + CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() + : (const void *)C->getConstVal(); Offset += C->getOffset(); return false; } @@ -8800,7 +9005,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, SDValue Base1, Base2; int64_t Offset1, Offset2; const GlobalValue *GV1, *GV2; - void *CV1, *CV2; + const void *CV1, *CV2; bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 3e18ea7..b2a2a5c 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -97,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, - MayNeedSP); + MayNeedSP, AI); } for (; BB != EB; ++BB) diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 4488d27..6d2cdea 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -55,7 +55,8 @@ unsigned InstrEmitter::CountResults(SDNode *Node) { /// /// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding /// the chain and glue. These operands may be implicit on the machine instr. -static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { +static unsigned countOperands(SDNode *Node, unsigned NumExpUses, + unsigned &NumImpUses) { unsigned N = Node->getNumOperands(); while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; @@ -63,7 +64,8 @@ static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { --N; // Ignore chain if it exists. // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses. - for (unsigned I = N; I; --I) { + NumImpUses = N - NumExpUses; + for (unsigned I = N; I > NumExpUses; --I) { if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) continue; if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) @@ -720,7 +722,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NumImpUses = 0; - unsigned NodeOperands = countOperands(Node, NumImpUses); + unsigned NodeOperands = + countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; @@ -870,6 +873,17 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; } + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: { + unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ? + TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; + + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1)); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) + .addFrameIndex(FI->getIndex()); + break; + } + case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) @@ -890,19 +904,23 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, getZExtValue(); MI->addOperand(MachineOperand::CreateImm(ExtraInfo)); + // Remember to operand index of the group flags. + SmallVector<unsigned, 8> GroupIdx; + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); - unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + GroupIdx.push_back(MI->getNumOperands()); MI->addOperand(MachineOperand::CreateImm(Flags)); ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegDef: - for (; NumVals; --NumVals, ++i) { + for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast @@ -913,7 +931,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: - for (; NumVals; --NumVals, ++i) { + for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), @@ -928,9 +946,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case InlineAsm::Kind_Mem: // Addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. - for (; NumVals; --NumVals, ++i) + for (unsigned j = 0; j != NumVals; ++j, ++i) AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + + // Manually set isTied bits. + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { + unsigned DefGroup = 0; + if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { + unsigned DefIdx = GroupIdx[DefGroup] + 1; + unsigned UseIdx = GroupIdx.back() + 1; + for (unsigned j = 0; j != NumVals; ++j) + MI->tieOperands(DefIdx + j, UseIdx + j); + } + } break; } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 908ebb9..7b34170 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2042,7 +2042,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, DebugLoc dl) { - if (Op0.getValueType() == MVT::i32) { + if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion // Get the stack frame index of a 8 byte buffer. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 94fc976..37f0e60 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -625,6 +625,7 @@ private: SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); SDValue WidenVecRes_VSETCC(SDNode* N); + SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 704f99b..22f8d51 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -64,6 +64,7 @@ class VectorLegalizer { // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); + SDValue ExpandSELECT(SDValue Op); SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); @@ -220,6 +221,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::FMA: case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; @@ -260,6 +262,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case TargetLowering::Expand: if (Node->getOpcode() == ISD::VSELECT) Result = ExpandVSELECT(Op); + else if (Node->getOpcode() == ISD::SELECT) + Result = ExpandSELECT(Op); else if (Node->getOpcode() == ISD::UINT_TO_FP) Result = ExpandUINT_TO_FLOAT(Op); else if (Node->getOpcode() == ISD::FNEG) @@ -435,6 +439,66 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { return TF; } +SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { + // Lower a select instruction where the condition is a scalar and the + // operands are vectors. Lower this select to VSELECT and implement it + // using XOR AND OR. The selector bit is broadcasted. + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Mask = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + assert(VT.isVector() && !Mask.getValueType().isVector() + && Op1.getValueType() == Op2.getValueType() && "Invalid type"); + + unsigned NumElem = VT.getVectorNumElements(); + + // If we can't even use the basic vector operations of + // AND,OR,XOR, we will have to scalarize the op. + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + // Also, we need to be able to construct a splat vector using BUILD_VECTOR. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + // Generate a mask operand. + EVT MaskTy = TLI.getSetCCResultType(VT); + assert(MaskTy.isVector() && "Invalid CC type"); + assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits() + && "Invalid mask size"); + + // What is the size of each element in the vector mask. + EVT BitTy = MaskTy.getScalarType(); + + Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask, + DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), + DAG.getConstant(0, BitTy)); + + // Broadcast the mask so that the entire vector is all-one or all zero. + SmallVector<SDValue, 8> Ops(NumElem, Mask); + Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size()); + + // Bitcast the operands to be the same type as the mask. + // This is needed when we select between FP types because + // the mask is a vector of integers. + Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); + + SDValue AllOnes = DAG.getConstant( + APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy); + SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); + + Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); + Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); + SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -449,12 +513,17 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // AND,OR,XOR, we will have to scalarize the op. // Notice that the operation may be 'promoted' which means that it is // 'bitcasted' to another type which is handled. + // This operation also isn't safe with AND, OR, XOR when the boolean + // type is 0/1 as we need an all ones vector constant to mask with. + // FIXME: Sign extend 1 to all ones if thats legal on the target. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(true) != + TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); - assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits() + assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits() && "Invalid mask size"); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4709202..4095728 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1366,6 +1366,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FTRUNC: Res = WidenVecRes_Unary(N); break; + case ISD::FMA: + Res = WidenVecRes_Ternary(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -1373,6 +1376,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { SetWidenedVector(SDValue(N, ResNo), Res); } +SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { + // Ternary op widening. + DebugLoc dl = N->getDebugLoc(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = GetWidenedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); +} + SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. unsigned Opcode = N->getOpcode(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index bf0a437..2b86e36 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -656,6 +656,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { break; case ISD::MERGE_VALUES: case ISD::TokenFactor: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: case ISD::CopyToReg: case ISD::CopyFromReg: case ISD::EH_LABEL: @@ -1756,6 +1758,7 @@ public: return V; } +#ifndef NDEBUG void dump(ScheduleDAG *DAG) const { // Emulate pop() without clobbering NodeQueueIds. std::vector<SUnit*> DumpQueue = Queue; @@ -1766,6 +1769,7 @@ public: SU->dump(DAG); } } +#endif }; typedef RegReductionPriorityQueue<bu_ls_rr_sort> @@ -1893,6 +1897,7 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { //===----------------------------------------------------------------------===// void RegReductionPQBase::dumpRegPressure() const { +#ifndef NDEBUG for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) { const TargetRegisterClass *RC = *I; @@ -1902,6 +1907,7 @@ void RegReductionPQBase::dumpRegPressure() const { DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] << '\n'); } +#endif } bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 748668c..222dc55 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -643,6 +643,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, } void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { +#ifndef NDEBUG if (!SU->getNode()) { dbgs() << "PHYS REG COPY\n"; return; @@ -659,8 +660,10 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { dbgs() << "\n"; GluedNodes.pop_back(); } +#endif } +#ifndef NDEBUG void ScheduleDAGSDNodes::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { if (SUnit *SU = Sequence[i]) @@ -669,6 +672,7 @@ void ScheduleDAGSDNodes::dumpSchedule() const { dbgs() << "**** NOOP ****\n"; } } +#endif #ifndef NDEBUG /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f4fe892..d85d41b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1097,10 +1097,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, "Cannot set target flags on target-independent globals"); // Truncate (with sign-extension) the offset value to the pointer size. - EVT PTy = TLI.getPointerTy(); - unsigned BitWidth = PTy.getSizeInBits(); + unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); if (BitWidth < 64) - Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth)); + Offset = SignExtend64(Offset, BitWidth); const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); if (!GVar) { @@ -2817,6 +2816,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) if (CFP->getValueAPF().isZero()) return N1; + } else if (Opcode == ISD::FMUL) { + ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1); + SDValue V = N2; + + // If the first operand isn't the constant, try the second + if (!CFP) { + CFP = dyn_cast<ConstantFPSDNode>(N2); + V = N1; + } + + if (CFP) { + // 0*x --> 0 + if (CFP->isZero()) + return SDValue(CFP,0); + // 1*x --> x + if (CFP->isExactlyValue(1.0)) + return V; + } } } assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); @@ -2935,17 +2952,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // expanding large vector constants. if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { SDValue Elt = N1.getOperand(N2C->getZExtValue()); - EVT VEltTy = N1.getValueType().getVectorElementType(); - if (Elt.getValueType() != VEltTy) { + + if (VT != Elt.getValueType()) // If the vector element type is not legal, the BUILD_VECTOR operands - // are promoted and implicitly truncated. Make that explicit here. - Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt); - } - if (VT != VEltTy) { - // If the vector element type is not legal, the EXTRACT_VECTOR_ELT - // result is implicitly extended. - Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt); - } + // are promoted and implicitly truncated, and the result implicitly + // extended. Make that explicit here. + Elt = getAnyExtOrTrunc(Elt, DL, VT); + return Elt; } @@ -3923,17 +3936,21 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + SynchronizationScope SynchScope) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE. // For now, atomics are considered to be volatile always. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); @@ -3983,17 +4000,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // A monotonic store does not load; a release store "loads" in the sense - // that other stores cannot be sunk past it. + // An atomic store does not load. An atomic load does not store. // (An atomicrmw obviously both loads and stores.) - unsigned Flags = MachineMemOperand::MOStore; - if (Opcode != ISD::ATOMIC_STORE || Ordering > Monotonic) - Flags |= MachineMemOperand::MOLoad; - - // For now, atomics are considered to be volatile always. + // For now, atomics are considered to be volatile always, and they are + // chained as such. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, @@ -4056,16 +4073,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // A monotonic load does not store; an acquire load "stores" in the sense - // that other loads cannot be hoisted past it. - unsigned Flags = MachineMemOperand::MOLoad; - if (Ordering > Monotonic) - Flags |= MachineMemOperand::MOStore; - - // For now, atomics are considered to be volatile always. + // An atomic store does not load. An atomic load does not store. + // (An atomicrmw obviously both loads and stores.) + // For now, atomics are considered to be volatile always, and they are + // chained as such. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, @@ -4157,6 +4175,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || + Opcode == ISD::LIFETIME_START || + Opcode == ISD::LIFETIME_END || (Opcode <= INT_MAX && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); @@ -4226,7 +4246,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo, const MDNode *Ranges) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); @@ -4284,7 +4304,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), - MMO->isNonTemporal(), + MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; @@ -4303,7 +4323,7 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - bool isInvariant, unsigned Alignment, + bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo, const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -4332,7 +4352,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, "Load is already a indexed load!"); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), false, LD->getAlignment()); } @@ -4340,7 +4360,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); @@ -4365,7 +4385,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); @@ -4394,7 +4414,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); @@ -4421,7 +4441,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (VT == SVT) return getStore(Chain, dl, Val, Ptr, MMO); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index f3cf758..483b051 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/CallingConv.h" #include "llvm/DebugInfo.h" @@ -825,6 +826,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, GFI = gfi; LibInfo = li; TD = DAG.getTarget().getTargetData(); + Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -1765,6 +1767,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, + uint32_t BranchWeightToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { @@ -1802,8 +1805,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ISD::SETNE); } - addSuccessorWithWeight(SwitchBB, B.TargetBB); - addSuccessorWithWeight(SwitchBB, NextMBB); + // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. + addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); + // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. + addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1926,6 +1931,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; + BranchProbabilityInfo *BPI = FuncInfo.BPI; // If any two of the cases has the same destination, and if one value // is the same as the other, but has one bit unset that the other has set, // use bit manipulation to do two compares at once. For example: @@ -1959,8 +1965,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, ISD::SETEQ); // Update successor info. - addSuccessorWithWeight(SwitchBB, Small.BB); - addSuccessorWithWeight(SwitchBB, Default); + // Both Small and Big will jump to Small.BB, so we sum up the weights. + addSuccessorWithWeight(SwitchBB, Small.BB, + Small.ExtraWeight + Big.ExtraWeight); + addSuccessorWithWeight(SwitchBB, Default, + // The default destination is the first successor in IR. + BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); // Insert the true branch. SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, @@ -1978,14 +1988,13 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } // Order cases by weight so the most likely case will be checked first. - BranchProbabilityInfo *BPI = FuncInfo.BPI; + uint32_t UnhandledWeights = 0; if (BPI) { for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { - uint32_t IWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), - I->BB->getBasicBlock()); + uint32_t IWeight = I->ExtraWeight; + UnhandledWeights += IWeight; for (CaseItr J = CR.Range.first; J < I; ++J) { - uint32_t JWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), - J->BB->getBasicBlock()); + uint32_t JWeight = J->ExtraWeight; if (IWeight > JWeight) std::swap(*I, *J); } @@ -2034,10 +2043,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, LHS = I->Low; MHS = SV; RHS = I->High; } - uint32_t ExtraWeight = I->ExtraWeight; + // The false weight should be sum of all un-handled cases. + UnhandledWeights -= I->ExtraWeight; CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, /* me */ CurBlock, - /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2); + /* trueweight */ I->ExtraWeight, + /* falseweight */ UnhandledWeights); // If emitting the first comparison, just call visitSwitchCase to emit the // code into the current block. Otherwise, push the CaseBlock onto the @@ -2137,13 +2148,28 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, } } + // Calculate weight for each unique destination in CR. + DenseMap<MachineBasicBlock*, uint32_t> DestWeights; + if (FuncInfo.BPI) + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = + DestWeights.find(I->BB); + if (Itr != DestWeights.end()) + Itr->second += I->ExtraWeight; + else + DestWeights[I->BB] = I->ExtraWeight; + } + // Update successor info. Add one edge to each unique successor. BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), E = DestBBs.end(); I != E; ++I) { if (!SuccsHandled[(*I)->getNumber()]) { SuccsHandled[(*I)->getNumber()] = true; - addSuccessorWithWeight(JumpTableBB, *I); + DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = + DestWeights.find(*I); + addSuccessorWithWeight(JumpTableBB, *I, + Itr != DestWeights.end() ? Itr->second : 0); } } @@ -2374,7 +2400,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, if (i == count) { assert((count < 3) && "Too much destinations to test!"); - CasesBits.push_back(CaseBits(0, Dest, 0)); + CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); count++; } @@ -2383,6 +2409,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, uint64_t lo = (lowValue - lowBound).getZExtValue(); uint64_t hi = (highValue - lowBound).getZExtValue(); + CasesBits[i].ExtraWeight += I->ExtraWeight; for (uint64_t j = lo; j <= hi; j++) { CasesBits[i].Mask |= 1ULL << j; @@ -2410,7 +2437,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, CurMF->insert(BBI, CaseBB); BTC.push_back(BitTestCase(CasesBits[i].Mask, CaseBB, - CasesBits[i].BB)); + CasesBits[i].BB, CasesBits[i].ExtraWeight)); // Put SV in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(SV); @@ -2438,30 +2465,25 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, Clusterifier TheClusterifier; + BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - TheClusterifier.add(i.getCaseValueEx(), SMBB); + TheClusterifier.add(i.getCaseValueEx(), SMBB, + BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); } TheClusterifier.optimize(); - BranchProbabilityInfo *BPI = FuncInfo.BPI; size_t numCmps = 0; for (Clusterifier::RangeIterator i = TheClusterifier.begin(), e = TheClusterifier.end(); i != e; ++i, ++numCmps) { Clusterifier::Cluster &C = *i; - unsigned W = 0; - if (BPI) { - W = BPI->getEdgeWeight(SI.getParent(), C.second->getBasicBlock()); - if (!W) - W = 16; - W *= C.first.Weight; - BPI->setEdgeWeight(SI.getParent(), C.second->getBasicBlock(), W); - } + // Update edge weight for the cluster. + unsigned W = C.first.Weight; // FIXME: Currently work with ConstantInt based numbers. // Changing it to APInt based is a pretty heavy for this commit. @@ -4853,7 +4875,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getConstant(Idx, MVT::i32)); + DAG.getIntPtrConstant(Idx)); + setValue(&I, Res); + return 0; + } + case Intrinsic::x86_avx_vextractf128_pd_256: + case Intrinsic::x86_avx_vextractf128_ps_256: + case Intrinsic::x86_avx_vextractf128_si_256: + case Intrinsic::x86_avx2_vextracti128: { + DebugLoc dl = getCurDebugLoc(); + EVT DestVT = TLI.getValueType(I.getType()); + uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * + DestVT.getVectorNumElements(); + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, + getValue(I.getArgOperand(0)), + DAG.getIntPtrConstant(Idx)); setValue(&I, Res); return 0; } @@ -5180,14 +5216,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { rw==1)); /* write */ return 0; } + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: { + bool IsStart = (Intrinsic == Intrinsic::lifetime_start); + // Stack coloring is not enabled in O0, discard region information. + if (TM.getOptLevel() == CodeGenOpt::None) + return 0; + + SmallVector<Value *, 4> Allocas; + GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); + + for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(), + E = Allocas.end(); Object != E; ++Object) { + AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); + + // Could not find an Alloca. + if (!LifetimeObject) + continue; + + int FI = FuncInfo.StaticAllocaMap[LifetimeObject]; + + SDValue Ops[2]; + Ops[0] = getRoot(); + Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); + unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); + Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); + DAG.setRoot(Res); + } + } case Intrinsic::invariant_start: - case Intrinsic::lifetime_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); return 0; case Intrinsic::invariant_end: - case Intrinsic::lifetime_end: // Discard region information. return 0; case Intrinsic::donothing: @@ -6043,12 +6105,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); - // Remember the HasSideEffect and AlignStack bits as operand 3. + // Remember the HasSideEffect, AlignStack and AsmDialect bits as operand 3. unsigned ExtraInfo = 0; if (IA->hasSideEffects()) ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + // Set the asm dialect. + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, TLI.getPointerTy())); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 4090002..3b7615a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -150,9 +150,11 @@ private: uint64_t Mask; MachineBasicBlock* BB; unsigned Bits; + uint32_t ExtraWeight; - CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits): - Mask(mask), BB(bb), Bits(bits) { } + CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, + uint32_t Weight): + Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { } }; typedef std::vector<Case> CaseVector; @@ -247,11 +249,13 @@ private: typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock; struct BitTestCase { - BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr): - Mask(M), ThisBB(T), TargetBB(Tr) { } + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, + uint32_t Weight): + Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { } uint64_t Mask; MachineBasicBlock *ThisBB; MachineBasicBlock *TargetBB; + uint32_t ExtraWeight; }; typedef SmallVector<BitTestCase, 3> BitTestInfo; @@ -325,7 +329,7 @@ public: CodeGenOpt::Level ol) : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), - HasTailCall(false), Context(dag.getContext()) { + HasTailCall(false) { } void init(GCFunctionInfo *gfi, AliasAnalysis &aa, @@ -452,6 +456,7 @@ public: void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, + uint32_t BranchWeightToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 13cd011..75989ad 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -267,6 +267,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STACKRESTORE: return "stackrestore"; case ISD::TRAP: return "trap"; case ISD::DEBUGTRAP: return "debugtrap"; + case ISD::LIFETIME_START: return "lifetime.start"; + case ISD::LIFETIME_END: return "lifetime.end"; // Bit manipulation case ISD::BSWAP: return "bswap"; @@ -331,7 +333,7 @@ void SDNode::dump(const SelectionDAG *G) const { } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (void*)this << ": "; + OS << (const void*)this << ": "; for (unsigned i = 0, e = getNumValues(); i != e; ++i) { if (i) OS << ","; @@ -559,7 +561,7 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, child->printr(OS, G); once.insert(child); } else { // Just the address. FIXME: also print the child's opcode. - OS << (void*)child; + OS << (const void*)child; if (unsigned RN = N->getOperand(i).getResNo()) OS << ":" << RN; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 4e5e3ba..7542941 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -554,7 +554,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #endif { BlockNumber = FuncInfo->MBB->getNumber(); - BlockName = MF->getFunction()->getName().str() + ":" + + BlockName = MF->getName().str() + ":" + FuncInfo->MBB->getBasicBlock()->getName().str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber @@ -1209,7 +1209,12 @@ SelectionDAGISel::FinishBasicBlock() { CodeGenAndEmitDAG(); } + uint32_t UnhandledWeight = 0; + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) + UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight; + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { + UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight; // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); @@ -1217,12 +1222,14 @@ SelectionDAGISel::FinishBasicBlock() { if (j+1 != ej) SDB->visitBitTestCase(SDB->BitTestCases[i], SDB->BitTestCases[i].Cases[j+1].ThisBB, + UnhandledWeight, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); else SDB->visitBitTestCase(SDB->BitTestCases[i], SDB->BitTestCases[i].Default, + UnhandledWeight, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); @@ -1794,10 +1801,13 @@ WalkChainUsers(const SDNode *ChainedNode, User->getOpcode() == ISD::HANDLENODE) // Root of the graph. continue; - if (User->getOpcode() == ISD::CopyToReg || - User->getOpcode() == ISD::CopyFromReg || - User->getOpcode() == ISD::INLINEASM || - User->getOpcode() == ISD::EH_LABEL) { + unsigned UserOpcode = User->getOpcode(); + if (UserOpcode == ISD::CopyToReg || + UserOpcode == ISD::CopyFromReg || + UserOpcode == ISD::INLINEASM || + UserOpcode == ISD::EH_LABEL || + UserOpcode == ISD::LIFETIME_START || + UserOpcode == ISD::LIFETIME_END) { // If their node ID got reset to -1 then they've already been selected. // Treat them like a MachineOpcode. if (User->getNodeId() == -1) @@ -2213,6 +2223,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::CopyFromReg: case ISD::CopyToReg: case ISD::EH_LABEL: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. return 0; case ISD::AssertSext: @@ -2981,7 +2993,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && N->getOpcode() != ISD::INTRINSIC_VOID) { N->printrFull(Msg, CurDAG); - Msg << "\nIn function: " << MF->getFunction()->getName(); + Msg << "\nIn function: " << MF->getName(); } else { bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; unsigned iid = diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 173ffac..3921635 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -14,7 +14,6 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/Constants.h" #include "llvm/DebugInfo.h" -#include "llvm/Function.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -50,7 +49,7 @@ namespace llvm { template<typename EdgeIter> static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) { - return itostr(I - SDNodeIterator::begin((SDNode *) Node)); + return itostr(I - SDNodeIterator::begin((const SDNode *) Node)); } /// edgeTargetsEdgeSource - This method returns true if this outgoing edge @@ -73,7 +72,7 @@ namespace llvm { } static std::string getGraphName(const SelectionDAG *G) { - return G->getMachineFunction().getFunction()->getName(); + return G->getMachineFunction().getName(); } static bool renderGraphFromBottomUp() { @@ -146,7 +145,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), + ViewGraph(this, "dag." + getMachineFunction().getName(), false, Title); #else errs() << "SelectionDAG::viewGraph is only available in debug builds on " diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6820175..dcaa9ba 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -772,7 +772,7 @@ void TargetLowering::computeRegisterProperties() { LegalIntReg = IntReg; } else { RegisterTypeForVT[IntReg] = TransformToType[IntReg] = - (MVT::SimpleValueType)LegalIntReg; + (const MVT::SimpleValueType)LegalIntReg; ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); } } @@ -898,7 +898,6 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return NULL; } - EVT TargetLowering::getSetCCResultType(EVT VT) const { assert(!VT.isVector() && "No default SetCC type for vectors!"); return PointerTy.SimpleTy; @@ -2441,7 +2440,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. - uint64_t EqVal; + uint64_t EqVal = 0; switch (getBooleanContents(N0.getValueType().isVector())) { case UndefinedBooleanContent: case ZeroOrOneBooleanContent: |
