diff options
-rw-r--r-- | include/llvm/Target/TargetLowering.h | 7 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 71 | ||||
-rw-r--r-- | test/CodeGen/X86/imul-lea-2.ll | 15 |
4 files changed, 98 insertions, 11 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index d0721d3..587666f 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -791,9 +791,10 @@ public: bool isCalledByLegalizer() const { return CalledByLegalizer; } void AddToWorklist(SDNode *N); - SDValue CombineTo(SDNode *N, const std::vector<SDValue> &To); - SDValue CombineTo(SDNode *N, SDValue Res); - SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1); + SDValue CombineTo(SDNode *N, const std::vector<SDValue> &To, + bool AddTo = true); + SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); + SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); }; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 041c500..78d5d40 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -93,14 +93,14 @@ namespace { } SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, - bool AddTo = true); + bool AddTo = true); SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { return CombineTo(N, &Res, 1, AddTo); } SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, - bool AddTo = true) { + bool AddTo = true) { SDValue To[] = { Res0, Res1 }; return CombineTo(N, To, 2, AddTo); } @@ -293,19 +293,19 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { } SDValue TargetLowering::DAGCombinerInfo:: -CombineTo(SDNode *N, const std::vector<SDValue> &To) { - return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size()); +CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); } SDValue TargetLowering::DAGCombinerInfo:: -CombineTo(SDNode *N, SDValue Res) { - return ((DAGCombiner*)DC)->CombineTo(N, Res); +CombineTo(SDNode *N, SDValue Res, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); } SDValue TargetLowering::DAGCombinerInfo:: -CombineTo(SDNode *N, SDValue Res0, SDValue Res1) { - return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1); +CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); } void TargetLowering::DAGCombinerInfo:: diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4856059..29174b9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -826,6 +826,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::STORE); + if (Subtarget->is64Bit()) + setTargetDAGCombine(ISD::MUL); computeRegisterProperties(); @@ -8407,6 +8409,74 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, } +/// PerformMulCombine - Optimize a single multiply with constant into two +/// in order to implement it with two cheaper instructions, e.g. +/// LEA + SHL, LEA + LEA. +static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + if (DAG.getMachineFunction(). + getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + return SDValue(); + + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + MVT VT = N->getValueType(0); + if (VT != MVT::i64) + return SDValue(); + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!C) + return SDValue(); + uint64_t MulAmt = C->getZExtValue(); + if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9) + return SDValue(); + + uint64_t MulAmt1 = 0; + uint64_t MulAmt2 = 0; + if ((MulAmt % 9) == 0) { + MulAmt1 = 9; + MulAmt2 = MulAmt / 9; + } else if ((MulAmt % 5) == 0) { + MulAmt1 = 5; + MulAmt2 = MulAmt / 5; + } else if ((MulAmt % 3) == 0) { + MulAmt1 = 3; + MulAmt2 = MulAmt / 3; + } + if (MulAmt2 && + (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){ + DebugLoc DL = N->getDebugLoc(); + + if (isPowerOf2_64(MulAmt2) && + !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD)) + // If second multiplifer is pow2, issue it first. We want the multiply by + // 3, 5, or 9 to be folded into the addressing mode unless the lone use + // is an add. + std::swap(MulAmt1, MulAmt2); + + SDValue NewMul; + if (isPowerOf2_64(MulAmt1)) + NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(Log2_64(MulAmt1), MVT::i8)); + else + NewMul = DAG.getNode(ISD::MUL, DL, VT, N->getOperand(0), + DAG.getConstant(MulAmt1, VT)); + + if (isPowerOf2_64(MulAmt2)) + NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, + DAG.getConstant(Log2_64(MulAmt2), MVT::i8)); + else + NewMul = DAG.getNode(ISD::MUL, DL, VT, NewMul, + DAG.getConstant(MulAmt2, VT)); + + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, NewMul, false); + } + return SDValue(); +} + + /// PerformShiftCombine - Transforms vector shift nodes to use vector shifts /// when possible. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, @@ -8668,6 +8738,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); + case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); diff --git a/test/CodeGen/X86/imul-lea-2.ll b/test/CodeGen/X86/imul-lea-2.ll new file mode 100644 index 0000000..0a2df1c --- /dev/null +++ b/test/CodeGen/X86/imul-lea-2.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | count 3 +; RUN: llvm-as < %s | llc -march=x86-64 | grep shl | count 1 +; RUN: llvm-as < %s | llc -march=x86-64 | not grep imul + +define i64 @t1(i64 %a) nounwind readnone { +entry: + %0 = mul i64 %a, 81 ; <i64> [#uses=1] + ret i64 %0 +} + +define i64 @t2(i64 %a) nounwind readnone { +entry: + %0 = mul i64 %a, 40 ; <i64> [#uses=1] + ret i64 %0 +} |