3 files changed, 152 insertions, 64 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 29846c1..8e4267f 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -377,6 +377,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), TD))
     return ReplaceInstUsesWith(I, V);
 
+  bool AllowReassociate = I.hasUnsafeAlgebra();
+
   // Simplify mul instructions with a constant RHS.
   if (isa<Constant>(Op1)) {
     // Try to fold constant mul into select arguments.
@@ -389,7 +391,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
         return NV;
 
     ConstantFP *C = dyn_cast<ConstantFP>(Op1);
-    if (C && I.hasUnsafeAlgebra() && C->getValueAPF().isNormal()) {
+    if (C && AllowReassociate && C->getValueAPF().isNormal()) {
       // Let MDC denote an expression in one of these forms:
       // X * C, C/X, X/C, where C is a constant.
       //
@@ -430,7 +432,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
                         BinaryOperator::CreateFAdd(M0, M1) :
                         BinaryOperator::CreateFSub(M0, M1);
             Instruction *RI = cast<Instruction>(R);
-            RI->setHasUnsafeAlgebra(true);
+            RI->copyFastMathFlags(&I);
             return RI;
           }
         }
@@ -438,9 +440,6 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     }
   }
 
-  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castFNegVal(Op1))
-      return BinaryOperator::CreateFMul(Op0v, Op1v);
 
   // Under unsafe algebra do:
   // X * log2(0.5*Y) = X*log2(Y) - X
@@ -469,36 +468,66 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     }
   }
 
-  // X * cond ? 1.0 : 0.0 => cond ? X : 0.0
-  if (I.hasNoNaNs() && I.hasNoSignedZeros()) {
-    Value *V0 = I.getOperand(0);
-    Value *V1 = I.getOperand(1);
-    Value *Cond, *SLHS, *SRHS;
-    bool Match = false;
-
-    if (match(V0, m_Select(m_Value(Cond), m_Value(SLHS), m_Value(SRHS)))) {
-      Match = true;
-    } else if (match(V1, m_Select(m_Value(Cond), m_Value(SLHS), 
-                     m_Value(SRHS)))) {
-      Match = true;
-      std::swap(V0, V1);
+  // Handle symmetric situation in a 2-iteration loop
+  Value *Opnd0 = Op0;
+  Value *Opnd1 = Op1;
+  for (int i = 0; i < 2; i++) {
+    bool IgnoreZeroSign = I.hasNoSignedZeros();
+    if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
+      Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
+      Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
+
+      // -X * -Y => X*Y
+      if (N1)
+        return BinaryOperator::CreateFMul(N0, N1);
+
+      if (Opnd0->hasOneUse()) {
+        // -X * Y => -(X*Y) (Promote negation as high as possible)
+        Value *T = Builder->CreateFMul(N0, Opnd1);
+        cast<Instruction>(T)->setDebugLoc(I.getDebugLoc());
+        Instruction *Neg = BinaryOperator::CreateFNeg(T);
+        if (I.getFastMathFlags().any()) {
+          cast<Instruction>(T)->copyFastMathFlags(&I);
+          Neg->copyFastMathFlags(&I);
+        }
+        return Neg;
+      }
     }
 
-    if (Match) {
-      ConstantFP *C0 = dyn_cast<ConstantFP>(SLHS);
-      ConstantFP *C1 = dyn_cast<ConstantFP>(SRHS);
-
-      if (C0 && C1 &&
-          ((C0->isZero() && C1->isExactlyValue(1.0)) ||
-           (C1->isZero() && C0->isExactlyValue(1.0)))) {
-        Value *T;
-        if (C0->isZero())
-          T = Builder->CreateSelect(Cond, SLHS, V1);
-        else
-          T = Builder->CreateSelect(Cond, V1, SRHS);
-        return ReplaceInstUsesWith(I, T);
+    // (X*Y) * X => (X*X) * Y where Y != X
+    //  The purpose is two-fold: 
+    //   1) to form a power expression (of X).
+    //   2) potentially shorten the critical path: After transformation, the
+    //  latency of the instruction Y is amortized by the expression of X*X,
+    //  and therefore Y is in a "less critical" position compared to what it
+    //  was before the transformation.
+    //
+    if (AllowReassociate) {
+      Value *Opnd0_0, *Opnd0_1;
+      if (Opnd0->hasOneUse() &&
+          match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) {
+        Value *Y = 0;
+        if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1)
+          Y = Opnd0_1;
+        else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1)
+          Y = Opnd0_0;
+
+        if (Y) {
+          Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1));
+          T->copyFastMathFlags(&I);
+          T->setDebugLoc(I.getDebugLoc());
+
+          Instruction *R = BinaryOperator::CreateFMul(T, Y);
+          R->copyFastMathFlags(&I);
+          return R;
+        }
       }
     }
+
+    if (!isa<Constant>(Op1))
+      std::swap(Opnd0, Opnd1);
+    else
+      break;
   }
 
   return Changed ? &I : 0;
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index 5a1ad5e..88e19e3 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -130,37 +130,6 @@ define double @fail2(double %f1, double %f2) {
 ; CHECK: ret
 }
 
-; rdar://12753946:  x * cond ? 1.0 : 0.0 => cond ? x : 0.0
-define double @select1(i32 %cond, double %x, double %y) {
-  %tobool = icmp ne i32 %cond, 0
-  %cond1 = select i1 %tobool, double 1.000000e+00, double 0.000000e+00
-  %mul = fmul nnan nsz double %cond1, %x
-  %add = fadd double %mul, %y
-  ret double %add
-; CHECK: @select1
-; CHECK: select i1 %tobool, double %x, double 0.000000e+00
-}
-
-define double @select2(i32 %cond, double %x, double %y) {
-  %tobool = icmp ne i32 %cond, 0
-  %cond1 = select i1 %tobool, double 0.000000e+00, double 1.000000e+00
-  %mul = fmul nnan nsz double %cond1, %x
-  %add = fadd double %mul, %y
-  ret double %add
-; CHECK: @select2
-; CHECK: select i1 %tobool, double 0.000000e+00, double %x
-}
-
-define double @select3(i32 %cond, double %x, double %y) {
-  %tobool = icmp ne i32 %cond, 0
-  %cond1 = select i1 %tobool, double 0.000000e+00, double 2.000000e+00
-  %mul = fmul nnan nsz double %cond1, %x
-  %add = fadd double %mul, %y
-  ret double %add
-; CHECK: @select3
-; CHECK: fmul nnan nsz double %cond1, %x
-}
-
 ; =========================================================================
 ;
 ;   Testing-cases about fmul begin
@@ -243,6 +212,25 @@ define float @fmul5(float %f1, float %f2) {
 ; CHECK: fdiv fast float %f1, 0x47E8000000000000
 }
 
+; (X*Y) * X => (X*X) * Y
+define float @fmul6(float %f1, float %f2) {
+  %mul = fmul float %f1, %f2
+  %mul1 = fmul fast float %mul, %f1
+  ret float %mul1
+; CHECK: @fmul6
+; CHECK: fmul fast float %f1, %f1 
+}
+
+; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses
+define float @fmul7(float %f1, float %f2) {
+  %mul = fmul float %f1, %f2
+  %mul1 = fmul fast float %mul, %f1
+  %add = fadd float %mul1, %mul
+  ret float %add
+; CHECK: @fmul7
+; CHECK: fmul fast float %mul, %f1
+}
+
 ; =========================================================================
 ;
 ;   Testing-cases about negation
@@ -262,8 +250,8 @@ define float @fneg1(float %f1, float %f2) {
 ;   Testing-cases about div
 ;
 ; =========================================================================
-; X/C1 / C2 => X * (1/(C2*C1))
 
+; X/C1 / C2 => X * (1/(C2*C1))
 define float @fdiv1(float %x) {
   %div = fdiv float %x, 0x3FF3333340000000
   %div1 = fdiv fast float %div, 0x4002666660000000
@@ -351,4 +339,3 @@ define float @fdiv9(float %x) {
 ; CHECK: @fdiv9
 ; CHECK: fmul fast float %x, 5.000000e+00
 }
-
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
new file mode 100644
index 0000000..3671b4c
--- /dev/null
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -0,0 +1,72 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; (-0.0 - X) * C => X * -C
+define float @test1(float %x) {
+  %sub = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub, 2.0e+1
+  ret float %mul
+
+; CHECK: @test1
+; CHECK: fmul float %x, -2.000000e+01
+}
+
+; (0.0 - X) * C => X * -C
+define float @test2(float %x) {
+  %sub = fsub nsz float 0.000000e+00, %x
+  %mul = fmul float %sub, 2.0e+1
+  ret float %mul
+
+; CHECK: @test2
+; CHECK: fmul float %x, -2.000000e+01
+}
+
+; (-0.0 - X) * (-0.0 - Y) => X * Y
+define float @test3(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %sub2 = fsub float -0.000000e+00, %y
+  %mul = fmul float %sub1, %sub2
+  ret float %mul
+; CHECK: @test3
+; CHECK: fmul float %x, %y
+}
+
+; (0.0 - X) * (0.0 - Y) => X * Y
+define float @test4(float %x, float %y) {
+  %sub1 = fsub nsz float 0.000000e+00, %x
+  %sub2 = fsub nsz float 0.000000e+00, %y
+  %mul = fmul float %sub1, %sub2
+  ret float %mul
+; CHECK: @test4
+; CHECK: fmul float %x, %y
+}
+
+; (-0.0 - X) * Y => -0.0 - (X * Y)
+define float @test5(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  ret float %mul
+; CHECK: @test5
+; CHECK: %1 = fmul float %x, %y
+; CHECK: %mul = fsub float -0.000000e+00, %1
+}
+
+; (0.0 - X) * Y => 0.0 - (X * Y)
+define float @test6(float %x, float %y) {
+  %sub1 = fsub nsz float 0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  ret float %mul
+; CHECK: @test6
+; CHECK: %1 = fmul float %x, %y
+; CHECK: %mul = fsub float -0.000000e+00, %1
+}
+
+; "(-0.0 - X) * Y => -0.0 - (X * Y)" is disabled if expression "-0.0 - X"
+; has multiple uses.
+define float @test7(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  %mul2 = fmul float %mul, %sub1
+  ret float %mul2
+; CHECK: @test7
+; CHECK: fsub float -0.000000e+00, %x
+}