Fix bugs in lowering of FCOPYSIGN nodes.

- FCOPYSIGN nodes that have operands of different types were not handled. - Different code was generated depending on the endianness of the target. Additionally, code is added that emits INS and EXT instructions, if they are supported by target (they are R2 instructions). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154540 91177308-0d34-0410-b5e6-96231b3b80d8
author: Akira Hatanaka <ahatanaka@mips.com> 2012-04-11 22:13:04 +0000
committer: Akira Hatanaka <ahatanaka@mips.com> 2012-04-11 22:13:04 +0000
commit: 056c51e59820c4c36d3b9d2f22acb6fd121b798e (patch)
tree: 641ba94fc250beeb15afed2f3250a2de4555789d
parent: 2ce182c90cd8738bf76e5cdaa21eb6627c6e6aec (diff)
download: external_llvm-056c51e59820c4c36d3b9d2f22acb6fd121b798e.zip
external_llvm-056c51e59820c4c36d3b9d2f22acb6fd121b798e.tar.gz
external_llvm-056c51e59820c4c36d3b9d2f22acb6fd121b798e.tar.bz2
3 files changed, 183 insertions, 95 deletions
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index c772c5d..2b3921d 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -1756,66 +1756,105 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
                       MachinePointerInfo(SV), false, false, 0);
 }
 
-// Called if the size of integer registers is large enough to hold the whole
-// floating point number.
-static SDValue LowerFCOPYSIGNLargeIntReg(SDValue Op, SelectionDAG &DAG) {
-  // FIXME: Use ext/ins instructions if target architecture is Mips32r2.
-  EVT ValTy = Op.getValueType();
-  EVT IntValTy = MVT::getIntegerVT(ValTy.getSizeInBits());
-  uint64_t Mask = (uint64_t)1 << (ValTy.getSizeInBits() - 1);
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(0));
-  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(1));
-  SDValue And0 = DAG.getNode(ISD::AND, dl, IntValTy, Op0,
-                             DAG.getConstant(Mask - 1, IntValTy));
-  SDValue And1 = DAG.getNode(ISD::AND, dl, IntValTy, Op1,
-                             DAG.getConstant(Mask, IntValTy));
-  SDValue Result = DAG.getNode(ISD::OR, dl, IntValTy, And0, And1);
-  return DAG.getNode(ISD::BITCAST, dl, ValTy, Result);
-}
+static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+  EVT TyX = Op.getOperand(0).getValueType();
+  EVT TyY = Op.getOperand(1).getValueType();
+  SDValue Const1 = DAG.getConstant(1, MVT::i32);
+  SDValue Const31 = DAG.getConstant(31, MVT::i32);
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue Res;
+
+  // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it
+  // to i32.
+  SDValue X = (TyX == MVT::f32) ?
+    DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) :
+    DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+                Const1);
+  SDValue Y = (TyY == MVT::f32) ?
+    DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(1)) :
+    DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(1),
+                Const1);
+
+  if (HasR2) {
+    // ext  E, Y, 31, 1  ; extract bit31 of Y
+    // ins  X, E, 31, 1  ; insert extracted bit at bit31 of X
+    SDValue E = DAG.getNode(MipsISD::Ext, DL, MVT::i32, Y, Const31, Const1);
+    Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32, E, Const31, Const1, X);
+  } else {
+    // sll SllX, X, 1
+    // srl SrlX, SllX, 1
+    // srl SrlY, Y, 31
+    // sll SllY, SrlX, 31
+    // or  Or, SrlX, SllY
+    SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1);
+    SDValue SrlX = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1);
+    SDValue SrlY = DAG.getNode(ISD::SRL, DL, MVT::i32, Y, Const31);
+    SDValue SllY = DAG.getNode(ISD::SHL, DL, MVT::i32, SrlY, Const31);
+    Res = DAG.getNode(ISD::OR, DL, MVT::i32, SrlX, SllY);
+  }
 
-// Called if the size of integer registers is not large enough to hold the whole
-// floating point number (e.g. f64 & 32-bit integer register).
-static SDValue
-LowerFCOPYSIGNSmallIntReg(SDValue Op, SelectionDAG &DAG, bool isLittle) {
-  // FIXME:
-  //  Use ext/ins instructions if target architecture is Mips32r2.
-  //  Eliminate redundant mfc1 and mtc1 instructions.
-  unsigned LoIdx = 0, HiIdx = 1;
+  if (TyX == MVT::f32)
+    return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Res);
 
-  if (!isLittle)
-    std::swap(LoIdx, HiIdx);
+  SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+                             Op.getOperand(0), DAG.getConstant(0, MVT::i32));
+  return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
+}
 
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue Word0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
-                              Op.getOperand(0),
-                              DAG.getConstant(LoIdx, MVT::i32));
-  SDValue Hi0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
-                            Op.getOperand(0), DAG.getConstant(HiIdx, MVT::i32));
-  SDValue Hi1 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
-                            Op.getOperand(1), DAG.getConstant(HiIdx, MVT::i32));
-  SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi0,
-                             DAG.getConstant(0x7fffffff, MVT::i32));
-  SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi1,
-                             DAG.getConstant(0x80000000, MVT::i32));
-  SDValue Word1 = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
-
-  if (!isLittle)
-    std::swap(Word0, Word1);
-
-  return DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, Word0, Word1);
+static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+  unsigned WidthX = Op.getOperand(0).getValueSizeInBits();
+  unsigned WidthY = Op.getOperand(1).getValueSizeInBits();
+  EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY);
+  SDValue Const1 = DAG.getConstant(1, MVT::i32);
+  DebugLoc DL = Op.getDebugLoc();
+
+  // Bitcast to integer nodes.
+  SDValue X = DAG.getNode(ISD::BITCAST, DL, TyX, Op.getOperand(0));
+  SDValue Y = DAG.getNode(ISD::BITCAST, DL, TyY, Op.getOperand(1));
+
+  if (HasR2) {
+    // ext  E, Y, width(Y) - 1, 1  ; extract bit width(Y)-1 of Y
+    // ins  X, E, width(X) - 1, 1  ; insert extracted bit at bit width(X)-1 of X
+    SDValue E = DAG.getNode(MipsISD::Ext, DL, TyY, Y,
+                            DAG.getConstant(WidthY - 1, MVT::i32), Const1);
+
+    if (WidthX > WidthY)
+      E = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, E);
+    else if (WidthY > WidthX)
+      E = DAG.getNode(ISD::TRUNCATE, DL, TyX, E);
+
+    SDValue I = DAG.getNode(MipsISD::Ins, DL, TyX, E,
+                            DAG.getConstant(WidthX - 1, MVT::i32), Const1, X);
+    return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), I);
+  }
+
+  // (d)sll SllX, X, 1
+  // (d)srl SrlX, SllX, 1
+  // (d)srl SrlY, Y, width(Y)-1
+  // (d)sll SllY, SrlX, width(Y)-1
+  // or     Or, SrlX, SllY
+  SDValue SllX = DAG.getNode(ISD::SHL, DL, TyX, X, Const1);
+  SDValue SrlX = DAG.getNode(ISD::SRL, DL, TyX, SllX, Const1);
+  SDValue SrlY = DAG.getNode(ISD::SRL, DL, TyY, Y,
+                             DAG.getConstant(WidthY - 1, MVT::i32));
+
+  if (WidthX > WidthY)
+    SrlY = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, SrlY);
+  else if (WidthY > WidthX)
+    SrlY = DAG.getNode(ISD::TRUNCATE, DL, TyX, SrlY);
+
+  SDValue SllY = DAG.getNode(ISD::SHL, DL, TyX, SrlY,
+                             DAG.getConstant(WidthX - 1, MVT::i32));
+  SDValue Or = DAG.getNode(ISD::OR, DL, TyX, SrlX, SllY);
+  return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Or);
 }
 
 SDValue
 MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
-  EVT Ty = Op.getValueType();
-
-  assert(Ty == MVT::f32 || Ty == MVT::f64);
-
-  if (Ty == MVT::f32 || HasMips64)
-    return LowerFCOPYSIGNLargeIntReg(Op, DAG);
+  if (Subtarget->hasMips64())
+    return LowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2());
 
-  return LowerFCOPYSIGNSmallIntReg(Op, DAG, Subtarget->isLittle());
+  return LowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2());
 }
 
 SDValue MipsTargetLowering::
diff --git a/test/CodeGen/Mips/fcopysign-f32-f64.ll b/test/CodeGen/Mips/fcopysign-f32-f64.ll
new file mode 100644
index 0000000..b36473d
--- /dev/null
+++ b/test/CodeGen/Mips/fcopysign-f32-f64.ll
@@ -0,0 +1,50 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+
+declare double @copysign(double, double) nounwind readnone
+
+declare float @copysignf(float, float) nounwind readnone
+
+define float @func2(float %d, double %f) nounwind readnone {
+entry:
+; 64: func2
+; 64: lui  $[[T0:[0-9]+]], 32767
+; 64: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 64: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 63
+; 64: sll  $[[SLL:[0-9]+]], ${{[0-9]+}}, 31
+; 64: or   $[[OR:[0-9]+]], $[[AND0]], $[[SLL]]
+; 64: mtc1 $[[OR]], $f0
+
+; 64R2: dext ${{[0-9]+}}, ${{[0-9]+}}, 63, 1
+; 64R2: ins  $[[INS:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 64R2: mtc1 $[[INS]], $f0
+
+  %add = fadd float %d, 1.000000e+00
+  %conv = fptrunc double %f to float
+  %call = tail call float @copysignf(float %add, float %conv) nounwind readnone
+  ret float %call
+}
+
+define double @func3(double %d, float %f) nounwind readnone {
+entry:
+
+; 64: daddiu $[[T0:[0-9]+]], $zero, 1
+; 64: dsll   $[[T1:[0-9]+]], $[[T0]], 63
+; 64: daddiu $[[MSK0:[0-9]+]], $[[T1]], -1
+; 64: and    $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: srl    ${{[0-9]+}}, ${{[0-9]+}}, 31
+; 64: dsll   $[[DSLL:[0-9]+]], ${{[0-9]+}}, 63
+; 64: or     $[[OR:[0-9]+]], $[[AND0]], $[[DSLL]]
+; 64: dmtc1  $[[OR]], $f0
+
+; 64R2: ext   ${{[0-9]+}}, ${{[0-9]+}}, 31, 1
+; 64R2: dins  $[[INS:[0-9]+]], ${{[0-9]+}}, 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
+  %add = fadd double %d, 1.000000e+00
+  %conv = fpext float %f to double
+  %call = tail call double @copysign(double %add, double %conv) nounwind readnone
+  ret double %call
+}
+
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
index e494fe2..1c57eca 100644
--- a/test/CodeGen/Mips/fcopysign.ll
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -1,40 +1,35 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=MIPS32-EL
-; RUN: llc  < %s -march=mips | FileCheck %s -check-prefix=MIPS32-EB
-; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=MIPS64
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
 
 define double @func0(double %d0, double %d1) nounwind readnone {
 entry:
-; MIPS32-EL: func0:
-; MIPS32-EL: mfc1 $[[HI0:[0-9]+]], $f15
-; MIPS32-EL: lui $[[MSK1:[0-9]+]], 32768
-; MIPS32-EL: and $[[AND1:[0-9]+]], $[[HI0]], $[[MSK1]]
-; MIPS32-EL: lui $[[T0:[0-9]+]], 32767
-; MIPS32-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; MIPS32-EL: mfc1 $[[HI1:[0-9]+]], $f13
-; MIPS32-EL: and $[[AND0:[0-9]+]], $[[HI1]], $[[MSK0]]
-; MIPS32-EL: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
-; MIPS32-EL: mfc1 $[[LO0:[0-9]+]], $f12
-; MIPS32-EL: mtc1 $[[LO0]], $f0
-; MIPS32-EL: mtc1 $[[OR]], $f1
 ;
-; MIPS32-EB: mfc1 $[[HI1:[0-9]+]], $f14
-; MIPS32-EB: lui $[[MSK1:[0-9]+]], 32768
-; MIPS32-EB: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
-; MIPS32-EB: lui $[[T0:[0-9]+]], 32767
-; MIPS32-EB: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; MIPS32-EB: mfc1 $[[HI0:[0-9]+]], $f12
-; MIPS32-EB: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
-; MIPS32-EB: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
-; MIPS32-EB: mfc1 $[[LO0:[0-9]+]], $f13
-; MIPS32-EB: mtc1 $[[OR]], $f0
-; MIPS32-EB: mtc1 $[[LO0]], $f1
-
-; MIPS64: dmfc1 $[[R0:[0-9]+]], $f13
-; MIPS64: and $[[R1:[0-9]+]], $[[R0]], ${{[0-9]+}}
-; MIPS64: dmfc1 $[[R2:[0-9]+]], $f12
-; MIPS64: and $[[R3:[0-9]+]], $[[R2]], ${{[0-9]+}}
-; MIPS64: or  $[[R4:[0-9]+]], $[[R3]], $[[R1]]
-; MIPS64: dmtc1 $[[R4]], $f0
+; 32: lui  $[[MSK1:[0-9]+]], 32768
+; 32: and  $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: or   $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 32: mtc1 $[[OR]], $f1
+
+; 32R2: ext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 32R2: ins  $[[INS:[0-9]+]], $[[EXT]], 31, 1
+; 32R2: mtc1 $[[INS]], $f1
+
+; 64: daddiu $[[T0:[0-9]+]], $zero, 1
+; 64: dsll   $[[MSK1:[0-9]+]], $[[T0]], 63
+; 64: and    $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 64: daddiu $[[MSK0:[0-9]+]], $[[MSK1]], -1
+; 64: and    $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: or     $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 64: dmtc1  $[[OR]], $f0
+
+; 64R2: dext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 63, 1
+; 64R2: dins  $[[INS:[0-9]+]], $[[EXT]], 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
   %call = tail call double @copysign(double %d0, double %d1) nounwind readnone
   ret double %call
 }
@@ -43,18 +38,22 @@ declare double @copysign(double, double) nounwind readnone
 
 define float @func1(float %f0, float %f1) nounwind readnone {
 entry:
-; MIPS32-EL: func1:
-; MIPS32-EL: mfc1 $[[ARG1:[0-9]+]], $f14
-; MIPS32-EL: lui $[[MSK1:[0-9]+]], 32768
-; MIPS32-EL: and $[[T3:[0-9]+]], $[[ARG1]], $[[MSK1]]
-; MIPS32-EL: lui $[[T0:[0-9]+]], 32767
-; MIPS32-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; MIPS32-EL: mfc1 $[[ARG0:[0-9]+]], $f12
-; MIPS32-EL: and $[[T2:[0-9]+]], $[[ARG0]], $[[MSK0]]
-; MIPS32-EL: or  $[[T4:[0-9]+]], $[[T2]], $[[T3]]
-; MIPS32-EL: mtc1 $[[T4]], $f0
+
+; 32: lui  $[[MSK1:[0-9]+]], 32768
+; 32: and  $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: or   $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 32: mtc1 $[[OR]], $f0
+
+; 32R2: ext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 32R2: ins  $[[INS:[0-9]+]], $[[EXT]], 31, 1
+; 32R2: mtc1 $[[INS]], $f0
+
   %call = tail call float @copysignf(float %f0, float %f1) nounwind readnone
   ret float %call
 }
 
 declare float @copysignf(float, float) nounwind readnone
+
author	Akira Hatanaka <ahatanaka@mips.com>	2012-04-11 22:13:04 +0000
committer	Akira Hatanaka <ahatanaka@mips.com>	2012-04-11 22:13:04 +0000
commit	056c51e59820c4c36d3b9d2f22acb6fd121b798e (patch)
tree	641ba94fc250beeb15afed2f3250a2de4555789d
parent	2ce182c90cd8738bf76e5cdaa21eb6627c6e6aec (diff)
download	external_llvm-056c51e59820c4c36d3b9d2f22acb6fd121b798e.zip external_llvm-056c51e59820c4c36d3b9d2f22acb6fd121b798e.tar.gz external_llvm-056c51e59820c4c36d3b9d2f22acb6fd121b798e.tar.bz2