aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-08-19 13:06:16 +0000
committerNadav Rotem <nrotem@apple.com>2012-08-19 13:06:16 +0000
commitd60cb11afd709b9f84c58d8335cbc08ad912b236 (patch)
tree8308b69511649b732cda459b7aa7d3dc0e9f2314 /lib
parent51bfb0fe50405bfbb14abf8ceb6ea8d0f67d7057 (diff)
downloadexternal_llvm-d60cb11afd709b9f84c58d8335cbc08ad912b236.zip
external_llvm-d60cb11afd709b9f84c58d8335cbc08ad912b236.tar.gz
external_llvm-d60cb11afd709b9f84c58d8335cbc08ad912b236.tar.bz2
When unsafe math is used, we can use commutative FMAX and FMIN. In some cases
this allows for better code generation. Added a new DAGCombine transformation to convert FMAX and FMIN to FMANC and FMINC, which are commutative. For example: movaps %xmm0, %xmm1 movsd LC(%rip), %xmm0 minsd %xmm1, %xmm0 becomes: minsd LC(%rip), %xmm0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162187 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp27
-rw-r--r--lib/Target/X86/X86ISelLowering.h3
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td12
-rw-r--r--lib/Target/X86/X86InstrSSE.td20
4 files changed, 59 insertions, 3 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index e59ab80..e4af736 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -11463,6 +11463,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FMAXC: return "X86ISD::FMAXC";
+ case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
@@ -15368,6 +15370,29 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+/// PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and
+/// X86ISD::FMAX nodes.
+static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
+
+ // Only perform optimizations if UnsafeMath is used.
+ if (!DAG.getTarget().Options.UnsafeFPMath)
+ return SDValue();
+
+ // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
+ // into FMINC and MMAXC, which are Commutative operations.
+ unsigned NewOp = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unknown opcode");
+ case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
+ case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
+ }
+
+ return DAG.getNode(NewOp, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1));
+}
+
+
/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
// FAND(0.0, x) -> 0.0
@@ -15849,6 +15874,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FMIN:
+ case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 896d067..74f5167 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -195,6 +195,9 @@ namespace llvm {
///
FMAX, FMIN,
+ /// FMAXC, FMINC - Commutative FMIN and FMAX.
+ FMAXC, FMINC,
+
/// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
/// approximation. Note that these typically require refinement
/// in order to obtain suitable precision.
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 1db68c8..ee2d3c4 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -29,6 +29,13 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+
+// Commutative and Associative FMIN and FMAX.
+def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+
def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
@@ -73,11 +80,12 @@ def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<1, 0> ]>>;
def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
+ SDTypeProfile<1, 1,
+ [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 20dc81e..220c06d 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2872,7 +2872,8 @@ let isCommutable = 0 in {
basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, VEX_4V;
+ basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
+ VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
VEX_4V, VEX_LIG;
@@ -2923,6 +2924,23 @@ let Constraints = "$src1 = $dst" in {
}
}
+let isCommutable = 1, isCodeGenOnly = 1 in {
+ defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V;
+ defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>,
+ VEX_4V, VEX_LIG;
+ defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>,
+ basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V;
+ let Constraints = "$src1 = $dst" in {
+ defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
+ defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
+ }
+}
+
/// Unop Arithmetic
/// In addition, we also have a special variant of the scalar form here to
/// represent the associated intrinsic operation. This form is unlike the