aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2013-08-19 23:35:46 +0000
committerHal Finkel <hfinkel@anl.gov>2013-08-19 23:35:46 +0000
commit66d1fa6f4b443ac9f8bcea5d1f71a73ada733a42 (patch)
tree93e47ddf0c8e56f76d676971f3247e9f42d7e49f
parent30cbccb029b01738202bd04341b1cbf68a7814c9 (diff)
downloadexternal_llvm-66d1fa6f4b443ac9f8bcea5d1f71a73ada733a42.zip
external_llvm-66d1fa6f4b443ac9f8bcea5d1f71a73ada733a42.tar.gz
external_llvm-66d1fa6f4b443ac9f8bcea5d1f71a73ada733a42.tar.bz2
Add a llvm.copysign intrinsic
This adds a llvm.copysign intrinsic; We already have Libfunc recognition for copysign (which is turned into the FCOPYSIGN SDAG node). In order to autovectorize calls to copysign in the loop vectorizer, we need a corresponding intrinsic as well. In addition to the expected changes to the language reference, the loop vectorizer, BasicTTI, and the SDAG builder (the intrinsic is transformed into an FCOPYSIGN node, just like the function call), this also adds FCOPYSIGN to a few lists in LegalizeVector{Ops,Types} so that vector copysigns can be expanded. In TargetLoweringBase::initActions, I've made the default action for FCOPYSIGN be Expand for vector types. This seems correct for all in-tree targets, and I think is the right thing to do because, previously, there was no way to generate vector-values FCOPYSIGN nodes (and most targets don't specify an action for vector-typed FCOPYSIGN). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188728 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--docs/LangRef.rst36
-rw-r--r--include/llvm/IR/Intrinsics.td2
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp6
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp5
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp6
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp5
-rw-r--r--test/Transforms/LoopVectorize/intrinsic.ll53
10 files changed, 117 insertions, 0 deletions
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 47c89e8..68ac96f 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -7353,6 +7353,42 @@ Semantics:
This function returns the same values as the libm ``fabs`` functions
would, and handles error conditions in the same way.
+'``llvm.copysign.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.copysign`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+ declare float @llvm.copysign.f32(float %Mag, float %Sgn)
+ declare double @llvm.copysign.f64(double %Mag, double %Sgn)
+ declare x86_fp80 @llvm.copysign.f80(x86_fp80 %Mag, x86_fp80 %Sgn)
+ declare fp128 @llvm.copysign.f128(fp128 %Mag, fp128 %Sgn)
+ declare ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128 %Mag, ppc_fp128 %Sgn)
+
+Overview:
+"""""""""
+
+The '``llvm.copysign.*``' intrinsics return a value with the magnitude of the
+first operand and the sign of the second operand.
+
+Arguments:
+""""""""""
+
+The arguments and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``copysign``
+functions would, and handles error conditions in the same way.
+
'``llvm.floor.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index 856e0a5..c7414e0 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -296,6 +296,8 @@ let Properties = [IntrReadMem] in {
def int_exp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_copysign : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_floor : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_ceil : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_trunc : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index ef53235..0883ab0 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -443,6 +443,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::log10: ISD = ISD::FLOG10; break;
case Intrinsic::log2: ISD = ISD::FLOG2; break;
case Intrinsic::fabs: ISD = ISD::FABS; break;
+ case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break;
case Intrinsic::floor: ISD = ISD::FFLOOR; break;
case Intrinsic::ceil: ISD = ISD::FCEIL; break;
case Intrinsic::trunc: ISD = ISD::FTRUNC; break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 237a33a..b768f39 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -227,6 +227,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FP_TO_UINT:
case ISD::FNEG:
case ISD::FABS:
+ case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bf65319..fd5f977 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -98,6 +98,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ADD:
case ISD::AND:
case ISD::FADD:
+ case ISD::FCOPYSIGN:
case ISD::FDIV:
case ISD::FMUL:
case ISD::FPOW:
@@ -557,6 +558,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SUB:
case ISD::MUL:
case ISD::FADD:
+ case ISD::FCOPYSIGN:
case ISD::FSUB:
case ISD::FMUL:
case ISD::SDIV:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3b9f358..df9293d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4925,6 +4925,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0))));
return 0;
}
+ case Intrinsic::copysign:
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return 0;
case Intrinsic::fma:
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 55125bd..a2ea35e 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -697,6 +697,11 @@ void TargetLoweringBase::initActions() {
// These library functions default to expand.
setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand);
+
+ // These operations default to expand for vector types.
+ if (VT >= MVT::FIRST_VECTOR_VALUETYPE &&
+ VT <= MVT::LAST_VECTOR_VALUETYPE)
+ setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index d9f1323..4224ae2 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -253,6 +253,12 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
case Intrinsic::sin:
case Intrinsic::cos:
return true;
+ case Intrinsic::copysign:
+ if (CI->getArgOperand(0)->getType()->getScalarType()->
+ isPPC_FP128Ty())
+ return true;
+ else
+ continue; // ISD::FCOPYSIGN is never a library call.
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index e452acd..a7026f6 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1767,6 +1767,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
case Intrinsic::log10:
case Intrinsic::log2:
case Intrinsic::fabs:
+ case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
@@ -1831,6 +1832,10 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
case LibFunc::fabsf:
case LibFunc::fabsl:
return Intrinsic::fabs;
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ case LibFunc::copysignl:
+ return Intrinsic::copysign;
case LibFunc::floor:
case LibFunc::floorf:
case LibFunc::floorl:
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index 566dcc7..216b937 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -468,6 +468,59 @@ for.end: ; preds = %for.body, %entry
declare double @llvm.fabs(double) nounwind readnone
+;CHECK-LABEL: @copysign_f32(
+;CHECK: llvm.copysign.v4f32
+;CHECK: ret void
+define void @copysign_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float* %z, i64 %indvars.iv
+ %1 = load float* %arrayidx1, align 4
+ %call = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare float @llvm.copysign.f32(float, float) nounwind readnone
+
+define void @copysign_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+ %0 = load double* %arrayidx, align 8
+ %arrayidx1 = getelementptr inbounds double* %z, i64 %indvars.iv
+ %1 = load double* %arrayidx, align 8
+ %call = tail call double @llvm.copysign(double %0, double %1) nounwind readnone
+ %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+ store double %call, double* %arrayidx2, align 8
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare double @llvm.copysign(double, double) nounwind readnone
+
;CHECK-LABEL: @floor_f32(
;CHECK: llvm.floor.v4f32
;CHECK: ret void