aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoey Gouly <joey.gouly@arm.com>2013-08-22 15:29:11 +0000
committerJoey Gouly <joey.gouly@arm.com>2013-08-22 15:29:11 +0000
commit35eab1db2f21aee9678fe946a5d983a67285e7e4 (patch)
tree6d6c05c460860ecd13f344a8422c89a3ca05b9c7
parent3f256c2e0b4417288e3191e67cb16177c8d8b2a7 (diff)
downloadexternal_llvm-35eab1db2f21aee9678fe946a5d983a67285e7e4.zip
external_llvm-35eab1db2f21aee9678fe946a5d983a67285e7e4.tar.gz
external_llvm-35eab1db2f21aee9678fe946a5d983a67285e7e4.tar.bz2
[ARMv8] Add CodeGen support for VSEL.
This uses the ARMcmov pattern that Tim cleaned up in r188995. Thanks to Simon Tatham for his floating point help! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189024 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp94
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td20
-rw-r--r--test/CodeGen/ARM/vsel.ll309
3 files changed, 414 insertions, 9 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ebfa1b1..7021941 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3178,6 +3178,61 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SelectTrue, SelectFalse, ISD::SETNE);
}
+static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
+ if (CC == ISD::SETNE)
+ return ISD::SETEQ;
+ return ISD::getSetCCSwappedOperands(CC);
+}
+
+static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ bool &swpCmpOps, bool &swpVselOps) {
+ // Start by selecting the GE condition code for opcodes that return true for
+ // 'equality'
+ if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
+ CC == ISD::SETULE)
+ CondCode = ARMCC::GE;
+
+ // and GT for opcodes that return false for 'equality'.
+ else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ CondCode = ARMCC::GT;
+
+ // Since we are constrained to GE/GT, if the opcode contains 'less', we need
+ // to swap the compare operands.
+ if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ swpCmpOps = true;
+
+ // Both GT and GE are ordered comparisons, and return false for 'unordered'.
+ // If we have an unordered opcode, we need to swap the operands to the VSEL
+ // instruction (effectively negating the condition).
+ //
+ // This also has the effect of swapping which one of 'less' or 'greater'
+ // returns true, so we also swap the compare operands. It also switches
+ // whether we return true for 'equality', so we compensate by picking the
+ // opposite condition code to our original choice.
+ if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
+ CC == ISD::SETUGT) {
+ swpCmpOps = !swpCmpOps;
+ swpVselOps = !swpVselOps;
+ CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
+ }
+
+ // 'ordered' is 'anything but unordered', so use the VS condition code and
+ // swap the VSEL operands.
+ if (CC == ISD::SETO) {
+ CondCode = ARMCC::VS;
+ swpVselOps = true;
+ }
+
+ // 'unordered or not equal' is 'anything but equal', so use the EQ condition
+ // code and swap the VSEL operands.
+ if (CC == ISD::SETUNE) {
+ CondCode = ARMCC::EQ;
+ swpVselOps = true;
+ }
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -3188,15 +3243,52 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
if (LHS.getValueType() == MVT::i32) {
+ // Try to generate VSEL on ARMv8.
+ // The VSEL instruction can't use all the usual ARM condition
+ // codes: it only has two bits to select the condition code, so it's
+ // constrained to use only GE, GT, VS and EQ.
+ //
+ // To implement all the various ISD::SETXXX opcodes, we sometimes need to
+ // swap the operands of the previous compare instruction (effectively
+ // inverting the compare condition, swapping 'less' and 'greater') and
+ // sometimes need to swap the operands to the VSEL (which inverts the
+ // condition in the sense of firing whenever the previous condition didn't)
+ if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
+ CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
+ CC = getInverseCCForVSEL(CC);
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+ Cmp);
}
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
+ // Try to generate VSEL on ARMv8.
+ if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ bool swpCmpOps = false;
+ bool swpVselOps = false;
+ checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
+
+ if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
+ CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
+ if (swpCmpOps)
+ std::swap(LHS, RHS);
+ if (swpVselOps)
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index c6b8bc3..b4df4d7 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -333,24 +333,28 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
let D = VFPNeonA8Domain;
}
-multiclass vsel_inst<string op, bits<2> opc> {
- let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
+multiclass vsel_inst<string op, bits<2> opc, int CC> {
+ let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
+ Uses = [CPSR], AddedComplexity = 4 in {
def S : ASbInp<0b11100, opc, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
- []>, Requires<[HasV8FP]>;
+ [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>,
+ Requires<[HasV8FP]>;
def D : ADbInp<0b11100, opc, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"),
- []>, Requires<[HasV8FP]>;
+ [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>,
+ Requires<[HasV8FP]>;
}
}
-defm VSELGT : vsel_inst<"gt", 0b11>;
-defm VSELGE : vsel_inst<"ge", 0b10>;
-defm VSELEQ : vsel_inst<"eq", 0b00>;
-defm VSELVS : vsel_inst<"vs", 0b01>;
+// The CC constants here match ARMCC::CondCodes.
+defm VSELGT : vsel_inst<"gt", 0b11, 12>;
+defm VSELGE : vsel_inst<"ge", 0b10, 10>;
+defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
+defm VSELVS : vsel_inst<"vs", 0b01, 6>;
multiclass vmaxmin_inst<string op, bit opc> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
diff --git a/test/CodeGen/ARM/vsel.ll b/test/CodeGen/ARM/vsel.ll
new file mode 100644
index 0000000..f4ee800
--- /dev/null
+++ b/test/CodeGen/ARM/vsel.ll
@@ -0,0 +1,309 @@
+; RUN: llc < %s -mtriple=armv8-linux-gnueabihf -mattr=+v8fp -float-abi=hard | FileCheck %s
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+define void @test_vsel32sgt(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32sgt
+ %tst1 = icmp sgt i32 %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vselgt.f32 s0, s0, s1
+ ret void
+}
+define void @test_vsel64sgt(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64sgt
+ %tst1 = icmp sgt i32 %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vselgt.f64 d16, d0, d1
+ ret void
+}
+define void @test_vsel32sge(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32sge
+ %tst1 = icmp sge i32 %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vselge.f32 s0, s0, s1
+ ret void
+}
+define void @test_vsel64sge(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64sge
+ %tst1 = icmp sge i32 %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vselge.f64 d16, d0, d1
+ ret void
+}
+define void @test_vsel32eq(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32eq
+ %tst1 = icmp eq i32 %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vseleq.f32 s0, s0, s1
+ ret void
+}
+define void @test_vsel64eq(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64eq
+ %tst1 = icmp eq i32 %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vseleq.f64 d16, d0, d1
+ ret void
+}
+define void @test_vsel32slt(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32slt
+ %tst1 = icmp slt i32 %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vselgt.f32 s0, s1, s0
+ ret void
+}
+define void @test_vsel64slt(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64slt
+ %tst1 = icmp slt i32 %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vselgt.f64 d16, d1, d0
+ ret void
+}
+define void @test_vsel32sle(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32sle
+ %tst1 = icmp sle i32 %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vselge.f32 s0, s1, s0
+ ret void
+}
+define void @test_vsel64sle(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64sle
+ %tst1 = icmp sle i32 %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vselge.f64 d16, d1, d0
+ ret void
+}
+define void @test_vsel32ogt(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ogt
+ %tst1 = fcmp ogt float %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselgt.f32 s0, s2, s3
+ ret void
+}
+define void @test_vsel64ogt(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ogt
+ %tst1 = fcmp ogt float %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselgt.f64 d16, d1, d2
+ ret void
+}
+define void @test_vsel32oge(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32oge
+ %tst1 = fcmp oge float %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselge.f32 s0, s2, s3
+ ret void
+}
+define void @test_vsel64oge(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64oge
+ %tst1 = fcmp oge float %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselge.f64 d16, d1, d2
+ ret void
+}
+define void @test_vsel32oeq(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32oeq
+ %tst1 = fcmp oeq float %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vseleq.f32 s0, s2, s3
+ ret void
+}
+define void @test_vsel64oeq(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64oeq
+ %tst1 = fcmp oeq float %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vseleq.f64 d16, d1, d2
+ ret void
+}
+define void @test_vsel32ugt(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ugt
+ %tst1 = fcmp ugt float %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselge.f32 s0, s3, s2
+ ret void
+}
+define void @test_vsel64ugt(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ugt
+ %tst1 = fcmp ugt float %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselge.f64 d16, d2, d1
+ ret void
+}
+define void @test_vsel32uge(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32uge
+ %tst1 = fcmp uge float %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselgt.f32 s0, s3, s2
+ ret void
+}
+define void @test_vsel64uge(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64uge
+ %tst1 = fcmp uge float %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselgt.f64 d16, d2, d1
+ ret void
+}
+define void @test_vsel32olt(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32olt
+ %tst1 = fcmp olt float %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselgt.f32 s0, s2, s3
+ ret void
+}
+define void @test_vsel64olt(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64olt
+ %tst1 = fcmp olt float %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselgt.f64 d16, d1, d2
+ ret void
+}
+define void @test_vsel32ult(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ult
+ %tst1 = fcmp ult float %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselge.f32 s0, s3, s2
+ ret void
+}
+define void @test_vsel64ult(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ult
+ %tst1 = fcmp ult float %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselge.f64 d16, d2, d1
+ ret void
+}
+define void @test_vsel32ole(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ole
+ %tst1 = fcmp ole float %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselge.f32 s0, s2, s3
+ ret void
+}
+define void @test_vsel64ole(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ole
+ %tst1 = fcmp ole float %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselge.f64 d16, d1, d2
+ ret void
+}
+define void @test_vsel32ule(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ule
+ %tst1 = fcmp ule float %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselgt.f32 s0, s3, s2
+ ret void
+}
+define void @test_vsel64ule(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ule
+ %tst1 = fcmp ule float %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselgt.f64 d16, d2, d1
+ ret void
+}
+define void @test_vsel32ord(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ord
+ %tst1 = fcmp ord float %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselvs.f32 s0, s3, s2
+ ret void
+}
+define void @test_vsel64ord(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ord
+ %tst1 = fcmp ord float %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselvs.f64 d16, d2, d1
+ ret void
+}
+define void @test_vsel32une(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32une
+ %tst1 = fcmp une float %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vseleq.f32 s0, s3, s2
+ ret void
+}
+define void @test_vsel64une(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64une
+ %tst1 = fcmp une float %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vseleq.f64 d16, d2, d1
+ ret void
+}
+define void @test_vsel32uno(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32uno
+ %tst1 = fcmp uno float %lhs32, %rhs32
+ %val1 = select i1 %tst1, float %a, float %b
+ store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselvs.f32 s0, s2, s3
+ ret void
+}
+define void @test_vsel64uno(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64uno
+ %tst1 = fcmp uno float %lhs32, %rhs32
+ %val1 = select i1 %tst1, double %a, double %b
+ store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselvs.f64 d16, d1, d2
+ ret void
+}