From 2f2fe417f98406140504ba3bbb65676d4a00ed87 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 9 Nov 2011 03:22:02 +0000 Subject: Add support for encoding immediates in icmp and fcmp. Hopefully, this will remove a fair number of unnecessary materialized constants. rdar://10412592 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144163 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 76 ++++++++++-- test/CodeGen/ARM/fast-isel-cmp-imm.ll | 214 ++++++++++++++++++++++++++++++++++ 2 files changed, 278 insertions(+), 12 deletions(-) create mode 100644 test/CodeGen/ARM/fast-isel-cmp-imm.ll diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 3c6d1e8..23629e7 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1206,16 +1206,42 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, if (isFloat && !Subtarget->hasVFP2()) return false; + // Check to see if the 2nd operand is a constant that we can encode directly + // in the compare. + uint64_t Imm; + int EncodedImm = 0; + bool EncodeImm = false; + bool isNegativeImm = false; + if (const ConstantInt *ConstInt = dyn_cast(Src2Value)) { + if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || + SrcVT == MVT::i1) { + const APInt &CIVal = ConstInt->getValue(); + + isNegativeImm = CIVal.isNegative(); + Imm = (isNegativeImm) ? (-CIVal).getZExtValue() : CIVal.getZExtValue(); + EncodedImm = (int)Imm; + EncodeImm = isThumb2 ? (ARM_AM::getT2SOImmVal(EncodedImm) != -1) : + (ARM_AM::getSOImmVal(EncodedImm) != -1); + } + } else if (const ConstantFP *ConstFP = dyn_cast(Src2Value)) { + if (SrcVT == MVT::f32 || SrcVT == MVT::f64) + if (ConstFP->isZero() && !ConstFP->isNegative()) + EncodeImm = true; + } + unsigned CmpOpc; + bool isICmp = true; bool needsExt = false; switch (SrcVT.getSimpleVT().SimpleTy) { default: return false; // TODO: Verify compares. case MVT::f32: - CmpOpc = ARM::VCMPES; + isICmp = false; + CmpOpc = EncodeImm ? ARM::VCMPEZS : ARM::VCMPES; break; case MVT::f64: - CmpOpc = ARM::VCMPED; + isICmp = false; + CmpOpc = EncodeImm ? ARM::VCMPEZD : ARM::VCMPED; break; case MVT::i1: case MVT::i8: @@ -1223,30 +1249,56 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, needsExt = true; // Intentional fall-through. case MVT::i32: - CmpOpc = isThumb2 ? ARM::t2CMPrr : ARM::CMPrr; + if (isThumb2) { + if (!EncodeImm) + CmpOpc = ARM::t2CMPrr; + else + CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri; + } else { + if (!EncodeImm) + CmpOpc = ARM::CMPrr; + else + CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri; + } break; } unsigned SrcReg1 = getRegForValue(Src1Value); if (SrcReg1 == 0) return false; - unsigned SrcReg2 = getRegForValue(Src2Value); - if (SrcReg2 == 0) return false; + unsigned SrcReg2; + if (!EncodeImm) { + SrcReg2 = getRegForValue(Src2Value); + if (SrcReg2 == 0) return false; + } // We have i1, i8, or i16, we need to either zero extend or sign extend. if (needsExt) { unsigned ResultReg; - EVT DestVT = MVT::i32; - ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, DestVT, isZExt); + ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); if (ResultReg == 0) return false; SrcReg1 = ResultReg; - ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, DestVT, isZExt); - if (ResultReg == 0) return false; - SrcReg2 = ResultReg; + if (!EncodeImm) { + ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); + if (ResultReg == 0) return false; + SrcReg2 = ResultReg; + } } - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(SrcReg1).addReg(SrcReg2)); + if (!EncodeImm) { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CmpOpc)) + .addReg(SrcReg1).addReg(SrcReg2)); + } else { + MachineInstrBuilder MIB; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + .addReg(SrcReg1); + + // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0. + if (isICmp) + MIB.addImm(EncodedImm); + AddOptionalDefs(MIB); + } // For floating point we need to move the result to a comparison register // that we can then use for branches. diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll new file mode 100644 index 0000000..8cb4722 --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll @@ -0,0 +1,214 @@ +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB + +define void @t1a(float %a) uwtable ssp { +entry: +; ARM: t1a +; THUMB: t1a + %cmp = fcmp oeq float %a, 0.000000e+00 +; ARM: vcmpe.f32 s0, #0 +; THUMB: vcmpe.f32 s0, #0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +declare void @foo() + +; Shouldn't be able to encode -0.0 imm. +define void @t1b(float %a) uwtable ssp { +entry: +; ARM: t1b +; THUMB: t1b + %cmp = fcmp oeq float %a, -0.000000e+00 +; ARM: vldr.32 +; ARM: vcmpe.f32 s0, s1 +; THUMB: vldr.32 +; THUMB: vcmpe.f32 s0, s1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +define void @t2a(double %a) uwtable ssp { +entry: +; ARM: t2a +; THUMB: t2a + %cmp = fcmp oeq double %a, 0.000000e+00 +; ARM: vcmpe.f64 d16, #0 +; THUMB: vcmpe.f64 d16, #0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; Shouldn't be able to encode -0.0 imm. +define void @t2b(double %a) uwtable ssp { +entry: +; ARM: t2b +; THUMB: t2b + %cmp = fcmp oeq double %a, -0.000000e+00 +; ARM: vldr.64 +; ARM: vcmpe.f64 d16, d17 +; THUMB: vldr.64 +; THUMB: vcmpe.f64 d16, d17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +define void @t4(i8 signext %a) uwtable ssp { +entry: +; ARM: t4 +; THUMB: t4 + %cmp = icmp eq i8 %a, -1 +; ARM: cmn r0, #1 +; THUMB: cmn.w r0, #1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +define void @t5(i8 zeroext %a) uwtable ssp { +entry: +; ARM: t5 +; THUMB: t5 + %cmp = icmp eq i8 %a, 1 +; ARM: cmp r0, #1 +; THUMB: cmp r0, #1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +define void @t6(i16 signext %a) uwtable ssp { +entry: +; ARM: t6 +; THUMB: t6 + %cmp = icmp eq i16 %a, -1 +; ARM: cmn r0, #1 +; THUMB: cmn.w r0, #1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +define void @t7(i16 zeroext %a) uwtable ssp { +entry: +; ARM: t7 +; THUMB: t7 + %cmp = icmp eq i16 %a, 1 +; ARM: cmp r0, #1 +; THUMB: cmp r0, #1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +define void @t8(i32 %a) uwtable ssp { +entry: +; ARM: t8 +; THUMB: t8 + %cmp = icmp eq i32 %a, -1 +; ARM: cmn r0, #1 +; THUMB: cmn.w r0, #1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +define void @t9(i32 %a) uwtable ssp { +entry: +; ARM: t9 +; THUMB: t9 + %cmp = icmp eq i32 %a, 1 +; ARM: cmp r0, #1 +; THUMB: cmp r0, #1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +define void @t10(i32 %a) uwtable ssp { +entry: +; ARM: t10 +; THUMB: t10 + %cmp = icmp eq i32 %a, 384 +; ARM: cmp r0, #384 +; THUMB: cmp.w r0, #384 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +define void @t11(i32 %a) uwtable ssp { +entry: +; ARM: t11 +; THUMB: t11 + %cmp = icmp eq i32 %a, 4096 +; ARM: cmp r0, #4096 +; THUMB: cmp.w r0, #4096 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @foo() + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} -- cgit v1.1