From 2f2fe417f98406140504ba3bbb65676d4a00ed87 Mon Sep 17 00:00:00 2001
From: Chad Rosier <mcrosier@apple.com>
Date: Wed, 9 Nov 2011 03:22:02 +0000
Subject: Add support for encoding immediates in icmp and fcmp.  Hopefully,
 this will remove a fair number of unnecessary materialized constants.
 rdar://10412592

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144163 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMFastISel.cpp        |  76 ++++++++++--
 test/CodeGen/ARM/fast-isel-cmp-imm.ll | 214 ++++++++++++++++++++++++++++++++++
 2 files changed, 278 insertions(+), 12 deletions(-)
 create mode 100644 test/CodeGen/ARM/fast-isel-cmp-imm.ll
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 3c6d1e8..23629e7 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -1206,16 +1206,42 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
   if (isFloat && !Subtarget->hasVFP2())
     return false;
 
+  // Check to see if the 2nd operand is a constant that we can encode directly
+  // in the compare.
+  uint64_t Imm;
+  int EncodedImm = 0;
+  bool EncodeImm = false;
+  bool isNegativeImm = false;
+  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
+    if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
+        SrcVT == MVT::i1) {
+      const APInt &CIVal = ConstInt->getValue();
+
+      isNegativeImm = CIVal.isNegative();
+      Imm = (isNegativeImm) ? (-CIVal).getZExtValue() : CIVal.getZExtValue();
+      EncodedImm = (int)Imm;
+      EncodeImm = isThumb2 ? (ARM_AM::getT2SOImmVal(EncodedImm) != -1) :
+        (ARM_AM::getSOImmVal(EncodedImm) != -1);
+    }
+  } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
+    if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
+      if (ConstFP->isZero() && !ConstFP->isNegative())
+        EncodeImm = true;
+  }
+
   unsigned CmpOpc;
+  bool isICmp = true;
   bool needsExt = false;
   switch (SrcVT.getSimpleVT().SimpleTy) {
     default: return false;
     // TODO: Verify compares.
     case MVT::f32:
-      CmpOpc = ARM::VCMPES;
+      isICmp = false;
+      CmpOpc = EncodeImm ? ARM::VCMPEZS : ARM::VCMPES;
       break;
     case MVT::f64:
-      CmpOpc = ARM::VCMPED;
+      isICmp = false;
+      CmpOpc = EncodeImm ? ARM::VCMPEZD : ARM::VCMPED;
       break;
     case MVT::i1:
     case MVT::i8:
@@ -1223,30 +1249,56 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
       needsExt = true;
     // Intentional fall-through.
     case MVT::i32:
-      CmpOpc = isThumb2 ? ARM::t2CMPrr : ARM::CMPrr;
+      if (isThumb2) {
+        if (!EncodeImm)
+          CmpOpc = ARM::t2CMPrr;
+        else
+          CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri;
+      } else {
+        if (!EncodeImm)
+          CmpOpc = ARM::CMPrr;
+        else
+          CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri;
+      }
       break;
   }
 
   unsigned SrcReg1 = getRegForValue(Src1Value);
   if (SrcReg1 == 0) return false;
 
-  unsigned SrcReg2 = getRegForValue(Src2Value);
-  if (SrcReg2 == 0) return false;
+  unsigned SrcReg2;
+  if (!EncodeImm) {
+    SrcReg2 = getRegForValue(Src2Value);
+    if (SrcReg2 == 0) return false;
+  }
 
   // We have i1, i8, or i16, we need to either zero extend or sign extend.
   if (needsExt) {
     unsigned ResultReg;
-    EVT DestVT = MVT::i32;
-    ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, DestVT, isZExt);
+    ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
     if (ResultReg == 0) return false;
     SrcReg1 = ResultReg;
-    ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, DestVT, isZExt);
-    if (ResultReg == 0) return false;
-    SrcReg2 = ResultReg;
+    if (!EncodeImm) {
+      ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
+      if (ResultReg == 0) return false;
+      SrcReg2 = ResultReg;
+    }
   }
 
-  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
-                  .addReg(SrcReg1).addReg(SrcReg2));
+  if (!EncodeImm) {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(CmpOpc))
+                    .addReg(SrcReg1).addReg(SrcReg2));
+  } else {
+    MachineInstrBuilder MIB;
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+      .addReg(SrcReg1);
+
+    // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
+    if (isICmp)
+      MIB.addImm(EncodedImm);
+    AddOptionalDefs(MIB);
+  }
 
   // For floating point we need to move the result to a comparison register
   // that we can then use for branches.
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
new file mode 100644
index 0000000..8cb4722
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -0,0 +1,214 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+define void @t1a(float %a) uwtable ssp {
+entry:
+; ARM: t1a
+; THUMB: t1a
+  %cmp = fcmp oeq float %a, 0.000000e+00
+; ARM: vcmpe.f32 s0, #0
+; THUMB: vcmpe.f32 s0, #0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare void @foo()
+
+; Shouldn't be able to encode -0.0 imm.
+define void @t1b(float %a) uwtable ssp {
+entry:
+; ARM: t1b
+; THUMB: t1b
+  %cmp = fcmp oeq float %a, -0.000000e+00
+; ARM: vldr.32
+; ARM: vcmpe.f32 s0, s1
+; THUMB: vldr.32
+; THUMB: vcmpe.f32 s0, s1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t2a(double %a) uwtable ssp {
+entry:
+; ARM: t2a
+; THUMB: t2a
+  %cmp = fcmp oeq double %a, 0.000000e+00
+; ARM: vcmpe.f64 d16, #0
+; THUMB: vcmpe.f64 d16, #0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; Shouldn't be able to encode -0.0 imm.
+define void @t2b(double %a) uwtable ssp {
+entry:
+; ARM: t2b
+; THUMB: t2b
+  %cmp = fcmp oeq double %a, -0.000000e+00
+; ARM: vldr.64
+; ARM: vcmpe.f64 d16, d17
+; THUMB: vldr.64
+; THUMB: vcmpe.f64 d16, d17
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t4(i8 signext %a) uwtable ssp {
+entry:
+; ARM: t4
+; THUMB: t4
+  %cmp = icmp eq i8 %a, -1
+; ARM: cmn r0, #1
+; THUMB: cmn.w r0, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t5(i8 zeroext %a) uwtable ssp {
+entry:
+; ARM: t5
+; THUMB: t5
+  %cmp = icmp eq i8 %a, 1
+; ARM: cmp r0, #1
+; THUMB: cmp r0, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t6(i16 signext %a) uwtable ssp {
+entry:
+; ARM: t6
+; THUMB: t6
+  %cmp = icmp eq i16 %a, -1
+; ARM: cmn r0, #1
+; THUMB: cmn.w r0, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t7(i16 zeroext %a) uwtable ssp {
+entry:
+; ARM: t7
+; THUMB: t7
+  %cmp = icmp eq i16 %a, 1
+; ARM: cmp r0, #1
+; THUMB: cmp r0, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t8(i32 %a) uwtable ssp {
+entry:
+; ARM: t8
+; THUMB: t8
+  %cmp = icmp eq i32 %a, -1
+; ARM: cmn r0, #1
+; THUMB: cmn.w r0, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t9(i32 %a) uwtable ssp {
+entry:
+; ARM: t9
+; THUMB: t9
+  %cmp = icmp eq i32 %a, 1
+; ARM: cmp r0, #1
+; THUMB: cmp r0, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t10(i32 %a) uwtable ssp {
+entry:
+; ARM: t10
+; THUMB: t10
+  %cmp = icmp eq i32 %a, 384
+; ARM: cmp r0, #384
+; THUMB: cmp.w r0, #384
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t11(i32 %a) uwtable ssp {
+entry:
+; ARM: t11
+; THUMB: t11
+  %cmp = icmp eq i32 %a, 4096
+; ARM: cmp r0, #4096
+; THUMB: cmp.w r0, #4096
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
-- 
cgit v1.1