diff options
| author | Bill Wendling <isanbard@gmail.com> | 2010-03-05 00:24:26 +0000 | 
|---|---|---|
| committer | Bill Wendling <isanbard@gmail.com> | 2010-03-05 00:24:26 +0000 | 
| commit | 37b52ee6d9141293c9aef0d2a4c753300a687536 (patch) | |
| tree | 34077edffbd28f8f34b799b41535c379b342c9c0 | |
| parent | b0812f114b83a32c4b90a4b553c7177c557558b5 (diff) | |
| download | external_llvm-37b52ee6d9141293c9aef0d2a4c753300a687536.zip external_llvm-37b52ee6d9141293c9aef0d2a4c753300a687536.tar.gz external_llvm-37b52ee6d9141293c9aef0d2a4c753300a687536.tar.bz2 | |
Micro-optimization:
This code:
float floatingPointComparison(float x, float y) {
    double product = (double)x * y;
    if (product == 0.0)
        return product;
    return product - 1.0;
}
produces this:
_floatingPointComparison:
0000000000000000        cvtss2sd        %xmm1,%xmm1
0000000000000004        cvtss2sd        %xmm0,%xmm0
0000000000000008        mulsd           %xmm1,%xmm0
000000000000000c        pxor            %xmm1,%xmm1
0000000000000010        ucomisd         %xmm1,%xmm0
0000000000000014        jne             0x00000004
0000000000000016        jp              0x00000002
0000000000000018        jmp             0x00000008
000000000000001a        addsd           0x00000006(%rip),%xmm0
0000000000000022        cvtsd2ss        %xmm0,%xmm0
0000000000000026        ret
The "jne/jp/jmp" sequence can be reduced to this instead:
_floatingPointComparison:
0000000000000000        cvtss2sd        %xmm1,%xmm1
0000000000000004        cvtss2sd        %xmm0,%xmm0
0000000000000008        mulsd           %xmm1,%xmm0
000000000000000c        pxor            %xmm1,%xmm1
0000000000000010        ucomisd         %xmm1,%xmm0
0000000000000014        jp              0x00000002
0000000000000016        je              0x00000008
0000000000000018        addsd           0x00000006(%rip),%xmm0
0000000000000020        cvtsd2ss        %xmm0,%xmm0
0000000000000024        ret
for a savings of 2 bytes.
This xform can happen when we recognize that jne and jp jump to the same "true"
MBB, the unconditional jump would jump to the "false" MBB, and the "true" branch
is the fall-through MBB.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@97766 91177308-0d34-0410-b5e6-96231b3b80d8
| -rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 65 | ||||
| -rw-r--r-- | test/CodeGen/X86/jump-opt.ll | 22 | 
2 files changed, 74 insertions, 13 deletions
| diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 39bda04..0d3b54f 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1786,6 +1786,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,                             const SmallVectorImpl<MachineOperand> &Cond) const {    // FIXME this should probably have a DebugLoc operand    DebugLoc dl = DebugLoc::getUnknownLoc(); +    // Shouldn't be a fall through.    assert(TBB && "InsertBranch must not be told to insert a fallthrough");    assert((Cond.size() == 1 || Cond.size() == 0) && @@ -1799,34 +1800,72 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,    }    // Conditional branch. +  const MachineBasicBlock *NextBB = next(&MBB);    unsigned Count = 0;    X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); + +  // In a two-way conditional branch, if the fall-through block is the +  // "false" branch of the conditional jumps, we can cut out the +  // unconditional jump by rearranging the conditional jumps. This saves a +  // few bytes and improves performance. I.e., for COND_NE_OR_P: +  // +  //     JNE L1 +  //     JP  L1 +  //     JMP L2 +  // L1: +  //     ... +  // L2: +  //     ... +  // +  // to: +  //  +  //     JP  L1 +  //     JE  L2 +  // L1: +  //     ... +  // L2: +  //     ... +  // +  // Similarly for COND_NP_OR_E.    switch (CC) { +  default: +    BuildMI(&MBB, dl, get(GetCondBranchFromCond(CC))).addMBB(TBB); +    ++Count; +    break;    case X86::COND_NP_OR_E:      // Synthesize NP_OR_E with two branches. -    BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); -    ++Count; -    BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); -    ++Count; +    if (FBB && FBB == NextBB) { +      BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); +      BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(FBB); +      FBB = 0; +    } else { +      BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); +      BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); +    } + +    Count += 2;      break;    case X86::COND_NE_OR_P:      // Synthesize NE_OR_P with two branches. -    BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); -    ++Count; -    BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); -    ++Count; +    if (FBB && FBB == NextBB) { +      BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); +      BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(FBB); +      FBB = 0; +    } else { +      BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); +      BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); +    } + +    Count += 2;      break; -  default: { -    unsigned Opc = GetCondBranchFromCond(CC); -    BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); -    ++Count; -  }    } +    if (FBB) {      // Two-way Conditional branch. Insert the second branch.      BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB);      ++Count;    } +    return Count;  } diff --git a/test/CodeGen/X86/jump-opt.ll b/test/CodeGen/X86/jump-opt.ll new file mode 100644 index 0000000..dc32f66 --- /dev/null +++ b/test/CodeGen/X86/jump-opt.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s + +; <rdar://problem/7598384> +define float @test1(float %x, float %y) nounwind readnone optsize ssp { +; CHECK:      jp +; CHECK-NEXT: je +entry: +  %0 = fpext float %x to double +  %1 = fpext float %y to double +  %2 = fmul double %0, %1 +  %3 = fcmp oeq double %2, 0.000000e+00 +  br i1 %3, label %bb2, label %bb1 + +bb1: +  %4 = fadd double %2, -1.000000e+00 +  br label %bb2 + +bb2: +  %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ] +  %.0 = fptrunc double %.0.in to float +  ret float %.0 +} | 
