diff options
author | Evan Cheng <evan.cheng@apple.com> | 2011-03-23 22:52:04 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2011-03-23 22:52:04 +0000 |
commit | 2c339156289d8398bea09c2bb4b735d00d39bdb3 (patch) | |
tree | bd93f3a2bae816c2cce86c73d05bbb958c31136a | |
parent | b141099c14bfa86167b2137e8a9544c6ee805955 (diff) | |
download | external_llvm-2c339156289d8398bea09c2bb4b735d00d39bdb3.zip external_llvm-2c339156289d8398bea09c2bb4b735d00d39bdb3.tar.gz external_llvm-2c339156289d8398bea09c2bb4b735d00d39bdb3.tar.bz2 |
Cmp peephole optimization isn't always safe for signed arithmetics.
int tries = INT_MAX;
while (tries > 0) {
tries--;
}
The check should be:
subs r4, #1
cmp r4, #0
bgt LBB0_1
The subs can set the overflow V bit when r4 is INT_MAX+1 (which loop
canonicalization apparently does in this case). cmp #0 would have cleared
it while not changing the N and Z bits. Since BGT is dependent on the V
bit, i.e. (N == V) && !Z, it is not safe to eliminate the cmp #0.
rdar://9172742
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128179 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 46 | ||||
-rw-r--r-- | test/CodeGen/ARM/2011-03-23-PeepholeBug.ll | 41 | ||||
-rw-r--r-- | test/CodeGen/ARM/long_shift.ll | 6 |
3 files changed, 88 insertions, 5 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index a56df30..b5f20a4 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1612,11 +1612,51 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, switch (MI->getOpcode()) { default: break; case ARM::ADDri: - case ARM::ANDri: - case ARM::t2ANDri: case ARM::SUBri: case ARM::t2ADDri: - case ARM::t2SUBri: + case ARM::t2SUBri: { + // Scan forward for the use of CPSR, if it's a conditional code requires + // checking of V bit, then this is not safe to do. If we can't find the + // CPSR use (i.e. used in another block), then it's not safe to perform + // the optimization. + bool isSafe = false; + I = CmpInstr; + E = MI->getParent()->end(); + while (!isSafe && ++I != E) { + const MachineInstr &Instr = *I; + for (unsigned IO = 0, EO = Instr.getNumOperands(); + !isSafe && IO != EO; ++IO) { + const MachineOperand &MO = Instr.getOperand(IO); + if (!MO.isReg() || MO.getReg() != ARM::CPSR) + continue; + if (MO.isDef()) { + isSafe = true; + break; + } + // Condition code is after the operand before CPSR. + ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); + switch (CC) { + default: + isSafe = true; + break; + case ARMCC::VS: + case ARMCC::VC: + case ARMCC::GE: + case ARMCC::LT: + case ARMCC::GT: + case ARMCC::LE: + return false; + } + } + } + + if (!isSafe) + return false; + + // fallthrough + } + case ARM::ANDri: + case ARM::t2ANDri: // Toggle the optional operand to CPSR. MI->getOperand(5).setReg(ARM::CPSR); MI->getOperand(5).setIsDef(true); diff --git a/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll new file mode 100644 index 0000000..7c9af6f --- /dev/null +++ b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s + +; subs r4, #1 +; cmp r4, 0 +; bgt +; cmp cannot be optimized away since it will clear the overflow bit. +; gt / ge, lt, le conditions all depend on V bit. +; rdar://9172742 + +define i32 @t() nounwind { +; CHECK: t: +entry: + br label %bb2 + +bb: ; preds = %bb2 + %0 = tail call i32 @rand() nounwind + %1 = icmp eq i32 %0, 50 + br i1 %1, label %bb3, label %bb1 + +bb1: ; preds = %bb + %tmp = tail call i32 @puts() nounwind + %indvar.next = add i32 %indvar, 1 + br label %bb2 + +bb2: ; preds = %bb1, %entry +; CHECK: bb2 +; CHECK: subs [[REG:r[0-9]+]], #1 +; CHECK: cmp [[REG]], #0 +; CHECK: bgt + %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] + %tries.0 = sub i32 2147483647, %indvar + %tmp1 = icmp sgt i32 %tries.0, 0 + br i1 %tmp1, label %bb, label %bb3 + +bb3: ; preds = %bb2, %bb + ret i32 0 +} + +declare i32 @rand() + +declare i32 @puts() nounwind diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll index ab14de1..d5aac2e 100644 --- a/test/CodeGen/ARM/long_shift.ll +++ b/test/CodeGen/ARM/long_shift.ll @@ -24,7 +24,8 @@ define i32 @f2(i64 %x, i64 %y) { ; CHECK: f2 ; CHECK: lsr{{.*}}r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: subs r2, r2, #32 +; CHECK-NEXT: sub r2, r2, #32 +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 ; CHECK-NEXT: asrge r0, r1, r2 %a = ashr i64 %x, %y @@ -36,7 +37,8 @@ define i32 @f3(i64 %x, i64 %y) { ; CHECK: f3 ; CHECK: lsr{{.*}}r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: subs r2, r2, #32 +; CHECK-NEXT: sub r2, r2, #32 +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 ; CHECK-NEXT: lsrge r0, r1, r2 %a = lshr i64 %x, %y |