diff options
author | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2012-04-04 18:23:42 +0000 |
---|---|---|
committer | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2012-04-04 18:23:42 +0000 |
commit | c5041cac7d3aeaa7350abadf2a7ada92e8da27dc (patch) | |
tree | eefd36287fd9e7715a6717aa0c9c6f32384e081b | |
parent | 1487cb3a5a0535b343e602e976af538fd87d0df2 (diff) | |
download | external_llvm-c5041cac7d3aeaa7350abadf2a7ada92e8da27dc.zip external_llvm-c5041cac7d3aeaa7350abadf2a7ada92e8da27dc.tar.gz external_llvm-c5041cac7d3aeaa7350abadf2a7ada92e8da27dc.tar.bz2 |
Implement ARMBaseInstrInfo::commuteInstruction() for MOVCCr.
A MOVCCr instruction can be commuted by inverting the condition. This
can help reduce register pressure and remove unnecessary copies in some
cases.
<rdar://problem/11182914>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154033 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 23 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.h | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 2 | ||||
-rw-r--r-- | lib/Target/ARM/README.txt | 5 | ||||
-rw-r--r-- | test/CodeGen/ARM/commute-movcc.ll | 63 | ||||
-rw-r--r-- | test/CodeGen/ARM/machine-cse-cmp.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/select-imm.ll | 2 |
8 files changed, 95 insertions, 7 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 56cd6e5..c6280f8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1500,6 +1500,29 @@ int llvm::getMatchingCondBranchOpcode(int Opc) { llvm_unreachable("Unknown unconditional branch opcode!"); } +/// commuteInstruction - Handle commutable instructions. +MachineInstr * +ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { + switch (MI->getOpcode()) { + case ARM::MOVCCr: + case ARM::t2MOVCCr: { + // MOVCC can be commuted by inverting the condition. + unsigned PredReg = 0; + ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); + // MOVCC AL can't be inverted. Shouldn't happen. + if (CC == ARMCC::AL || PredReg != ARM::CPSR) + return NULL; + MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + if (!MI) + return NULL; + // After swapping the MOVCC operands, also invert the condition. + MI->getOperand(MI->findFirstPredOperandIdx()) + .setImm(ARMCC::getOppositeCondition(CC)); + return MI; + } + } + return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); +} /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the /// instruction is encoded with an 'S' bit is determined by the optional CPSR diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 314e317..2fe8507 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -139,6 +139,8 @@ public: MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; + MachineInstr *commuteInstruction(MachineInstr*, bool=false) const; + virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c0bd237..37d53b0 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4044,10 +4044,13 @@ def BCCZi64 : PseudoInst<(outs), // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { + +let isCommutable = 1 in def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), 4, IIC_iCMOVr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; + def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, so_reg_imm:$shift, pred:$p), 4, IIC_iCMOVsr, diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 63d3a63..41d4e20 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2872,6 +2872,8 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { + +let isCommutable = 1 in def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, pred:$p), 4, IIC_iCMOVr, diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 4fcaecf..3eddda8 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -501,11 +501,6 @@ those operations and the ARMv6 scalar versions. //===---------------------------------------------------------------------===// -ARM::MOVCCr is commutable (by flipping the condition). But we need to implement -ARMInstrInfo::commuteInstruction() to support it. - -//===---------------------------------------------------------------------===// - Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g. ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg) diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll new file mode 100644 index 0000000..f53ac17 --- /dev/null +++ b/test/CodeGen/ARM/commute-movcc.ll @@ -0,0 +1,63 @@ +; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s +; RUN: llc -mtriple=armv7-apple-ios -disable-code-place < %s | FileCheck %s + +; LLVM IR optimizers canonicalize icmp+select this way. +; Make sure that TwoAddressInstructionPass can commute the corresponding +; MOVCC instructions to avoid excessive copies in one of the if blocks. +; +; CHECK: %if.then +; CHECK-NOT: mov +; CHECK: movlo +; CHECK: movlo +; CHECK-NOT: mov + +; CHECK: %if.else +; CHECK-NOT: mov +; CHECK: movls +; CHECK: movls +; CHECK-NOT: mov + +; CHECK: %if.end8 + +define i32 @f(i32* nocapture %a, i32 %Pref) nounwind ssp { +entry: + br label %for.body + +for.body: ; preds = %entry, %if.end8 + %i.012 = phi i32 [ 0, %entry ], [ %inc, %if.end8 ] + %BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ] + %BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ] + %arrayidx = getelementptr inbounds i32* %a, i32 %i.012 + %0 = load i32* %arrayidx, align 4, !tbaa !0 + %mul = mul i32 %0, %0 + %sub = add nsw i32 %i.012, -5 + %cmp2 = icmp eq i32 %sub, %Pref + br i1 %cmp2, label %if.else, label %if.then + +if.then: ; preds = %for.body + %cmp3 = icmp ult i32 %mul, %BestCost.011 + %i.0.BestIdx.0 = select i1 %cmp3, i32 %i.012, i32 %BestIdx.010 + %mul.BestCost.0 = select i1 %cmp3, i32 %mul, i32 %BestCost.011 + br label %if.end8 + +if.else: ; preds = %for.body + %cmp5 = icmp ugt i32 %mul, %BestCost.011 + %BestIdx.0.i.0 = select i1 %cmp5, i32 %BestIdx.010, i32 %i.012 + %BestCost.0.mul = select i1 %cmp5, i32 %BestCost.011, i32 %mul + br label %if.end8 + +if.end8: ; preds = %if.else, %if.then + %BestIdx.1 = phi i32 [ %i.0.BestIdx.0, %if.then ], [ %BestIdx.0.i.0, %if.else ] + %BestCost.1 = phi i32 [ %mul.BestCost.0, %if.then ], [ %BestCost.0.mul, %if.else ] + store i32 %mul, i32* %arrayidx, align 4, !tbaa !0 + %inc = add i32 %i.012, 1 + %cmp = icmp eq i32 %inc, 11 + br i1 %cmp, label %for.end, label %for.body + +for.end: ; preds = %if.end8 + ret i32 %BestIdx.1 +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll index f566974..3ac7d77 100644 --- a/test/CodeGen/ARM/machine-cse-cmp.ll +++ b/test/CodeGen/ARM/machine-cse-cmp.ll @@ -10,7 +10,7 @@ entry: ; CHECK: cmp ; CHECK: moveq ; CHECK-NOT: cmp -; CHECK: moveq +; CHECK: mov{{eq|ne}} %tmp1 = icmp eq i32 %cond1, 0 %tmp2 = select i1 %tmp1, i32 %x1, i32 %x2 %tmp3 = select i1 %tmp1, i32 %x2, i32 %x3 diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll index e927b39..c9ac66a 100644 --- a/test/CodeGen/ARM/select-imm.ll +++ b/test/CodeGen/ARM/select-imm.ll @@ -64,7 +64,7 @@ define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind { entry: ; ARM: t4: ; ARM: ldr -; ARM: movlt +; ARM: mov{{lt|ge}} ; ARMT2: t4: ; ARMT2: movwlt [[R0:r[0-9]+]], #65365 |