From 199c4240feedec2f9dbd0d4c4c0a32fa46e50270 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 11 Jan 2010 22:03:29 +0000 Subject: Extend r93152 to work on OR r, r. If the source set bits are known not to overlap, then select as an ADD instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@93191 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Instr64bit.td | 7 +++++-- lib/Target/X86/X86InstrInfo.td | 36 +++++++++++++++++++++++++++++------- test/CodeGen/X86/3addr-or.ll | 16 ++++++++++++++++ test/CodeGen/X86/fast-isel.ll | 2 +- 4 files changed, 51 insertions(+), 10 deletions(-) diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index b2aead6..7077cf9 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1093,7 +1093,7 @@ let isCommutable = 1 in def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, GR64:$src2)), + [(set GR64:$dst, (or_not_add GR64:$src1, GR64:$src2)), (implicit EFLAGS)]>; def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -2125,13 +2125,16 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), addr:$dst), (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; -// (or x, c) -> (add x, c) if masked bits are known zero. +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2), (implicit EFLAGS)), (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2), (implicit EFLAGS)), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; +def : Pat<(parallel (or_is_add GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (ADD64rr GR64:$src1, GR64:$src2)>; // X86 specific add which produces a flag. def : Pat<(addc GR64:$src1, GR64:$src2), diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 28c5154..9b69018 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -497,12 +497,28 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - return false; + else { + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero0, KnownOne0; + CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + APInt KnownZero1, KnownOne1; + CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + return (~KnownZero0 & ~KnownZero1) == 0; + } }]>; def or_not_add : PatFrag<(ops node:$lhs, node:$rhs),(or node:$lhs, node:$rhs),[{ - ConstantSDNode *CN = dyn_cast(N->getOperand(1)); - if (!CN) return true; - return !CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) + return !CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + else { + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero0, KnownOne0; + CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + APInt KnownZero1, KnownOne1; + CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + return (~KnownZero0 & ~KnownZero1) != 0; + } }]>; // 'shld' and 'shrd' instruction patterns. Note that even though these have @@ -1853,12 +1869,12 @@ def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst), def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (or GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (or_not_add GR16:$src1, GR16:$src2)), (implicit EFLAGS)]>, OpSize; def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, GR32:$src2)), + [(set GR32:$dst, (or_not_add GR32:$src1, GR32:$src2)), (implicit EFLAGS)]>; } @@ -4659,7 +4675,7 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; -// (or x, c) -> (add x, c) if masked bits are known zero. +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2), (implicit EFLAGS)), (ADD16ri GR16:$src1, imm:$src2)>; @@ -4672,6 +4688,12 @@ def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2), def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (ADD16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (ADD32rr GR32:$src1, GR32:$src2)>; //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns diff --git a/test/CodeGen/X86/3addr-or.ll b/test/CodeGen/X86/3addr-or.ll index 395ba46..30a1f36 100644 --- a/test/CodeGen/X86/3addr-or.ll +++ b/test/CodeGen/X86/3addr-or.ll @@ -9,3 +9,19 @@ entry: %1 = or i32 %0, 3 ; [#uses=1] ret i32 %1 } + +define i64 @test2(i8 %A, i8 %B) nounwind { +; CHECK: test2: +; CHECK: shrq $4 +; CHECK-NOT: movq +; CHECK-NOT: orq +; CHECK: leaq +; CHECK: ret + %C = zext i8 %A to i64 ; [#uses=1] + %D = shl i64 %C, 4 ; [#uses=1] + %E = and i64 %D, 48 ; [#uses=1] + %F = zext i8 %B to i64 ; [#uses=1] + %G = lshr i64 %F, 4 ; [#uses=1] + %H = or i64 %G, %E ; [#uses=1] + ret i64 %H +} diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll index 3dcd736..84b3fd7 100644 --- a/test/CodeGen/X86/fast-isel.ll +++ b/test/CodeGen/X86/fast-isel.ll @@ -14,7 +14,7 @@ fast: %t1 = mul i32 %t0, %s %t2 = sub i32 %t1, %s %t3 = and i32 %t2, %s - %t4 = or i32 %t3, %s + %t4 = xor i32 %t3, 3 %t5 = xor i32 %t4, %s %t6 = add i32 %t5, 2 %t7 = getelementptr i32* %y, i32 1 -- cgit v1.1