From 959ddbb5e0e088f4d5c3f8c015de3caf0baa6e6c Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Thu, 11 Apr 2013 04:43:09 +0000 Subject: Enhance bool simplifcation in X86 to handle more cases This patch is revised based on patch from Victor Umansky . More cases are handled in X86's bool simplification, i.e. - SETCC_CARRY - value is truncated to i1 with AND As a by-product, PR5443 is also fixed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179265 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 41 ++++++++-- test/CodeGen/X86/add.ll | 4 +- test/CodeGen/X86/avx-brcond.ll | 150 +++++++++++++++++++++++++++++++++++++ test/CodeGen/X86/brcond.ll | 147 ++++++++++++++++++++++++++++++++++++ 4 files changed, 334 insertions(+), 8 deletions(-) create mode 100644 test/CodeGen/X86/avx-brcond.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 95e7f85..806e72f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15847,6 +15847,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { SDValue SetCC; const ConstantSDNode* C = 0; bool needOppositeCond = (CC == X86::COND_E); + bool checkAgainstTrue = false; // Is it a comparison against 1? if ((C = dyn_cast(Op1))) SetCC = Op2; @@ -15855,18 +15856,46 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { else // Quit if all operands are not constants. return SDValue(); - if (C->getZExtValue() == 1) + if (C->getZExtValue() == 1) { needOppositeCond = !needOppositeCond; - else if (C->getZExtValue() != 0) + checkAgainstTrue = true; + } else if (C->getZExtValue() != 0) // Quit if the constant is neither 0 or 1. return SDValue(); - // Skip 'zext' or 'trunc' node. - if (SetCC.getOpcode() == ISD::ZERO_EXTEND || - SetCC.getOpcode() == ISD::TRUNCATE) - SetCC = SetCC.getOperand(0); + bool truncatedToBoolWithAnd = false; + // Skip (zext $x), (trunc $x), or (and $x, 1) node. + while (SetCC.getOpcode() == ISD::ZERO_EXTEND || + SetCC.getOpcode() == ISD::TRUNCATE || + SetCC.getOpcode() == ISD::AND) { + if (SetCC.getOpcode() == ISD::AND) { + int OpIdx = -1; + ConstantSDNode *CS; + if ((CS = dyn_cast(SetCC.getOperand(0))) && + CS->getZExtValue() == 1) + OpIdx = 1; + if ((CS = dyn_cast(SetCC.getOperand(1))) && + CS->getZExtValue() == 1) + OpIdx = 0; + if (OpIdx == -1) + break; + SetCC = SetCC.getOperand(OpIdx); + truncatedToBoolWithAnd = true; + } else + SetCC = SetCC.getOperand(0); + } switch (SetCC.getOpcode()) { + case X86ISD::SETCC_CARRY: + // Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to + // simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1, + // i.e. it's a comparison against true but the result of SETCC_CARRY is not + // truncated to i1 using 'and'. + if (checkAgainstTrue && !truncatedToBoolWithAnd) + break; + assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B && + "Invalid use of SETCC_CARRY!"); + // FALL THROUGH case X86ISD::SETCC: // Set the condition code or opposite one if necessary. CC = X86::CondCode(SetCC.getConstantOperandVal(0)); diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll index 03d2e47..5fe08ed 100644 --- a/test/CodeGen/X86/add.ll +++ b/test/CodeGen/X86/add.ll @@ -119,8 +119,8 @@ entry: ; X64: test8: ; X64: addq -; X64-NEXT: sbbq -; X64-NEXT: testb +; X64-NEXT: setb +; X64: ret define i32 @test9(i32 %x, i32 %y) nounwind readnone { %cmp = icmp eq i32 %x, 10 diff --git a/test/CodeGen/X86/avx-brcond.ll b/test/CodeGen/X86/avx-brcond.ll new file mode 100644 index 0000000..d52ae52 --- /dev/null +++ b/test/CodeGen/X86/avx-brcond.ll @@ -0,0 +1,150 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +declare i32 @llvm.x86.avx.ptestz.256(<4 x i64> %p1, <4 x i64> %p2) nounwind +declare i32 @llvm.x86.avx.ptestc.256(<4 x i64> %p1, <4 x i64> %p2) nounwind + +define <4 x float> @test1(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test1: +; CHECK: vptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = icmp ne i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test3(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test3: +; CHECK: vptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = trunc i32 %res to i1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test4(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test4: +; CHECK: vptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = icmp ne i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test6(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test6: +; CHECK: vptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = trunc i32 %res to i1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test7(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test7: +; CHECK: vptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = icmp eq i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test8(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test8: +; CHECK: vptest +; CHECK-NEXT: je +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = icmp ne i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + + diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll index 44670c8..bc4032b 100644 --- a/test/CodeGen/X86/brcond.ll +++ b/test/CodeGen/X86/brcond.ll @@ -108,3 +108,150 @@ bb2: ; preds = %entry, %bb1 ret float %.0 } +declare i32 @llvm.x86.sse41.ptestz(<4 x float> %p1, <4 x float> %p2) nounwind +declare i32 @llvm.x86.sse41.ptestc(<4 x float> %p1, <4 x float> %p2) nounwind + +define <4 x float> @test5(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test5: +; CHECK: ptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp ne i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test7(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test7: +; CHECK: ptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = trunc i32 %res to i1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test8(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test8: +; CHECK: ptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp ne i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test10(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test10: +; CHECK: ptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind + %one = trunc i32 %res to i1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test11(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test11: +; CHECK: ptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp eq i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test12(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test12: +; CHECK: ptest +; CHECK-NEXT: je +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp ne i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + -- cgit v1.1