From 0c81dc887cdd494985d275116a91759ddb591df2 Mon Sep 17 00:00:00 2001 From: Duraid Madina Date: Mon, 16 Jan 2006 06:33:38 +0000 Subject: fix division! again!! pattern isel, prepare to die. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25353 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/IA64/IA64ISelDAGToDAG.cpp | 198 +++++++++++++++++------------------ lib/Target/IA64/IA64InstrInfo.td | 21 ++++ 2 files changed, 118 insertions(+), 101 deletions(-) diff --git a/lib/Target/IA64/IA64ISelDAGToDAG.cpp b/lib/Target/IA64/IA64ISelDAGToDAG.cpp index f819465..929e8f5 100644 --- a/lib/Target/IA64/IA64ISelDAGToDAG.cpp +++ b/lib/Target/IA64/IA64ISelDAGToDAG.cpp @@ -199,11 +199,13 @@ SDOperand IA64DAGToDAGISel::SelectDIV(SDOperand Op) { Chain = TmpF3.getValue(1); TmpF4 = CurDAG->getTargetNode(IA64::FCVTXF, MVT::f64, TmpF2); Chain = TmpF4.getValue(1); - } else { - TmpF3 = CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF1); - Chain = TmpF3.getValue(1); - TmpF4 = CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF2); - Chain = TmpF4.getValue(1); + } else { // is unsigned + if(isModulus) { /* unsigned integer divides do not need any fcvt.x*f* insns */ + TmpF3 = CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF1); + Chain = TmpF3.getValue(1); + TmpF4 = CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF2); + Chain = TmpF4.getValue(1); + } } } else { // this is an FP divide/remainder, so we 'leak' some temp @@ -214,116 +216,110 @@ SDOperand IA64DAGToDAGISel::SelectDIV(SDOperand Op) { // we start by computing an approximate reciprocal (good to 9 bits?) // note, this instruction writes _both_ TmpF5 (answer) and TmpPR (predicate) - TmpF5 = CurDAG->getTargetNode(IA64::FRCPAS1, MVT::f64, MVT::i1, + if(isFP) + TmpF5 = CurDAG->getTargetNode(IA64::FRCPAS0, MVT::f64, MVT::i1, TmpF3, TmpF4); + else + TmpF5 = CurDAG->getTargetNode(IA64::FRCPAS1, MVT::f64, MVT::i1, + TmpF3, TmpF4); + TmpPR = TmpF5.getValue(1); Chain = TmpF5.getValue(2); - if(!isModulus) { // if this is a divide, we worry about div-by-zero - SDOperand bogusPR = CurDAG->getTargetNode(IA64::CMPEQ, MVT::i1, - CurDAG->getRegister(IA64::r0, MVT::i64), - CurDAG->getRegister(IA64::r0, MVT::i64)); - Chain = bogusPR.getValue(1); - TmpPR2 = CurDAG->getTargetNode(IA64::TPCMPNE, MVT::i1, bogusPR, - CurDAG->getRegister(IA64::r0, MVT::i64), - CurDAG->getRegister(IA64::r0, MVT::i64), TmpPR); - Chain = TmpPR2.getValue(1); - } - + // we'll need copies of F0 and F1 SDOperand F0 = CurDAG->getRegister(IA64::F0, MVT::f64); SDOperand F1 = CurDAG->getRegister(IA64::F1, MVT::f64); - // now we apply newton's method, thrice! (FIXME: this is ~72 bits of - // precision, don't need this much for f32/i32) - TmpF6 = CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64, - TmpF4, TmpF5, F1, TmpPR); - Chain = TmpF6.getValue(1); - TmpF7 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpF3, TmpF5, F0, TmpPR); - Chain = TmpF7.getValue(1); - TmpF8 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpF6, TmpF6, F0, TmpPR); - Chain = TmpF8.getValue(1); - TmpF9 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpF6, TmpF7, TmpF7, TmpPR); - Chain = TmpF9.getValue(1); - TmpF10 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpF6, TmpF5, TmpF5, TmpPR); - Chain = TmpF10.getValue(1); - TmpF11 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpF8, TmpF9, TmpF9, TmpPR); - Chain = TmpF11.getValue(1); - TmpF12 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpF8, TmpF10, TmpF10, TmpPR); - Chain = TmpF12.getValue(1); - TmpF13 = CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64, - TmpF4, TmpF11, TmpF3, TmpPR); - Chain = TmpF13.getValue(1); + SDOperand minusB; + if(isModulus) { // for remainders, it'll be handy to have + // copies of -input_b + minusB = CurDAG->getTargetNode(IA64::SUB, MVT::i64, + CurDAG->getRegister(IA64::r0, MVT::i64), Tmp2); + Chain = minusB.getValue(1); + } - // FIXME: this is unfortunate :( - // the story is that the dest reg of the fnma above and the fma below - // (and therefore possibly the src of the fcvt.fx[u] as well) cannot - // be the same register, or this code breaks if the first argument is - // zero. (e.g. without this hack, 0%8 yields -64, not 0.) - TmpF14 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, - TmpF13, TmpF12, TmpF11, TmpPR); - Chain = TmpF14.getValue(1); + SDOperand TmpE0, TmpY1, TmpE1, TmpY2; - if(isModulus) { // XXX: fragile! fixes _only_ mod, *breaks* div! ! - SDOperand bogus = CurDAG->getTargetNode(IA64::IUSE, MVT::Other, TmpF13); // hack :( - Chain = bogus.getValue(0); // hmmm - } - - if(!isFP) { - // round to an integer - if(isSigned) { - TmpF15 = CurDAG->getTargetNode(IA64::FCVTFXTRUNCS1, MVT::i64, TmpF14); - Chain = TmpF15.getValue(1); - } - else { - TmpF15 = CurDAG->getTargetNode(IA64::FCVTFXUTRUNCS1, MVT::i64, TmpF14); - Chain = TmpF15.getValue(1); - } - } else { - TmpF15 = TmpF14; - // EXERCISE: can you see why TmpF15=TmpF14 does not work here, and - // we really do need the above FMOV? ;) - } - - if(!isModulus) { - if(isFP) { // extra worrying about div-by-zero - // we do a 'conditional fmov' (of the correct result, depending - // on how the frcpa predicate turned out) - SDOperand bogoResult = CurDAG->getTargetNode(IA64::PFMOV, MVT::f64, - TmpF12, TmpPR2); - Chain = bogoResult.getValue(1); - Result = CurDAG->getTargetNode(IA64::CFMOV, MVT::f64, bogoResult, - TmpF15, TmpPR); + TmpE0 = CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64, + TmpF4, TmpF5, F1, TmpPR); + Chain = TmpE0.getValue(1); + TmpY1 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, + TmpF5, TmpE0, TmpF5, TmpPR); + Chain = TmpY1.getValue(1); + TmpE1 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, + TmpE0, TmpE0, F0, TmpPR); + Chain = TmpE1.getValue(1); + TmpY2 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, + TmpY1, TmpE1, TmpY1, TmpPR); + Chain = TmpY2.getValue(1); + + if(isFP) { // if this is an FP divide, we finish up here and exit early + if(isModulus) + assert(0 && "Sorry, try another FORTRAN compiler."); + + SDOperand TmpE2, TmpY3, TmpQ0, TmpR0; + + TmpE2 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, + TmpE1, TmpE1, F0, TmpPR); + Chain = TmpE2.getValue(1); + TmpY3 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, + TmpY2, TmpE2, TmpY2, TmpPR); + Chain = TmpY3.getValue(1); + TmpQ0 = CurDAG->getTargetNode(IA64::CFMADS1, MVT::f64, // double prec! + Tmp1, TmpY3, F0, TmpPR); + Chain = TmpQ0.getValue(1); + TmpR0 = CurDAG->getTargetNode(IA64::CFNMADS1, MVT::f64, // double prec! + Tmp2, TmpQ0, Tmp1, TmpPR); + Chain = TmpR0.getValue(1); + +// we want Result to have the same target register as the frcpa, so +// we two-address hack it. See the comment "for this to work..." on +// page 48 of Intel application note #245415 + Result = CurDAG->getTargetNode(IA64::TCFMADS0, MVT::f64, // d.p. s0 rndg! + TmpY3, TmpR0, TmpQ0, TmpPR); Chain = Result.getValue(1); - } - else { - Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, TmpF15); + return Result; // XXX: early exit! + } else { // this is *not* an FP divide, so there's a bit left to do: + + SDOperand TmpQ2, TmpR2, TmpQ3, TmpQ; + + TmpQ2 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64, + TmpF3, TmpY2, F0, TmpPR); + Chain = TmpQ2.getValue(1); + TmpR2 = CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64, + TmpF4, TmpQ2, TmpF3, TmpPR); + Chain = TmpR2.getValue(1); + +// we want TmpQ3 to have the same target register as the frcpa, so +// we two-address hack it. See the comment "for this to work..." on +// page 48 of Intel application note #245415 + TmpQ3 = CurDAG->getTargetNode(IA64::TCFMAS1, MVT::f64, + TmpR2, TmpR2, TmpY2, TmpQ2, TmpPR); + Chain = TmpQ3.getValue(1); + + if(isSigned) + TmpQ = CurDAG->getTargetNode(IA64::FCVTFXTRUNCS1, MVT::f64, TmpQ3); + else + TmpQ = CurDAG->getTargetNode(IA64::FCVTFXUTRUNCS1, MVT::f64, TmpQ3); + + Chain = TmpQ.getValue(1); + + if(isModulus) { + SDOperand FPminusB = CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, + minusB); + Chain = FPminusB.getValue(1); + SDOperand Remainder = CurDAG->getTargetNode(IA64::XMAL, MVT::f64, + TmpQ, FPminusB, TmpF1); + Chain = Remainder.getValue(1); + Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, Remainder); Chain = Result.getValue(1); - } - } else { // this is a modulus - if(!isFP) { - // answer = q * (-b) + a - SDOperand TmpI = CurDAG->getTargetNode(IA64::SUB, MVT::i64, - CurDAG->getRegister(IA64::r0, MVT::i64), Tmp2); - Chain = TmpI.getValue(1); - SDOperand TmpF = CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, TmpI); - Chain = TmpF.getValue(1); - SDOperand ModulusResult = CurDAG->getTargetNode(IA64::XMAL, MVT::f64, - TmpF15, TmpF, TmpF1); - Chain = ModulusResult.getValue(1); - Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, ModulusResult); + } else { // just an integer divide + Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, TmpQ); Chain = Result.getValue(1); - } else { // FP modulus! The horror... the horror.... - assert(0 && "sorry, no FP modulus just yet!\n!\n"); } - } - return Result; + return Result; + } // wasn't an FP divide } // Select - Convert the specified operand from a target-independent to a diff --git a/lib/Target/IA64/IA64InstrInfo.td b/lib/Target/IA64/IA64InstrInfo.td index f18b451..fc72ad5 100644 --- a/lib/Target/IA64/IA64InstrInfo.td +++ b/lib/Target/IA64/IA64InstrInfo.td @@ -589,6 +589,15 @@ def FNEGABS : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src), "fnegabs $dst = $src;;", [(set FP:$dst, (fneg (fabs FP:$src)))]>; +let isTwoAddress=1 in { +def TCFMAS1 : AForm<0x03, 0x0b, + (ops FP:$dst, FP:$bogussrc, FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fma.s1 $dst = $src1, $src2, $src3;;">; +def TCFMADS0 : AForm<0x03, 0x0b, + (ops FP:$dst, FP:$bogussrc, FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fma.d.s0 $dst = $src1, $src2, $src3;;">; +} + def CFMAS1 : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp), "($qp) fma.s1 $dst = $src1, $src2, $src3;;">; @@ -596,6 +605,18 @@ def CFNMAS1 : AForm<0x03, 0x0b, (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp), "($qp) fnma.s1 $dst = $src1, $src2, $src3;;">; +def CFMADS1 : AForm<0x03, 0x0b, + (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fma.d.s1 $dst = $src1, $src2, $src3;;">; +def CFMADS0 : AForm<0x03, 0x0b, + (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fma.d.s0 $dst = $src1, $src2, $src3;;">; +def CFNMADS1 : AForm<0x03, 0x0b, + (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp), + "($qp) fnma.d.s1 $dst = $src1, $src2, $src3;;">; + +def FRCPAS0 : AForm<0x03, 0x0b, (ops FP:$dstFR, PR:$dstPR, FP:$src1, FP:$src2), + "frcpa.s0 $dstFR, $dstPR = $src1, $src2;;">; def FRCPAS1 : AForm<0x03, 0x0b, (ops FP:$dstFR, PR:$dstPR, FP:$src1, FP:$src2), "frcpa.s1 $dstFR, $dstPR = $src1, $src2;;">; -- cgit v1.1