aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDuraid Madina <duraid@octopus.com.au>2006-01-16 06:33:38 +0000
committerDuraid Madina <duraid@octopus.com.au>2006-01-16 06:33:38 +0000
commit0c81dc887cdd494985d275116a91759ddb591df2 (patch)
treec5c622b038c232bab12dd4de5089f971b4ba5c9c
parent99cf50937d55381fbcdb506e61fb46ade774e7ee (diff)
downloadexternal_llvm-0c81dc887cdd494985d275116a91759ddb591df2.zip
external_llvm-0c81dc887cdd494985d275116a91759ddb591df2.tar.gz
external_llvm-0c81dc887cdd494985d275116a91759ddb591df2.tar.bz2
fix division! again!! pattern isel, prepare to die.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25353 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/IA64/IA64ISelDAGToDAG.cpp198
-rw-r--r--lib/Target/IA64/IA64InstrInfo.td21
2 files changed, 118 insertions, 101 deletions
diff --git a/lib/Target/IA64/IA64ISelDAGToDAG.cpp b/lib/Target/IA64/IA64ISelDAGToDAG.cpp
index f819465..929e8f5 100644
--- a/lib/Target/IA64/IA64ISelDAGToDAG.cpp
+++ b/lib/Target/IA64/IA64ISelDAGToDAG.cpp
@@ -199,11 +199,13 @@ SDOperand IA64DAGToDAGISel::SelectDIV(SDOperand Op) {
Chain = TmpF3.getValue(1);
TmpF4 = CurDAG->getTargetNode(IA64::FCVTXF, MVT::f64, TmpF2);
Chain = TmpF4.getValue(1);
- } else {
- TmpF3 = CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF1);
- Chain = TmpF3.getValue(1);
- TmpF4 = CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF2);
- Chain = TmpF4.getValue(1);
+ } else { // is unsigned
+ if(isModulus) { /* unsigned integer divides do not need any fcvt.x*f* insns */
+ TmpF3 = CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF1);
+ Chain = TmpF3.getValue(1);
+ TmpF4 = CurDAG->getTargetNode(IA64::FCVTXUFS1, MVT::f64, TmpF2);
+ Chain = TmpF4.getValue(1);
+ }
}
} else { // this is an FP divide/remainder, so we 'leak' some temp
@@ -214,116 +216,110 @@ SDOperand IA64DAGToDAGISel::SelectDIV(SDOperand Op) {
// we start by computing an approximate reciprocal (good to 9 bits?)
// note, this instruction writes _both_ TmpF5 (answer) and TmpPR (predicate)
- TmpF5 = CurDAG->getTargetNode(IA64::FRCPAS1, MVT::f64, MVT::i1,
+ if(isFP)
+ TmpF5 = CurDAG->getTargetNode(IA64::FRCPAS0, MVT::f64, MVT::i1,
TmpF3, TmpF4);
+ else
+ TmpF5 = CurDAG->getTargetNode(IA64::FRCPAS1, MVT::f64, MVT::i1,
+ TmpF3, TmpF4);
+
TmpPR = TmpF5.getValue(1);
Chain = TmpF5.getValue(2);
- if(!isModulus) { // if this is a divide, we worry about div-by-zero
- SDOperand bogusPR = CurDAG->getTargetNode(IA64::CMPEQ, MVT::i1,
- CurDAG->getRegister(IA64::r0, MVT::i64),
- CurDAG->getRegister(IA64::r0, MVT::i64));
- Chain = bogusPR.getValue(1);
- TmpPR2 = CurDAG->getTargetNode(IA64::TPCMPNE, MVT::i1, bogusPR,
- CurDAG->getRegister(IA64::r0, MVT::i64),
- CurDAG->getRegister(IA64::r0, MVT::i64), TmpPR);
- Chain = TmpPR2.getValue(1);
- }
-
+ // we'll need copies of F0 and F1
SDOperand F0 = CurDAG->getRegister(IA64::F0, MVT::f64);
SDOperand F1 = CurDAG->getRegister(IA64::F1, MVT::f64);
- // now we apply newton's method, thrice! (FIXME: this is ~72 bits of
- // precision, don't need this much for f32/i32)
- TmpF6 = CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64,
- TmpF4, TmpF5, F1, TmpPR);
- Chain = TmpF6.getValue(1);
- TmpF7 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
- TmpF3, TmpF5, F0, TmpPR);
- Chain = TmpF7.getValue(1);
- TmpF8 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
- TmpF6, TmpF6, F0, TmpPR);
- Chain = TmpF8.getValue(1);
- TmpF9 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
- TmpF6, TmpF7, TmpF7, TmpPR);
- Chain = TmpF9.getValue(1);
- TmpF10 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
- TmpF6, TmpF5, TmpF5, TmpPR);
- Chain = TmpF10.getValue(1);
- TmpF11 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
- TmpF8, TmpF9, TmpF9, TmpPR);
- Chain = TmpF11.getValue(1);
- TmpF12 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
- TmpF8, TmpF10, TmpF10, TmpPR);
- Chain = TmpF12.getValue(1);
- TmpF13 = CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64,
- TmpF4, TmpF11, TmpF3, TmpPR);
- Chain = TmpF13.getValue(1);
+ SDOperand minusB;
+ if(isModulus) { // for remainders, it'll be handy to have
+ // copies of -input_b
+ minusB = CurDAG->getTargetNode(IA64::SUB, MVT::i64,
+ CurDAG->getRegister(IA64::r0, MVT::i64), Tmp2);
+ Chain = minusB.getValue(1);
+ }
- // FIXME: this is unfortunate :(
- // the story is that the dest reg of the fnma above and the fma below
- // (and therefore possibly the src of the fcvt.fx[u] as well) cannot
- // be the same register, or this code breaks if the first argument is
- // zero. (e.g. without this hack, 0%8 yields -64, not 0.)
- TmpF14 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
- TmpF13, TmpF12, TmpF11, TmpPR);
- Chain = TmpF14.getValue(1);
+ SDOperand TmpE0, TmpY1, TmpE1, TmpY2;
- if(isModulus) { // XXX: fragile! fixes _only_ mod, *breaks* div! !
- SDOperand bogus = CurDAG->getTargetNode(IA64::IUSE, MVT::Other, TmpF13); // hack :(
- Chain = bogus.getValue(0); // hmmm
- }
-
- if(!isFP) {
- // round to an integer
- if(isSigned) {
- TmpF15 = CurDAG->getTargetNode(IA64::FCVTFXTRUNCS1, MVT::i64, TmpF14);
- Chain = TmpF15.getValue(1);
- }
- else {
- TmpF15 = CurDAG->getTargetNode(IA64::FCVTFXUTRUNCS1, MVT::i64, TmpF14);
- Chain = TmpF15.getValue(1);
- }
- } else {
- TmpF15 = TmpF14;
- // EXERCISE: can you see why TmpF15=TmpF14 does not work here, and
- // we really do need the above FMOV? ;)
- }
-
- if(!isModulus) {
- if(isFP) { // extra worrying about div-by-zero
- // we do a 'conditional fmov' (of the correct result, depending
- // on how the frcpa predicate turned out)
- SDOperand bogoResult = CurDAG->getTargetNode(IA64::PFMOV, MVT::f64,
- TmpF12, TmpPR2);
- Chain = bogoResult.getValue(1);
- Result = CurDAG->getTargetNode(IA64::CFMOV, MVT::f64, bogoResult,
- TmpF15, TmpPR);
+ TmpE0 = CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64,
+ TmpF4, TmpF5, F1, TmpPR);
+ Chain = TmpE0.getValue(1);
+ TmpY1 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
+ TmpF5, TmpE0, TmpF5, TmpPR);
+ Chain = TmpY1.getValue(1);
+ TmpE1 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
+ TmpE0, TmpE0, F0, TmpPR);
+ Chain = TmpE1.getValue(1);
+ TmpY2 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
+ TmpY1, TmpE1, TmpY1, TmpPR);
+ Chain = TmpY2.getValue(1);
+
+ if(isFP) { // if this is an FP divide, we finish up here and exit early
+ if(isModulus)
+ assert(0 && "Sorry, try another FORTRAN compiler.");
+
+ SDOperand TmpE2, TmpY3, TmpQ0, TmpR0;
+
+ TmpE2 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
+ TmpE1, TmpE1, F0, TmpPR);
+ Chain = TmpE2.getValue(1);
+ TmpY3 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
+ TmpY2, TmpE2, TmpY2, TmpPR);
+ Chain = TmpY3.getValue(1);
+ TmpQ0 = CurDAG->getTargetNode(IA64::CFMADS1, MVT::f64, // double prec!
+ Tmp1, TmpY3, F0, TmpPR);
+ Chain = TmpQ0.getValue(1);
+ TmpR0 = CurDAG->getTargetNode(IA64::CFNMADS1, MVT::f64, // double prec!
+ Tmp2, TmpQ0, Tmp1, TmpPR);
+ Chain = TmpR0.getValue(1);
+
+// we want Result to have the same target register as the frcpa, so
+// we two-address hack it. See the comment "for this to work..." on
+// page 48 of Intel application note #245415
+ Result = CurDAG->getTargetNode(IA64::TCFMADS0, MVT::f64, // d.p. s0 rndg!
+ TmpY3, TmpR0, TmpQ0, TmpPR);
Chain = Result.getValue(1);
- }
- else {
- Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, TmpF15);
+ return Result; // XXX: early exit!
+ } else { // this is *not* an FP divide, so there's a bit left to do:
+
+ SDOperand TmpQ2, TmpR2, TmpQ3, TmpQ;
+
+ TmpQ2 = CurDAG->getTargetNode(IA64::CFMAS1, MVT::f64,
+ TmpF3, TmpY2, F0, TmpPR);
+ Chain = TmpQ2.getValue(1);
+ TmpR2 = CurDAG->getTargetNode(IA64::CFNMAS1, MVT::f64,
+ TmpF4, TmpQ2, TmpF3, TmpPR);
+ Chain = TmpR2.getValue(1);
+
+// we want TmpQ3 to have the same target register as the frcpa, so
+// we two-address hack it. See the comment "for this to work..." on
+// page 48 of Intel application note #245415
+ TmpQ3 = CurDAG->getTargetNode(IA64::TCFMAS1, MVT::f64,
+ TmpR2, TmpR2, TmpY2, TmpQ2, TmpPR);
+ Chain = TmpQ3.getValue(1);
+
+ if(isSigned)
+ TmpQ = CurDAG->getTargetNode(IA64::FCVTFXTRUNCS1, MVT::f64, TmpQ3);
+ else
+ TmpQ = CurDAG->getTargetNode(IA64::FCVTFXUTRUNCS1, MVT::f64, TmpQ3);
+
+ Chain = TmpQ.getValue(1);
+
+ if(isModulus) {
+ SDOperand FPminusB = CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64,
+ minusB);
+ Chain = FPminusB.getValue(1);
+ SDOperand Remainder = CurDAG->getTargetNode(IA64::XMAL, MVT::f64,
+ TmpQ, FPminusB, TmpF1);
+ Chain = Remainder.getValue(1);
+ Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, Remainder);
Chain = Result.getValue(1);
- }
- } else { // this is a modulus
- if(!isFP) {
- // answer = q * (-b) + a
- SDOperand TmpI = CurDAG->getTargetNode(IA64::SUB, MVT::i64,
- CurDAG->getRegister(IA64::r0, MVT::i64), Tmp2);
- Chain = TmpI.getValue(1);
- SDOperand TmpF = CurDAG->getTargetNode(IA64::SETFSIG, MVT::f64, TmpI);
- Chain = TmpF.getValue(1);
- SDOperand ModulusResult = CurDAG->getTargetNode(IA64::XMAL, MVT::f64,
- TmpF15, TmpF, TmpF1);
- Chain = ModulusResult.getValue(1);
- Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, ModulusResult);
+ } else { // just an integer divide
+ Result = CurDAG->getTargetNode(IA64::GETFSIG, MVT::i64, TmpQ);
Chain = Result.getValue(1);
- } else { // FP modulus! The horror... the horror....
- assert(0 && "sorry, no FP modulus just yet!\n!\n");
}
- }
- return Result;
+ return Result;
+ } // wasn't an FP divide
}
// Select - Convert the specified operand from a target-independent to a
diff --git a/lib/Target/IA64/IA64InstrInfo.td b/lib/Target/IA64/IA64InstrInfo.td
index f18b451..fc72ad5 100644
--- a/lib/Target/IA64/IA64InstrInfo.td
+++ b/lib/Target/IA64/IA64InstrInfo.td
@@ -589,6 +589,15 @@ def FNEGABS : AForm_DAG<0x03, 0x0b, (ops FP:$dst, FP:$src),
"fnegabs $dst = $src;;",
[(set FP:$dst, (fneg (fabs FP:$src)))]>;
+let isTwoAddress=1 in {
+def TCFMAS1 : AForm<0x03, 0x0b,
+ (ops FP:$dst, FP:$bogussrc, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.s1 $dst = $src1, $src2, $src3;;">;
+def TCFMADS0 : AForm<0x03, 0x0b,
+ (ops FP:$dst, FP:$bogussrc, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.d.s0 $dst = $src1, $src2, $src3;;">;
+}
+
def CFMAS1 : AForm<0x03, 0x0b,
(ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
"($qp) fma.s1 $dst = $src1, $src2, $src3;;">;
@@ -596,6 +605,18 @@ def CFNMAS1 : AForm<0x03, 0x0b,
(ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
"($qp) fnma.s1 $dst = $src1, $src2, $src3;;">;
+def CFMADS1 : AForm<0x03, 0x0b,
+ (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.d.s1 $dst = $src1, $src2, $src3;;">;
+def CFMADS0 : AForm<0x03, 0x0b,
+ (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.d.s0 $dst = $src1, $src2, $src3;;">;
+def CFNMADS1 : AForm<0x03, 0x0b,
+ (ops FP:$dst, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fnma.d.s1 $dst = $src1, $src2, $src3;;">;
+
+def FRCPAS0 : AForm<0x03, 0x0b, (ops FP:$dstFR, PR:$dstPR, FP:$src1, FP:$src2),
+ "frcpa.s0 $dstFR, $dstPR = $src1, $src2;;">;
def FRCPAS1 : AForm<0x03, 0x0b, (ops FP:$dstFR, PR:$dstPR, FP:$src1, FP:$src2),
"frcpa.s1 $dstFR, $dstPR = $src1, $src2;;">;