diff options
author | Nate Begeman <natebegeman@mac.com> | 2005-07-15 00:38:55 +0000 |
---|---|---|
committer | Nate Begeman <natebegeman@mac.com> | 2005-07-15 00:38:55 +0000 |
commit | 16b04f3d5e2a602333a7f64d66e3b78bc1679110 (patch) | |
tree | a6561cdbace9f9af7cfb23b63a5991926d1f3b42 | |
parent | 6c7cb2903836905f5ac483e8cd5382e541fa73f3 (diff) | |
download | external_llvm-16b04f3d5e2a602333a7f64d66e3b78bc1679110.zip external_llvm-16b04f3d5e2a602333a7f64d66e3b78bc1679110.tar.gz external_llvm-16b04f3d5e2a602333a7f64d66e3b78bc1679110.tar.bz2 |
Get closer to fully working scalar FP in SSE regs. This gets singlesource
working, and Olden/power.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@22441 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86.td | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelPattern.cpp | 58 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 45 |
3 files changed, 53 insertions, 52 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index afa3ff7..874391d 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -61,7 +61,7 @@ def IntelAsmWriter : AsmWriter { def X86 : Target { // Specify the callee saved registers. - let CalleeSavedRegisters = [ESI, EDI, EBX, EBP, XMM4, XMM5, XMM6, XMM7]; + let CalleeSavedRegisters = [ESI, EDI, EBX, EBP]; // Yes, pointers are 32-bits in size. let PointerType = i32; diff --git a/lib/Target/X86/X86ISelPattern.cpp b/lib/Target/X86/X86ISelPattern.cpp index 6bd8e27..2230766 100644 --- a/lib/Target/X86/X86ISelPattern.cpp +++ b/lib/Target/X86/X86ISelPattern.cpp @@ -1687,9 +1687,9 @@ void ISel::EmitSelectCC(SDOperand Cond, MVT::ValueType SVT, /*missing*/0, /*missing*/0, X86::FCMOVB , X86::FCMOVBE, X86::FCMOVA , X86::FCMOVAE, X86::FCMOVP , X86::FCMOVNP }; - static const unsigned SSE_CMOVTAB[] = { + static const int SSE_CMOVTAB[] = { 0 /* CMPEQSS */, 4 /* CMPNEQSS */, 1 /* CMPLTSS */, 2 /* CMPLESS */, - 2 /* CMPLESS */, 1 /* CMPLTSS */, /*missing*/0, /*missing*/0, + 1 /* CMPLTSS */, 2 /* CMPLESS */, /*missing*/0, /*missing*/0, /*missing*/0, /*missing*/0, /*missing*/0, /*missing*/0 }; @@ -1761,33 +1761,12 @@ void ISel::EmitSelectCC(SDOperand Cond, MVT::ValueType SVT, // There's no SSE equivalent of FCMOVE. In some cases we can fake it up, in // Others we will have to do the PowerPC thing and generate an MBB for the // true and false values and select between them with a PHI. - if (X86ScalarSSE) { - if (CondCode != NOT_SET) { - unsigned CMPSOpc = (SVT == MVT::f64) ? X86::CMPSDrr : X86::CMPSSrr; - unsigned CMPSImm = SSE_CMOVTAB[CondCode]; - // FIXME check for min - // FIXME check for max - // FIXME check for reverse - unsigned LHS = SelectExpr(Cond.getOperand(0)); - unsigned RHS = SelectExpr(Cond.getOperand(1)); - // emit compare mask - unsigned MaskReg = MakeReg(SVT); - BuildMI(BB, CMPSOpc, 3, MaskReg).addReg(LHS).addReg(RHS).addImm(CMPSImm); - // emit and with mask - unsigned TrueMask = MakeReg(SVT); - unsigned AndOpc = (SVT == MVT::f32) ? X86::ANDPSrr : X86::ANDPDrr; - BuildMI(BB, AndOpc, 2, TrueMask).addReg(RTrue).addReg(MaskReg); - // emit and with inverse mask - unsigned FalseMask = MakeReg(SVT); - unsigned AndnOpc = (SVT == MVT::f32) ? X86::ANDNPSrr : X86::ANDNPDrr; - BuildMI(BB, AndnOpc, 2, FalseMask).addReg(RFalse).addReg(MaskReg); - // emit or into dest reg - unsigned OROpc = (SVT == MVT::f32) ? X86::ORPSrr : X86::ORPDrr; - BuildMI(BB, OROpc, 2, RDest).addReg(TrueMask).addReg(FalseMask); - return; + if (X86ScalarSSE && (SVT == MVT::f32 || SVT == MVT::f64)) { + if (0 && CondCode != NOT_SET) { + // FIXME: check for min and max } else { - // do the test and branch thing - // Get the condition into the zero flag. + // FIXME: emit a direct compare and branch rather than setting a cond reg + // and testing it. unsigned CondReg = SelectExpr(Cond); BuildMI(BB, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg); @@ -2184,6 +2163,11 @@ unsigned ISel::SelectExpr(SDOperand N) { Tmp1 = SelectExpr(N.getOperand(0)); BuildMI(BB, X86::CVTSS2SDrr, 1, Result).addReg(Tmp1); return Result; + case ISD::FP_ROUND: + assert(X86ScalarSSE && "Scalar SSE FP must be enabled to use f32"); + Tmp1 = SelectExpr(N.getOperand(0)); + BuildMI(BB, X86::CVTSD2SSrr, 1, Result).addReg(Tmp1); + return Result; case ISD::CopyFromReg: Select(N.getOperand(0)); if (Result == 1) { @@ -2482,9 +2466,9 @@ unsigned ISel::SelectExpr(SDOperand N) { // CVTSD2SI instructions. if (ISD::FP_TO_SINT == N.getOpcode() && X86ScalarSSE) { if (MVT::f32 == N.getOperand(0).getValueType()) { - BuildMI(BB, X86::CVTSS2SIrr, 1, Result).addReg(Tmp1); + BuildMI(BB, X86::CVTTSS2SIrr, 1, Result).addReg(Tmp1); } else if (MVT::f64 == N.getOperand(0).getValueType()) { - BuildMI(BB, X86::CVTSD2SIrr, 1, Result).addReg(Tmp1); + BuildMI(BB, X86::CVTTSD2SIrr, 1, Result).addReg(Tmp1); } else { assert(0 && "Not an f32 or f64?"); abort(); @@ -4485,8 +4469,18 @@ void ISel::Select(SDOperand N) { SelectAddress(N.getOperand(2), AM); Select(N.getOperand(0)); } - addFullAddress(BuildMI(BB, X86::MOV32mi, 4+1), - AM).addGlobalAddress(GA->getGlobal()); + GlobalValue *GV = GA->getGlobal(); + // For Darwin, external and weak symbols are indirect, so we want to load + // the value at address GV, not the value of GV itself. + if (Subtarget->getIndirectExternAndWeakGlobals() && + (GV->hasWeakLinkage() || GV->isExternal())) { + Tmp1 = MakeReg(MVT::i32); + BuildMI(BB, X86::MOV32rm, 4, Tmp1).addReg(0).addZImm(1).addReg(0) + .addGlobalAddress(GV, false, 0); + addFullAddress(BuildMI(BB, X86::MOV32mr, 4+1),AM).addReg(Tmp1); + } else { + addFullAddress(BuildMI(BB, X86::MOV32mi, 4+1),AM).addGlobalAddress(GV); + } return; } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 1376d8f..53a82ba 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -20,6 +20,9 @@ class X86MemOperand<ValueType Ty> : Operand<Ty> { let NumMIOperands = 4; let PrintMethod = "printMemoryOperand"; } +def SSECC : Operand<i8> { + let PrintMethod = "printSSECC"; +} def i8mem : X86MemOperand<i8>; def i16mem : X86MemOperand<i16>; @@ -188,7 +191,7 @@ def JG : IBr<0x8F, (ops i32imm:$dst), "jg $dst">, TB; let isCall = 1 in // All calls clobber the non-callee saved registers... let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, - XMM0, XMM1, XMM2, XMM3] in { + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7] in { def CALLpcrel32 : I<0xE8, RawFrm, (ops calltarget:$dst), "call $dst">; def CALL32r : I<0xFF, MRM2r, (ops R32:$dst), "call {*}$dst">; def CALL32m : I<0xFF, MRM2m, (ops i32mem:$dst), "call {*}$dst">; @@ -1425,17 +1428,21 @@ def MOVAPDrm: I<0x28, MRMSrcMem, (ops RXMM:$dst, f64mem:$src), def MOVAPDmr: I<0x29, MRMDestMem, (ops f64mem:$dst, RXMM:$src), "movapd {$src, $dst|$dst, $src}">, TB, OpSize; -def CVTSD2SIrr: I<0x2D, MRMSrcReg, (ops R32:$dst, RXMM:$src), - "cvtsd2si {$src, $dst|$dst, $src}">, XD; -def CVTSD2SIrm: I<0x2D, MRMSrcMem, (ops R32:$dst, f64mem:$src), - "cvtsd2si {$src, $dst|$dst, $src}">, XD; -def CVTSS2SIrr: I<0x2D, MRMSrcReg, (ops R32:$dst, RXMM:$src), - "cvtss2si {$src, $dst|$dst, $src}">, XS; -def CVTSS2SIrm: I<0x2D, MRMSrcMem, (ops R32:$dst, f32mem:$src), - "cvtss2si {$src, $dst|$dst, $src}">, XS; -def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops R32:$dst, RXMM:$src), +def CVTTSD2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, RXMM:$src), + "cvttsd2si {$src, $dst|$dst, $src}">, XD; +def CVTTSD2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src), + "cvttsd2si {$src, $dst|$dst, $src}">, XD; +def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, RXMM:$src), + "cvttss2si {$src, $dst|$dst, $src}">, XS; +def CVTTSS2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src), + "cvttss2si {$src, $dst|$dst, $src}">, XS; +def CVTSD2SSrr: I<0x5A, MRMSrcReg, (ops RXMM:$dst, RXMM:$src), + "cvtsd2ss {$src, $dst|$dst, $src}">, XS; +def CVTSD2SSrm: I<0x5A, MRMSrcMem, (ops RXMM:$dst, f64mem:$src), + "cvtsd2ss {$src, $dst|$dst, $src}">, XS; +def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops RXMM:$dst, RXMM:$src), "cvtss2sd {$src, $dst|$dst, $src}">, XD; -def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops R32:$dst, f32mem:$src), +def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops RXMM:$dst, f32mem:$src), "cvtss2sd {$src, $dst|$dst, $src}">, XD; def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops R32:$dst, RXMM:$src), "cvtsi2ss {$src, $dst|$dst, $src}">, XS; @@ -1515,17 +1522,17 @@ def SUBSDrr : I<0x5C, MRMSrcReg, (ops RXMM:$dst, RXMM:$src1, RXMM:$src), "subsd {$src, $dst|$dst, $src}">, XD; def CMPSSrr : I<0xC2, MRMSrcReg, - (ops RXMM:$dst, RXMM:$src1, RXMM:$src, i8imm:$pred), - "cmpss {$src, $dst, $pred|$dst, $src, $pred}">, XS; + (ops RXMM:$dst, RXMM:$src1, RXMM:$src, SSECC:$cc), + "cmp${cc}ss {$src, $dst|$dst, $src}">, XS; def CMPSSrm : I<0xC2, MRMSrcMem, - (ops RXMM:$dst, RXMM:$src1, f32mem:$src, i8imm:$pred), - "cmpss {$src, $dst, $pred|$dst, $src, $pred}">, XS; + (ops RXMM:$dst, RXMM:$src1, f32mem:$src, SSECC:$cc), + "cmp${cc}ss {$src, $dst|$dst, $src}">, XS; def CMPSDrr : I<0xC2, MRMSrcReg, - (ops RXMM:$dst, RXMM:$src1, RXMM:$src, i8imm:$pred), - "cmpss {$src, $dst, $pred|$dst, $src, $pred}">, XD; + (ops RXMM:$dst, RXMM:$src1, RXMM:$src, SSECC:$cc), + "cmp${cc}sd {$src, $dst|$dst, $src}">, XD; def CMPSDrm : I<0xC2, MRMSrcMem, - (ops RXMM:$dst, RXMM:$src1, f64mem:$src, i8imm:$pred), - "cmpss {$src, $dst, $pred|$dst, $src, $pred}">, XD; + (ops RXMM:$dst, RXMM:$src1, f64mem:$src, SSECC:$cc), + "cmp${cc}sd {$src, $dst|$dst, $src}">, XD; } //===----------------------------------------------------------------------===// |