aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86/X86InstrInfo.cpp
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2010-03-25 17:25:00 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2010-03-25 17:25:00 +0000
commit352aa503faee6c58e9cdb5054cc5ec1d90c696b4 (patch)
tree69c7938e0883c8745b56a81eb9aa556e74cf044b /lib/Target/X86/X86InstrInfo.cpp
parente4614f7e84a7219164e9992fff855dee3816a08e (diff)
downloadexternal_llvm-352aa503faee6c58e9cdb5054cc5ec1d90c696b4.zip
external_llvm-352aa503faee6c58e9cdb5054cc5ec1d90c696b4.tar.gz
external_llvm-352aa503faee6c58e9cdb5054cc5ec1d90c696b4.tar.bz2
Add a late SSEDomainFix pass that twiddles SSE instructions to avoid domain crossings.
On Nehalem and newer CPUs there is a 2 cycle latency penalty on using a register in a different domain than where it was defined. Some instructions have equvivalents for different domains, like por/orps/orpd. The SSEDomainFix pass tries to minimize the number of domain crossings by changing between equvivalent opcodes where possible. This is a work in progress, in particular the pass doesn't do anything yet. SSE instructions are tagged with their execution domain in TableGen using the last two bits of TSFlags. Note that not all instructions are tagged correctly. Life just isn't that simple. The SSE execution domain issue is very similar to the ARM NEON/VFP pipeline issue handled by NEONMoveFixPass. This pass may become target independent to handle both. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99524 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp43
1 files changed, 43 insertions, 0 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 2323f57..eeb020b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3658,3 +3658,46 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
X86FI->setGlobalBaseReg(GlobalBaseReg);
return GlobalBaseReg;
}
+
+X86InstrInfo::SSEDomain X86InstrInfo::GetSSEDomain(const MachineInstr *MI,
+ const unsigned *&equiv) const {
+ // These are the replaceable SSE instructions. Some of these have Int variants
+ // that we don't include here. We don't want to replace instructions selected
+ // by intrinsics.
+ static const unsigned ReplaceableInstrs[][3] = {
+ //PackedInt PackedSingle PackedDouble
+ { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr },
+ { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm },
+ { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr },
+ { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr },
+ { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm },
+ { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr },
+ { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm },
+ { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr },
+ { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm },
+ { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr },
+ { X86::PORrm, X86::ORPSrm, X86::ORPDrm },
+ { X86::PORrr, X86::ORPSrr, X86::ORPDrr },
+ { X86::PUNPCKHQDQrm, X86::UNPCKHPSrm, X86::UNPCKHPDrm },
+ { X86::PUNPCKHQDQrr, X86::UNPCKHPSrr, X86::UNPCKHPDrr },
+ { X86::PUNPCKLQDQrm, X86::UNPCKLPSrm, X86::UNPCKLPDrm },
+ { X86::PUNPCKLQDQrr, X86::UNPCKLPSrr, X86::UNPCKLPDrr },
+ { X86::PXORrm, X86::XORPSrm, X86::XORPDrm },
+ { X86::PXORrr, X86::XORPSrr, X86::XORPDrr },
+ };
+
+ const SSEDomain domain =
+ SSEDomain((MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3);
+ if (domain == NotSSEDomain)
+ return domain;
+
+ // Linear search FTW!
+ const unsigned opc = MI->getOpcode();
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+ if (ReplaceableInstrs[i][domain-1] == opc) {
+ equiv = ReplaceableInstrs[i];
+ return domain;
+ }
+ equiv = 0;
+ return domain;
+}