aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2011-04-19 01:21:49 +0000
committerEvan Cheng <evan.cheng@apple.com>2011-04-19 01:21:49 +0000
commit75b41f1540f35ef0fd5e4a52c1840f1a19debb03 (patch)
tree8d3559137ef944320f4082f3ccfa2d8f765938bd /lib/Target/ARM
parent775c3e58246befc4a6fd943e9db37a1cfb32cce2 (diff)
downloadexternal_llvm-75b41f1540f35ef0fd5e4a52c1840f1a19debb03.zip
external_llvm-75b41f1540f35ef0fd5e4a52c1840f1a19debb03.tar.gz
external_llvm-75b41f1540f35ef0fd5e4a52c1840f1a19debb03.tar.bz2
Change A9 scheduling itineraries VLD* / VST* entries default to "aligned". That
is, it assumes addresses are 64-bit aligned (which should be the more common case). If the alignment is found not to be aligned, then getOperandLatency() would adjust the operand latency computation by one to compensate for it. rdar://9294833 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129742 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp202
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td343
2 files changed, 373 insertions, 172 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 0789279..54f51cc 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2222,6 +2222,101 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
}
+ if (DefAlign < 8 && Subtarget.isCortexA9())
+ switch (DefTID.getOpcode()) {
+ default: break;
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8_UPD:
+ case ARM::VLD1q16_UPD:
+ case ARM::VLD1q32_UPD:
+ case ARM::VLD1q64_UPD:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2d8_UPD:
+ case ARM::VLD2d16_UPD:
+ case ARM::VLD2d32_UPD:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD1d64T:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD1d64Q:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq8_UPD:
+ case ARM::VLD1DUPq16_UPD:
+ case ARM::VLD1DUPq32_UPD:
+ case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16:
+ case ARM::VLD2DUPd32:
+ case ARM::VLD2DUPd8_UPD:
+ case ARM::VLD2DUPd16_UPD:
+ case ARM::VLD2DUPd32_UPD:
+ case ARM::VLD4DUPd8:
+ case ARM::VLD4DUPd16:
+ case ARM::VLD4DUPd32:
+ case ARM::VLD4DUPd8_UPD:
+ case ARM::VLD4DUPd16_UPD:
+ case ARM::VLD4DUPd32_UPD:
+ case ARM::VLD1LNd8:
+ case ARM::VLD1LNd16:
+ case ARM::VLD1LNd32:
+ case ARM::VLD1LNd8_UPD:
+ case ARM::VLD1LNd16_UPD:
+ case ARM::VLD1LNd32_UPD:
+ case ARM::VLD2LNd8:
+ case ARM::VLD2LNd16:
+ case ARM::VLD2LNd32:
+ case ARM::VLD2LNq16:
+ case ARM::VLD2LNq32:
+ case ARM::VLD2LNd8_UPD:
+ case ARM::VLD2LNd16_UPD:
+ case ARM::VLD2LNd32_UPD:
+ case ARM::VLD2LNq16_UPD:
+ case ARM::VLD2LNq32_UPD:
+ case ARM::VLD4LNd8:
+ case ARM::VLD4LNd16:
+ case ARM::VLD4LNd32:
+ case ARM::VLD4LNq16:
+ case ARM::VLD4LNq32:
+ case ARM::VLD4LNd8_UPD:
+ case ARM::VLD4LNd16_UPD:
+ case ARM::VLD4LNd32_UPD:
+ case ARM::VLD4LNq16_UPD:
+ case ARM::VLD4LNq32_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Latency;
+ break;
+ }
+
return Latency;
}
@@ -2288,6 +2383,113 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
}
+ if (DefAlign < 8 && Subtarget.isCortexA9())
+ switch (DefTID.getOpcode()) {
+ default: break;
+ case ARM::VLD1q8Pseudo:
+ case ARM::VLD1q16Pseudo:
+ case ARM::VLD1q32Pseudo:
+ case ARM::VLD1q64Pseudo:
+ case ARM::VLD1q8Pseudo_UPD:
+ case ARM::VLD1q16Pseudo_UPD:
+ case ARM::VLD1q32Pseudo_UPD:
+ case ARM::VLD1q64Pseudo_UPD:
+ case ARM::VLD2d8Pseudo:
+ case ARM::VLD2d16Pseudo:
+ case ARM::VLD2d32Pseudo:
+ case ARM::VLD2q8Pseudo:
+ case ARM::VLD2q16Pseudo:
+ case ARM::VLD2q32Pseudo:
+ case ARM::VLD2d8Pseudo_UPD:
+ case ARM::VLD2d16Pseudo_UPD:
+ case ARM::VLD2d32Pseudo_UPD:
+ case ARM::VLD2q8Pseudo_UPD:
+ case ARM::VLD2q16Pseudo_UPD:
+ case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD3d8Pseudo:
+ case ARM::VLD3d16Pseudo:
+ case ARM::VLD3d32Pseudo:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD3d8Pseudo_UPD:
+ case ARM::VLD3d16Pseudo_UPD:
+ case ARM::VLD3d32Pseudo_UPD:
+ case ARM::VLD1d64TPseudo_UPD:
+ case ARM::VLD3q8Pseudo_UPD:
+ case ARM::VLD3q16Pseudo_UPD:
+ case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
+ case ARM::VLD3q8oddPseudo_UPD:
+ case ARM::VLD3q16oddPseudo_UPD:
+ case ARM::VLD3q32oddPseudo_UPD:
+ case ARM::VLD4d8Pseudo:
+ case ARM::VLD4d16Pseudo:
+ case ARM::VLD4d32Pseudo:
+ case ARM::VLD1d64QPseudo:
+ case ARM::VLD4d8Pseudo_UPD:
+ case ARM::VLD4d16Pseudo_UPD:
+ case ARM::VLD4d32Pseudo_UPD:
+ case ARM::VLD1d64QPseudo_UPD:
+ case ARM::VLD4q8Pseudo_UPD:
+ case ARM::VLD4q16Pseudo_UPD:
+ case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
+ case ARM::VLD4q8oddPseudo_UPD:
+ case ARM::VLD4q16oddPseudo_UPD:
+ case ARM::VLD4q32oddPseudo_UPD:
+ case ARM::VLD1DUPq8Pseudo:
+ case ARM::VLD1DUPq16Pseudo:
+ case ARM::VLD1DUPq32Pseudo:
+ case ARM::VLD1DUPq8Pseudo_UPD:
+ case ARM::VLD1DUPq16Pseudo_UPD:
+ case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD2DUPd8Pseudo:
+ case ARM::VLD2DUPd16Pseudo:
+ case ARM::VLD2DUPd32Pseudo:
+ case ARM::VLD2DUPd8Pseudo_UPD:
+ case ARM::VLD2DUPd16Pseudo_UPD:
+ case ARM::VLD2DUPd32Pseudo_UPD:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Latency;
+ break;
+ }
+
return Latency;
}
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 82c6735..6714be0 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -741,189 +741,188 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
// NEON
// VLD1
- // FIXME: Conservatively assume insufficent alignment.
InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [2, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
// VLD1x2
InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [2, 2, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
// VLD1x3
InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [2, 2, 3, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 1]>,
// VLD1x4
InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [2, 2, 3, 3, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 1]>,
// VLD1u
InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [2, 2, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 2, 1]>,
// VLD1x2u
InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [2, 2, 2, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 2, 1]>,
// VLD1x3u
InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [2, 2, 3, 2, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 1]>,
// VLD1x4u
InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [2, 2, 3, 3, 2, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 2, 1]>,
//
// VLD1ln
InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [4, 1, 1, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
//
// VLD1lnu
InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [4, 2, 1, 1, 1, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 2, 1, 1, 1, 1]>,
//
// VLD1dup
InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [3, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1]>,
//
// VLD1dupu
InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [3, 2, 1, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1, 1]>,
//
// VLD2
InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [3, 3, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1]>,
//
// VLD2x2
InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [3, 4, 3, 4, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 3, 2, 3, 1]>,
//
// VLD2ln
InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [4, 4, 1, 1, 1, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 1, 1, 1, 1]>,
//
// VLD2u
InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [3, 3, 2, 1, 1, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 2, 1, 1, 1]>,
//
// VLD2x2u
InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [3, 4, 3, 4, 2, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 3, 2, 3, 2, 1]>,
//
// VLD2lnu
InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [4, 4, 2, 1, 1, 1, 1, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 2, 1, 1, 1, 1, 1]>,
//
// VLD2dup
InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [3, 3, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1]>,
//
// VLD2dupu
InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [3, 3, 2, 1, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 2, 1, 1]>,
//
// VLD3
InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
- InstrStage<4, [A9_NPipe], 0>,
- InstrStage<4, [A9_LSUnit]>],
- [4, 4, 5, 1]>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
//
// VLD3ln
InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -938,10 +937,10 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
- InstrStage<4, [A9_NPipe], 0>,
- InstrStage<4, [A9_LSUnit]>],
- [4, 4, 5, 2, 1]>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1]>,
//
// VLD3lnu
InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -974,108 +973,108 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
- InstrStage<4, [A9_NPipe], 0>,
- InstrStage<4, [A9_LSUnit]>],
- [4, 4, 5, 5, 1]>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 1]>,
//
// VLD4ln
InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
- InstrStage<5, [A9_NPipe], 0>,
- InstrStage<5, [A9_LSUnit]>],
- [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
//
// VLD4u
InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
- InstrStage<4, [A9_NPipe], 0>,
- InstrStage<4, [A9_LSUnit]>],
- [4, 4, 5, 5, 2, 1]>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 2, 1]>,
//
// VLD4lnu
InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
- InstrStage<5, [A9_NPipe], 0>,
- InstrStage<5, [A9_LSUnit]>],
- [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
//
// VLD4dup
InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [3, 3, 4, 4, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 3, 3, 1]>,
//
// VLD4dupu
InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [3, 3, 4, 4, 2, 1, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 3, 3, 2, 1, 1]>,
//
// VST1
InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[1, 1, 1]>,
//
// VST1x2
InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[1, 1, 1, 1]>,
//
// VST1x3
InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 2]>,
//
// VST1x4
InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 2, 2]>,
//
// VST1u
InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[2, 1, 1, 1, 1]>,
//
// VST1x2u
InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1]>,
//
// VST1x3u
@@ -1083,44 +1082,44 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1, 2]>,
//
// VST1x4u
InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1, 2, 2]>,
//
// VST1ln
InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[1, 1, 1]>,
//
// VST1lnu
InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[2, 1, 1, 1, 1]>,
//
// VST2
InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[1, 1, 1, 1]>,
//
// VST2x2
@@ -1136,9 +1135,9 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1]>,
//
// VST2x2u
@@ -1154,36 +1153,36 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[1, 1, 1, 1]>,
//
// VST2lnu
InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1]>,
//
// VST3
InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 2]>,
//
// VST3u
InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1, 2]>,
//
// VST3ln
@@ -1208,36 +1207,36 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 2, 2]>,
//
// VST4u
InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1, 2, 2]>,
//
// VST4ln
InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 2, 2]>,
//
// VST4lnu
InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1, 2, 2]>,
//