aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorSebastian Pop <spop@codeaurora.org>2012-03-05 17:39:52 +0000
committerSebastian Pop <spop@codeaurora.org>2012-03-05 17:39:52 +0000
commit74bebde7c4e2d1cfd4a16c19ce3c87521df67639 (patch)
tree07fd9dda4ac83ae4cf874a76cc41b70a61720229 /lib
parent43ec0f4921e315dd9507be7467e633a837ad23db (diff)
downloadexternal_llvm-74bebde7c4e2d1cfd4a16c19ce3c87521df67639.zip
external_llvm-74bebde7c4e2d1cfd4a16c19ce3c87521df67639.tar.gz
external_llvm-74bebde7c4e2d1cfd4a16c19ce3c87521df67639.tar.bz2
updated patch for the ARM fused multiply add/sub
In this update: - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. - I kept setting .fpu=neon-vfpv4 code attribute because that is what the assembler understands. Patch by Ana Pazos <apazos@codeaurora.org> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152036 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/ARM/ARM.td8
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp4
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td6
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td24
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td32
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp2
-rw-r--r--lib/Target/ARM/ARMSubtarget.h4
7 files changed, 41 insertions, 39 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 16af8cf..b05fe62 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -38,9 +38,9 @@ def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
[FeatureVFP3]>;
-def FeatureNEONVFP4 : SubtargetFeature<"neon-vfpv4", "HasNEONVFPv4", "true",
- "Enable NEON-VFP4 instructions",
- [FeatureVFP4, FeatureNEON]>;
+def FeatureNEON2 : SubtargetFeature<"neon2", "HasNEON2", "true",
+ "Enable Advanced SIMD2 instructions",
+ [FeatureNEON]>;
def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
"Enable Thumb2 instructions">;
def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
@@ -76,6 +76,8 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
"true",
"Use NEON for single precision FP">;
+// Allow more precision in FP computation
+def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
// Disable 32-bit to 16-bit narrowing for experimentation.
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 9d8c97a..4ec19cc 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -732,10 +732,10 @@ void ARMAsmPrinter::emitAttributes() {
if (Subtarget->hasNEON() && emitFPU) {
/* NEON is not exactly a VFP architecture, but GAS emit one of
* neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
- if (Subtarget->hasNEONVFP4())
+ if (Subtarget->hasNEON2())
AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4");
else
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
/* If emitted for NEON, omit from VFP below, since you can have both
* NEON and VFP in build attributes but only one .fpu */
emitFPU = false;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 6f510ba..0b1406e 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -184,9 +184,9 @@ def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON">;
-def HasNEONVFP4 : Predicate<"Subtarget->hasNEONVFP4()">,
- AssemblerPredicate<"FeatureNEONVFP4">;
-def NoNEONVFP4 : Predicate<"!Subtarget->hasNEONVFP4()">;
+def HasNEON2 : Predicate<"Subtarget->hasNEON2()">,
+ AssemblerPredicate<"FeatureNEON2">;
+def NoNEON2 : Predicate<"!Subtarget->hasNEON2()">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 387e16d..17fe808 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4060,10 +4060,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -4118,10 +4118,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -4174,19 +4174,19 @@ defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasNEONVFP4]>;
+ Requires<[HasNEON2,FPContractions]>;
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasNEONVFP4]>;
+ Requires<[HasNEON2,FPContractions]>;
// Fused Vector Multiply Subtract (floating-point)
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasNEONVFP4]>;
+ Requires<[HasNEON2,FPContractions]>;
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasNEONVFP4]>;
+ Requires<[HasNEON2,FPContractions]>;
// Vector Subtract Operations.
@@ -5541,13 +5541,13 @@ def : N3VSPat<fadd, VADDfd>;
def : N3VSPat<fsub, VSUBfd>;
def : N3VSPat<fmul, VMULfd>;
def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>;
def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>;
def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
- Requires<[HasNEONVFP4, UseNEONForFP]>;
+ Requires<[HasNEON2, UseNEONForFP,FPContractions]>;
def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
- Requires<[HasNEONVFP4, UseNEONForFP]>;
+ Requires<[HasNEON2, UseNEONForFP,FPContractions]>;
def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
def : N3VSPat<NEONfmax, VMAXfd>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index bf32b49..aa10af7 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1030,7 +1030,7 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,FPContractions]>;
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1038,17 +1038,17 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP]> {
+ Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,FPContractions]>;
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP4,DontUseNEONForFP]>;
+ Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
def VFMSD : ADbI<0b11101, 0b10, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1056,7 +1056,7 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,FPContractions]>;
def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1064,17 +1064,17 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP]> {
+ Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,FPContractions]>;
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP4,DontUseNEONForFP]>;
+ Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1082,7 +1082,7 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,FPContractions]>;
def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1090,17 +1090,17 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP]> {
+ Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,FPContractions]>;
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP4,DontUseNEONForFP]>;
+ Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1108,24 +1108,24 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,FPContractions]>;
def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP]> {
+ Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,FPContractions]>;
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
- Requires<[HasVFP4,DontUseNEONForFP]>;
+ Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
//===----------------------------------------------------------------------===//
// FP Conditional moves.
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 64b2b62..1bd6f1c 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -49,7 +49,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasVFPv3(false)
, HasVFPv4(false)
, HasNEON(false)
- , HasNEONVFPv4(false)
+ , HasNEON2(false)
, UseNEONForSinglePrecisionFP(false)
, SlowFPVMLx(false)
, HasVMLxForwarding(false)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index c94795f..3d9c03d 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -51,7 +51,7 @@ protected:
bool HasVFPv3;
bool HasVFPv4;
bool HasNEON;
- bool HasNEONVFPv4;
+ bool HasNEON2;
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
/// specified. Use the method useNEONForSinglePrecisionFP() to
@@ -205,7 +205,7 @@ protected:
bool hasVFP3() const { return HasVFPv3; }
bool hasVFP4() const { return HasVFPv4; }
bool hasNEON() const { return HasNEON; }
- bool hasNEONVFP4() const { return HasNEONVFPv4; }
+ bool hasNEON2() const { return HasNEON2 || (HasNEON && HasVFPv4); }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }