aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorDavid Greene <greened@obbligato.org>2011-03-02 17:23:43 +0000
committerDavid Greene <greened@obbligato.org>2011-03-02 17:23:43 +0000
commita20244d1bab2ae6dad1c82c16670d7eb1b3a9087 (patch)
tree08a97db15ba97dc695ecea19935051bc89e3937c /lib/Target
parent31c488c8bdef56618e45a5356a7c6770d13e1241 (diff)
downloadexternal_llvm-a20244d1bab2ae6dad1c82c16670d7eb1b3a9087.zip
external_llvm-a20244d1bab2ae6dad1c82c16670d7eb1b3a9087.tar.gz
external_llvm-a20244d1bab2ae6dad1c82c16670d7eb1b3a9087.tar.bz2
[AVX] Fix mask predicates for 256-bit UNPCKLPS/D and implement
missing patterns for them. Add a SIMD test subdirectory to hold tests for SIMD instruction selection correctness and quality. ' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126845 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp25
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp67
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td2
-rw-r--r--lib/Target/X86/X86InstrSSE.td8
4 files changed, 78 insertions, 24 deletions
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index eeb83c1..cd06060 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -165,12 +165,25 @@ void DecodeUNPCKLPDMask(unsigned NElts,
/// datatypes and vector widths.
void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask) {
-
- unsigned NElts = VT.getVectorNumElements();
-
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i); // Reads from dest
- ShuffleMask.push_back(i+NElts); // Reads from src
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElts / NumSections;
+
+ unsigned Start = 0;
+ unsigned End = NumSectionElts / 2;
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = Start; i != End; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2
+ }
+ // Process the next 128 bits.
+ Start += NumSectionElts;
+ End += NumSectionElts;
}
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 09ec69d..722202d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3173,7 +3173,8 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
unsigned NumElems = N->getValueType(0).getVectorNumElements();
- if (NumElems != 2 && NumElems != 4)
+ if ((NumElems != 2 && NumElems != 4)
+ || N->getValueType(0).getSizeInBits() > 128)
return false;
for (unsigned i = 0; i < NumElems/2; ++i)
@@ -3195,19 +3196,36 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
return false;
- for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (V2IsSplat) {
- if (!isUndefOrEqual(BitI1, NumElts))
- return false;
- } else {
- if (!isUndefOrEqual(BitI1, j + NumElts))
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElts / NumSections;
+
+ unsigned Start = 0;
+ unsigned End = NumSectionElts;
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = Start, j = s * NumSectionElts;
+ i != End;
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
return false;
+ if (V2IsSplat) {
+ if (!isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts))
+ return false;
+ }
}
+ // Process the next 128 bits.
+ Start += NumSectionElts;
+ End += NumSectionElts;
}
+
return true;
}
@@ -3255,14 +3273,27 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
return false;
- for (int i = 0, j = 0; i != NumElems; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (!isUndefOrEqual(BitI1, j))
- return false;
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElems / NumSections;
+
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = s * NumSectionElts, j = s * NumSectionElts;
+ i != NumSectionElts * (s + 1);
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
}
+
return true;
}
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 5016c0f..3cbfac1 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -132,6 +132,8 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
+def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b912949..45e9051 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5622,11 +5622,15 @@ def : Pat<(X86Movddup (bc_v2f64
// Shuffle with UNPCKLPS
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
(UNPCKLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
(VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
(UNPCKLPSrr VR128:$src1, VR128:$src2)>;
@@ -5644,11 +5648,15 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
// Shuffle with UNPCKLPD
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
(VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
(UNPCKLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
(UNPCKLPDrr VR128:$src1, VR128:$src2)>;