aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadav Rotem <nadav.rotem@intel.com>2012-07-18 08:14:48 +0000
committerNadav Rotem <nadav.rotem@intel.com>2012-07-18 08:14:48 +0000
commitfd34c110cf34b0d0734922330dd8d9dbf827e7fa (patch)
treee1ac0bd94995e7b05f78eaf614e14d3e192f97c6
parenta0f14afee16ca976fef79c64df9a678e7f26cf43 (diff)
downloadexternal_llvm-fd34c110cf34b0d0734922330dd8d9dbf827e7fa.zip
external_llvm-fd34c110cf34b0d0734922330dd8d9dbf827e7fa.tar.gz
external_llvm-fd34c110cf34b0d0734922330dd8d9dbf827e7fa.tar.bz2
The vbroadcast family of instructions has 'fallback patterns' in case where the
load source operand is used by multiple nodes. The v2i64 broadcast was emulated by shuffling the two lower i32 elements to the upper two. We had a bug in the immediate used for the broadcast. Replacing 0 to 0x44. 0x44 means [01|00|01|00] which corresponds to the correct lane. Patch by Michael Kuperstein. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160430 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrSSE.td14
1 files changed, 8 insertions, 6 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 10cc483..c2d169a 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -7781,11 +7781,12 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
(VPSHUFDri
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd), 0),
- sub_xmm),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
+ 0x44),
+ sub_xmm),
(VPSHUFDri
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
- 0), 1)>;
+ 0x44), 1)>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
(VPSHUFDri
@@ -7801,11 +7802,12 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
(VPSHUFDri
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd), 0),
- sub_xmm),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd),
+ 0x44),
+ sub_xmm),
(VPSHUFDri
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd),
- 0), 1)>;
+ 0x44), 1)>;
}
}