aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBill Wendling <isanbard@gmail.com>2010-10-04 20:24:01 +0000
committerBill Wendling <isanbard@gmail.com>2010-10-04 20:24:01 +0000
commit6cf6c79e8281835898cd105a30efedd44b554000 (patch)
tree371a460697778f20e7dfd82a3b557b429ec739c4
parent3f55c24df9527de345f6cc960944840a7a101c6a (diff)
downloadexternal_llvm-6cf6c79e8281835898cd105a30efedd44b554000.zip
external_llvm-6cf6c79e8281835898cd105a30efedd44b554000.tar.gz
external_llvm-6cf6c79e8281835898cd105a30efedd44b554000.tar.bz2
The pshufw instruction came about in MMX2 when SSE was introduced. Don't place
it in with the SSSE3 instructions. Steward! Could you place this chair by the aft sun deck? I'm trying to get away from the Astors. They are such boors! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115552 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IntrinsicsX86.td2
-rw-r--r--lib/Target/X86/X86InstrMMX.td6
-rw-r--r--lib/VMCore/AutoUpgrade.cpp44
-rw-r--r--test/Assembler/AutoUpgradeMMXIntrinsics.ll3
4 files changed, 44 insertions, 11 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 2b18e7f..4946220 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -630,7 +630,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_ssse3_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_ssse3_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">,
+ def int_x86_sse_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
[IntrNoMem]>;
}
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index c78099e..bb2165a 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -342,13 +342,13 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
- (int_x86_ssse3_pshuf_w VR64:$src1, imm:$src2))]>;
+ (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
- (int_x86_ssse3_pshuf_w (load_mmx addr:$src1),
- imm:$src2))]>;
+ (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+ imm:$src2))]>;
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 5b5955d..64b0b51 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -528,6 +528,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
// or 0.
NewFn = 0;
return true;
+ } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
+ // This is an SSE/MMX instruction.
+ const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
+ NewFn =
+ cast<Function>(M->getOrInsertFunction("llvm.x86.sse.pshuf.w",
+ X86_MMXTy,
+ X86_MMXTy,
+ Type::getInt8Ty(F->getContext()),
+ (Type*)0));
+ return true;
}
break;
@@ -631,22 +641,23 @@ static void ConstructNewCallInst(Function *NewFn, CallInst *OldCI,
NewCI->setTailCall(OldCI->isTailCall());
NewCI->setCallingConv(OldCI->getCallingConv());
- // Handle any uses of the old CallInst.
+ // Handle any uses of the old CallInst. If the type has changed, add a cast.
if (!OldCI->use_empty()) {
- // If the type has changed, add a cast.
- Instruction *I = OldCI;
if (OldCI->getType() != NewCI->getType()) {
Function *OldFn = OldCI->getCalledFunction();
CastInst *RetCast =
CastInst::Create(CastInst::getCastOpcode(NewCI, true,
OldFn->getReturnType(), true),
NewCI, OldFn->getReturnType(), NewCI->getName(),OldCI);
- I = RetCast;
+
+ // Replace all uses of the old call with the new cast which has the
+ // correct type.
+ OldCI->replaceAllUsesWith(RetCast);
+ } else {
+ OldCI->replaceAllUsesWith(NewCI);
}
- // Replace all uses of the old call with the new cast which has the
- // correct type.
- OldCI->replaceAllUsesWith(I);
}
+
// Clean up the old call now that it has been completely upgraded.
OldCI->eraseFromParent();
}
@@ -1150,6 +1161,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
ConstructNewCallInst(NewFn, CI, Operands, 3);
break;
}
+ case Intrinsic::x86_sse_pshuf_w: {
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
+
+ // Cast the operand to the X86 MMX type.
+ Value *Operands[2];
+ Operands[0] =
+ Builder.CreateBitCast(CI->getArgOperand(0),
+ NewFn->getFunctionType()->getParamType(0),
+ "upgraded.");
+ Operands[1] =
+ Builder.CreateTrunc(CI->getArgOperand(1),
+ Type::getInt8Ty(C),
+ "upgraded.");
+
+ ConstructNewCallInst(NewFn, CI, Operands, 2);
+ break;
+ }
+
#if 0
case Intrinsic::x86_mmx_cvtsi32_si64: {
// The return type needs to be changed.
diff --git a/test/Assembler/AutoUpgradeMMXIntrinsics.ll b/test/Assembler/AutoUpgradeMMXIntrinsics.ll
index 898582e..54120ff 100644
--- a/test/Assembler/AutoUpgradeMMXIntrinsics.ll
+++ b/test/Assembler/AutoUpgradeMMXIntrinsics.ll
@@ -4,6 +4,7 @@
; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<2 x i32\\\>}
; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<4 x i16\\\>}
; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<8 x i8\\\>}
+; RUN: grep {llvm\\.x86\\.sse\\.pshuf\\.w} %t | not grep i32
; Addition
declare <8 x i8> @llvm.x86.mmx.padd.b(<8 x i8>, <8 x i8>) nounwind readnone
@@ -207,6 +208,7 @@ declare void @llvm.x86.mmx.movnt.dq(i32*, <1 x i64>) nounwind readnone
declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>, i8) nounwind readnone
declare i32 @llvm.x86.mmx.pextr.w(<1 x i64>, i32) nounwind readnone
declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32) nounwind readnone
+declare <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16>, i32) nounwind readnone
define void @misc(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
<2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H,
i32* %I, i8 %J, i16 %K, i32 %L) {
@@ -216,5 +218,6 @@ define void @misc(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
%r2 = call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %G, <1 x i64> %H, i8 %J)
%r3 = call i32 @llvm.x86.mmx.pextr.w(<1 x i64> %G, i32 37)
%r4 = call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %G, i32 37, i32 927)
+ %r5 = call <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16> %C, i32 37)
ret void
}