diff options
author | Evan Cheng <evan.cheng@apple.com> | 2008-05-24 02:14:05 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2008-05-24 02:14:05 +0000 |
commit | df0b78da33a0e486fa892e10e2c2a0e2ed644a97 (patch) | |
tree | 7348b915bbb3da181d246933c11375d67eed6d5a | |
parent | 48d91af0c37fa7216107b2b4e6b28b8fe1e8b22d (diff) | |
download | external_llvm-df0b78da33a0e486fa892e10e2c2a0e2ed644a97.zip external_llvm-df0b78da33a0e486fa892e10e2c2a0e2ed644a97.tar.gz external_llvm-df0b78da33a0e486fa892e10e2c2a0e2ed644a97.tar.bz2 |
Eliminate x86.sse2.movs.d, x86.sse2.shuf.pd, x86.sse2.unpckh.pd, and x86.sse2.unpckl.pd intrinsics. These will be lowered into shuffles.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51531 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/IntrinsicsX86.td | 12 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 16 | ||||
-rw-r--r-- | lib/VMCore/AutoUpgrade.cpp | 48 | ||||
-rw-r--r-- | test/Bitcode/sse2_movs_d.ll | 2 | ||||
-rw-r--r-- | test/Bitcode/sse2_movs_d.ll.bc | bin | 0 -> 476 bytes | |||
-rw-r--r-- | test/Bitcode/sse2_shuf_pd.ll | 2 | ||||
-rw-r--r-- | test/Bitcode/sse2_shuf_pd.ll.bc | bin | 0 -> 584 bytes | |||
-rw-r--r-- | test/Bitcode/sse2_unpck_pd.ll | 3 | ||||
-rw-r--r-- | test/Bitcode/sse2_unpck_pd.ll.bc | bin | 0 -> 572 bytes |
9 files changed, 50 insertions, 33 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 8af50c1..47c8e19 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -504,18 +504,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Shuffles. // FIXME: Temporary workarounds since 2-wide shuffle is broken. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_movs_d : GCCBuiltin<"__builtin_ia32_movsd">, - Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_shuf_pd : GCCBuiltin<"__builtin_ia32_shufpd">, - Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_sse2_unpckh_pd : GCCBuiltin<"__builtin_ia32_unpckhpd">, - Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_unpckl_pd : GCCBuiltin<"__builtin_ia32_unpcklpd">, - Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_punpckh_qdq : GCCBuiltin<"__builtin_ia32_punpckhqdq128">, Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 6f4eede..8e6d9df 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3051,22 +3051,6 @@ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; // FIXME: Temporary workaround since 2-wide shuffle is broken. -def : Pat<(int_x86_sse2_movs_d VR128:$src1, VR128:$src2), - (v2f64 (MOVLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; -def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, VR128:$src2, imm:$src3), - (v2f64 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$src3))>, - Requires<[HasSSE2]>; -def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, (memop addr:$src2),imm:$src3), - (v2f64 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$src3))>, - Requires<[HasSSE2]>; -def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, VR128:$src2), - (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; -def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, (memop addr:$src2)), - (v2f64 (UNPCKHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>; -def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, VR128:$src2), - (v2f64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; -def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, (memop addr:$src2)), - (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>; def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, VR128:$src2), (v2i64 (PUNPCKHQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, (memop addr:$src2)), diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 3814639..0d6ae43 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -150,7 +150,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return true; } else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 || Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 || - Name.compare(5,16,"x86.sse2.movl.dq",16) == 0) { + Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 || + Name.compare(5,15,"x86.sse2.movs.d",15) == 0 || + Name.compare(5,16,"x86.sse2.shuf.pd",16) == 0 || + Name.compare(5,18,"x86.sse2.unpckh.pd",18) == 0 || + Name.compare(5,18,"x86.sse2.unpckl.pd",18) == 0 ) { // Calls to these intrinsics are transformed into ShuffleVector's. NewFn = 0; return true; @@ -187,17 +191,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { if (!NewFn) { bool isLoadH = false, isLoadL = false, isMovL = false; + bool isMovSD = false, isShufPD = false; + bool isUnpckhPD = false, isUnpcklPD = false; if (strcmp(F->getNameStart(), "llvm.x86.sse2.loadh.pd") == 0) isLoadH = true; else if (strcmp(F->getNameStart(), "llvm.x86.sse2.loadl.pd") == 0) isLoadL = true; else if (strcmp(F->getNameStart(), "llvm.x86.sse2.movl.dq") == 0) isMovL = true; - - if (isLoadH || isLoadL || isMovL) { + else if (strcmp(F->getNameStart(), "llvm.x86.sse2.movs.d") == 0) + isMovSD = true; + else if (strcmp(F->getNameStart(), "llvm.x86.sse2.shuf.pd") == 0) + isShufPD = true; + else if (strcmp(F->getNameStart(), "llvm.x86.sse2.unpckh.pd") == 0) + isUnpckhPD = true; + else if (strcmp(F->getNameStart(), "llvm.x86.sse2.unpckl.pd") == 0) + isUnpcklPD = true; + + if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD || + isUnpckhPD || isUnpcklPD) { std::vector<Constant*> Idxs; Value *Op0 = CI->getOperand(1); - ShuffleVectorInst *SI; + ShuffleVectorInst *SI = NULL; if (isLoadH || isLoadL) { Value *Op1 = UndefValue::get(Op0->getType()); Value *Addr = new BitCastInst(CI->getOperand(2), @@ -216,7 +231,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } Value *Mask = ConstantVector::get(Idxs); SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI); - } else { + } else if (isMovL) { Constant *Zero = ConstantInt::get(Type::Int32Ty, 0); Idxs.push_back(Zero); Idxs.push_back(Zero); @@ -231,8 +246,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Idxs.push_back(ConstantInt::get(Type::Int32Ty, 3)); Value *Mask = ConstantVector::get(Idxs); SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI); + } else if (isMovSD || isUnpckhPD || isUnpcklPD) { + Value *Op1 = CI->getOperand(2); + if (isMovSD) { + Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2)); + Idxs.push_back(ConstantInt::get(Type::Int32Ty, 1)); + } else if (isUnpckhPD) { + Idxs.push_back(ConstantInt::get(Type::Int32Ty, 1)); + Idxs.push_back(ConstantInt::get(Type::Int32Ty, 3)); + } else { + Idxs.push_back(ConstantInt::get(Type::Int32Ty, 0)); + Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2)); + } + Value *Mask = ConstantVector::get(Idxs); + SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI); + } else if (isShufPD) { + Value *Op1 = CI->getOperand(2); + unsigned MaskVal = cast<ConstantInt>(CI->getOperand(3))->getZExtValue(); + Idxs.push_back(ConstantInt::get(Type::Int32Ty, MaskVal & 1)); + Idxs.push_back(ConstantInt::get(Type::Int32Ty, ((MaskVal >> 1) & 1)+2)); + Value *Mask = ConstantVector::get(Idxs); + SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI); } + assert(SI && "Unexpected!"); + // Handle any uses of the old CallInst. if (!CI->use_empty()) // Replace all uses of the old call with the new cast which has the diff --git a/test/Bitcode/sse2_movs_d.ll b/test/Bitcode/sse2_movs_d.ll new file mode 100644 index 0000000..25a35b6 --- /dev/null +++ b/test/Bitcode/sse2_movs_d.ll @@ -0,0 +1,2 @@ +; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.movs.d} +; RUN: llvm-dis < %s.bc | grep shufflevector diff --git a/test/Bitcode/sse2_movs_d.ll.bc b/test/Bitcode/sse2_movs_d.ll.bc Binary files differnew file mode 100644 index 0000000..719d529 --- /dev/null +++ b/test/Bitcode/sse2_movs_d.ll.bc diff --git a/test/Bitcode/sse2_shuf_pd.ll b/test/Bitcode/sse2_shuf_pd.ll new file mode 100644 index 0000000..5829edb --- /dev/null +++ b/test/Bitcode/sse2_shuf_pd.ll @@ -0,0 +1,2 @@ +; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.shuf.pd} +; RUN: llvm-dis < %s.bc | grep shufflevector diff --git a/test/Bitcode/sse2_shuf_pd.ll.bc b/test/Bitcode/sse2_shuf_pd.ll.bc Binary files differnew file mode 100644 index 0000000..832c39e --- /dev/null +++ b/test/Bitcode/sse2_shuf_pd.ll.bc diff --git a/test/Bitcode/sse2_unpck_pd.ll b/test/Bitcode/sse2_unpck_pd.ll new file mode 100644 index 0000000..f4e5d54 --- /dev/null +++ b/test/Bitcode/sse2_unpck_pd.ll @@ -0,0 +1,3 @@ +; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.unpckh.pd} +; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.unpckl.pd} +; RUN: llvm-dis < %s.bc | grep shufflevector diff --git a/test/Bitcode/sse2_unpck_pd.ll.bc b/test/Bitcode/sse2_unpck_pd.ll.bc Binary files differnew file mode 100644 index 0000000..4fb829c --- /dev/null +++ b/test/Bitcode/sse2_unpck_pd.ll.bc |