aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDale Johannesen <dalej@apple.com>2008-10-10 23:51:03 +0000
committerDale Johannesen <dalej@apple.com>2008-10-10 23:51:03 +0000
commite397acce9db154d8355eb78e701622383f551074 (patch)
treec60537a248a27cda5986b92569ee623ba2be01ec
parentfc19fbd2ca2759eafaea4d174a814f9236f2d0e8 (diff)
downloadexternal_llvm-e397acce9db154d8355eb78e701622383f551074.zip
external_llvm-e397acce9db154d8355eb78e701622383f551074.tar.gz
external_llvm-e397acce9db154d8355eb78e701622383f551074.tar.bz2
Fix SSE4.1 roundss, roundsd. While the instructions have
the same pattern as roundpd/roundps, the Intel compiler builtins do not: rounds* has an extra operand. Fixes gcc.target/i386/sse4_1-rounds[sd]-[1234].c git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@57370 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IntrinsicsX86.td4
-rw-r--r--lib/Target/X86/X86InstrSSE.td90
2 files changed, 54 insertions, 40 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 682eb51..20b5563 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -685,13 +685,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// FP rounding ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_round_ss : GCCBuiltin<"__builtin_ia32_roundss">,
- Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
+ Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse41_round_ps : GCCBuiltin<"__builtin_ia32_roundps">,
Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse41_round_sd : GCCBuiltin<"__builtin_ia32_roundsd">,
- Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
+ Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse41_round_pd : GCCBuiltin<"__builtin_ia32_roundpd">,
Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 35348b6..6d0d768 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3169,29 +3169,11 @@ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
// SSE4.1 Instructions
//===----------------------------------------------------------------------===//
-multiclass sse41_fp_unop_rm<bits<8> opcss, bits<8> opcps,
- bits<8> opcsd, bits<8> opcpd,
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
string OpcodeStr,
- Intrinsic F32Int,
Intrinsic V4F32Int,
- Intrinsic F64Int,
Intrinsic V2F64Int> {
// Intrinsic operation, reg.
- def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (F32Int VR128:$src1, imm:$src2))]>,
- OpSize;
-
- // Intrinsic operation, mem.
- def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
- (outs VR128:$dst), (ins ssmem:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src1, imm:$src2))]>,
- OpSize;
-
// Vector intrinsic operation, reg
def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3209,22 +3191,6 @@ multiclass sse41_fp_unop_rm<bits<8> opcss, bits<8> opcps,
(V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
OpSize;
- // Intrinsic operation, reg.
- def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (F64Int VR128:$src1, imm:$src2))]>,
- OpSize;
-
- // Intrinsic operation, mem.
- def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
- (outs VR128:$dst), (ins sdmem:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (F64Int sse_load_f64:$src1, imm:$src2))]>,
- OpSize;
-
// Vector intrinsic operation, reg
def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3243,10 +3209,58 @@ multiclass sse41_fp_unop_rm<bits<8> opcss, bits<8> opcps,
OpSize;
}
+let Constraints = "$src1 = $dst" in {
+multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic F64Int> {
+ // Intrinsic operation, reg.
+ def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ OpSize;
+}
+}
+
// FP round - roundss, roundps, roundsd, roundpd
-defm ROUND : sse41_fp_unop_rm<0x0A, 0x08, 0x0B, 0x09, "round",
- int_x86_sse41_round_ss, int_x86_sse41_round_ps,
- int_x86_sse41_round_sd, int_x86_sse41_round_pd>;
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
+ int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+ int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,