diff options
Diffstat (limited to 'test/CodeGen/X86/fold-vex.ll')
-rw-r--r-- | test/CodeGen/X86/fold-vex.ll | 39 |
1 files changed, 27 insertions, 12 deletions
diff --git a/test/CodeGen/X86/fold-vex.ll b/test/CodeGen/X86/fold-vex.ll index 2bb5b44..5a8b1d8 100644 --- a/test/CodeGen/X86/fold-vex.ll +++ b/test/CodeGen/X86/fold-vex.ll @@ -1,16 +1,31 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s +; Use CPU parameters to ensure that a CPU-specific attribute is not overriding the AVX definition. -;CHECK: @test -; No need to load from memory. The operand will be loaded as part of th AND instr. -;CHECK-NOT: vmovaps -;CHECK: vandps -;CHECK: ret +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-avx | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx -mattr=-avx | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefix=SSE -define void @test1(<8 x i32>* %p0, <8 x i32> %in1) nounwind { -entry: - %in0 = load <8 x i32>* %p0, align 2 - %a = and <8 x i32> %in0, %in1 - store <8 x i32> %a, <8 x i32>* undef - ret void +; No need to load unaligned operand from memory using an explicit instruction with AVX. +; The operand should be folded into the AND instr. + +; With SSE, folding memory operands into math/logic ops requires 16-byte alignment +; unless specially configured on some CPUs such as AMD Family 10H. + +define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind { + %in0 = load <4 x i32>* %p0, align 2 + %a = and <4 x i32> %in0, %in1 + ret <4 x i32> %a + +; CHECK-LABEL: @test1 +; CHECK-NOT: vmovups +; CHECK: vandps (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: ret + +; SSE-LABEL: @test1 +; SSE: movups (%rdi), %xmm1 +; SSE-NEXT: andps %xmm1, %xmm0 +; SSE-NEXT: ret } |