aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-10-17 20:14:58 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2014-10-17 20:14:58 +0000
commit281cc67b6ac794b1eb8232e6efca366d870dad43 (patch)
treeac7eadad1814ff2f989e716c1ed0bd8fd1557e9a /test/CodeGen
parent06d8c96f9e7be298f0e1372ccb8dea18fe149afa (diff)
parentbfc2d688b591c574c0cc788348c74545ce894efa (diff)
downloadexternal_llvm-281cc67b6ac794b1eb8232e6efca366d870dad43.zip
external_llvm-281cc67b6ac794b1eb8232e6efca366d870dad43.tar.gz
external_llvm-281cc67b6ac794b1eb8232e6efca366d870dad43.tar.bz2
Merge "Bring in fixes for Cortex-A53 errata + build updates."
Diffstat (limited to 'test/CodeGen')
-rw-r--r--test/CodeGen/AArch64/a57-csel.ll11
-rw-r--r--test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll323
-rw-r--r--test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll534
-rw-r--r--test/CodeGen/AArch64/remat.ll16
-rw-r--r--test/CodeGen/X86/critical-anti-dep-breaker.ll28
5 files changed, 912 insertions, 0 deletions
diff --git a/test/CodeGen/AArch64/a57-csel.ll b/test/CodeGen/AArch64/a57-csel.ll
new file mode 100644
index 0000000..9d16d1a
--- /dev/null
+++ b/test/CodeGen/AArch64/a57-csel.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mcpu=cortex-a57 -aarch64-enable-early-ifcvt=false | FileCheck %s
+
+; Check that the select is expanded into a branch sequence.
+define i64 @f(i64 %a, i64 %b, i64* %c, i64 %d, i64 %e) {
+ ; CHECK: cbz
+ %x0 = load i64* %c
+ %x1 = icmp eq i64 %x0, 0
+ %x2 = select i1 %x1, i64 %a, i64 %b
+ %x3 = add i64 %x2, %d
+ ret i64 %x3
+}
diff --git a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
new file mode 100644
index 0000000..fb229fc
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
@@ -0,0 +1,323 @@
+; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
+; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
+
+; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
+; our test strategy is to:
+; * Force the pass to always perform register swapping even if the dest register is of the
+; correct color already (-force-all)
+; * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance),
+; and run it twice, once where it always hints odd, and once where it always hints even.
+;
+; We then use regex magic to check that in the two cases the register allocation is
+; different; this is what gives us the testing coverage and distinguishes cases where
+; the pass has done some work versus accidental regalloc.
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; Non-overlapping groups - shouldn't need any changing at all.
+
+; CHECK-LABEL: f1:
+; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
+; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
+; CHECK: fmadd [[x]]
+; CHECK: fmsub [[x]]
+; CHECK: fmadd [[x]]
+; CHECK: str [[x]]
+
+define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 {
+entry:
+ %0 = load double* %p, align 8
+ %arrayidx1 = getelementptr inbounds double* %p, i64 1
+ %1 = load double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double* %p, i64 2
+ %2 = load double* %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds double* %p, i64 3
+ %3 = load double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double* %p, i64 4
+ %4 = load double* %arrayidx4, align 8
+ %mul = fmul fast double %0, %1
+ %add = fadd fast double %mul, %4
+ %mul5 = fmul fast double %1, %2
+ %add6 = fadd fast double %mul5, %add
+ %mul7 = fmul fast double %1, %3
+ %sub = fsub fast double %add6, %mul7
+ %mul8 = fmul fast double %2, %3
+ %add9 = fadd fast double %mul8, %sub
+ store double %add9, double* %q, align 8
+ %arrayidx11 = getelementptr inbounds double* %p, i64 5
+ %5 = load double* %arrayidx11, align 8
+ %arrayidx12 = getelementptr inbounds double* %p, i64 6
+ %6 = load double* %arrayidx12, align 8
+ %arrayidx13 = getelementptr inbounds double* %p, i64 7
+ %7 = load double* %arrayidx13, align 8
+ %mul15 = fmul fast double %6, %7
+ %mul16 = fmul fast double %0, %5
+ %add17 = fadd fast double %mul16, %mul15
+ %mul18 = fmul fast double %5, %6
+ %add19 = fadd fast double %mul18, %add17
+ %arrayidx20 = getelementptr inbounds double* %q, i64 1
+ store double %add19, double* %arrayidx20, align 8
+ ret void
+}
+
+; Overlapping groups - coloring needed.
+
+; CHECK-LABEL: f2:
+; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
+; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]]
+; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
+; CHECK-ODD: fmul [[y:d[0-9]*[02468]]]
+; CHECK: fmadd [[x]]
+; CHECK: fmadd [[y]]
+; CHECK: fmsub [[x]]
+; CHECK: fmadd [[y]]
+; CHECK: fmadd [[x]]
+; CHECK: stp [[x]], [[y]]
+
+define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 {
+entry:
+ %0 = load double* %p, align 8
+ %arrayidx1 = getelementptr inbounds double* %p, i64 1
+ %1 = load double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double* %p, i64 2
+ %2 = load double* %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds double* %p, i64 3
+ %3 = load double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double* %p, i64 4
+ %4 = load double* %arrayidx4, align 8
+ %arrayidx5 = getelementptr inbounds double* %p, i64 5
+ %5 = load double* %arrayidx5, align 8
+ %arrayidx6 = getelementptr inbounds double* %p, i64 6
+ %6 = load double* %arrayidx6, align 8
+ %arrayidx7 = getelementptr inbounds double* %p, i64 7
+ %7 = load double* %arrayidx7, align 8
+ %mul = fmul fast double %0, %1
+ %add = fadd fast double %mul, %7
+ %mul8 = fmul fast double %5, %6
+ %mul9 = fmul fast double %1, %2
+ %add10 = fadd fast double %mul9, %add
+ %mul11 = fmul fast double %3, %4
+ %add12 = fadd fast double %mul11, %mul8
+ %mul13 = fmul fast double %1, %3
+ %sub = fsub fast double %add10, %mul13
+ %mul14 = fmul fast double %4, %5
+ %add15 = fadd fast double %mul14, %add12
+ %mul16 = fmul fast double %2, %3
+ %add17 = fadd fast double %mul16, %sub
+ store double %add17, double* %q, align 8
+ %arrayidx19 = getelementptr inbounds double* %q, i64 1
+ store double %add15, double* %arrayidx19, align 8
+ ret void
+}
+
+; Dest register is live on block exit - fixup needed.
+
+; CHECK-LABEL: f3:
+; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
+; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
+; CHECK: fmadd [[x]]
+; CHECK: fmsub [[x]]
+; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]]
+; CHECK: str [[y]]
+
+define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 {
+entry:
+ %0 = load double* %p, align 8
+ %arrayidx1 = getelementptr inbounds double* %p, i64 1
+ %1 = load double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double* %p, i64 2
+ %2 = load double* %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds double* %p, i64 3
+ %3 = load double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double* %p, i64 4
+ %4 = load double* %arrayidx4, align 8
+ %mul = fmul fast double %0, %1
+ %add = fadd fast double %mul, %4
+ %mul5 = fmul fast double %1, %2
+ %add6 = fadd fast double %mul5, %add
+ %mul7 = fmul fast double %1, %3
+ %sub = fsub fast double %add6, %mul7
+ %mul8 = fmul fast double %2, %3
+ %add9 = fadd fast double %mul8, %sub
+ %cmp = fcmp oeq double %3, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void bitcast (void (...)* @g to void ()*)() #2
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ store double %add9, double* %q, align 8
+ ret void
+}
+
+declare void @g(...) #1
+
+; Single precision version of f2.
+
+; CHECK-LABEL: f4:
+; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
+; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]]
+; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
+; CHECK-ODD: fmul [[y:s[0-9]*[02468]]]
+; CHECK: fmadd [[x]]
+; CHECK: fmadd [[y]]
+; CHECK: fmsub [[x]]
+; CHECK: fmadd [[y]]
+; CHECK: fmadd [[x]]
+; CHECK: stp [[x]], [[y]]
+
+define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 {
+entry:
+ %0 = load float* %p, align 4
+ %arrayidx1 = getelementptr inbounds float* %p, i64 1
+ %1 = load float* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds float* %p, i64 2
+ %2 = load float* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds float* %p, i64 3
+ %3 = load float* %arrayidx3, align 4
+ %arrayidx4 = getelementptr inbounds float* %p, i64 4
+ %4 = load float* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds float* %p, i64 5
+ %5 = load float* %arrayidx5, align 4
+ %arrayidx6 = getelementptr inbounds float* %p, i64 6
+ %6 = load float* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds float* %p, i64 7
+ %7 = load float* %arrayidx7, align 4
+ %mul = fmul fast float %0, %1
+ %add = fadd fast float %mul, %7
+ %mul8 = fmul fast float %5, %6
+ %mul9 = fmul fast float %1, %2
+ %add10 = fadd fast float %mul9, %add
+ %mul11 = fmul fast float %3, %4
+ %add12 = fadd fast float %mul11, %mul8
+ %mul13 = fmul fast float %1, %3
+ %sub = fsub fast float %add10, %mul13
+ %mul14 = fmul fast float %4, %5
+ %add15 = fadd fast float %mul14, %add12
+ %mul16 = fmul fast float %2, %3
+ %add17 = fadd fast float %mul16, %sub
+ store float %add17, float* %q, align 4
+ %arrayidx19 = getelementptr inbounds float* %q, i64 1
+ store float %add15, float* %arrayidx19, align 4
+ ret void
+}
+
+; Single precision version of f3
+
+; CHECK-LABEL: f5:
+; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
+; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
+; CHECK: fmadd [[x]]
+; CHECK: fmsub [[x]]
+; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]]
+; CHECK: str [[y]]
+
+define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 {
+entry:
+ %0 = load float* %p, align 4
+ %arrayidx1 = getelementptr inbounds float* %p, i64 1
+ %1 = load float* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds float* %p, i64 2
+ %2 = load float* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds float* %p, i64 3
+ %3 = load float* %arrayidx3, align 4
+ %arrayidx4 = getelementptr inbounds float* %p, i64 4
+ %4 = load float* %arrayidx4, align 4
+ %mul = fmul fast float %0, %1
+ %add = fadd fast float %mul, %4
+ %mul5 = fmul fast float %1, %2
+ %add6 = fadd fast float %mul5, %add
+ %mul7 = fmul fast float %1, %3
+ %sub = fsub fast float %add6, %mul7
+ %mul8 = fmul fast float %2, %3
+ %add9 = fadd fast float %mul8, %sub
+ %cmp = fcmp oeq float %3, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void bitcast (void (...)* @g to void ()*)() #2
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ store float %add9, float* %q, align 4
+ ret void
+}
+
+; Test that regmask clobbering stops a chain sequence.
+
+; CHECK-LABEL: f6:
+; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
+; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
+; CHECK: fmadd [[x]]
+; CHECK: fmsub [[x]]
+; CHECK: fmadd d0, {{.*}}, [[x]]
+; CHECK: bl hh
+; CHECK: str d0
+
+define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 {
+entry:
+ %0 = load double* %p, align 8
+ %arrayidx1 = getelementptr inbounds double* %p, i64 1
+ %1 = load double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double* %p, i64 2
+ %2 = load double* %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds double* %p, i64 3
+ %3 = load double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double* %p, i64 4
+ %4 = load double* %arrayidx4, align 8
+ %mul = fmul fast double %0, %1
+ %add = fadd fast double %mul, %4
+ %mul5 = fmul fast double %1, %2
+ %add6 = fadd fast double %mul5, %add
+ %mul7 = fmul fast double %1, %3
+ %sub = fsub fast double %add6, %mul7
+ %mul8 = fmul fast double %2, %3
+ %add9 = fadd fast double %mul8, %sub
+ %call = tail call double @hh(double %add9) #2
+ store double %call, double* %q, align 8
+ ret void
+}
+
+declare double @hh(double) #1
+
+; Check that we correctly deal with repeated operands.
+; The following testcase creates:
+; %D1<def> = FADDDrr %D0<kill>, %D0
+; We'll get a crash if we naively look at the first operand, remove it
+; from the substitution list then look at the second operand.
+
+; CHECK: fmadd [[x:d[0-9]+]]
+; CHECK: fadd d1, [[x]], [[x]]
+
+define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 {
+entry:
+ %0 = load double* %p, align 8
+ %arrayidx1 = getelementptr inbounds double* %p, i64 1
+ %1 = load double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double* %p, i64 2
+ %2 = load double* %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds double* %p, i64 3
+ %3 = load double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double* %p, i64 4
+ %4 = load double* %arrayidx4, align 8
+ %mul = fmul fast double %0, %1
+ %add = fadd fast double %mul, %4
+ %mul5 = fmul fast double %1, %2
+ %add6 = fadd fast double %mul5, %add
+ %mul7 = fmul fast double %1, %3
+ %sub = fsub fast double %add6, %mul7
+ %mul8 = fmul fast double %2, %3
+ %add9 = fadd fast double %mul8, %sub
+ %add10 = fadd fast double %add9, %add9
+ call void @hhh(double 0.0, double %add10)
+ ret void
+}
+
+declare void @hhh(double, double)
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
diff --git a/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll b/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
new file mode 100644
index 0000000..64d91ee
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
@@ -0,0 +1,534 @@
+; REQUIRES: asserts
+; The regression tests need to test for order of emitted instructions, and
+; therefore, the tests are a bit fragile/reliant on instruction scheduling. The
+; test cases have been minimized as much as possible, but still most of the test
+; cases could break if instruction scheduling heuristics for cortex-a53 change
+; RUN: llc < %s -mcpu=cortex-a53 -aarch64-fix-cortex-a53-835769=1 -stats 2>&1 \
+; RUN: | FileCheck %s --check-prefix CHECK
+; RUN: llc < %s -mcpu=cortex-a53 -aarch64-fix-cortex-a53-835769=0 -stats 2>&1 \
+; RUN: | FileCheck %s --check-prefix CHECK-NOWORKAROUND
+; The following run lines are just to verify whether or not this pass runs by
+; default for given CPUs. Given the fragility of the tests, this is only run on
+; a test case where the scheduler has not freedom at all to reschedule the
+; instructions, so the potentially massively different scheduling heuristics
+; will not break the test case.
+; RUN: llc < %s -mcpu=generic | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
+; RUN: llc < %s -mcpu=cortex-a53 | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
+; RUN: llc < %s -mcpu=cortex-a57 | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
+; RUN: llc < %s -mcpu=cyclone | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define i64 @f_load_madd_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
+entry:
+ %0 = load i64* %c, align 8
+ %mul = mul nsw i64 %0, %b
+ %add = add nsw i64 %mul, %a
+ ret i64 %add
+}
+; CHECK-LABEL: f_load_madd_64:
+; CHECK: ldr
+; CHECK-NEXT: nop
+; CHECK-NEXT: madd
+; CHECK-NOWORKAROUND-LABEL: f_load_madd_64:
+; CHECK-NOWORKAROUND: ldr
+; CHECK-NOWORKAROUND-NEXT: madd
+; CHECK-BASIC-PASS-DISABLED-LABEL: f_load_madd_64:
+; CHECK-BASIC-PASS-DISABLED: ldr
+; CHECK-BASIC-PASS-DISABLED-NEXT: madd
+
+
+define i32 @f_load_madd_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
+entry:
+ %0 = load i32* %c, align 4
+ %mul = mul nsw i32 %0, %b
+ %add = add nsw i32 %mul, %a
+ ret i32 %add
+}
+; CHECK-LABEL: f_load_madd_32:
+; CHECK: ldr
+; CHECK-NEXT: madd
+; CHECK-NOWORKAROUND-LABEL: f_load_madd_32:
+; CHECK-NOWORKAROUND: ldr
+; CHECK-NOWORKAROUND-NEXT: madd
+
+
+define i64 @f_load_msub_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
+entry:
+ %0 = load i64* %c, align 8
+ %mul = mul nsw i64 %0, %b
+ %sub = sub nsw i64 %a, %mul
+ ret i64 %sub
+}
+; CHECK-LABEL: f_load_msub_64:
+; CHECK: ldr
+; CHECK-NEXT: nop
+; CHECK-NEXT: msub
+; CHECK-NOWORKAROUND-LABEL: f_load_msub_64:
+; CHECK-NOWORKAROUND: ldr
+; CHECK-NOWORKAROUND-NEXT: msub
+
+
+define i32 @f_load_msub_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
+entry:
+ %0 = load i32* %c, align 4
+ %mul = mul nsw i32 %0, %b
+ %sub = sub nsw i32 %a, %mul
+ ret i32 %sub
+}
+; CHECK-LABEL: f_load_msub_32:
+; CHECK: ldr
+; CHECK-NEXT: msub
+; CHECK-NOWORKAROUND-LABEL: f_load_msub_32:
+; CHECK-NOWORKAROUND: ldr
+; CHECK-NOWORKAROUND-NEXT: msub
+
+
+define i64 @f_load_mul_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
+entry:
+ %0 = load i64* %c, align 8
+ %mul = mul nsw i64 %0, %b
+ ret i64 %mul
+}
+; CHECK-LABEL: f_load_mul_64:
+; CHECK: ldr
+; CHECK-NEXT: mul
+; CHECK-NOWORKAROUND-LABEL: f_load_mul_64:
+; CHECK-NOWORKAROUND: ldr
+; CHECK-NOWORKAROUND-NEXT: mul
+
+
+define i32 @f_load_mul_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
+entry:
+ %0 = load i32* %c, align 4
+ %mul = mul nsw i32 %0, %b
+ ret i32 %mul
+}
+; CHECK-LABEL: f_load_mul_32:
+; CHECK: ldr
+; CHECK-NEXT: mul
+; CHECK-NOWORKAROUND-LABEL: f_load_mul_32:
+; CHECK-NOWORKAROUND: ldr
+; CHECK-NOWORKAROUND-NEXT: mul
+
+
+define i64 @f_load_mneg_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
+entry:
+ %0 = load i64* %c, align 8
+ %mul = sub i64 0, %b
+ %sub = mul i64 %0, %mul
+ ret i64 %sub
+}
+; CHECK-LABEL: f_load_mneg_64:
+; CHECK-NOWORKAROUND-LABEL: f_load_mneg_64:
+; FIXME: only add further checks here once LLVM actually produces
+; neg instructions
+; FIXME-CHECK: ldr
+; FIXME-CHECK-NEXT: nop
+; FIXME-CHECK-NEXT: mneg
+; FIXME-CHECK-NOWORKAROUND: ldr
+; FIXME-CHECK-NOWORKAROUND-NEXT: mneg
+
+
+define i32 @f_load_mneg_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
+entry:
+ %0 = load i32* %c, align 4
+ %mul = sub i32 0, %b
+ %sub = mul i32 %0, %mul
+ ret i32 %sub
+}
+; CHECK-LABEL: f_load_mneg_32:
+; CHECK-NOWORKAROUND-LABEL: f_load_mneg_32:
+; FIXME: only add further checks here once LLVM actually produces
+; neg instructions
+; FIXME-CHECK: ldr
+; FIXME-CHECK-NEXT: mneg
+; FIXME-CHECK-NOWORKAROUND: ldr
+; FIXME-CHECK-NOWORKAROUND-NEXT: mneg
+
+
+define i64 @f_load_smaddl(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+ %conv = sext i32 %b to i64
+ %conv1 = sext i32 %c to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %add = add nsw i64 %mul, %a
+ %0 = load i32* %d, align 4
+ %conv2 = sext i32 %0 to i64
+ %add3 = add nsw i64 %add, %conv2
+ ret i64 %add3
+}
+; CHECK-LABEL: f_load_smaddl:
+; CHECK: ldrsw
+; CHECK-NEXT: nop
+; CHECK-NEXT: smaddl
+; CHECK-NOWORKAROUND-LABEL: f_load_smaddl:
+; CHECK-NOWORKAROUND: ldrsw
+; CHECK-NOWORKAROUND-NEXT: smaddl
+
+
+define i64 @f_load_smsubl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+ %conv = sext i32 %b to i64
+ %conv1 = sext i32 %c to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %sub = sub i64 %a, %mul
+ %0 = load i32* %d, align 4
+ %conv2 = sext i32 %0 to i64
+ %add = add nsw i64 %sub, %conv2
+ ret i64 %add
+}
+; CHECK-LABEL: f_load_smsubl_64:
+; CHECK: ldrsw
+; CHECK-NEXT: nop
+; CHECK-NEXT: smsubl
+; CHECK-NOWORKAROUND-LABEL: f_load_smsubl_64:
+; CHECK-NOWORKAROUND: ldrsw
+; CHECK-NOWORKAROUND-NEXT: smsubl
+
+
+define i64 @f_load_smull(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+ %conv = sext i32 %b to i64
+ %conv1 = sext i32 %c to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %0 = load i32* %d, align 4
+ %conv2 = sext i32 %0 to i64
+ %div = sdiv i64 %mul, %conv2
+ ret i64 %div
+}
+; CHECK-LABEL: f_load_smull:
+; CHECK: ldrsw
+; CHECK-NEXT: smull
+; CHECK-NOWORKAROUND-LABEL: f_load_smull:
+; CHECK-NOWORKAROUND: ldrsw
+; CHECK-NOWORKAROUND-NEXT: smull
+
+
+define i64 @f_load_smnegl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+ %conv = sext i32 %b to i64
+ %conv1 = sext i32 %c to i64
+ %mul = sub nsw i64 0, %conv
+ %sub = mul i64 %conv1, %mul
+ %0 = load i32* %d, align 4
+ %conv2 = sext i32 %0 to i64
+ %div = sdiv i64 %sub, %conv2
+ ret i64 %div
+}
+; CHECK-LABEL: f_load_smnegl_64:
+; CHECK-NOWORKAROUND-LABEL: f_load_smnegl_64:
+; FIXME: only add further checks here once LLVM actually produces
+; smnegl instructions
+
+
+define i64 @f_load_umaddl(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+ %conv = zext i32 %b to i64
+ %conv1 = zext i32 %c to i64
+ %mul = mul i64 %conv1, %conv
+ %add = add i64 %mul, %a
+ %0 = load i32* %d, align 4
+ %conv2 = zext i32 %0 to i64
+ %add3 = add i64 %add, %conv2
+ ret i64 %add3
+}
+; CHECK-LABEL: f_load_umaddl:
+; CHECK: ldr
+; CHECK-NEXT: nop
+; CHECK-NEXT: umaddl
+; CHECK-NOWORKAROUND-LABEL: f_load_umaddl:
+; CHECK-NOWORKAROUND: ldr
+; CHECK-NOWORKAROUND-NEXT: umaddl
+
+
+define i64 @f_load_umsubl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+ %conv = zext i32 %b to i64
+ %conv1 = zext i32 %c to i64
+ %mul = mul i64 %conv1, %conv
+ %sub = sub i64 %a, %mul
+ %0 = load i32* %d, align 4
+ %conv2 = zext i32 %0 to i64
+ %add = add i64 %sub, %conv2
+ ret i64 %add
+}
+; CHECK-LABEL: f_load_umsubl_64:
+; CHECK: ldr
+; CHECK-NEXT: nop
+; CHECK-NEXT: umsubl
+; CHECK-NOWORKAROUND-LABEL: f_load_umsubl_64:
+; CHECK-NOWORKAROUND: ldr
+; CHECK-NOWORKAROUND-NEXT: umsubl
+
+
+define i64 @f_load_umull(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+ %conv = zext i32 %b to i64
+ %conv1 = zext i32 %c to i64
+ %mul = mul i64 %conv1, %conv
+ %0 = load i32* %d, align 4
+ %conv2 = zext i32 %0 to i64
+ %div = udiv i64 %mul, %conv2
+ ret i64 %div
+}
+; CHECK-LABEL: f_load_umull:
+; CHECK: ldr
+; CHECK-NEXT: umull
+; CHECK-NOWORKAROUND-LABEL: f_load_umull:
+; CHECK-NOWORKAROUND: ldr
+; CHECK-NOWORKAROUND-NEXT: umull
+
+
+define i64 @f_load_umnegl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+ %conv = zext i32 %b to i64
+ %conv1 = zext i32 %c to i64
+ %mul = sub nsw i64 0, %conv
+ %sub = mul i64 %conv1, %mul
+ %0 = load i32* %d, align 4
+ %conv2 = zext i32 %0 to i64
+ %div = udiv i64 %sub, %conv2
+ ret i64 %div
+}
+; CHECK-LABEL: f_load_umnegl_64:
+; CHECK-NOWORKAROUND-LABEL: f_load_umnegl_64:
+; FIXME: only add further checks here once LLVM actually produces
+; umnegl instructions
+
+
+define i64 @f_store_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
+entry:
+ %0 = load i64* %cp, align 8
+ store i64 %a, i64* %e, align 8
+ %mul = mul nsw i64 %0, %b
+ %add = add nsw i64 %mul, %a
+ ret i64 %add
+}
+; CHECK-LABEL: f_store_madd_64:
+; CHECK: str
+; CHECK-NEXT: nop
+; CHECK-NEXT: madd
+; CHECK-NOWORKAROUND-LABEL: f_store_madd_64:
+; CHECK-NOWORKAROUND: str
+; CHECK-NOWORKAROUND-NEXT: madd
+
+
+define i32 @f_store_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
+entry:
+ %0 = load i32* %cp, align 4
+ store i32 %a, i32* %e, align 4
+ %mul = mul nsw i32 %0, %b
+ %add = add nsw i32 %mul, %a
+ ret i32 %add
+}
+; CHECK-LABEL: f_store_madd_32:
+; CHECK: str
+; CHECK-NEXT: madd
+; CHECK-NOWORKAROUND-LABEL: f_store_madd_32:
+; CHECK-NOWORKAROUND: str
+; CHECK-NOWORKAROUND-NEXT: madd
+
+
+define i64 @f_store_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
+entry:
+ %0 = load i64* %cp, align 8
+ store i64 %a, i64* %e, align 8
+ %mul = mul nsw i64 %0, %b
+ %sub = sub nsw i64 %a, %mul
+ ret i64 %sub
+}
+; CHECK-LABEL: f_store_msub_64:
+; CHECK: str
+; CHECK-NEXT: nop
+; CHECK-NEXT: msub
+; CHECK-NOWORKAROUND-LABEL: f_store_msub_64:
+; CHECK-NOWORKAROUND: str
+; CHECK-NOWORKAROUND-NEXT: msub
+
+
+define i32 @f_store_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
+entry:
+ %0 = load i32* %cp, align 4
+ store i32 %a, i32* %e, align 4
+ %mul = mul nsw i32 %0, %b
+ %sub = sub nsw i32 %a, %mul
+ ret i32 %sub
+}
+; CHECK-LABEL: f_store_msub_32:
+; CHECK: str
+; CHECK-NEXT: msub
+; CHECK-NOWORKAROUND-LABEL: f_store_msub_32:
+; CHECK-NOWORKAROUND: str
+; CHECK-NOWORKAROUND-NEXT: msub
+
+
+define i64 @f_store_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
+entry:
+ %0 = load i64* %cp, align 8
+ store i64 %a, i64* %e, align 8
+ %mul = mul nsw i64 %0, %b
+ ret i64 %mul
+}
+; CHECK-LABEL: f_store_mul_64:
+; CHECK: str
+; CHECK-NEXT: mul
+; CHECK-NOWORKAROUND-LABEL: f_store_mul_64:
+; CHECK-NOWORKAROUND: str
+; CHECK-NOWORKAROUND-NEXT: mul
+
+
+define i32 @f_store_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
+entry:
+ %0 = load i32* %cp, align 4
+ store i32 %a, i32* %e, align 4
+ %mul = mul nsw i32 %0, %b
+ ret i32 %mul
+}
+; CHECK-LABEL: f_store_mul_32:
+; CHECK: str
+; CHECK-NEXT: mul
+; CHECK-NOWORKAROUND-LABEL: f_store_mul_32:
+; CHECK-NOWORKAROUND: str
+; CHECK-NOWORKAROUND-NEXT: mul
+
+
+define i64 @f_prefetch_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
+entry:
+ %0 = load i64* %cp, align 8
+ %1 = bitcast i64* %e to i8*
+ tail call void @llvm.prefetch(i8* %1, i32 0, i32 0, i32 1)
+ %mul = mul nsw i64 %0, %b
+ %add = add nsw i64 %mul, %a
+ ret i64 %add
+}
+; CHECK-LABEL: f_prefetch_madd_64:
+; CHECK: prfm
+; CHECK-NEXT: nop
+; CHECK-NEXT: madd
+; CHECK-NOWORKAROUND-LABEL: f_prefetch_madd_64:
+; CHECK-NOWORKAROUND: prfm
+; CHECK-NOWORKAROUND-NEXT: madd
+
+declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) #2
+
+define i32 @f_prefetch_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
+entry:
+ %0 = load i32* %cp, align 4
+ %1 = bitcast i32* %e to i8*
+ tail call void @llvm.prefetch(i8* %1, i32 1, i32 0, i32 1)
+ %mul = mul nsw i32 %0, %b
+ %add = add nsw i32 %mul, %a
+ ret i32 %add
+}
+; CHECK-LABEL: f_prefetch_madd_32:
+; CHECK: prfm
+; CHECK-NEXT: madd
+; CHECK-NOWORKAROUND-LABEL: f_prefetch_madd_32:
+; CHECK-NOWORKAROUND: prfm
+; CHECK-NOWORKAROUND-NEXT: madd
+
+define i64 @f_prefetch_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
+entry:
+ %0 = load i64* %cp, align 8
+ %1 = bitcast i64* %e to i8*
+ tail call void @llvm.prefetch(i8* %1, i32 0, i32 1, i32 1)
+ %mul = mul nsw i64 %0, %b
+ %sub = sub nsw i64 %a, %mul
+ ret i64 %sub
+}
+; CHECK-LABEL: f_prefetch_msub_64:
+; CHECK: prfm
+; CHECK-NEXT: nop
+; CHECK-NEXT: msub
+; CHECK-NOWORKAROUND-LABEL: f_prefetch_msub_64:
+; CHECK-NOWORKAROUND: prfm
+; CHECK-NOWORKAROUND-NEXT: msub
+
+define i32 @f_prefetch_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
+entry:
+ %0 = load i32* %cp, align 4
+ %1 = bitcast i32* %e to i8*
+ tail call void @llvm.prefetch(i8* %1, i32 1, i32 1, i32 1)
+ %mul = mul nsw i32 %0, %b
+ %sub = sub nsw i32 %a, %mul
+ ret i32 %sub
+}
+; CHECK-LABEL: f_prefetch_msub_32:
+; CHECK: prfm
+; CHECK-NEXT: msub
+; CHECK-NOWORKAROUND-LABEL: f_prefetch_msub_32:
+; CHECK-NOWORKAROUND: prfm
+; CHECK-NOWORKAROUND-NEXT: msub
+
+define i64 @f_prefetch_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
+entry:
+ %0 = load i64* %cp, align 8
+ %1 = bitcast i64* %e to i8*
+ tail call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1)
+ %mul = mul nsw i64 %0, %b
+ ret i64 %mul
+}
+; CHECK-LABEL: f_prefetch_mul_64:
+; CHECK: prfm
+; CHECK-NEXT: mul
+; CHECK-NOWORKAROUND-LABEL: f_prefetch_mul_64:
+; CHECK-NOWORKAROUND: prfm
+; CHECK-NOWORKAROUND-NEXT: mul
+
+define i32 @f_prefetch_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
+entry:
+ %0 = load i32* %cp, align 4
+ %1 = bitcast i32* %e to i8*
+ tail call void @llvm.prefetch(i8* %1, i32 1, i32 3, i32 1)
+ %mul = mul nsw i32 %0, %b
+ ret i32 %mul
+}
+; CHECK-LABEL: f_prefetch_mul_32:
+; CHECK: prfm
+; CHECK-NEXT: mul
+; CHECK-NOWORKAROUND-LABEL: f_prefetch_mul_32:
+; CHECK-NOWORKAROUND: prfm
+; CHECK-NOWORKAROUND-NEXT: mul
+
+define i64 @fall_through(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
+entry:
+ %0 = load i64* %c, align 8
+ br label %block1
+
+block1:
+ %mul = mul nsw i64 %0, %b
+ %add = add nsw i64 %mul, %a
+ %tmp = ptrtoint i8* blockaddress(@fall_through, %block1) to i64
+ %ret = add nsw i64 %tmp, %add
+ ret i64 %ret
+}
+; CHECK-LABEL: fall_through
+; CHECK: ldr
+; CHECK-NEXT: nop
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: BB
+; CHECK-NEXT: madd
+; CHECK-NOWORKAROUND-LABEL: fall_through
+; CHECK-NOWORKAROUND: ldr
+; CHECK-NOWORKAROUND-NEXT: .Ltmp
+; CHECK-NOWORKAROUND-NEXT: BB
+; CHECK-NOWORKAROUND-NEXT: madd
+
+; No checks for this, just check it doesn't crash
+define i32 @crash_check(i8** nocapture readnone %data) #0 {
+entry:
+ br label %while.cond
+
+while.cond:
+ br label %while.cond
+}
+
+attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+
+; CHECK-LABEL: ... Statistics Collected ...
+; CHECK: 11 aarch64-fix-cortex-a53-835769 - Number of Nops added to work around erratum 835769
diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll
new file mode 100644
index 0000000..32b3ed2
--- /dev/null
+++ b/test/CodeGen/AArch64/remat.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a57 -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a53 -o - %s | FileCheck %s
+
+%X = type { i64, i64, i64 }
+declare void @f(%X*)
+define void @t() {
+entry:
+ %tmp = alloca %X
+ call void @f(%X* %tmp)
+; CHECK: add x0, sp, #8
+; CHECK-NEXT-NOT: mov
+ call void @f(%X* %tmp)
+; CHECK: add x0, sp, #8
+; CHECK-NEXT-NOT: mov
+ ret void
+}
diff --git a/test/CodeGen/X86/critical-anti-dep-breaker.ll b/test/CodeGen/X86/critical-anti-dep-breaker.ll
new file mode 100644
index 0000000..32d3f49
--- /dev/null
+++ b/test/CodeGen/X86/critical-anti-dep-breaker.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic -post-RA-scheduler=1 -break-anti-dependencies=critical | FileCheck %s
+
+; PR20308 ( http://llvm.org/bugs/show_bug.cgi?id=20308 )
+; The critical-anti-dependency-breaker must not use register def information from a kill inst.
+; This test case expects such an instruction to appear as a comment with def info for RDI.
+; There is an anti-dependency (WAR) hazard using RAX using default reg allocation and scheduling.
+; The post-RA-scheduler and critical-anti-dependency breaker can eliminate that hazard using R10.
+; That is the first free register that isn't used as a param in the call to "@Image".
+
+@PartClass = external global i32
+@NullToken = external global i64
+
+; CHECK-LABEL: Part_Create:
+; CHECK-DAG: # kill: RDI<def>
+; CHECK-DAG: movq PartClass@GOTPCREL(%rip), %r10
+define i32 @Part_Create(i64* %Anchor, i32 %TypeNum, i32 %F, i32 %Z, i32* %Status, i64* %PartTkn) {
+ %PartObj = alloca i64*, align 8
+ %Vchunk = alloca i64, align 8
+ %1 = load i64* @NullToken, align 4
+ store i64 %1, i64* %Vchunk, align 8
+ %2 = load i32* @PartClass, align 4
+ call i32 @Image(i64* %Anchor, i32 %2, i32 0, i32 0, i32* %Status, i64* %PartTkn, i64** %PartObj)
+ call i32 @Create(i64* %Anchor)
+ ret i32 %2
+}
+
+declare i32 @Image(i64*, i32, i32, i32, i32*, i64*, i64**)
+declare i32 @Create(i64*)