aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/AArch64
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-07-21 00:45:20 -0700
committerStephen Hines <srhines@google.com>2014-07-21 00:45:20 -0700
commitc6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch)
tree81b7dd2bb4370a392f31d332a566c903b5744764 /test/CodeGen/AArch64
parent19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff)
downloadexternal_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip
external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz
external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2
Update LLVM for rebase to r212749.
Includes a cherry-pick of: r212948 - fixes a small issue with atomic calls Change-Id: Ib97bd980b59f18142a69506400911a6009d9df18
Diffstat (limited to 'test/CodeGen/AArch64')
-rw-r--r--test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll55
-rw-r--r--test/CodeGen/AArch64/aarch64-address-type-promotion.ll28
-rw-r--r--test/CodeGen/AArch64/addsub_ext.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-AnInfiniteLoopInDAGCombine.ll (renamed from test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll)0
-rw-r--r--test/CodeGen/AArch64/arm64-EXT-undef-mask.ll (renamed from test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll)0
-rw-r--r--test/CodeGen/AArch64/arm64-aapcs.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-abi.ll27
-rw-r--r--test/CodeGen/AArch64/arm64-ands-bad-peephole.ll10
-rw-r--r--test/CodeGen/AArch64/arm64-arith.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-atomic-128.ll9
-rw-r--r--test/CodeGen/AArch64/arm64-atomic.ll14
-rw-r--r--test/CodeGen/AArch64/arm64-build-vector.ll24
-rw-r--r--test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll24
-rw-r--r--test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll29
-rw-r--r--test/CodeGen/AArch64/arm64-convert-v4f64.ll33
-rw-r--r--test/CodeGen/AArch64/arm64-cse.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll3
-rw-r--r--test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll16
-rw-r--r--test/CodeGen/AArch64/arm64-early-ifcvt.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll13
-rw-r--r--test/CodeGen/AArch64/arm64-fp128.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-frame-index.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-misched-basic-A53.ll79
-rw-r--r--test/CodeGen/AArch64/arm64-misched-basic-A57.ll112
-rw-r--r--test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll7
-rw-r--r--test/CodeGen/AArch64/arm64-neon-copy.ll16
-rw-r--r--test/CodeGen/AArch64/arm64-neon-select_cc.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-shrink-v1i64.ll14
-rw-r--r--test/CodeGen/AArch64/arm64-sqshl-uqshl-i64Contant.ll (renamed from test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll)0
-rw-r--r--test/CodeGen/AArch64/arm64-vcvt.ll12
-rw-r--r--test/CodeGen/AArch64/arm64-vshift.ll9
-rw-r--r--test/CodeGen/AArch64/arm64-xaluo.ll2
-rw-r--r--test/CodeGen/AArch64/atomic-ops.ll25
-rw-r--r--test/CodeGen/AArch64/blockaddress.ll4
-rw-r--r--test/CodeGen/AArch64/branch-relax-asm.ll35
-rw-r--r--test/CodeGen/AArch64/breg.ll2
-rw-r--r--test/CodeGen/AArch64/cmpxchg-idioms.ll93
-rw-r--r--test/CodeGen/AArch64/compiler-ident.ll12
-rw-r--r--test/CodeGen/AArch64/complex-fp-to-int.ll141
-rw-r--r--test/CodeGen/AArch64/complex-int-to-fp.ll164
-rw-r--r--test/CodeGen/AArch64/directcond.ll4
-rw-r--r--test/CodeGen/AArch64/f16-convert.ll254
-rw-r--r--test/CodeGen/AArch64/fast-isel-mul.ll40
-rw-r--r--test/CodeGen/AArch64/flags-multiuse.ll2
-rw-r--r--test/CodeGen/AArch64/funcptr_cast.ll13
-rw-r--r--test/CodeGen/AArch64/global-merge-1.ll26
-rw-r--r--test/CodeGen/AArch64/global-merge-2.ll51
-rw-r--r--test/CodeGen/AArch64/global-merge-3.ll51
-rw-r--r--test/CodeGen/AArch64/global-merge-4.ll73
-rw-r--r--test/CodeGen/AArch64/global-merge.ll30
-rw-r--r--test/CodeGen/AArch64/i128-fast-isel-fallback.ll18
-rw-r--r--test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll26
-rw-r--r--test/CodeGen/AArch64/jump-table.ll6
-rw-r--r--test/CodeGen/AArch64/ldst-opt.ll468
-rw-r--r--test/CodeGen/AArch64/lit.local.cfg3
-rw-r--r--test/CodeGen/AArch64/memcpy-f128.ll19
-rw-r--r--test/CodeGen/AArch64/mul_pow2.ll123
-rw-r--r--test/CodeGen/AArch64/regress-tail-livereg.ll14
-rw-r--r--test/CodeGen/AArch64/trunc-v1i64.ll63
-rw-r--r--test/CodeGen/AArch64/tst-br.ll2
61 files changed, 2196 insertions, 138 deletions
diff --git a/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll b/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll
new file mode 100644
index 0000000..2df9c37
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll
@@ -0,0 +1,55 @@
+; RUN: llc -O3 -mcpu=cortex-a53 -mtriple=aarch64--linux-gnu %s -o - | FileCheck %s
+; PR20188: don't crash when merging sexts.
+
+; CHECK: foo:
+define void @foo() unnamed_addr align 2 {
+entry:
+ br label %invoke.cont145
+
+invoke.cont145:
+ %or.cond = and i1 undef, false
+ br i1 %or.cond, label %if.then274, label %invoke.cont145
+
+if.then274:
+ %0 = load i32* null, align 4
+ br i1 undef, label %invoke.cont291, label %if.else313
+
+invoke.cont291:
+ %idxprom.i.i.i605 = sext i32 %0 to i64
+ %arrayidx.i.i.i607 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i605
+ %idxprom.i.i.i596 = sext i32 %0 to i64
+ %arrayidx.i.i.i598 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i596
+ br label %if.end356
+
+if.else313:
+ %cmp314 = fcmp olt double undef, 0.000000e+00
+ br i1 %cmp314, label %invoke.cont317, label %invoke.cont353
+
+invoke.cont317:
+ br i1 undef, label %invoke.cont326, label %invoke.cont334
+
+invoke.cont326:
+ %idxprom.i.i.i587 = sext i32 %0 to i64
+ %arrayidx.i.i.i589 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i587
+ %sub329 = fsub fast double undef, undef
+ br label %invoke.cont334
+
+invoke.cont334:
+ %lo.1 = phi double [ %sub329, %invoke.cont326 ], [ undef, %invoke.cont317 ]
+ br i1 undef, label %invoke.cont342, label %if.end356
+
+invoke.cont342:
+ %idxprom.i.i.i578 = sext i32 %0 to i64
+ %arrayidx.i.i.i580 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i578
+ br label %if.end356
+
+invoke.cont353:
+ %idxprom.i.i.i572 = sext i32 %0 to i64
+ %arrayidx.i.i.i574 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i572
+ br label %if.end356
+
+if.end356:
+ %lo.2 = phi double [ 0.000000e+00, %invoke.cont291 ], [ %lo.1, %invoke.cont342 ], [ undef, %invoke.cont353 ], [ %lo.1, %invoke.cont334 ]
+ call void null(i32 %0, double %lo.2)
+ unreachable
+}
diff --git a/test/CodeGen/AArch64/aarch64-address-type-promotion.ll b/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
new file mode 100644
index 0000000..ee90d19
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
+target triple = "arm64-apple-macosx10.9"
+
+; Check that sexts get promoted above adds.
+define void @foo(i32* nocapture %a, i32 %i) {
+entry:
+; CHECK-LABEL: _foo:
+; CHECK: add
+; CHECK-NEXT: ldp
+; CHECK-NEXT: add
+; CHECK-NEXT: str
+; CHECK-NEXT: ret
+ %add = add nsw i32 %i, 1
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
+ %0 = load i32* %arrayidx, align 4
+ %add1 = add nsw i32 %i, 2
+ %idxprom2 = sext i32 %add1 to i64
+ %arrayidx3 = getelementptr inbounds i32* %a, i64 %idxprom2
+ %1 = load i32* %arrayidx3, align 4
+ %add4 = add nsw i32 %1, %0
+ %idxprom5 = sext i32 %i to i64
+ %arrayidx6 = getelementptr inbounds i32* %a, i64 %idxprom5
+ store i32 %add4, i32* %arrayidx6, align 4
+ ret void
+}
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
index a2266b1..ceea8a0 100644
--- a/test/CodeGen/AArch64/addsub_ext.ll
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s
@var8 = global i8 0
@var16 = global i16 0
diff --git a/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll b/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll
index d1840d3..7da2d2c 100644
--- a/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll
+++ b/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll
@@ -2,14 +2,14 @@
; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic < %s | FileCheck %s --check-prefix=CHECK-LINUX
; <rdar://problem/11392109>
-define hidden void @t() optsize ssp {
+define hidden void @t(i64* %addr) optsize ssp {
entry:
- store i64 zext (i32 ptrtoint (i64 (i32)* @x to i32) to i64), i64* undef, align 8
+ store i64 zext (i32 ptrtoint (i64 (i32)* @x to i32) to i64), i64* %addr, align 8
; CHECK: adrp x{{[0-9]+}}, _x@GOTPAGE
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, _x@GOTPAGEOFF]
; CHECK-NEXT: and x{{[0-9]+}}, x{{[0-9]+}}, #0xffffffff
; CHECK-NEXT: str x{{[0-9]+}}, [x{{[0-9]+}}]
- unreachable
+ ret void
}
declare i64 @x(i32) optsize
diff --git a/test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll b/test/CodeGen/AArch64/arm64-AnInfiniteLoopInDAGCombine.ll
index a73b707..a73b707 100644
--- a/test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll
+++ b/test/CodeGen/AArch64/arm64-AnInfiniteLoopInDAGCombine.ll
diff --git a/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll
index 1b2d543..1b2d543 100644
--- a/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll
+++ b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll
diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll
index b713f0d..ccf1371 100644
--- a/test/CodeGen/AArch64/arm64-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-aapcs.ll
@@ -101,3 +101,11 @@ define fp128 @test_fp128([8 x float] %arg0, fp128 %arg1) {
; CHECK: ldr {{q[0-9]+}}, [sp]
ret fp128 %arg1
}
+
+; Check if VPR can be correctly pass by stack.
+define <2 x double> @test_vreg_stack([8 x <2 x double>], <2 x double> %varg_stack) {
+entry:
+; CHECK-LABEL: test_vreg_stack:
+; CHECK: ldr {{q[0-9]+}}, [sp]
+ ret <2 x double> %varg_stack;
+}
diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll
index e2de434..a955029 100644
--- a/test/CodeGen/AArch64/arm64-abi.ll
+++ b/test/CodeGen/AArch64/arm64-abi.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
+; RUN: llc < %s -debug -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
+; REQUIRES: asserts
target triple = "arm64-apple-darwin"
; rdar://9932559
@@ -8,15 +9,15 @@ entry:
; CHECK-LABEL: i8i16callee:
; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5.
; They are i8, i16, i8 and i8.
-; CHECK: ldrsb {{w[0-9]+}}, [sp, #5]
-; CHECK: ldrsh {{w[0-9]+}}, [sp, #2]
-; CHECK: ldrsb {{w[0-9]+}}, [sp]
-; CHECK: ldrsb {{w[0-9]+}}, [sp, #4]
+; CHECK-DAG: ldrsb {{w[0-9]+}}, [sp, #5]
+; CHECK-DAG: ldrsb {{w[0-9]+}}, [sp, #4]
+; CHECK-DAG: ldrsh {{w[0-9]+}}, [sp, #2]
+; CHECK-DAG: ldrsb {{w[0-9]+}}, [sp]
; FAST-LABEL: i8i16callee:
-; FAST: ldrb {{w[0-9]+}}, [sp, #5]
-; FAST: ldrb {{w[0-9]+}}, [sp, #4]
-; FAST: ldrh {{w[0-9]+}}, [sp, #2]
-; FAST: ldrb {{w[0-9]+}}, [sp]
+; FAST-DAG: ldrsb {{w[0-9]+}}, [sp, #5]
+; FAST-DAG: ldrsb {{w[0-9]+}}, [sp, #4]
+; FAST-DAG: ldrsh {{w[0-9]+}}, [sp, #2]
+; FAST-DAG: ldrsb {{w[0-9]+}}, [sp]
%conv = sext i8 %a4 to i64
%conv3 = sext i16 %a5 to i64
%conv8 = sext i8 %b1 to i64
@@ -44,10 +45,10 @@ entry:
; CHECK: i8i16caller
; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5.
; They are i8, i16, i8 and i8.
-; CHECK: strb {{w[0-9]+}}, [sp, #5]
-; CHECK: strb {{w[0-9]+}}, [sp, #4]
-; CHECK: strh {{w[0-9]+}}, [sp, #2]
-; CHECK: strb {{w[0-9]+}}, [sp]
+; CHECK-DAG: strb {{w[0-9]+}}, [sp, #5]
+; CHECK-DAG: strb {{w[0-9]+}}, [sp, #4]
+; CHECK-DAG: strh {{w[0-9]+}}, [sp, #2]
+; CHECK-DAG: strb {{w[0-9]+}}, [sp]
; CHECK: bl
; FAST: i8i16caller
; FAST: strb {{w[0-9]+}}, [sp]
diff --git a/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
index 34d6287..38661a5 100644
--- a/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
+++ b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -o - | FileCheck %s
+; RUN: llc %s -o - -aarch64-atomic-cfg-tidy=0 | FileCheck %s
; Check that ANDS (tst) is not merged with ADD when the immediate
; is not 0.
; <rdar://problem/16693089>
@@ -8,18 +8,18 @@ target triple = "arm64-apple-ios"
; CHECK-LABEL: tst1:
; CHECK: add [[REG:w[0-9]+]], w{{[0-9]+}}, #1
; CHECK: tst [[REG]], #0x1
-define void @tst1() {
+define void @tst1(i1 %tst, i32 %true) {
entry:
- br i1 undef, label %for.end, label %for.body
+ br i1 %tst, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%result.09 = phi i32 [ %add2.result.0, %for.body ], [ 1, %entry ]
%i.08 = phi i32 [ %inc, %for.body ], [ 2, %entry ]
%and = and i32 %i.08, 1
%cmp1 = icmp eq i32 %and, 0
- %add2.result.0 = select i1 %cmp1, i32 undef, i32 %result.09
+ %add2.result.0 = select i1 %cmp1, i32 %true, i32 %result.09
%inc = add nsw i32 %i.08, 1
- %cmp = icmp slt i32 %i.08, undef
+ %cmp = icmp slt i32 %i.08, %true
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
for.cond.for.end_crit_edge: ; preds = %for.body
diff --git a/test/CodeGen/AArch64/arm64-arith.ll b/test/CodeGen/AArch64/arm64-arith.ll
index ed9b569..f36e706 100644
--- a/test/CodeGen/AArch64/arm64-arith.ll
+++ b/test/CodeGen/AArch64/arm64-arith.ll
@@ -260,3 +260,11 @@ define i64 @f3(i64 %a) nounwind readnone ssp {
%res = mul nsw i64 %a, 17
ret i64 %res
}
+
+define i32 @f4(i32 %a) nounwind readnone ssp {
+; CHECK-LABEL: f4:
+; CHECK-NEXT: add w0, w0, w0, lsl #1
+; CHECK-NEXT: ret
+ %res = mul i32 %a, 3
+ ret i32 %res
+}
diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll
index 3b43aa1..3377849 100644
--- a/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -13,7 +13,8 @@ define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
; CHECK: stxp [[SCRATCH_RES:w[0-9]+]], x4, x5, [x[[ADDR]]]
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
; CHECK: [[DONE]]:
- %val = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
+ %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
+ %val = extractvalue { i128, i1 } %pair, 0
ret i128 %val
}
@@ -21,8 +22,10 @@ define void @fetch_and_nand(i128* %p, i128 %bits) {
; CHECK-LABEL: fetch_and_nand:
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK-DAG: bic [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
-; CHECK-DAG: bic [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
+; CHECK-DAG: and [[TMP_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
+; CHECK-DAG: and [[TMP_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
+; CHECK-DAG: mvn [[SCRATCH_REGLO:x[0-9]+]], [[TMP_REGLO]]
+; CHECK-DAG: mvn [[SCRATCH_REGHI:x[0-9]+]], [[TMP_REGHI]]
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll
index aa9b284..b56f91d 100644
--- a/test/CodeGen/AArch64/arm64-atomic.ll
+++ b/test/CodeGen/AArch64/arm64-atomic.ll
@@ -10,7 +10,8 @@ define i32 @val_compare_and_swap(i32* %p) {
; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0]
; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
; CHECK: [[LABEL2]]:
- %val = cmpxchg i32* %p, i32 7, i32 4 acquire acquire
+ %pair = cmpxchg i32* %p, i32 7, i32 4 acquire acquire
+ %val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
}
@@ -25,7 +26,8 @@ define i64 @val_compare_and_swap_64(i64* %p) {
; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], x[[NEWVAL_REG]], [x0]
; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
; CHECK: [[LABEL2]]:
- %val = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic
+ %pair = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic
+ %val = extractvalue { i64, i1 } %pair, 0
ret i64 %val
}
@@ -33,7 +35,8 @@ define i32 @fetch_and_nand(i32* %p) {
; CHECK-LABEL: fetch_and_nand:
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldxr w[[DEST_REG:[0-9]+]], [x0]
-; CHECK: and [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], #0xfffffff8
+; CHECK: mvn [[TMP_REG:w[0-9]+]], w[[DEST_REG]]
+; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], [[TMP_REG]], #0xfffffff8
; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
@@ -46,8 +49,9 @@ define i64 @fetch_and_nand_64(i64* %p) {
; CHECK-LABEL: fetch_and_nand_64:
; CHECK: mov x[[ADDR:[0-9]+]], x0
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
-; CHECK: and [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0xfffffffffffffff8
+; CHECK: ldaxr x[[DEST_REG:[0-9]+]], [x[[ADDR]]]
+; CHECK: mvn w[[TMP_REG:[0-9]+]], w[[DEST_REG]]
+; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], x[[TMP_REG]], #0xfffffffffffffff8
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
diff --git a/test/CodeGen/AArch64/arm64-build-vector.ll b/test/CodeGen/AArch64/arm64-build-vector.ll
index c109263..d0f6db0 100644
--- a/test/CodeGen/AArch64/arm64-build-vector.ll
+++ b/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -33,3 +33,27 @@ define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
%4 = insertelement <4 x float> %3, float %d, i32 3
ret <4 x float> %4
}
+
+define <8 x i16> @build_all_zero(<8 x i16> %a) #1 {
+; CHECK-LABEL: build_all_zero:
+; CHECK: movz w[[GREG:[0-9]+]], #0xae80
+; CHECK-NEXT: fmov s[[FREG:[0-9]+]], w[[GREG]]
+; CHECK-NEXT: mul.8h v0, v0, v[[FREG]]
+ %b = add <8 x i16> %a, <i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
+ %c = mul <8 x i16> %b, <i16 -20864, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
+ ret <8 x i16> %c
+}
+
+; There is an optimization in DAG Combiner as following:
+; fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
+; -> (BUILD_VECTOR A, B, ..., C, D, ...)
+; This case checks when A,B and C,D are different types, there should be no
+; assertion failure.
+define <8 x i16> @concat_2_build_vector(<4 x i16> %in0) {
+; CHECK-LABEL: concat_2_build_vector:
+; CHECK: movi
+ %vshl_n = shl <4 x i16> %in0, <i16 8, i16 8, i16 8, i16 8>
+ %vshl_n2 = shl <4 x i16> %vshl_n, <i16 9, i16 9, i16 9, i16 9>
+ %shuffle.i = shufflevector <4 x i16> %vshl_n2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %shuffle.i
+} \ No newline at end of file
diff --git a/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll b/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll
deleted file mode 100644
index d862b1e..0000000
--- a/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
-
-; CHECK: fptosi_1
-; CHECK: fcvtzs.2d
-; CHECK: xtn.2s
-; CHECK: ret
-define void @fptosi_1() nounwind noinline ssp {
-entry:
- %0 = fptosi <2 x double> undef to <2 x i32>
- store <2 x i32> %0, <2 x i32>* undef, align 8
- ret void
-}
-
-; CHECK: fptoui_1
-; CHECK: fcvtzu.2d
-; CHECK: xtn.2s
-; CHECK: ret
-define void @fptoui_1() nounwind noinline ssp {
-entry:
- %0 = fptoui <2 x double> undef to <2 x i32>
- store <2 x i32> %0, <2 x i32>* undef, align 8
- ret void
-}
-
diff --git a/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll b/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll
deleted file mode 100644
index daaf1e0..0000000
--- a/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
-
-define <2 x double> @f1(<2 x i32> %v) nounwind readnone {
-; CHECK-LABEL: f1:
-; CHECK: sshll.2d v0, v0, #0
-; CHECK-NEXT: scvtf.2d v0, v0
-; CHECK-NEXT: ret
- %conv = sitofp <2 x i32> %v to <2 x double>
- ret <2 x double> %conv
-}
-define <2 x double> @f2(<2 x i32> %v) nounwind readnone {
-; CHECK-LABEL: f2:
-; CHECK: ushll.2d v0, v0, #0
-; CHECK-NEXT: ucvtf.2d v0, v0
-; CHECK-NEXT: ret
- %conv = uitofp <2 x i32> %v to <2 x double>
- ret <2 x double> %conv
-}
-
-; CHECK: autogen_SD19655
-; CHECK: scvtf
-; CHECK: ret
-define void @autogen_SD19655() {
- %T = load <2 x i64>* undef
- %F = sitofp <2 x i64> undef to <2 x float>
- store <2 x float> %F, <2 x float>* undef
- ret void
-}
-
diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
new file mode 100644
index 0000000..7123e5e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -march=arm64 | FileCheck %s
+
+
+define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i16
+; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: xtn v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: xtn v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
+; CHECK: uzp1 v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
+ %tmp1 = load <4 x double>* %ptr
+ %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
+ ret <4 x i16> %tmp2
+}
+
+define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i8
+; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d
+; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d
+; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d
+; CHECK-DAG: fcvtzs v[[CONV0:[0-9]+]].2d, v0.2d
+; CHECK-DAG: xtn v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
+; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
+; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
+; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
+; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h
+; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h
+; CHECK: uzp1 v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
+ %tmp1 = load <8 x double>* %ptr
+ %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
+ ret <8 x i8> %tmp2
+}
+
diff --git a/test/CodeGen/AArch64/arm64-cse.ll b/test/CodeGen/AArch64/arm64-cse.ll
index bb14c89..5d62cfe 100644
--- a/test/CodeGen/AArch64/arm64-cse.ll
+++ b/test/CodeGen/AArch64/arm64-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 < %s | FileCheck %s
+; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 | FileCheck %s
target triple = "arm64-apple-ios"
; rdar://12462006
diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
index 2cf0135..6eed48b 100644
--- a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
+++ b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
@@ -1,5 +1,8 @@
; RUN: llc -mcpu=cyclone < %s | FileCheck %s
+; r208640 broke ppc64/Linux self-hosting; xfailing while this is worked on.
+; XFAIL: *
+
target datalayout = "e-i64:64-n32:64-S128"
target triple = "arm64-apple-ios"
diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
index 2e4b658..ce132c6 100644
--- a/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
+++ b/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
@@ -13,12 +13,12 @@ target triple = "arm64-apple-ios"
; CHECK-LABEL: XX:
; CHECK: ldr
-define void @XX(%class.A* %K) {
+define i32 @XX(%class.A* %K, i1 %tst, i32* %addr, %class.C** %ppC, %class.C* %pC) {
entry:
- br i1 undef, label %if.then, label %lor.rhs.i
+ br i1 %tst, label %if.then, label %lor.rhs.i
lor.rhs.i: ; preds = %entry
- %tmp = load i32* undef, align 4
+ %tmp = load i32* %addr, align 4
%y.i.i.i = getelementptr inbounds %class.A* %K, i64 0, i32 1
%tmp1 = load i64* %y.i.i.i, align 8
%U.sroa.3.8.extract.trunc.i = trunc i64 %tmp1 to i32
@@ -30,17 +30,17 @@ lor.rhs.i: ; preds = %entry
%add16.i = add nsw i32 %add12.i, %div15.i
%rem.i.i = srem i32 %add16.i, %tmp
%idxprom = sext i32 %rem.i.i to i64
- %arrayidx = getelementptr inbounds %class.C** undef, i64 %idxprom
- %tobool533 = icmp eq %class.C* undef, null
+ %arrayidx = getelementptr inbounds %class.C** %ppC, i64 %idxprom
+ %tobool533 = icmp eq %class.C* %pC, null
br i1 %tobool533, label %while.end, label %while.body
if.then: ; preds = %entry
- unreachable
+ ret i32 42
while.body: ; preds = %lor.rhs.i
- unreachable
+ ret i32 5
while.end: ; preds = %lor.rhs.i
%tmp3 = load %class.C** %arrayidx, align 8
- unreachable
+ ret i32 50
}
diff --git a/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/test/CodeGen/AArch64/arm64-early-ifcvt.ll
index 17d783a..44150c2 100644
--- a/test/CodeGen/AArch64/arm64-early-ifcvt.ll
+++ b/test/CodeGen/AArch64/arm64-early-ifcvt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -stress-early-ifcvt | FileCheck %s
+; RUN: llc < %s -stress-early-ifcvt -aarch64-atomic-cfg-tidy=0 | FileCheck %s
target triple = "arm64-apple-macosx"
; CHECK: mm2
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
index a3d5f6c..1152988 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
@@ -133,3 +133,16 @@ define void @t8() {
call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 4, i32 1, i1 false)
ret void
}
+
+define void @test_distant_memcpy(i8* %dst) {
+; ARM64-LABEL: test_distant_memcpy:
+; ARM64: mov [[ARRAY:x[0-9]+]], sp
+; ARM64: movz [[OFFSET:x[0-9]+]], #0x1f40
+; ARM64: add x[[ADDR:[0-9]+]], [[ARRAY]], [[OFFSET]]
+; ARM64: ldrb [[BYTE:w[0-9]+]], [x[[ADDR]]]
+; ARM64: strb [[BYTE]], [x0]
+ %array = alloca i8, i32 8192
+ %elem = getelementptr i8* %array, i32 8000
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i32 1, i1 false)
+ ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll
index 57bbb93..b1d5010 100644
--- a/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/test/CodeGen/AArch64/arm64-fp128.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s
@lhs = global fp128 zeroinitializer, align 16
@rhs = global fp128 zeroinitializer, align 16
diff --git a/test/CodeGen/AArch64/arm64-frame-index.ll b/test/CodeGen/AArch64/arm64-frame-index.ll
index 4a91ff3..321f335 100644
--- a/test/CodeGen/AArch64/arm64-frame-index.ll
+++ b/test/CodeGen/AArch64/arm64-frame-index.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc -march=arm64 -mtriple=arm64-apple-ios -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s
; rdar://11935841
define void @t1() nounwind ssp {
diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
index f88bd6a..bc7ed7f 100644
--- a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
+++ b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
@@ -122,3 +122,82 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
}
declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*)
+
+; Regression Test for PR20057.
+;
+; Cortex-A53 machine model stalls on A53UnitFPMDS contention. Instructions that
+; are otherwise ready are jammed in the pending queue.
+; CHECK: ********** MI Scheduling **********
+; CHECK: testResourceConflict
+; CHECK: *** Final schedule for BB#0 ***
+; CHECK: BRK
+; CHECK: ********** INTERVALS **********
+define void @testResourceConflict(float* %ptr) {
+entry:
+ %add1 = fadd float undef, undef
+ %mul2 = fmul float undef, undef
+ %add3 = fadd float %mul2, undef
+ %mul4 = fmul float undef, %add3
+ %add5 = fadd float %mul4, undef
+ %sub6 = fsub float 0.000000e+00, undef
+ %sub7 = fsub float %add5, undef
+ %div8 = fdiv float 1.000000e+00, undef
+ %mul9 = fmul float %div8, %sub7
+ %mul14 = fmul float %sub6, %div8
+ %mul10 = fsub float -0.000000e+00, %mul14
+ %mul15 = fmul float undef, %div8
+ %mul11 = fsub float -0.000000e+00, %mul15
+ %mul12 = fmul float 0.000000e+00, %div8
+ %mul13 = fmul float %add1, %mul9
+ %mul21 = fmul float %add5, %mul11
+ %add22 = fadd float %mul13, %mul21
+ store float %add22, float* %ptr, align 4
+ %mul28 = fmul float %add1, %mul10
+ %mul33 = fmul float %add5, %mul12
+ %add34 = fadd float %mul33, %mul28
+ store float %add34, float* %ptr, align 4
+ %mul240 = fmul float undef, %mul9
+ %add246 = fadd float %mul240, undef
+ store float %add246, float* %ptr, align 4
+ %mul52 = fmul float undef, %mul10
+ %mul57 = fmul float undef, %mul12
+ %add58 = fadd float %mul57, %mul52
+ store float %add58, float* %ptr, align 4
+ %mul27 = fmul float 0.000000e+00, %mul9
+ %mul81 = fmul float undef, %mul10
+ %add82 = fadd float %mul27, %mul81
+ store float %add82, float* %ptr, align 4
+ call void @llvm.trap()
+ unreachable
+}
+
+declare void @llvm.trap()
+
+; Regression test for PR20057: "permanent hazard"'
+; Resource contention on LDST.
+; CHECK: ********** MI Scheduling **********
+; CHECK: testLdStConflict
+; CHECK: *** Final schedule for BB#1 ***
+; CHECK: LD4Fourv2d
+; CHECK: STRQui
+; CHECK: ********** INTERVALS **********
+define void @testLdStConflict() {
+entry:
+ br label %loop
+
+loop:
+ %0 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i8(i8* null)
+ %ptr = bitcast i8* undef to <2 x i64>*
+ store <2 x i64> zeroinitializer, <2 x i64>* %ptr, align 4
+ %ptr1 = bitcast i8* undef to <2 x i64>*
+ store <2 x i64> zeroinitializer, <2 x i64>* %ptr1, align 4
+ %ptr2 = bitcast i8* undef to <2 x i64>*
+ store <2 x i64> zeroinitializer, <2 x i64>* %ptr2, align 4
+ %ptr3 = bitcast i8* undef to <2 x i64>*
+ store <2 x i64> zeroinitializer, <2 x i64>* %ptr3, align 4
+ %ptr4 = bitcast i8* undef to <2 x i64>*
+ store <2 x i64> zeroinitializer, <2 x i64>* %ptr4, align 4
+ br label %loop
+}
+
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i8(i8*)
diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
new file mode 100644
index 0000000..238474a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
@@ -0,0 +1,112 @@
+; REQUIRES: asserts
+;
+; The Cortext-A57 machine model will avoid scheduling load instructions in
+; succession because loads on the A57 have a latency of 4 cycles and they all
+; issue to the same pipeline. Instead, it will move other instructions between
+; the loads to avoid unnecessary stalls. The generic machine model schedules 4
+; loads consecutively for this case and will cause stalls.
+;
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+; CHECK: ********** MI Scheduling **********
+; CHECK: main:BB#2
+; CHECK LDR
+; CHECK Latency : 4
+; CHECK: *** Final schedule for BB#2 ***
+; CHECK: LDR
+; CHECK: LDR
+; CHECK-NOT: LDR
+; CHECK: {{.*}}
+; CHECK: ********** MI Scheduling **********
+
+@main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4
+@main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ %x = alloca [8 x i32], align 4
+ %y = alloca [8 x i32], align 4
+ %i = alloca i32, align 4
+ %xx = alloca i32, align 4
+ %yy = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = bitcast [8 x i32]* %x to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false)
+ %1 = bitcast [8 x i32]* %y to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false)
+ store i32 0, i32* %xx, align 4
+ store i32 0, i32* %yy, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %2 = load i32* %i, align 4
+ %cmp = icmp slt i32 %2, 8
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %3 = load i32* %yy, align 4
+ %4 = load i32* %i, align 4
+ %idxprom = sext i32 %4 to i64
+ %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom
+ %5 = load i32* %arrayidx, align 4
+ %add = add nsw i32 %5, 1
+ store i32 %add, i32* %xx, align 4
+ %6 = load i32* %xx, align 4
+ %add1 = add nsw i32 %6, 12
+ store i32 %add1, i32* %xx, align 4
+ %7 = load i32* %xx, align 4
+ %add2 = add nsw i32 %7, 23
+ store i32 %add2, i32* %xx, align 4
+ %8 = load i32* %xx, align 4
+ %add3 = add nsw i32 %8, 34
+ store i32 %add3, i32* %xx, align 4
+ %9 = load i32* %i, align 4
+ %idxprom4 = sext i32 %9 to i64
+ %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4
+ %10 = load i32* %arrayidx5, align 4
+
+ %add4 = add nsw i32 %9, %add
+ %add5 = add nsw i32 %10, %add1
+ %add6 = add nsw i32 %add4, %add5
+
+ %add7 = add nsw i32 %9, %add3
+ %add8 = add nsw i32 %10, %add4
+ %add9 = add nsw i32 %add7, %add8
+
+ %add10 = add nsw i32 %9, %add6
+ %add11 = add nsw i32 %10, %add7
+ %add12 = add nsw i32 %add10, %add11
+
+ %add13 = add nsw i32 %9, %add9
+ %add14 = add nsw i32 %10, %add10
+ %add15 = add nsw i32 %add13, %add14
+
+ store i32 %add15, i32* %xx, align 4
+
+ %div = sdiv i32 %4, %5
+
+ store i32 %div, i32* %yy, align 4
+
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %11 = load i32* %i, align 4
+ %inc = add nsw i32 %11, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %12 = load i32* %xx, align 4
+ %13 = load i32* %yy, align 4
+ %add67 = add nsw i32 %12, %13
+ ret i32 %add67
+}
+
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
index 97bfb5c..07373cc 100644
--- a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
+++ b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
@@ -6,9 +6,10 @@
;
; CHECK: ********** MI Scheduling **********
; CHECK: shiftable
-; CHECK: *** Final schedule for BB#0 ***
-; CHECK: ADDXrr %vreg0, %vreg2
-; CHECK: ADDXrs %vreg0, %vreg2, 5
+; CHECK: SU(2): %vreg2<def> = SUBXri %vreg1, 20, 0
+; CHECK: Successors:
+; CHECK-NEXT: val SU(4): Latency=1 Reg=%vreg2
+; CHECK-NEXT: val SU(3): Latency=2 Reg=%vreg2
; CHECK: ********** INTERVALS **********
define i64 @shiftable(i64 %A, i64 %B) {
%tmp0 = sub i64 %B, 20
diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll
index cfc2ebf..1cfba82 100644
--- a/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -842,7 +842,7 @@ define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
; CHECK-LABEL: testDUP.v1i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+; CHECK: dup v0.8b, v0.b[0]
%b = extractelement <1 x i8> %a, i32 0
%c = insertelement <8 x i8> undef, i8 %b, i32 0
%d = insertelement <8 x i8> %c, i8 %b, i32 1
@@ -857,7 +857,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
; CHECK-LABEL: testDUP.v1i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+; CHECK: dup v0.8h, v0.h[0]
%b = extractelement <1 x i16> %a, i32 0
%c = insertelement <8 x i16> undef, i16 %b, i32 0
%d = insertelement <8 x i16> %c, i16 %b, i32 1
@@ -872,7 +872,7 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
; CHECK-LABEL: testDUP.v1i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+; CHECK: dup v0.4s, v0.s[0]
%b = extractelement <1 x i32> %a, i32 0
%c = insertelement <4 x i32> undef, i32 %b, i32 0
%d = insertelement <4 x i32> %c, i32 %b, i32 1
@@ -1411,35 +1411,35 @@ define <16 x i8> @concat_vector_v16i8_const() {
define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
; CHECK-LABEL: concat_vector_v4i16:
-; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+; CHECK: dup v0.4h, v0.h[0]
%r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
ret <4 x i16> %r
}
define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
; CHECK-LABEL: concat_vector_v4i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+; CHECK: dup v0.4s, v0.s[0]
%r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %r
}
define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
; CHECK-LABEL: concat_vector_v8i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+; CHECK: dup v0.8b, v0.b[0]
%r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
ret <8 x i8> %r
}
define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
; CHECK-LABEL: concat_vector_v8i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+; CHECK: dup v0.8h, v0.h[0]
%r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %r
}
define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
; CHECK-LABEL: concat_vector_v16i8:
-; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+; CHECK: dup v0.16b, v0.b[0]
%r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %r
}
diff --git a/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
index 255b90d..95c582a 100644
--- a/test/CodeGen/AArch64/arm64-neon-select_cc.ll
+++ b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
@@ -136,8 +136,8 @@ define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d )
define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
; CHECK-LABEL: test_select_cc_v1f32:
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel s0, s2, s3, eq
+; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
+; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b
%cmp31 = fcmp oeq float %a, %b
%e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
ret <1 x float> %e
diff --git a/test/CodeGen/AArch64/arm64-shrink-v1i64.ll b/test/CodeGen/AArch64/arm64-shrink-v1i64.ll
new file mode 100644
index 0000000..f31a570
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-shrink-v1i64.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=arm64 < %s
+
+; The DAGCombiner tries to do following shrink:
+; Convert x+y to (VT)((SmallVT)x+(SmallVT)y)
+; But currently it can't handle vector type and will trigger an assertion failure
+; when it tries to generate an add mixed using vector type and scaler type.
+; This test checks that such assertion failur should not happen.
+define <1 x i64> @dotest(<1 x i64> %in0) {
+entry:
+ %0 = add <1 x i64> %in0, %in0
+ %vshl_n = shl <1 x i64> %0, <i64 32>
+ %vsra_n = ashr <1 x i64> %vshl_n, <i64 32>
+ ret <1 x i64> %vsra_n
+}
diff --git a/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll b/test/CodeGen/AArch64/arm64-sqshl-uqshl-i64Contant.ll
index 3949b85..3949b85 100644
--- a/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll
+++ b/test/CodeGen/AArch64/arm64-sqshl-uqshl-i64Contant.ll
diff --git a/test/CodeGen/AArch64/arm64-vcvt.ll b/test/CodeGen/AArch64/arm64-vcvt.ll
index 8c9e4e9..6570f0e 100644
--- a/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -665,19 +665,19 @@ define <2 x double> @ucvtf_2dc(<2 x i64> %A) nounwind {
;CHECK-LABEL: autogen_SD28458:
;CHECK: fcvt
;CHECK: ret
-define void @autogen_SD28458() {
- %Tr53 = fptrunc <8 x double> undef to <8 x float>
- store <8 x float> %Tr53, <8 x float>* undef
+define void @autogen_SD28458(<8 x double> %val.f64, <8 x float>* %addr.f32) {
+ %Tr53 = fptrunc <8 x double> %val.f64 to <8 x float>
+ store <8 x float> %Tr53, <8 x float>* %addr.f32
ret void
}
;CHECK-LABEL: autogen_SD19225:
;CHECK: fcvt
;CHECK: ret
-define void @autogen_SD19225() {
- %A = load <8 x float>* undef
+define void @autogen_SD19225(<8 x double>* %addr.f64, <8 x float>* %addr.f32) {
+ %A = load <8 x float>* %addr.f32
%Tr53 = fpext <8 x float> %A to <8 x double>
- store <8 x double> %Tr53, <8 x double>* undef
+ store <8 x double> %Tr53, <8 x double>* %addr.f64
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-vshift.ll b/test/CodeGen/AArch64/arm64-vshift.ll
index 82ae486..65bd50c 100644
--- a/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/test/CodeGen/AArch64/arm64-vshift.ll
@@ -1313,6 +1313,15 @@ define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind {
ret <8 x i8> %tmp3
}
+define <8 x i8> @uqshli8b_1(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: uqshli8b_1:
+;CHECK: movi.8b [[REG:v[0-9]+]], #0x8
+;CHECK: uqshl.8b v0, v0, [[REG]]
+ %tmp1 = load <8 x i8>* %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
+ ret <8 x i8> %tmp3
+}
+
define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind {
;CHECK-LABEL: uqshli4h:
;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1
diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll
index 6cffbde..0c300de 100644
--- a/test/CodeGen/AArch64/arm64-xaluo.ll
+++ b/test/CodeGen/AArch64/arm64-xaluo.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-atomic-cfg-tidy=0 | FileCheck %s
;
; Get the actual value of the overflow bit.
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index 58b5d1d..26301b9 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -878,7 +878,9 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i8:
- %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
+ %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
+ %old = extractvalue { i8, i1 } %pair, 0
+
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
@@ -889,8 +891,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
; function there.
; CHECK-NEXT: cmp w[[OLD]], w0
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
- ; As above, w1 is a reasonable guess.
-; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK: stxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
; CHECK-NOT: dmb
@@ -900,7 +901,9 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i16:
- %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
+ %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
+ %old = extractvalue { i16, i1 } %pair, 0
+
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
@@ -911,8 +914,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
; function there.
; CHECK-NEXT: cmp w[[OLD]], w0
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
- ; As above, w1 is a reasonable guess.
-; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK: stlxrh [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
; CHECK-NOT: dmb
@@ -922,7 +924,9 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i32:
- %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
+ %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
+ %old = extractvalue { i32, i1 } %pair, 0
+
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
@@ -933,8 +937,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
; function there.
; CHECK-NEXT: cmp w[[OLD]], w0
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
- ; As above, w1 is a reasonable guess.
-; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
; CHECK-NOT: dmb
@@ -944,7 +947,9 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i64:
- %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
+ %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
+ %old = extractvalue { i64, i1 } %pair, 0
+
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll
index 1eec4cc..3a5dbdc 100644
--- a/test/CodeGen/AArch64/blockaddress.ll
+++ b/test/CodeGen/AArch64/blockaddress.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
@addr = global i8* null
diff --git a/test/CodeGen/AArch64/branch-relax-asm.ll b/test/CodeGen/AArch64/branch-relax-asm.ll
new file mode 100644
index 0000000..7409c84
--- /dev/null
+++ b/test/CodeGen/AArch64/branch-relax-asm.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -disable-block-placement -aarch64-tbz-offset-bits=4 -o - %s | FileCheck %s
+define i32 @test_asm_length(i32 %in) {
+; CHECK-LABEL: test_asm_length:
+
+ ; It would be more natural to use just one "tbnz %false" here, but if the
+ ; number of instructions in the asm is counted reasonably, that block is out
+ ; of the limited range we gave tbz. So branch relaxation has to invert the
+ ; condition.
+; CHECK: tbz w0, #0, [[TRUE:LBB[0-9]+_[0-9]+]]
+; CHECK: b [[FALSE:LBB[0-9]+_[0-9]+]]
+
+; CHECK: [[TRUE]]:
+; CHECK: orr w0, wzr, #0x4
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+
+; CHECK: [[FALSE]]:
+; CHECK: ret
+
+ %val = and i32 %in, 1
+ %tst = icmp eq i32 %val, 0
+ br i1 %tst, label %true, label %false
+
+true:
+ call void asm sideeffect "nop\0A\09nop\0A\09nop\0A\09nop\0A\09nop\0A\09nop", ""()
+ ret i32 4
+
+false:
+ ret i32 0
+}
diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll
index 591f483..9524044 100644
--- a/test/CodeGen/AArch64/breg.ll
+++ b/test/CodeGen/AArch64/breg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s
@stored_label = global i8* null
diff --git a/test/CodeGen/AArch64/cmpxchg-idioms.ll b/test/CodeGen/AArch64/cmpxchg-idioms.ll
new file mode 100644
index 0000000..0c008c2
--- /dev/null
+++ b/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -0,0 +1,93 @@
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s
+
+define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
+; CHECK-LABEL: test_return:
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxr [[LOADED:w[0-9]+]], [x0]
+; CHECK: cmp [[LOADED]], w1
+; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]]
+
+; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x0]
+; CHECK: cbnz [[STATUS]], [[LOOP]]
+
+; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: orr w0, wzr, #0x1
+; CHECK: ret
+
+; CHECK: [[FAILED]]:
+; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: mov w0, wzr
+; CHECK: ret
+
+ %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
+ %success = extractvalue { i32, i1 } %pair, 1
+ %conv = zext i1 %success to i32
+ ret i32 %conv
+}
+
+define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
+; CHECK-LABEL: test_return_bool:
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxrb [[LOADED:w[0-9]+]], [x0]
+; CHECK: cmp [[LOADED]], w1, uxtb
+; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]]
+
+; CHECK: stlxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x0]
+; CHECK: cbnz [[STATUS]], [[LOOP]]
+
+; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+ ; FIXME: DAG combine should be able to deal with this.
+; CHECK: orr [[TMP:w[0-9]+]], wzr, #0x1
+; CHECK: eor w0, [[TMP]], #0x1
+; CHECK: ret
+
+; CHECK: [[FAILED]]:
+; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: mov [[TMP:w[0-9]+]], wzr
+; CHECK: eor w0, [[TMP]], #0x1
+; CHECK: ret
+
+ %pair = cmpxchg i8* %value, i8 %oldValue, i8 %newValue acq_rel monotonic
+ %success = extractvalue { i8, i1 } %pair, 1
+ %failure = xor i1 %success, 1
+ ret i1 %failure
+}
+
+define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) {
+; CHECK-LABEL: test_conditional:
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxr [[LOADED:w[0-9]+]], [x0]
+; CHECK: cmp [[LOADED]], w1
+; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]]
+
+; CHECK: stlxr [[STATUS:w[0-9]+]], w2, [x0]
+; CHECK: cbnz [[STATUS]], [[LOOP]]
+
+; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: b _bar
+
+; CHECK: [[FAILED]]:
+; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: b _baz
+
+ %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
+ %success = extractvalue { i32, i1 } %pair, 1
+ br i1 %success, label %true, label %false
+
+true:
+ tail call void @bar() #2
+ br label %end
+
+false:
+ tail call void @baz() #2
+ br label %end
+
+end:
+ ret void
+}
+
+declare void @bar()
+declare void @baz()
diff --git a/test/CodeGen/AArch64/compiler-ident.ll b/test/CodeGen/AArch64/compiler-ident.ll
new file mode 100644
index 0000000..0350571
--- /dev/null
+++ b/test/CodeGen/AArch64/compiler-ident.ll
@@ -0,0 +1,12 @@
+; RUN: llc -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+
+; ModuleID = 'compiler-ident.c'
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK: .ident "some LLVM version"
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"some LLVM version"}
+
diff --git a/test/CodeGen/AArch64/complex-fp-to-int.ll b/test/CodeGen/AArch64/complex-fp-to-int.ll
new file mode 100644
index 0000000..13cf762
--- /dev/null
+++ b/test/CodeGen/AArch64/complex-fp-to-int.ll
@@ -0,0 +1,141 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <2 x i64> @test_v2f32_to_signed_v2i64(<2 x float> %in) {
+; CHECK-LABEL: test_v2f32_to_signed_v2i64:
+; CHECK: fcvtl [[VAL64:v[0-9]+]].2d, v0.2s
+; CHECK: fcvtzs.2d v0, [[VAL64]]
+
+ %val = fptosi <2 x float> %in to <2 x i64>
+ ret <2 x i64> %val
+}
+
+define <2 x i64> @test_v2f32_to_unsigned_v2i64(<2 x float> %in) {
+; CHECK-LABEL: test_v2f32_to_unsigned_v2i64:
+; CHECK: fcvtl [[VAL64:v[0-9]+]].2d, v0.2s
+; CHECK: fcvtzu.2d v0, [[VAL64]]
+
+ %val = fptoui <2 x float> %in to <2 x i64>
+ ret <2 x i64> %val
+}
+
+define <2 x i16> @test_v2f32_to_signed_v2i16(<2 x float> %in) {
+; CHECK-LABEL: test_v2f32_to_signed_v2i16:
+; CHECK: fcvtzs.2s v0, v0
+
+ %val = fptosi <2 x float> %in to <2 x i16>
+ ret <2 x i16> %val
+}
+
+define <2 x i16> @test_v2f32_to_unsigned_v2i16(<2 x float> %in) {
+; CHECK-LABEL: test_v2f32_to_unsigned_v2i16:
+; CHECK: fcvtzs.2s v0, v0
+
+ %val = fptoui <2 x float> %in to <2 x i16>
+ ret <2 x i16> %val
+}
+
+define <2 x i8> @test_v2f32_to_signed_v2i8(<2 x float> %in) {
+; CHECK-LABEL: test_v2f32_to_signed_v2i8:
+; CHECK: fcvtzs.2s v0, v0
+
+ %val = fptosi <2 x float> %in to <2 x i8>
+ ret <2 x i8> %val
+}
+
+define <2 x i8> @test_v2f32_to_unsigned_v2i8(<2 x float> %in) {
+; CHECK-LABEL: test_v2f32_to_unsigned_v2i8:
+; CHECK: fcvtzs.2s v0, v0
+
+ %val = fptoui <2 x float> %in to <2 x i8>
+ ret <2 x i8> %val
+}
+
+define <4 x i16> @test_v4f32_to_signed_v4i16(<4 x float> %in) {
+; CHECK-LABEL: test_v4f32_to_signed_v4i16:
+; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.4h v0, [[VAL64]]
+
+ %val = fptosi <4 x float> %in to <4 x i16>
+ ret <4 x i16> %val
+}
+
+define <4 x i16> @test_v4f32_to_unsigned_v4i16(<4 x float> %in) {
+; CHECK-LABEL: test_v4f32_to_unsigned_v4i16:
+; CHECK: fcvtzu.4s [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.4h v0, [[VAL64]]
+
+ %val = fptoui <4 x float> %in to <4 x i16>
+ ret <4 x i16> %val
+}
+
+define <4 x i8> @test_v4f32_to_signed_v4i8(<4 x float> %in) {
+; CHECK-LABEL: test_v4f32_to_signed_v4i8:
+; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.4h v0, [[VAL64]]
+
+ %val = fptosi <4 x float> %in to <4 x i8>
+ ret <4 x i8> %val
+}
+
+define <4 x i8> @test_v4f32_to_unsigned_v4i8(<4 x float> %in) {
+; CHECK-LABEL: test_v4f32_to_unsigned_v4i8:
+; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.4h v0, [[VAL64]]
+
+ %val = fptoui <4 x float> %in to <4 x i8>
+ ret <4 x i8> %val
+}
+
+define <2 x i32> @test_v2f64_to_signed_v2i32(<2 x double> %in) {
+; CHECK-LABEL: test_v2f64_to_signed_v2i32:
+; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.2s v0, [[VAL64]]
+
+ %val = fptosi <2 x double> %in to <2 x i32>
+ ret <2 x i32> %val
+}
+
+define <2 x i32> @test_v2f64_to_unsigned_v2i32(<2 x double> %in) {
+; CHECK-LABEL: test_v2f64_to_unsigned_v2i32:
+; CHECK: fcvtzu.2d [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.2s v0, [[VAL64]]
+
+ %val = fptoui <2 x double> %in to <2 x i32>
+ ret <2 x i32> %val
+}
+
+define <2 x i16> @test_v2f64_to_signed_v2i16(<2 x double> %in) {
+; CHECK-LABEL: test_v2f64_to_signed_v2i16:
+; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.2s v0, [[VAL64]]
+
+ %val = fptosi <2 x double> %in to <2 x i16>
+ ret <2 x i16> %val
+}
+
+define <2 x i16> @test_v2f64_to_unsigned_v2i16(<2 x double> %in) {
+; CHECK-LABEL: test_v2f64_to_unsigned_v2i16:
+; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.2s v0, [[VAL64]]
+
+ %val = fptoui <2 x double> %in to <2 x i16>
+ ret <2 x i16> %val
+}
+
+define <2 x i8> @test_v2f64_to_signed_v2i8(<2 x double> %in) {
+; CHECK-LABEL: test_v2f64_to_signed_v2i8:
+; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.2s v0, [[VAL64]]
+
+ %val = fptosi <2 x double> %in to <2 x i8>
+ ret <2 x i8> %val
+}
+
+define <2 x i8> @test_v2f64_to_unsigned_v2i8(<2 x double> %in) {
+; CHECK-LABEL: test_v2f64_to_unsigned_v2i8:
+; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0
+; CHECK: xtn.2s v0, [[VAL64]]
+
+ %val = fptoui <2 x double> %in to <2 x i8>
+ ret <2 x i8> %val
+}
diff --git a/test/CodeGen/AArch64/complex-int-to-fp.ll b/test/CodeGen/AArch64/complex-int-to-fp.ll
new file mode 100644
index 0000000..5c943f9
--- /dev/null
+++ b/test/CodeGen/AArch64/complex-int-to-fp.ll
@@ -0,0 +1,164 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+; CHECK: autogen_SD19655
+; CHECK: scvtf
+; CHECK: ret
+define void @autogen_SD19655(<2 x i64>* %addr, <2 x float>* %addrfloat) {
+ %T = load <2 x i64>* %addr
+ %F = sitofp <2 x i64> %T to <2 x float>
+ store <2 x float> %F, <2 x float>* %addrfloat
+ ret void
+}
+
+define <2 x double> @test_signed_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v2i32_to_v2f64:
+; CHECK: sshll.2d [[VAL64:v[0-9]+]], v0, #0
+; CHECK-NEXT: scvtf.2d v0, [[VAL64]]
+; CHECK-NEXT: ret
+ %conv = sitofp <2 x i32> %v to <2 x double>
+ ret <2 x double> %conv
+}
+
+define <2 x double> @test_unsigned_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v2i32_to_v2f64
+; CHECK: ushll.2d [[VAL64:v[0-9]+]], v0, #0
+; CHECK-NEXT: ucvtf.2d v0, [[VAL64]]
+; CHECK-NEXT: ret
+ %conv = uitofp <2 x i32> %v to <2 x double>
+ ret <2 x double> %conv
+}
+
+define <2 x double> @test_signed_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v2i16_to_v2f64:
+; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16
+; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16
+; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
+; CHECK: scvtf.2d v0, [[VAL64]]
+
+ %conv = sitofp <2 x i16> %v to <2 x double>
+ ret <2 x double> %conv
+}
+define <2 x double> @test_unsigned_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v2i16_to_v2f64
+; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff
+; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
+; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
+; CHECK: ucvtf.2d v0, [[VAL64]]
+
+ %conv = uitofp <2 x i16> %v to <2 x double>
+ ret <2 x double> %conv
+}
+
+define <2 x double> @test_signed_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v2i8_to_v2f64:
+; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24
+; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24
+; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
+; CHECK: scvtf.2d v0, [[VAL64]]
+
+ %conv = sitofp <2 x i8> %v to <2 x double>
+ ret <2 x double> %conv
+}
+define <2 x double> @test_unsigned_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v2i8_to_v2f64
+; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff
+; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
+; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
+; CHECK: ucvtf.2d v0, [[VAL64]]
+
+ %conv = uitofp <2 x i8> %v to <2 x double>
+ ret <2 x double> %conv
+}
+
+define <2 x float> @test_signed_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v2i64_to_v2f32:
+; CHECK: scvtf.2d [[VAL64:v[0-9]+]], v0
+; CHECK: fcvtn v0.2s, [[VAL64]].2d
+
+ %conv = sitofp <2 x i64> %v to <2 x float>
+ ret <2 x float> %conv
+}
+define <2 x float> @test_unsigned_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v2i64_to_v2f32
+; CHECK: ucvtf.2d [[VAL64:v[0-9]+]], v0
+; CHECK: fcvtn v0.2s, [[VAL64]].2d
+
+ %conv = uitofp <2 x i64> %v to <2 x float>
+ ret <2 x float> %conv
+}
+
+define <2 x float> @test_signed_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v2i16_to_v2f32:
+; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16
+; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16
+; CHECK: scvtf.2s v0, [[VAL32]]
+
+ %conv = sitofp <2 x i16> %v to <2 x float>
+ ret <2 x float> %conv
+}
+define <2 x float> @test_unsigned_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v2i16_to_v2f32
+; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff
+; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
+; CHECK: ucvtf.2s v0, [[VAL32]]
+
+ %conv = uitofp <2 x i16> %v to <2 x float>
+ ret <2 x float> %conv
+}
+
+define <2 x float> @test_signed_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v2i8_to_v2f32:
+; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24
+; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24
+; CHECK: scvtf.2s v0, [[VAL32]]
+
+ %conv = sitofp <2 x i8> %v to <2 x float>
+ ret <2 x float> %conv
+}
+define <2 x float> @test_unsigned_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v2i8_to_v2f32
+; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff
+; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
+; CHECK: ucvtf.2s v0, [[VAL32]]
+
+ %conv = uitofp <2 x i8> %v to <2 x float>
+ ret <2 x float> %conv
+}
+
+define <4 x float> @test_signed_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v4i16_to_v4f32:
+; CHECK: sshll.4s [[VAL32:v[0-9]+]], v0, #0
+; CHECK: scvtf.4s v0, [[VAL32]]
+
+ %conv = sitofp <4 x i16> %v to <4 x float>
+ ret <4 x float> %conv
+}
+
+define <4 x float> @test_unsigned_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v4i16_to_v4f32
+; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0
+; CHECK: ucvtf.4s v0, [[VAL32]]
+
+ %conv = uitofp <4 x i16> %v to <4 x float>
+ ret <4 x float> %conv
+}
+
+define <4 x float> @test_signed_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_signed_v4i8_to_v4f32:
+; CHECK: shl.4h [[TMP:v[0-9]+]], v0, #8
+; CHECK: sshr.4h [[VAL16:v[0-9]+]], [[TMP]], #8
+; CHECK: sshll.4s [[VAL32:v[0-9]+]], [[VAL16]], #0
+; CHECK: scvtf.4s v0, [[VAL32]]
+
+ %conv = sitofp <4 x i8> %v to <4 x float>
+ ret <4 x float> %conv
+}
+define <4 x float> @test_unsigned_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone {
+; CHECK-LABEL: test_unsigned_v4i8_to_v4f32
+; CHECK: bic.4h v0, #0xff, lsl #8
+; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0
+; CHECK: ucvtf.4s v0, [[VAL32]]
+
+ %conv = uitofp <4 x i8> %v to <4 x float>
+ ret <4 x float> %conv
+}
diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll
index 1b51928..fbea4a6 100644
--- a/test/CodeGen/AArch64/directcond.ll
+++ b/test/CodeGen/AArch64/directcond.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-NOFP %s
define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
; CHECK-LABEL: test_select_i32:
diff --git a/test/CodeGen/AArch64/f16-convert.ll b/test/CodeGen/AArch64/f16-convert.ll
new file mode 100644
index 0000000..6fabdc5
--- /dev/null
+++ b/test/CodeGen/AArch64/f16-convert.ll
@@ -0,0 +1,254 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios -asm-verbose=false | FileCheck %s
+
+define float @load0(i16* nocapture readonly %a) nounwind {
+; CHECK-LABEL: load0:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0]
+; CHECK-NEXT: fcvt s0, [[HREG]]
+; CHECK-NEXT: ret
+
+ %tmp = load i16* %a, align 2
+ %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ ret float %tmp1
+}
+
+define double @load1(i16* nocapture readonly %a) nounwind {
+; CHECK-LABEL: load1:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0]
+; CHECK-NEXT: fcvt d0, [[HREG]]
+; CHECK-NEXT: ret
+
+ %tmp = load i16* %a, align 2
+ %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %conv = fpext float %tmp1 to double
+ ret double %conv
+}
+
+define float @load2(i16* nocapture readonly %a, i32 %i) nounwind {
+; CHECK-LABEL: load2:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1]
+; CHECK-NEXT: fcvt s0, [[HREG]]
+; CHECK-NEXT: ret
+
+ %idxprom = sext i32 %i to i64
+ %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
+ %tmp = load i16* %arrayidx, align 2
+ %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ ret float %tmp1
+}
+
+define double @load3(i16* nocapture readonly %a, i32 %i) nounwind {
+; CHECK-LABEL: load3:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1]
+; CHECK-NEXT: fcvt d0, [[HREG]]
+; CHECK-NEXT: ret
+
+ %idxprom = sext i32 %i to i64
+ %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
+ %tmp = load i16* %arrayidx, align 2
+ %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %conv = fpext float %tmp1 to double
+ ret double %conv
+}
+
+define float @load4(i16* nocapture readonly %a, i64 %i) nounwind {
+; CHECK-LABEL: load4:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1]
+; CHECK-NEXT: fcvt s0, [[HREG]]
+; CHECK-NEXT: ret
+
+ %arrayidx = getelementptr inbounds i16* %a, i64 %i
+ %tmp = load i16* %arrayidx, align 2
+ %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ ret float %tmp1
+}
+
+define double @load5(i16* nocapture readonly %a, i64 %i) nounwind {
+; CHECK-LABEL: load5:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1]
+; CHECK-NEXT: fcvt d0, [[HREG]]
+; CHECK-NEXT: ret
+
+ %arrayidx = getelementptr inbounds i16* %a, i64 %i
+ %tmp = load i16* %arrayidx, align 2
+ %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %conv = fpext float %tmp1 to double
+ ret double %conv
+}
+
+define float @load6(i16* nocapture readonly %a) nounwind {
+; CHECK-LABEL: load6:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20]
+; CHECK-NEXT: fcvt s0, [[HREG]]
+; CHECK-NEXT: ret
+
+ %arrayidx = getelementptr inbounds i16* %a, i64 10
+ %tmp = load i16* %arrayidx, align 2
+ %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ ret float %tmp1
+}
+
+define double @load7(i16* nocapture readonly %a) nounwind {
+; CHECK-LABEL: load7:
+; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20]
+; CHECK-NEXT: fcvt d0, [[HREG]]
+; CHECK-NEXT: ret
+
+ %arrayidx = getelementptr inbounds i16* %a, i64 10
+ %tmp = load i16* %arrayidx, align 2
+ %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %conv = fpext float %tmp1 to double
+ ret double %conv
+}
+
+define float @load8(i16* nocapture readonly %a) nounwind {
+; CHECK-LABEL: load8:
+; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20]
+; CHECK-NEXT: fcvt s0, [[HREG]]
+; CHECK-NEXT: ret
+
+ %arrayidx = getelementptr inbounds i16* %a, i64 -10
+ %tmp = load i16* %arrayidx, align 2
+ %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ ret float %tmp1
+}
+
+define double @load9(i16* nocapture readonly %a) nounwind {
+; CHECK-LABEL: load9:
+; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20]
+; CHECK-NEXT: fcvt d0, [[HREG]]
+; CHECK-NEXT: ret
+
+ %arrayidx = getelementptr inbounds i16* %a, i64 -10
+ %tmp = load i16* %arrayidx, align 2
+ %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %conv = fpext float %tmp1 to double
+ ret double %conv
+}
+
+define void @store0(i16* nocapture %a, float %val) nounwind {
+; CHECK-LABEL: store0:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str h0, [x0]
+; CHECK-NEXT: ret
+
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ store i16 %tmp, i16* %a, align 2
+ ret void
+}
+
+define void @store1(i16* nocapture %a, double %val) nounwind {
+; CHECK-LABEL: store1:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: str h0, [x0]
+; CHECK-NEXT: ret
+
+ %conv = fptrunc double %val to float
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ store i16 %tmp, i16* %a, align 2
+ ret void
+}
+
+define void @store2(i16* nocapture %a, i32 %i, float %val) nounwind {
+; CHECK-LABEL: store2:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %idxprom = sext i32 %i to i64
+ %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store3(i16* nocapture %a, i32 %i, double %val) nounwind {
+; CHECK-LABEL: store3:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+
+ %conv = fptrunc double %val to float
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %idxprom = sext i32 %i to i64
+ %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store4(i16* nocapture %a, i64 %i, float %val) nounwind {
+; CHECK-LABEL: store4:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %arrayidx = getelementptr inbounds i16* %a, i64 %i
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store5(i16* nocapture %a, i64 %i, double %val) nounwind {
+; CHECK-LABEL: store5:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: str h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+
+ %conv = fptrunc double %val to float
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %arrayidx = getelementptr inbounds i16* %a, i64 %i
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store6(i16* nocapture %a, float %val) nounwind {
+; CHECK-LABEL: store6:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str h0, [x0, #20]
+; CHECK-NEXT: ret
+
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %arrayidx = getelementptr inbounds i16* %a, i64 10
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store7(i16* nocapture %a, double %val) nounwind {
+; CHECK-LABEL: store7:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: str h0, [x0, #20]
+; CHECK-NEXT: ret
+
+ %conv = fptrunc double %val to float
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %arrayidx = getelementptr inbounds i16* %a, i64 10
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store8(i16* nocapture %a, float %val) nounwind {
+; CHECK-LABEL: store8:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: stur h0, [x0, #-20]
+; CHECK-NEXT: ret
+
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %arrayidx = getelementptr inbounds i16* %a, i64 -10
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store9(i16* nocapture %a, double %val) nounwind {
+; CHECK-LABEL: store9:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: stur h0, [x0, #-20]
+; CHECK-NEXT: ret
+
+ %conv = fptrunc double %val to float
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %arrayidx = getelementptr inbounds i16* %a, i64 -10
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+declare i16 @llvm.convert.to.fp16(float) nounwind readnone
+declare float @llvm.convert.from.fp16(i16) nounwind readnone
diff --git a/test/CodeGen/AArch64/fast-isel-mul.ll b/test/CodeGen/AArch64/fast-isel-mul.ll
new file mode 100644
index 0000000..d02c67f
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-mul.ll
@@ -0,0 +1,40 @@
+; RUN: llc -fast-isel -fast-isel-abort -mtriple=aarch64 -o - %s | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_mul8(i8 %lhs, i8 %rhs) {
+; CHECK-LABEL: test_mul8:
+; CHECK: mul w0, w0, w1
+; %lhs = load i8* @var8
+; %rhs = load i8* @var8
+ %prod = mul i8 %lhs, %rhs
+ store i8 %prod, i8* @var8
+ ret void
+}
+
+define void @test_mul16(i16 %lhs, i16 %rhs) {
+; CHECK-LABEL: test_mul16:
+; CHECK: mul w0, w0, w1
+ %prod = mul i16 %lhs, %rhs
+ store i16 %prod, i16* @var16
+ ret void
+}
+
+define void @test_mul32(i32 %lhs, i32 %rhs) {
+; CHECK-LABEL: test_mul32:
+; CHECK: mul w0, w0, w1
+ %prod = mul i32 %lhs, %rhs
+ store i32 %prod, i32* @var32
+ ret void
+}
+
+define void @test_mul64(i64 %lhs, i64 %rhs) {
+; CHECK-LABEL: test_mul64:
+; CHECK: mul x0, x0, x1
+ %prod = mul i64 %lhs, %rhs
+ store i64 %prod, i64* @var64
+ ret void
+}
diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll
index c9b0b9f..77bbcdd 100644
--- a/test/CodeGen/AArch64/flags-multiuse.ll
+++ b/test/CodeGen/AArch64/flags-multiuse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s
; LLVM should be able to cope with multiple uses of the same flag-setting
; instruction at different points of a routine. Either by rematerializing the
diff --git a/test/CodeGen/AArch64/funcptr_cast.ll b/test/CodeGen/AArch64/funcptr_cast.ll
new file mode 100644
index 0000000..a00b7bc
--- /dev/null
+++ b/test/CodeGen/AArch64/funcptr_cast.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i8 @test() {
+; CHECK-LABEL: @test
+; CHECK: adrp {{x[0-9]+}}, foo
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, :lo12:foo
+; CHECK: ldrb w0, [{{x[0-9]+}}]
+entry:
+ %0 = load i8* bitcast (void (...)* @foo to i8*), align 1
+ ret i8 %0
+}
+
+declare void @foo(...)
diff --git a/test/CodeGen/AArch64/global-merge-1.ll b/test/CodeGen/AArch64/global-merge-1.ll
new file mode 100644
index 0000000..68aba5e
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge-1.ll
@@ -0,0 +1,26 @@
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+
+; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+
+@m = internal global i32 0, align 4
+@n = internal global i32 0, align 4
+
+define void @f1(i32 %a1, i32 %a2) {
+;CHECK-APPLE-IOS: adrp x8, __MergedGlobals@PAGE
+;CHECK-APPLE-IOS-NOT: adrp
+;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals@PAGEOFF
+ store i32 %a1, i32* @m, align 4
+ store i32 %a2, i32* @n, align 4
+ ret void
+}
+
+;CHECK: .type _MergedGlobals,@object // @_MergedGlobals
+;CHECK: .local _MergedGlobals
+;CHECK: .comm _MergedGlobals,8,8
+
+;CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3 ; @_MergedGlobals
diff --git a/test/CodeGen/AArch64/global-merge-2.ll b/test/CodeGen/AArch64/global-merge-2.ll
new file mode 100644
index 0000000..a773566
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge-2.ll
@@ -0,0 +1,51 @@
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+
+@x = global i32 0, align 4
+@y = global i32 0, align 4
+@z = global i32 0, align 4
+
+define void @f1(i32 %a1, i32 %a2) {
+;CHECK-APPLE-IOS-LABEL: _f1:
+;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE
+;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF
+;CHECK-APPLE-IOS-NOT: adrp
+ store i32 %a1, i32* @x, align 4
+ store i32 %a2, i32* @y, align 4
+ ret void
+}
+
+define void @g1(i32 %a1, i32 %a2) {
+;CHECK-APPLE-IOS-LABEL: _g1:
+;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE
+;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF
+;CHECK-APPLE-IOS-NOT: adrp
+ store i32 %a1, i32* @y, align 4
+ store i32 %a2, i32* @z, align 4
+ ret void
+}
+
+;CHECK: .type _MergedGlobals_x,@object // @_MergedGlobals_x
+;CHECK: .globl _MergedGlobals_x
+;CHECK: .align 3
+;CHECK: _MergedGlobals_x:
+;CHECK: .size _MergedGlobals_x, 12
+
+;CHECK: .globl x
+;CHECK: x = _MergedGlobals_x
+;CHECK: .globl y
+;CHECK: y = _MergedGlobals_x+4
+;CHECK: .globl z
+;CHECK: z = _MergedGlobals_x+8
+
+;CHECK-APPLE-IOS: .globl __MergedGlobals_x ; @_MergedGlobals_x
+;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_x,12,3
+
+;CHECK-APPLE-IOS: .globl _x
+;CHECK-APPLE-IOS: _x = __MergedGlobals_x
+;CHECK-APPLE-IOS: .globl _y
+;CHECK-APPLE-IOS: _y = __MergedGlobals_x+4
+;CHECK-APPLE-IOS: .globl _z
+;CHECK-APPLE-IOS: _z = __MergedGlobals_x+8
+;CHECK-APPLE-IOS: .subsections_via_symbols
diff --git a/test/CodeGen/AArch64/global-merge-3.ll b/test/CodeGen/AArch64/global-merge-3.ll
new file mode 100644
index 0000000..d455d40
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge-3.ll
@@ -0,0 +1,51 @@
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+
+@x = global [1000 x i32] zeroinitializer, align 1
+@y = global [1000 x i32] zeroinitializer, align 1
+@z = internal global i32 1, align 4
+
+define void @f1(i32 %a1, i32 %a2, i32 %a3) {
+;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE
+;CHECK-APPLE-IOS-NOT: adrp
+;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF
+;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE
+;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_y@PAGEOFF
+ %x3 = getelementptr inbounds [1000 x i32]* @x, i32 0, i64 3
+ %y3 = getelementptr inbounds [1000 x i32]* @y, i32 0, i64 3
+ store i32 %a1, i32* %x3, align 4
+ store i32 %a2, i32* %y3, align 4
+ store i32 %a3, i32* @z, align 4
+ ret void
+}
+
+;CHECK: .type _MergedGlobals_x,@object // @_MergedGlobals_x
+;CHECK: .globl _MergedGlobals_x
+;CHECK: .align 4
+;CHECK: _MergedGlobals_x:
+;CHECK: .size _MergedGlobals_x, 4004
+
+;CHECK: .type _MergedGlobals_y,@object // @_MergedGlobals_y
+;CHECK: .globl _MergedGlobals_y
+;CHECK: _MergedGlobals_y:
+;CHECK: .size _MergedGlobals_y, 4000
+
+;CHECK-APPLE-IOS: .globl __MergedGlobals_x ; @_MergedGlobals_x
+;CHECK-APPLE-IOS: .align 4
+;CHECK-APPLE-IOS: __MergedGlobals_x:
+;CHECK-APPLE-IOS: .long 1
+;CHECK-APPLE-IOS: .space 4000
+
+;CHECK-APPLE-IOS: .globl __MergedGlobals_y ; @_MergedGlobals_y
+;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_y,4000,4
+
+;CHECK: .globl x
+;CHECK: x = _MergedGlobals_x+4
+;CHECK: .globl y
+;CHECK: y = _MergedGlobals_y
+
+;CHECK-APPLE-IOS:.globl _x
+;CHECK-APPLE-IOS: _x = __MergedGlobals_x+4
+;CHECK-APPLE-IOS:.globl _y
+;CHECK-APPLE-IOS: _y = __MergedGlobals_y
diff --git a/test/CodeGen/AArch64/global-merge-4.ll b/test/CodeGen/AArch64/global-merge-4.ll
new file mode 100644
index 0000000..a525ccd
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge-4.ll
@@ -0,0 +1,73 @@
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+target triple = "arm64-apple-ios7.0.0"
+
+@bar = internal global [5 x i32] zeroinitializer, align 4
+@baz = internal global [5 x i32] zeroinitializer, align 4
+@foo = internal global [5 x i32] zeroinitializer, align 4
+
+; Function Attrs: nounwind ssp
+define internal void @initialize() #0 {
+ %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+ store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4
+ %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+ store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4
+ %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+ store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4
+ %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+ store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4
+ %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+ store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4
+ %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+ store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4
+ %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+ store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4
+ %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+ store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4
+ %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+ store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4
+ %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
+ store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4
+ ret void
+}
+
+declare i32 @calc(...)
+
+; Function Attrs: nounwind ssp
+define internal void @calculate() #0 {
+ %1 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4
+ %2 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4
+ %3 = mul nsw i32 %2, %1
+ store i32 %3, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0), align 4
+ %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4
+ %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4
+ %6 = mul nsw i32 %5, %4
+ store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 1), align 4
+ %7 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4
+ %8 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4
+ %9 = mul nsw i32 %8, %7
+ store i32 %9, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 2), align 4
+ %10 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4
+ %11 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4
+ %12 = mul nsw i32 %11, %10
+ store i32 %12, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 3), align 4
+ %13 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4
+ %14 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4
+ %15 = mul nsw i32 %14, %13
+ store i32 %15, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 4), align 4
+ ret void
+}
+
+; Function Attrs: nounwind readnone ssp
+define internal i32* @returnFoo() #1 {
+ ret i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0)
+}
+
+;CHECK: .type _MergedGlobals,@object // @_MergedGlobals
+;CHECK: .local _MergedGlobals
+;CHECK: .comm _MergedGlobals,60,16
+
+attributes #0 = { nounwind ssp }
+attributes #1 = { nounwind readnone ssp }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/AArch64/global-merge.ll b/test/CodeGen/AArch64/global-merge.ll
new file mode 100644
index 0000000..aed1dc4
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck --check-prefix=NO-MERGE %s
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 -global-merge-on-external=true | FileCheck --check-prefix=NO-MERGE %s
+
+; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE
+; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE
+
+; FIXME: add O1/O2 test for aarch64-none-linux-gnu and aarch64-apple-ios
+
+@m = internal global i32 0, align 4
+@n = internal global i32 0, align 4
+
+define void @f1(i32 %a1, i32 %a2) {
+; CHECK-LABEL: f1:
+; CHECK: adrp x{{[0-9]+}}, _MergedGlobals
+; CHECK-NOT: adrp
+
+; CHECK-APPLE-IOS-LABEL: f1:
+; CHECK-APPLE-IOS: adrp x{{[0-9]+}}, __MergedGlobals
+; CHECK-APPLE-IOS-NOT: adrp
+ store i32 %a1, i32* @m, align 4
+ store i32 %a2, i32* @n, align 4
+ ret void
+}
+
+; CHECK: .local _MergedGlobals
+; CHECK: .comm _MergedGlobals,8,8
+; NO-MERGE-NOT: .local _MergedGlobals
+
+; CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3
+; CHECK-APPLE-IOS-NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,8,3
diff --git a/test/CodeGen/AArch64/i128-fast-isel-fallback.ll b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll
new file mode 100644
index 0000000..1cffbf3
--- /dev/null
+++ b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O0 -mtriple=arm64-apple-ios7.0 -mcpu=generic < %s | FileCheck %s
+
+; Function Attrs: nounwind ssp
+define void @test1() {
+ %1 = sext i32 0 to i128
+ call void @test2(i128 %1)
+ ret void
+
+; The i128 is 0 so the we can test to make sure it is propogated into the x
+; registers that make up the i128 pair
+
+; CHECK: mov x0, xzr
+; CHECK: mov x1, x0
+; CHECK: bl _test2
+
+}
+
+declare void @test2(i128)
diff --git a/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll b/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll
new file mode 100644
index 0000000..645214a
--- /dev/null
+++ b/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll
@@ -0,0 +1,26 @@
+; We actually need to use -filetype=obj in this test because if we output
+; assembly, the current code path will bypass the parser and just write the
+; raw text out to the Streamer. We need to actually parse the inlineasm to
+; demonstrate the bug. Going the asm->obj route does not show the issue.
+; RUN: llc -mtriple=aarch64 < %s -filetype=obj | llvm-objdump -arch=aarch64 -d - | FileCheck %s
+
+; CHECK-LABEL: foo:
+; CHECK: a0 79 95 d2 movz x0, #0xabcd
+; CHECK: c0 03 5f d6 ret
+define i32 @foo() nounwind {
+entry:
+ %0 = tail call i32 asm sideeffect "ldr $0,=0xabcd", "=r"() nounwind
+ ret i32 %0
+}
+; CHECK-LABEL: bar:
+; CHECK: 40 00 00 58 ldr x0, #8
+; CHECK: c0 03 5f d6 ret
+; Make sure the constant pool entry comes after the return
+; CHECK-LABEL: $d.1:
+define i32 @bar() nounwind {
+entry:
+ %0 = tail call i32 asm sideeffect "ldr $0,=0x10001", "=r"() nounwind
+ ret i32 %0
+}
+
+
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
index 1dfb789..69fbd99 100644
--- a/test/CodeGen/AArch64/jump-table.ll
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
-; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s
define i32 @test_jumptable(i32 %in) {
; CHECK: test_jumptable
diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll
index 1ce5c95..e4f4295 100644
--- a/test/CodeGen/AArch64/ldst-opt.ll
+++ b/test/CodeGen/AArch64/ldst-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s
; This file contains tests for the AArch64 load/store optimizer.
@@ -166,6 +166,217 @@ bar:
; Check the following transform:
;
+; add x8, x8, #16
+; ...
+; ldr X, [x8]
+; ->
+; ldr X, [x8, #16]!
+;
+; with X being either w0, x0, s0, d0 or q0.
+
+%pre.struct.i32 = type { i32, i32, i32}
+%pre.struct.i64 = type { i32, i64, i64}
+%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>}
+%pre.struct.float = type { i32, float, float}
+%pre.struct.double = type { i32, double, double}
+
+define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond,
+ %pre.struct.i32* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-word2
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #4]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.i32** %this
+ %gep1 = getelementptr inbounds %pre.struct.i32* %load1, i64 0, i32 1
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.i32* %load2, i64 0, i32 2
+ br label %return
+return:
+ %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ %ret = load i32* %retptr
+ ret i32 %ret
+}
+
+define i64 @load-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond,
+ %pre.struct.i64* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-doubleword2
+; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #8]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.i64** %this
+ %gep1 = getelementptr inbounds %pre.struct.i64* %load1, i64 0, i32 1
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.i64* %load2, i64 0, i32 2
+ br label %return
+return:
+ %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ %ret = load i64* %retptr
+ ret i64 %ret
+}
+
+define <2 x i64> @load-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond,
+ %pre.struct.i128* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-quadword2
+; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #16]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.i128** %this
+ %gep1 = getelementptr inbounds %pre.struct.i128* %load1, i64 0, i32 1
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.i128* %load2, i64 0, i32 2
+ br label %return
+return:
+ %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ %ret = load <2 x i64>* %retptr
+ ret <2 x i64> %ret
+}
+
+define float @load-pre-indexed-float2(%pre.struct.float** %this, i1 %cond,
+ %pre.struct.float* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-float2
+; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #4]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.float** %this
+ %gep1 = getelementptr inbounds %pre.struct.float* %load1, i64 0, i32 1
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.float* %load2, i64 0, i32 2
+ br label %return
+return:
+ %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ %ret = load float* %retptr
+ ret float %ret
+}
+
+define double @load-pre-indexed-double2(%pre.struct.double** %this, i1 %cond,
+ %pre.struct.double* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-double2
+; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #8]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.double** %this
+ %gep1 = getelementptr inbounds %pre.struct.double* %load1, i64 0, i32 1
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.double* %load2, i64 0, i32 2
+ br label %return
+return:
+ %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ %ret = load double* %retptr
+ ret double %ret
+}
+
+; Check the following transform:
+;
+; add x8, x8, #16
+; ...
+; str X, [x8]
+; ->
+; str X, [x8, #16]!
+;
+; with X being either w0, x0, s0, d0 or q0.
+
+define void @store-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond,
+ %pre.struct.i32* %load2,
+ i32 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-word2
+; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.i32** %this
+ %gep1 = getelementptr inbounds %pre.struct.i32* %load1, i64 0, i32 1
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.i32* %load2, i64 0, i32 2
+ br label %return
+return:
+ %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ store i32 %val, i32* %retptr
+ ret void
+}
+
+define void @store-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond,
+ %pre.struct.i64* %load2,
+ i64 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-doubleword2
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #8]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.i64** %this
+ %gep1 = getelementptr inbounds %pre.struct.i64* %load1, i64 0, i32 1
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.i64* %load2, i64 0, i32 2
+ br label %return
+return:
+ %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ store i64 %val, i64* %retptr
+ ret void
+}
+
+define void @store-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond,
+ %pre.struct.i128* %load2,
+ <2 x i64> %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-quadword2
+; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #16]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.i128** %this
+ %gep1 = getelementptr inbounds %pre.struct.i128* %load1, i64 0, i32 1
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.i128* %load2, i64 0, i32 2
+ br label %return
+return:
+ %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ store <2 x i64> %val, <2 x i64>* %retptr
+ ret void
+}
+
+define void @store-pre-indexed-float2(%pre.struct.float** %this, i1 %cond,
+ %pre.struct.float* %load2,
+ float %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-float2
+; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #4]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.float** %this
+ %gep1 = getelementptr inbounds %pre.struct.float* %load1, i64 0, i32 1
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.float* %load2, i64 0, i32 2
+ br label %return
+return:
+ %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ store float %val, float* %retptr
+ ret void
+}
+
+define void @store-pre-indexed-double2(%pre.struct.double** %this, i1 %cond,
+ %pre.struct.double* %load2,
+ double %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-double2
+; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #8]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.double** %this
+ %gep1 = getelementptr inbounds %pre.struct.double* %load1, i64 0, i32 1
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.double* %load2, i64 0, i32 2
+ br label %return
+return:
+ %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ store double %val, double* %retptr
+ ret void
+}
+
+; Check the following transform:
+;
; ldr X, [x20]
; ...
; add x20, x20, #32
@@ -294,8 +505,263 @@ exit:
ret void
}
+; Check the following transform:
+;
+; str X, [x20]
+; ...
+; add x20, x20, #32
+; ->
+; str X, [x20], #32
+;
+; with X being either w0, x0, s0, d0 or q0.
+
+define void @store-post-indexed-word(i32* %array, i64 %count, i32 %val) nounwind {
+; CHECK-LABEL: store-post-indexed-word
+; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #16
+entry:
+ %gep1 = getelementptr i32* %array, i64 2
+ br label %body
+
+body:
+ %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ]
+ %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+ %gep2 = getelementptr i32* %iv2, i64 -1
+ %load = load i32* %gep2
+ call void @use-word(i32 %load)
+ store i32 %val, i32* %iv2
+ %iv.next = add i64 %iv, -4
+ %gep3 = getelementptr i32* %iv2, i64 4
+ %cond = icmp eq i64 %iv.next, 0
+ br i1 %cond, label %exit, label %body
+
+exit:
+ ret void
+}
+
+define void @store-post-indexed-doubleword(i64* %array, i64 %count, i64 %val) nounwind {
+; CHECK-LABEL: store-post-indexed-doubleword
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #32
+entry:
+ %gep1 = getelementptr i64* %array, i64 2
+ br label %body
+
+body:
+ %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ]
+ %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+ %gep2 = getelementptr i64* %iv2, i64 -1
+ %load = load i64* %gep2
+ call void @use-doubleword(i64 %load)
+ store i64 %val, i64* %iv2
+ %iv.next = add i64 %iv, -4
+ %gep3 = getelementptr i64* %iv2, i64 4
+ %cond = icmp eq i64 %iv.next, 0
+ br i1 %cond, label %exit, label %body
+
+exit:
+ ret void
+}
+
+define void @store-post-indexed-quadword(<2 x i64>* %array, i64 %count, <2 x i64> %val) nounwind {
+; CHECK-LABEL: store-post-indexed-quadword
+; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #64
+entry:
+ %gep1 = getelementptr <2 x i64>* %array, i64 2
+ br label %body
+
+body:
+ %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ]
+ %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+ %gep2 = getelementptr <2 x i64>* %iv2, i64 -1
+ %load = load <2 x i64>* %gep2
+ call void @use-quadword(<2 x i64> %load)
+ store <2 x i64> %val, <2 x i64>* %iv2
+ %iv.next = add i64 %iv, -4
+ %gep3 = getelementptr <2 x i64>* %iv2, i64 4
+ %cond = icmp eq i64 %iv.next, 0
+ br i1 %cond, label %exit, label %body
+
+exit:
+ ret void
+}
+
+define void @store-post-indexed-float(float* %array, i64 %count, float %val) nounwind {
+; CHECK-LABEL: store-post-indexed-float
+; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #16
+entry:
+ %gep1 = getelementptr float* %array, i64 2
+ br label %body
+
+body:
+ %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ]
+ %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+ %gep2 = getelementptr float* %iv2, i64 -1
+ %load = load float* %gep2
+ call void @use-float(float %load)
+ store float %val, float* %iv2
+ %iv.next = add i64 %iv, -4
+ %gep3 = getelementptr float* %iv2, i64 4
+ %cond = icmp eq i64 %iv.next, 0
+ br i1 %cond, label %exit, label %body
+
+exit:
+ ret void
+}
+
+define void @store-post-indexed-double(double* %array, i64 %count, double %val) nounwind {
+; CHECK-LABEL: store-post-indexed-double
+; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #32
+entry:
+ %gep1 = getelementptr double* %array, i64 2
+ br label %body
+
+body:
+ %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ]
+ %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+ %gep2 = getelementptr double* %iv2, i64 -1
+ %load = load double* %gep2
+ call void @use-double(double %load)
+ store double %val, double* %iv2
+ %iv.next = add i64 %iv, -4
+ %gep3 = getelementptr double* %iv2, i64 4
+ %cond = icmp eq i64 %iv.next, 0
+ br i1 %cond, label %exit, label %body
+
+exit:
+ ret void
+}
+
declare void @use-word(i32)
declare void @use-doubleword(i64)
declare void @use-quadword(<2 x i64>)
declare void @use-float(float)
declare void @use-double(double)
+
+; Check the following transform:
+;
+; (ldr|str) X, [x20]
+; ...
+; sub x20, x20, #16
+; ->
+; (ldr|str) X, [x20], #-16
+;
+; with X being either w0, x0, s0, d0 or q0.
+
+define void @post-indexed-sub-word(i32* %a, i32* %b, i64 %count) nounwind {
+; CHECK-LABEL: post-indexed-sub-word
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #-8
+; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #-8
+ br label %for.body
+for.body:
+ %phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ]
+ %phi2 = phi i32* [ %gep3, %for.body ], [ %a, %0 ]
+ %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
+ %gep1 = getelementptr i32* %phi1, i64 -1
+ %load1 = load i32* %gep1
+ %gep2 = getelementptr i32* %phi2, i64 -1
+ store i32 %load1, i32* %gep2
+ %load2 = load i32* %phi1
+ store i32 %load2, i32* %phi2
+ %dec.i = add nsw i64 %i, -1
+ %gep3 = getelementptr i32* %phi2, i64 -2
+ %gep4 = getelementptr i32* %phi1, i64 -2
+ %cond = icmp sgt i64 %dec.i, 0
+ br i1 %cond, label %for.body, label %end
+end:
+ ret void
+}
+
+define void @post-indexed-sub-doubleword(i64* %a, i64* %b, i64 %count) nounwind {
+; CHECK-LABEL: post-indexed-sub-doubleword
+; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-16
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-16
+ br label %for.body
+for.body:
+ %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
+ %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
+ %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
+ %gep1 = getelementptr i64* %phi1, i64 -1
+ %load1 = load i64* %gep1
+ %gep2 = getelementptr i64* %phi2, i64 -1
+ store i64 %load1, i64* %gep2
+ %load2 = load i64* %phi1
+ store i64 %load2, i64* %phi2
+ %dec.i = add nsw i64 %i, -1
+ %gep3 = getelementptr i64* %phi2, i64 -2
+ %gep4 = getelementptr i64* %phi1, i64 -2
+ %cond = icmp sgt i64 %dec.i, 0
+ br i1 %cond, label %for.body, label %end
+end:
+ ret void
+}
+
+define void @post-indexed-sub-quadword(<2 x i64>* %a, <2 x i64>* %b, i64 %count) nounwind {
+; CHECK-LABEL: post-indexed-sub-quadword
+; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #-32
+; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #-32
+ br label %for.body
+for.body:
+ %phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ]
+ %phi2 = phi <2 x i64>* [ %gep3, %for.body ], [ %a, %0 ]
+ %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
+ %gep1 = getelementptr <2 x i64>* %phi1, i64 -1
+ %load1 = load <2 x i64>* %gep1
+ %gep2 = getelementptr <2 x i64>* %phi2, i64 -1
+ store <2 x i64> %load1, <2 x i64>* %gep2
+ %load2 = load <2 x i64>* %phi1
+ store <2 x i64> %load2, <2 x i64>* %phi2
+ %dec.i = add nsw i64 %i, -1
+ %gep3 = getelementptr <2 x i64>* %phi2, i64 -2
+ %gep4 = getelementptr <2 x i64>* %phi1, i64 -2
+ %cond = icmp sgt i64 %dec.i, 0
+ br i1 %cond, label %for.body, label %end
+end:
+ ret void
+}
+
+define void @post-indexed-sub-float(float* %a, float* %b, i64 %count) nounwind {
+; CHECK-LABEL: post-indexed-sub-float
+; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #-8
+; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #-8
+ br label %for.body
+for.body:
+ %phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ]
+ %phi2 = phi float* [ %gep3, %for.body ], [ %a, %0 ]
+ %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
+ %gep1 = getelementptr float* %phi1, i64 -1
+ %load1 = load float* %gep1
+ %gep2 = getelementptr float* %phi2, i64 -1
+ store float %load1, float* %gep2
+ %load2 = load float* %phi1
+ store float %load2, float* %phi2
+ %dec.i = add nsw i64 %i, -1
+ %gep3 = getelementptr float* %phi2, i64 -2
+ %gep4 = getelementptr float* %phi1, i64 -2
+ %cond = icmp sgt i64 %dec.i, 0
+ br i1 %cond, label %for.body, label %end
+end:
+ ret void
+}
+
+define void @post-indexed-sub-double(double* %a, double* %b, i64 %count) nounwind {
+; CHECK-LABEL: post-indexed-sub-double
+; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #-16
+; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #-16
+ br label %for.body
+for.body:
+ %phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ]
+ %phi2 = phi double* [ %gep3, %for.body ], [ %a, %0 ]
+ %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
+ %gep1 = getelementptr double* %phi1, i64 -1
+ %load1 = load double* %gep1
+ %gep2 = getelementptr double* %phi2, i64 -1
+ store double %load1, double* %gep2
+ %load2 = load double* %phi1
+ store double %load2, double* %phi2
+ %dec.i = add nsw i64 %i, -1
+ %gep3 = getelementptr double* %phi2, i64 -2
+ %gep4 = getelementptr double* %phi1, i64 -2
+ %cond = icmp sgt i64 %dec.i, 0
+ br i1 %cond, label %for.body, label %end
+end:
+ ret void
+}
diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
index 77493d8..125995c 100644
--- a/test/CodeGen/AArch64/lit.local.cfg
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -2,8 +2,7 @@ import re
config.suffixes = ['.ll']
-targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
+if not 'AArch64' in config.root.targets:
config.unsupported = True
# For now we don't test arm64-win32.
diff --git a/test/CodeGen/AArch64/memcpy-f128.ll b/test/CodeGen/AArch64/memcpy-f128.ll
new file mode 100644
index 0000000..76db297
--- /dev/null
+++ b/test/CodeGen/AArch64/memcpy-f128.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
+
+%structA = type { i128 }
+@stubA = internal unnamed_addr constant %structA zeroinitializer, align 8
+
+; Make sure we don't hit llvm_unreachable.
+
+define void @test1() {
+; CHECK-LABEL: @test1
+; CHECK: adrp
+; CHECK: ldr q0
+; CHECK: str q0
+; CHECK: ret
+entry:
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast (%structA* @stubA to i8*), i64 48, i32 8, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
diff --git a/test/CodeGen/AArch64/mul_pow2.ll b/test/CodeGen/AArch64/mul_pow2.ll
new file mode 100644
index 0000000..efc0ec8
--- /dev/null
+++ b/test/CodeGen/AArch64/mul_pow2.ll
@@ -0,0 +1,123 @@
+; RUN: llc < %s -march=aarch64 | FileCheck %s
+
+; Convert mul x, pow2 to shift.
+; Convert mul x, pow2 +/- 1 to shift + add/sub.
+
+define i32 @test2(i32 %x) {
+; CHECK-LABEL: test2
+; CHECK: lsl w0, w0, #1
+
+ %mul = shl nsw i32 %x, 1
+ ret i32 %mul
+}
+
+define i32 @test3(i32 %x) {
+; CHECK-LABEL: test3
+; CHECK: add w0, w0, w0, lsl #1
+
+ %mul = mul nsw i32 %x, 3
+ ret i32 %mul
+}
+
+define i32 @test4(i32 %x) {
+; CHECK-LABEL: test4
+; CHECK: lsl w0, w0, #2
+
+ %mul = shl nsw i32 %x, 2
+ ret i32 %mul
+}
+
+define i32 @test5(i32 %x) {
+; CHECK-LABEL: test5
+; CHECK: add w0, w0, w0, lsl #2
+
+
+ %mul = mul nsw i32 %x, 5
+ ret i32 %mul
+}
+
+define i32 @test7(i32 %x) {
+; CHECK-LABEL: test7
+; CHECK: lsl {{w[0-9]+}}, w0, #3
+; CHECK: sub w0, {{w[0-9]+}}, w0
+
+ %mul = mul nsw i32 %x, 7
+ ret i32 %mul
+}
+
+define i32 @test8(i32 %x) {
+; CHECK-LABEL: test8
+; CHECK: lsl w0, w0, #3
+
+ %mul = shl nsw i32 %x, 3
+ ret i32 %mul
+}
+
+define i32 @test9(i32 %x) {
+; CHECK-LABEL: test9
+; CHECK: add w0, w0, w0, lsl #3
+
+ %mul = mul nsw i32 %x, 9
+ ret i32 %mul
+}
+
+; Convert mul x, -pow2 to shift.
+; Convert mul x, -(pow2 +/- 1) to shift + add/sub.
+
+define i32 @ntest2(i32 %x) {
+; CHECK-LABEL: ntest2
+; CHECK: neg w0, w0, lsl #1
+
+ %mul = mul nsw i32 %x, -2
+ ret i32 %mul
+}
+
+define i32 @ntest3(i32 %x) {
+; CHECK-LABEL: ntest3
+; CHECK: add {{w[0-9]+}}, w0, w0, lsl #1
+; CHECK: neg w0, {{w[0-9]+}}
+
+ %mul = mul nsw i32 %x, -3
+ ret i32 %mul
+}
+
+define i32 @ntest4(i32 %x) {
+; CHECK-LABEL: ntest4
+; CHECK:neg w0, w0, lsl #2
+
+ %mul = mul nsw i32 %x, -4
+ ret i32 %mul
+}
+
+define i32 @ntest5(i32 %x) {
+; CHECK-LABEL: ntest5
+; CHECK: add {{w[0-9]+}}, w0, w0, lsl #2
+; CHECK: neg w0, {{w[0-9]+}}
+ %mul = mul nsw i32 %x, -5
+ ret i32 %mul
+}
+
+define i32 @ntest7(i32 %x) {
+; CHECK-LABEL: ntest7
+; CHECK: sub w0, w0, w0, lsl #3
+
+ %mul = mul nsw i32 %x, -7
+ ret i32 %mul
+}
+
+define i32 @ntest8(i32 %x) {
+; CHECK-LABEL: ntest8
+; CHECK: neg w0, w0, lsl #3
+
+ %mul = mul nsw i32 %x, -8
+ ret i32 %mul
+}
+
+define i32 @ntest9(i32 %x) {
+; CHECK-LABEL: ntest9
+; CHECK: add {{w[0-9]+}}, w0, w0, lsl #3
+; CHECK: neg w0, {{w[0-9]+}}
+
+ %mul = mul nsw i32 %x, -9
+ ret i32 %mul
+}
diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll
index e32ac84..03c3f33 100644
--- a/test/CodeGen/AArch64/regress-tail-livereg.ll
+++ b/test/CodeGen/AArch64/regress-tail-livereg.ll
@@ -17,3 +17,17 @@ define void @foo() {
; CHECK: br {{x([0-79]|1[0-8])}}
ret void
}
+
+; No matter how tempting it is, LLVM should not use x30 since that'll be
+; restored to its incoming value before the "br".
+define void @test_x30_tail() {
+; CHECK-LABEL: test_x30_tail:
+; CHECK: mov [[DEST:x[0-9]+]], x30
+; CHECK: br [[DEST]]
+ %addr = call i8* @llvm.returnaddress(i32 0)
+ %faddr = bitcast i8* %addr to void()*
+ tail call void %faddr()
+ ret void
+}
+
+declare i8* @llvm.returnaddress(i32)
diff --git a/test/CodeGen/AArch64/trunc-v1i64.ll b/test/CodeGen/AArch64/trunc-v1i64.ll
new file mode 100644
index 0000000..159b8e0
--- /dev/null
+++ b/test/CodeGen/AArch64/trunc-v1i64.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
+
+; An optimization in DAG Combiner to fold
+; (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...))
+; will generate nodes like:
+; v1i32 trunc v1i64, v1i16 trunc v1i64, v1i8 trunc v1i64.
+; And such nodes will be defaultly scalarized in type legalization. But such
+; scalarization will cause an assertion failure, as v1i64 is a legal type in
+; AArch64. We change the default behaviour from be scalarized to be widen.
+
+; FIXME: Currently XTN is generated for v1i32, but it can be optimized.
+; Just like v1i16 and v1i8, there is no XTN generated.
+
+define <2 x i32> @test_v1i32_0(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i32_0:
+; CHECK: xtn v0.2s, v0.2d
+ %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
+ %2 = trunc <2 x i64> %1 to <2 x i32>
+ ret <2 x i32> %2
+}
+
+define <2 x i32> @test_v1i32_1(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i32_1:
+; CHECK: xtn v0.2s, v0.2d
+; CHECK-NEXT: dup v0.2s, v0.s[0]
+ %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
+ %2 = trunc <2 x i64> %1 to <2 x i32>
+ ret <2 x i32> %2
+}
+
+define <4 x i16> @test_v1i16_0(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i16_0:
+; CHECK-NOT: xtn
+ %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+ %2 = trunc <4 x i64> %1 to <4 x i16>
+ ret <4 x i16> %2
+}
+
+define <4 x i16> @test_v1i16_1(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i16_1:
+; CHECK-NOT: xtn
+; CHECK: dup v0.4h, v0.h[0]
+ %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef>
+ %2 = trunc <4 x i64> %1 to <4 x i16>
+ ret <4 x i16> %2
+}
+
+define <8 x i8> @test_v1i8_0(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i8_0:
+; CHECK-NOT: xtn
+ %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %2 = trunc <8 x i64> %1 to <8 x i8>
+ ret <8 x i8> %2
+}
+
+define <8 x i8> @test_v1i8_1(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i8_1:
+; CHECK-NOT: xtn
+; CHECK: dup v0.8b, v0.b[0]
+ %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %2 = trunc <8 x i64> %1 to <8 x i8>
+ ret <8 x i8> %2
+} \ No newline at end of file
diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll
index 8a2fe26..5dc7b5d 100644
--- a/test/CodeGen/AArch64/tst-br.ll
+++ b/test/CodeGen/AArch64/tst-br.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s
; We've got the usual issues with LLVM reordering blocks here. The
; tests are correct for the current order, but who knows when that