diff options
author | Stephen Hines <srhines@google.com> | 2014-07-21 00:45:20 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-07-21 00:45:20 -0700 |
commit | c6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch) | |
tree | 81b7dd2bb4370a392f31d332a566c903b5744764 /test/CodeGen/AArch64 | |
parent | 19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff) | |
download | external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2 |
Update LLVM for rebase to r212749.
Includes a cherry-pick of:
r212948 - fixes a small issue with atomic calls
Change-Id: Ib97bd980b59f18142a69506400911a6009d9df18
Diffstat (limited to 'test/CodeGen/AArch64')
61 files changed, 2196 insertions, 138 deletions
diff --git a/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll b/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll new file mode 100644 index 0000000..2df9c37 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll @@ -0,0 +1,55 @@ +; RUN: llc -O3 -mcpu=cortex-a53 -mtriple=aarch64--linux-gnu %s -o - | FileCheck %s +; PR20188: don't crash when merging sexts. + +; CHECK: foo: +define void @foo() unnamed_addr align 2 { +entry: + br label %invoke.cont145 + +invoke.cont145: + %or.cond = and i1 undef, false + br i1 %or.cond, label %if.then274, label %invoke.cont145 + +if.then274: + %0 = load i32* null, align 4 + br i1 undef, label %invoke.cont291, label %if.else313 + +invoke.cont291: + %idxprom.i.i.i605 = sext i32 %0 to i64 + %arrayidx.i.i.i607 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i605 + %idxprom.i.i.i596 = sext i32 %0 to i64 + %arrayidx.i.i.i598 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i596 + br label %if.end356 + +if.else313: + %cmp314 = fcmp olt double undef, 0.000000e+00 + br i1 %cmp314, label %invoke.cont317, label %invoke.cont353 + +invoke.cont317: + br i1 undef, label %invoke.cont326, label %invoke.cont334 + +invoke.cont326: + %idxprom.i.i.i587 = sext i32 %0 to i64 + %arrayidx.i.i.i589 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i587 + %sub329 = fsub fast double undef, undef + br label %invoke.cont334 + +invoke.cont334: + %lo.1 = phi double [ %sub329, %invoke.cont326 ], [ undef, %invoke.cont317 ] + br i1 undef, label %invoke.cont342, label %if.end356 + +invoke.cont342: + %idxprom.i.i.i578 = sext i32 %0 to i64 + %arrayidx.i.i.i580 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i578 + br label %if.end356 + +invoke.cont353: + %idxprom.i.i.i572 = sext i32 %0 to i64 + %arrayidx.i.i.i574 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i572 + br label %if.end356 + +if.end356: + %lo.2 = phi double [ 0.000000e+00, %invoke.cont291 ], [ %lo.1, %invoke.cont342 ], [ undef, %invoke.cont353 ], [ %lo.1, %invoke.cont334 ] + call void null(i32 %0, double %lo.2) + unreachable +} diff --git a/test/CodeGen/AArch64/aarch64-address-type-promotion.ll b/test/CodeGen/AArch64/aarch64-address-type-promotion.ll new file mode 100644 index 0000000..ee90d19 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-address-type-promotion.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -o - | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64" +target triple = "arm64-apple-macosx10.9" + +; Check that sexts get promoted above adds. +define void @foo(i32* nocapture %a, i32 %i) { +entry: +; CHECK-LABEL: _foo: +; CHECK: add +; CHECK-NEXT: ldp +; CHECK-NEXT: add +; CHECK-NEXT: str +; CHECK-NEXT: ret + %add = add nsw i32 %i, 1 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32* %a, i64 %idxprom + %0 = load i32* %arrayidx, align 4 + %add1 = add nsw i32 %i, 2 + %idxprom2 = sext i32 %add1 to i64 + %arrayidx3 = getelementptr inbounds i32* %a, i64 %idxprom2 + %1 = load i32* %arrayidx3, align 4 + %add4 = add nsw i32 %1, %0 + %idxprom5 = sext i32 %i to i64 + %arrayidx6 = getelementptr inbounds i32* %a, i64 %idxprom5 + store i32 %add4, i32* %arrayidx6, align 4 + ret void +} diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll index a2266b1..ceea8a0 100644 --- a/test/CodeGen/AArch64/addsub_ext.ll +++ b/test/CodeGen/AArch64/addsub_ext.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s @var8 = global i8 0 @var16 = global i16 0 diff --git a/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll b/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll index d1840d3..7da2d2c 100644 --- a/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll +++ b/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll @@ -2,14 +2,14 @@ ; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic < %s | FileCheck %s --check-prefix=CHECK-LINUX ; <rdar://problem/11392109> -define hidden void @t() optsize ssp { +define hidden void @t(i64* %addr) optsize ssp { entry: - store i64 zext (i32 ptrtoint (i64 (i32)* @x to i32) to i64), i64* undef, align 8 + store i64 zext (i32 ptrtoint (i64 (i32)* @x to i32) to i64), i64* %addr, align 8 ; CHECK: adrp x{{[0-9]+}}, _x@GOTPAGE ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, _x@GOTPAGEOFF] ; CHECK-NEXT: and x{{[0-9]+}}, x{{[0-9]+}}, #0xffffffff ; CHECK-NEXT: str x{{[0-9]+}}, [x{{[0-9]+}}] - unreachable + ret void } declare i64 @x(i32) optsize diff --git a/test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll b/test/CodeGen/AArch64/arm64-AnInfiniteLoopInDAGCombine.ll index a73b707..a73b707 100644 --- a/test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll +++ b/test/CodeGen/AArch64/arm64-AnInfiniteLoopInDAGCombine.ll diff --git a/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll index 1b2d543..1b2d543 100644 --- a/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll +++ b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll index b713f0d..ccf1371 100644 --- a/test/CodeGen/AArch64/arm64-aapcs.ll +++ b/test/CodeGen/AArch64/arm64-aapcs.ll @@ -101,3 +101,11 @@ define fp128 @test_fp128([8 x float] %arg0, fp128 %arg1) { ; CHECK: ldr {{q[0-9]+}}, [sp] ret fp128 %arg1 } + +; Check if VPR can be correctly pass by stack. +define <2 x double> @test_vreg_stack([8 x <2 x double>], <2 x double> %varg_stack) { +entry: +; CHECK-LABEL: test_vreg_stack: +; CHECK: ldr {{q[0-9]+}}, [sp] + ret <2 x double> %varg_stack; +} diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll index e2de434..a955029 100644 --- a/test/CodeGen/AArch64/arm64-abi.ll +++ b/test/CodeGen/AArch64/arm64-abi.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s +; RUN: llc < %s -debug -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s ; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s +; REQUIRES: asserts target triple = "arm64-apple-darwin" ; rdar://9932559 @@ -8,15 +9,15 @@ entry: ; CHECK-LABEL: i8i16callee: ; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5. ; They are i8, i16, i8 and i8. -; CHECK: ldrsb {{w[0-9]+}}, [sp, #5] -; CHECK: ldrsh {{w[0-9]+}}, [sp, #2] -; CHECK: ldrsb {{w[0-9]+}}, [sp] -; CHECK: ldrsb {{w[0-9]+}}, [sp, #4] +; CHECK-DAG: ldrsb {{w[0-9]+}}, [sp, #5] +; CHECK-DAG: ldrsb {{w[0-9]+}}, [sp, #4] +; CHECK-DAG: ldrsh {{w[0-9]+}}, [sp, #2] +; CHECK-DAG: ldrsb {{w[0-9]+}}, [sp] ; FAST-LABEL: i8i16callee: -; FAST: ldrb {{w[0-9]+}}, [sp, #5] -; FAST: ldrb {{w[0-9]+}}, [sp, #4] -; FAST: ldrh {{w[0-9]+}}, [sp, #2] -; FAST: ldrb {{w[0-9]+}}, [sp] +; FAST-DAG: ldrsb {{w[0-9]+}}, [sp, #5] +; FAST-DAG: ldrsb {{w[0-9]+}}, [sp, #4] +; FAST-DAG: ldrsh {{w[0-9]+}}, [sp, #2] +; FAST-DAG: ldrsb {{w[0-9]+}}, [sp] %conv = sext i8 %a4 to i64 %conv3 = sext i16 %a5 to i64 %conv8 = sext i8 %b1 to i64 @@ -44,10 +45,10 @@ entry: ; CHECK: i8i16caller ; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5. ; They are i8, i16, i8 and i8. -; CHECK: strb {{w[0-9]+}}, [sp, #5] -; CHECK: strb {{w[0-9]+}}, [sp, #4] -; CHECK: strh {{w[0-9]+}}, [sp, #2] -; CHECK: strb {{w[0-9]+}}, [sp] +; CHECK-DAG: strb {{w[0-9]+}}, [sp, #5] +; CHECK-DAG: strb {{w[0-9]+}}, [sp, #4] +; CHECK-DAG: strh {{w[0-9]+}}, [sp, #2] +; CHECK-DAG: strb {{w[0-9]+}}, [sp] ; CHECK: bl ; FAST: i8i16caller ; FAST: strb {{w[0-9]+}}, [sp] diff --git a/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll index 34d6287..38661a5 100644 --- a/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll +++ b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll @@ -1,4 +1,4 @@ -; RUN: llc %s -o - | FileCheck %s +; RUN: llc %s -o - -aarch64-atomic-cfg-tidy=0 | FileCheck %s ; Check that ANDS (tst) is not merged with ADD when the immediate ; is not 0. ; <rdar://problem/16693089> @@ -8,18 +8,18 @@ target triple = "arm64-apple-ios" ; CHECK-LABEL: tst1: ; CHECK: add [[REG:w[0-9]+]], w{{[0-9]+}}, #1 ; CHECK: tst [[REG]], #0x1 -define void @tst1() { +define void @tst1(i1 %tst, i32 %true) { entry: - br i1 undef, label %for.end, label %for.body + br i1 %tst, label %for.end, label %for.body for.body: ; preds = %for.body, %entry %result.09 = phi i32 [ %add2.result.0, %for.body ], [ 1, %entry ] %i.08 = phi i32 [ %inc, %for.body ], [ 2, %entry ] %and = and i32 %i.08, 1 %cmp1 = icmp eq i32 %and, 0 - %add2.result.0 = select i1 %cmp1, i32 undef, i32 %result.09 + %add2.result.0 = select i1 %cmp1, i32 %true, i32 %result.09 %inc = add nsw i32 %i.08, 1 - %cmp = icmp slt i32 %i.08, undef + %cmp = icmp slt i32 %i.08, %true br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge for.cond.for.end_crit_edge: ; preds = %for.body diff --git a/test/CodeGen/AArch64/arm64-arith.ll b/test/CodeGen/AArch64/arm64-arith.ll index ed9b569..f36e706 100644 --- a/test/CodeGen/AArch64/arm64-arith.ll +++ b/test/CodeGen/AArch64/arm64-arith.ll @@ -260,3 +260,11 @@ define i64 @f3(i64 %a) nounwind readnone ssp { %res = mul nsw i64 %a, 17 ret i64 %res } + +define i32 @f4(i32 %a) nounwind readnone ssp { +; CHECK-LABEL: f4: +; CHECK-NEXT: add w0, w0, w0, lsl #1 +; CHECK-NEXT: ret + %res = mul i32 %a, 3 + ret i32 %res +} diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll index 3b43aa1..3377849 100644 --- a/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -13,7 +13,8 @@ define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { ; CHECK: stxp [[SCRATCH_RES:w[0-9]+]], x4, x5, [x[[ADDR]]] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] ; CHECK: [[DONE]]: - %val = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire + %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire + %val = extractvalue { i128, i1 } %pair, 0 ret i128 %val } @@ -21,8 +22,10 @@ define void @fetch_and_nand(i128* %p, i128 %bits) { ; CHECK-LABEL: fetch_and_nand: ; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: ; CHECK: ldxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] -; CHECK-DAG: bic [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2 -; CHECK-DAG: bic [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3 +; CHECK-DAG: and [[TMP_REGLO:x[0-9]+]], [[DEST_REGLO]], x2 +; CHECK-DAG: and [[TMP_REGHI:x[0-9]+]], [[DEST_REGHI]], x3 +; CHECK-DAG: mvn [[SCRATCH_REGLO:x[0-9]+]], [[TMP_REGLO]] +; CHECK-DAG: mvn [[SCRATCH_REGHI:x[0-9]+]], [[TMP_REGHI]] ; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] ; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll index aa9b284..b56f91d 100644 --- a/test/CodeGen/AArch64/arm64-atomic.ll +++ b/test/CodeGen/AArch64/arm64-atomic.ll @@ -10,7 +10,8 @@ define i32 @val_compare_and_swap(i32* %p) { ; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0] ; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] ; CHECK: [[LABEL2]]: - %val = cmpxchg i32* %p, i32 7, i32 4 acquire acquire + %pair = cmpxchg i32* %p, i32 7, i32 4 acquire acquire + %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val } @@ -25,7 +26,8 @@ define i64 @val_compare_and_swap_64(i64* %p) { ; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], x[[NEWVAL_REG]], [x0] ; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] ; CHECK: [[LABEL2]]: - %val = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic + %pair = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic + %val = extractvalue { i64, i1 } %pair, 0 ret i64 %val } @@ -33,7 +35,8 @@ define i32 @fetch_and_nand(i32* %p) { ; CHECK-LABEL: fetch_and_nand: ; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: ; CHECK: ldxr w[[DEST_REG:[0-9]+]], [x0] -; CHECK: and [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], #0xfffffff8 +; CHECK: mvn [[TMP_REG:w[0-9]+]], w[[DEST_REG]] +; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], [[TMP_REG]], #0xfffffff8 ; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]] ; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] ; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] @@ -46,8 +49,9 @@ define i64 @fetch_and_nand_64(i64* %p) { ; CHECK-LABEL: fetch_and_nand_64: ; CHECK: mov x[[ADDR:[0-9]+]], x0 ; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: -; CHECK: ldaxr [[DEST_REG:x[0-9]+]], [x[[ADDR]]] -; CHECK: and [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0xfffffffffffffff8 +; CHECK: ldaxr x[[DEST_REG:[0-9]+]], [x[[ADDR]]] +; CHECK: mvn w[[TMP_REG:[0-9]+]], w[[DEST_REG]] +; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], x[[TMP_REG]], #0xfffffffffffffff8 ; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]] ; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] diff --git a/test/CodeGen/AArch64/arm64-build-vector.ll b/test/CodeGen/AArch64/arm64-build-vector.ll index c109263..d0f6db0 100644 --- a/test/CodeGen/AArch64/arm64-build-vector.ll +++ b/test/CodeGen/AArch64/arm64-build-vector.ll @@ -33,3 +33,27 @@ define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind { %4 = insertelement <4 x float> %3, float %d, i32 3 ret <4 x float> %4 } + +define <8 x i16> @build_all_zero(<8 x i16> %a) #1 { +; CHECK-LABEL: build_all_zero: +; CHECK: movz w[[GREG:[0-9]+]], #0xae80 +; CHECK-NEXT: fmov s[[FREG:[0-9]+]], w[[GREG]] +; CHECK-NEXT: mul.8h v0, v0, v[[FREG]] + %b = add <8 x i16> %a, <i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef> + %c = mul <8 x i16> %b, <i16 -20864, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef> + ret <8 x i16> %c +} + +; There is an optimization in DAG Combiner as following: +; fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) +; -> (BUILD_VECTOR A, B, ..., C, D, ...) +; This case checks when A,B and C,D are different types, there should be no +; assertion failure. +define <8 x i16> @concat_2_build_vector(<4 x i16> %in0) { +; CHECK-LABEL: concat_2_build_vector: +; CHECK: movi + %vshl_n = shl <4 x i16> %in0, <i16 8, i16 8, i16 8, i16 8> + %vshl_n2 = shl <4 x i16> %vshl_n, <i16 9, i16 9, i16 9, i16 9> + %shuffle.i = shufflevector <4 x i16> %vshl_n2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x i16> %shuffle.i +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll b/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll deleted file mode 100644 index d862b1e..0000000 --- a/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s - -; CHECK: fptosi_1 -; CHECK: fcvtzs.2d -; CHECK: xtn.2s -; CHECK: ret -define void @fptosi_1() nounwind noinline ssp { -entry: - %0 = fptosi <2 x double> undef to <2 x i32> - store <2 x i32> %0, <2 x i32>* undef, align 8 - ret void -} - -; CHECK: fptoui_1 -; CHECK: fcvtzu.2d -; CHECK: xtn.2s -; CHECK: ret -define void @fptoui_1() nounwind noinline ssp { -entry: - %0 = fptoui <2 x double> undef to <2 x i32> - store <2 x i32> %0, <2 x i32>* undef, align 8 - ret void -} - diff --git a/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll b/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll deleted file mode 100644 index daaf1e0..0000000 --- a/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll +++ /dev/null @@ -1,29 +0,0 @@ -; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s - -define <2 x double> @f1(<2 x i32> %v) nounwind readnone { -; CHECK-LABEL: f1: -; CHECK: sshll.2d v0, v0, #0 -; CHECK-NEXT: scvtf.2d v0, v0 -; CHECK-NEXT: ret - %conv = sitofp <2 x i32> %v to <2 x double> - ret <2 x double> %conv -} -define <2 x double> @f2(<2 x i32> %v) nounwind readnone { -; CHECK-LABEL: f2: -; CHECK: ushll.2d v0, v0, #0 -; CHECK-NEXT: ucvtf.2d v0, v0 -; CHECK-NEXT: ret - %conv = uitofp <2 x i32> %v to <2 x double> - ret <2 x double> %conv -} - -; CHECK: autogen_SD19655 -; CHECK: scvtf -; CHECK: ret -define void @autogen_SD19655() { - %T = load <2 x i64>* undef - %F = sitofp <2 x i64> undef to <2 x float> - store <2 x float> %F, <2 x float>* undef - ret void -} - diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll new file mode 100644 index 0000000..7123e5e --- /dev/null +++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -march=arm64 | FileCheck %s + + +define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) { +; CHECK: fptosi_v4f64_to_v4i16 +; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v1.2d +; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v0.2d +; CHECK-DAG: xtn v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d +; CHECK-DAG: xtn v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d +; CHECK: uzp1 v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h + %tmp1 = load <4 x double>* %ptr + %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) { +; CHECK: fptosi_v4f64_to_v4i8 +; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d +; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d +; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d +; CHECK-DAG: fcvtzs v[[CONV0:[0-9]+]].2d, v0.2d +; CHECK-DAG: xtn v[[NA3:[0-9]+]].2s, v[[CONV3]].2d +; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d +; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d +; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d +; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h +; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h +; CHECK: uzp1 v0.8b, v[[TMP2]].8b, v[[TMP1]].8b + %tmp1 = load <8 x double>* %ptr + %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8> + ret <8 x i8> %tmp2 +} + diff --git a/test/CodeGen/AArch64/arm64-cse.ll b/test/CodeGen/AArch64/arm64-cse.ll index bb14c89..5d62cfe 100644 --- a/test/CodeGen/AArch64/arm64-cse.ll +++ b/test/CodeGen/AArch64/arm64-cse.ll @@ -1,4 +1,4 @@ -; RUN: llc -O3 < %s | FileCheck %s +; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 | FileCheck %s target triple = "arm64-apple-ios" ; rdar://12462006 diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll index 2cf0135..6eed48b 100644 --- a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll +++ b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll @@ -1,5 +1,8 @@ ; RUN: llc -mcpu=cyclone < %s | FileCheck %s +; r208640 broke ppc64/Linux self-hosting; xfailing while this is worked on. +; XFAIL: * + target datalayout = "e-i64:64-n32:64-S128" target triple = "arm64-apple-ios" diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll index 2e4b658..ce132c6 100644 --- a/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll +++ b/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll @@ -13,12 +13,12 @@ target triple = "arm64-apple-ios" ; CHECK-LABEL: XX: ; CHECK: ldr -define void @XX(%class.A* %K) { +define i32 @XX(%class.A* %K, i1 %tst, i32* %addr, %class.C** %ppC, %class.C* %pC) { entry: - br i1 undef, label %if.then, label %lor.rhs.i + br i1 %tst, label %if.then, label %lor.rhs.i lor.rhs.i: ; preds = %entry - %tmp = load i32* undef, align 4 + %tmp = load i32* %addr, align 4 %y.i.i.i = getelementptr inbounds %class.A* %K, i64 0, i32 1 %tmp1 = load i64* %y.i.i.i, align 8 %U.sroa.3.8.extract.trunc.i = trunc i64 %tmp1 to i32 @@ -30,17 +30,17 @@ lor.rhs.i: ; preds = %entry %add16.i = add nsw i32 %add12.i, %div15.i %rem.i.i = srem i32 %add16.i, %tmp %idxprom = sext i32 %rem.i.i to i64 - %arrayidx = getelementptr inbounds %class.C** undef, i64 %idxprom - %tobool533 = icmp eq %class.C* undef, null + %arrayidx = getelementptr inbounds %class.C** %ppC, i64 %idxprom + %tobool533 = icmp eq %class.C* %pC, null br i1 %tobool533, label %while.end, label %while.body if.then: ; preds = %entry - unreachable + ret i32 42 while.body: ; preds = %lor.rhs.i - unreachable + ret i32 5 while.end: ; preds = %lor.rhs.i %tmp3 = load %class.C** %arrayidx, align 8 - unreachable + ret i32 50 } diff --git a/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/test/CodeGen/AArch64/arm64-early-ifcvt.ll index 17d783a..44150c2 100644 --- a/test/CodeGen/AArch64/arm64-early-ifcvt.ll +++ b/test/CodeGen/AArch64/arm64-early-ifcvt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -stress-early-ifcvt | FileCheck %s +; RUN: llc < %s -stress-early-ifcvt -aarch64-atomic-cfg-tidy=0 | FileCheck %s target triple = "arm64-apple-macosx" ; CHECK: mm2 diff --git a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll index a3d5f6c..1152988 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll @@ -133,3 +133,16 @@ define void @t8() { call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 4, i32 1, i1 false) ret void } + +define void @test_distant_memcpy(i8* %dst) { +; ARM64-LABEL: test_distant_memcpy: +; ARM64: mov [[ARRAY:x[0-9]+]], sp +; ARM64: movz [[OFFSET:x[0-9]+]], #0x1f40 +; ARM64: add x[[ADDR:[0-9]+]], [[ARRAY]], [[OFFSET]] +; ARM64: ldrb [[BYTE:w[0-9]+]], [x[[ADDR]]] +; ARM64: strb [[BYTE]], [x0] + %array = alloca i8, i32 8192 + %elem = getelementptr i8* %array, i32 8000 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i32 1, i1 false) + ret void +} diff --git a/test/CodeGen/AArch64/arm64-fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll index 57bbb93..b1d5010 100644 --- a/test/CodeGen/AArch64/arm64-fp128.ll +++ b/test/CodeGen/AArch64/arm64-fp128.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone < %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s @lhs = global fp128 zeroinitializer, align 16 @rhs = global fp128 zeroinitializer, align 16 diff --git a/test/CodeGen/AArch64/arm64-frame-index.ll b/test/CodeGen/AArch64/arm64-frame-index.ll index 4a91ff3..321f335 100644 --- a/test/CodeGen/AArch64/arm64-frame-index.ll +++ b/test/CodeGen/AArch64/arm64-frame-index.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -mtriple=arm64-apple-ios < %s | FileCheck %s +; RUN: llc -march=arm64 -mtriple=arm64-apple-ios -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s ; rdar://11935841 define void @t1() nounwind ssp { diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll index f88bd6a..bc7ed7f 100644 --- a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll +++ b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll @@ -122,3 +122,82 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) { } declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) + +; Regression Test for PR20057. +; +; Cortex-A53 machine model stalls on A53UnitFPMDS contention. Instructions that +; are otherwise ready are jammed in the pending queue. +; CHECK: ********** MI Scheduling ********** +; CHECK: testResourceConflict +; CHECK: *** Final schedule for BB#0 *** +; CHECK: BRK +; CHECK: ********** INTERVALS ********** +define void @testResourceConflict(float* %ptr) { +entry: + %add1 = fadd float undef, undef + %mul2 = fmul float undef, undef + %add3 = fadd float %mul2, undef + %mul4 = fmul float undef, %add3 + %add5 = fadd float %mul4, undef + %sub6 = fsub float 0.000000e+00, undef + %sub7 = fsub float %add5, undef + %div8 = fdiv float 1.000000e+00, undef + %mul9 = fmul float %div8, %sub7 + %mul14 = fmul float %sub6, %div8 + %mul10 = fsub float -0.000000e+00, %mul14 + %mul15 = fmul float undef, %div8 + %mul11 = fsub float -0.000000e+00, %mul15 + %mul12 = fmul float 0.000000e+00, %div8 + %mul13 = fmul float %add1, %mul9 + %mul21 = fmul float %add5, %mul11 + %add22 = fadd float %mul13, %mul21 + store float %add22, float* %ptr, align 4 + %mul28 = fmul float %add1, %mul10 + %mul33 = fmul float %add5, %mul12 + %add34 = fadd float %mul33, %mul28 + store float %add34, float* %ptr, align 4 + %mul240 = fmul float undef, %mul9 + %add246 = fadd float %mul240, undef + store float %add246, float* %ptr, align 4 + %mul52 = fmul float undef, %mul10 + %mul57 = fmul float undef, %mul12 + %add58 = fadd float %mul57, %mul52 + store float %add58, float* %ptr, align 4 + %mul27 = fmul float 0.000000e+00, %mul9 + %mul81 = fmul float undef, %mul10 + %add82 = fadd float %mul27, %mul81 + store float %add82, float* %ptr, align 4 + call void @llvm.trap() + unreachable +} + +declare void @llvm.trap() + +; Regression test for PR20057: "permanent hazard"' +; Resource contention on LDST. +; CHECK: ********** MI Scheduling ********** +; CHECK: testLdStConflict +; CHECK: *** Final schedule for BB#1 *** +; CHECK: LD4Fourv2d +; CHECK: STRQui +; CHECK: ********** INTERVALS ********** +define void @testLdStConflict() { +entry: + br label %loop + +loop: + %0 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i8(i8* null) + %ptr = bitcast i8* undef to <2 x i64>* + store <2 x i64> zeroinitializer, <2 x i64>* %ptr, align 4 + %ptr1 = bitcast i8* undef to <2 x i64>* + store <2 x i64> zeroinitializer, <2 x i64>* %ptr1, align 4 + %ptr2 = bitcast i8* undef to <2 x i64>* + store <2 x i64> zeroinitializer, <2 x i64>* %ptr2, align 4 + %ptr3 = bitcast i8* undef to <2 x i64>* + store <2 x i64> zeroinitializer, <2 x i64>* %ptr3, align 4 + %ptr4 = bitcast i8* undef to <2 x i64>* + store <2 x i64> zeroinitializer, <2 x i64>* %ptr4, align 4 + br label %loop +} + +declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i8(i8*) diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll new file mode 100644 index 0000000..238474a --- /dev/null +++ b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll @@ -0,0 +1,112 @@ +; REQUIRES: asserts +; +; The Cortext-A57 machine model will avoid scheduling load instructions in +; succession because loads on the A57 have a latency of 4 cycles and they all +; issue to the same pipeline. Instead, it will move other instructions between +; the loads to avoid unnecessary stalls. The generic machine model schedules 4 +; loads consecutively for this case and will cause stalls. +; +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; CHECK: ********** MI Scheduling ********** +; CHECK: main:BB#2 +; CHECK LDR +; CHECK Latency : 4 +; CHECK: *** Final schedule for BB#2 *** +; CHECK: LDR +; CHECK: LDR +; CHECK-NOT: LDR +; CHECK: {{.*}} +; CHECK: ********** MI Scheduling ********** + +@main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4 +@main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4 + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %x = alloca [8 x i32], align 4 + %y = alloca [8 x i32], align 4 + %i = alloca i32, align 4 + %xx = alloca i32, align 4 + %yy = alloca i32, align 4 + store i32 0, i32* %retval + %0 = bitcast [8 x i32]* %x to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) + %1 = bitcast [8 x i32]* %y to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) + store i32 0, i32* %xx, align 4 + store i32 0, i32* %yy, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %2 = load i32* %i, align 4 + %cmp = icmp slt i32 %2, 8 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %3 = load i32* %yy, align 4 + %4 = load i32* %i, align 4 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom + %5 = load i32* %arrayidx, align 4 + %add = add nsw i32 %5, 1 + store i32 %add, i32* %xx, align 4 + %6 = load i32* %xx, align 4 + %add1 = add nsw i32 %6, 12 + store i32 %add1, i32* %xx, align 4 + %7 = load i32* %xx, align 4 + %add2 = add nsw i32 %7, 23 + store i32 %add2, i32* %xx, align 4 + %8 = load i32* %xx, align 4 + %add3 = add nsw i32 %8, 34 + store i32 %add3, i32* %xx, align 4 + %9 = load i32* %i, align 4 + %idxprom4 = sext i32 %9 to i64 + %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4 + %10 = load i32* %arrayidx5, align 4 + + %add4 = add nsw i32 %9, %add + %add5 = add nsw i32 %10, %add1 + %add6 = add nsw i32 %add4, %add5 + + %add7 = add nsw i32 %9, %add3 + %add8 = add nsw i32 %10, %add4 + %add9 = add nsw i32 %add7, %add8 + + %add10 = add nsw i32 %9, %add6 + %add11 = add nsw i32 %10, %add7 + %add12 = add nsw i32 %add10, %add11 + + %add13 = add nsw i32 %9, %add9 + %add14 = add nsw i32 %10, %add10 + %add15 = add nsw i32 %add13, %add14 + + store i32 %add15, i32* %xx, align 4 + + %div = sdiv i32 %4, %5 + + store i32 %div, i32* %yy, align 4 + + br label %for.inc + +for.inc: ; preds = %for.body + %11 = load i32* %i, align 4 + %inc = add nsw i32 %11, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %12 = load i32* %xx, align 4 + %13 = load i32* %yy, align 4 + %add67 = add nsw i32 %12, %13 + ret i32 %add67 +} + + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } diff --git a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll index 97bfb5c..07373cc 100644 --- a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll +++ b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll @@ -6,9 +6,10 @@ ; ; CHECK: ********** MI Scheduling ********** ; CHECK: shiftable -; CHECK: *** Final schedule for BB#0 *** -; CHECK: ADDXrr %vreg0, %vreg2 -; CHECK: ADDXrs %vreg0, %vreg2, 5 +; CHECK: SU(2): %vreg2<def> = SUBXri %vreg1, 20, 0 +; CHECK: Successors: +; CHECK-NEXT: val SU(4): Latency=1 Reg=%vreg2 +; CHECK-NEXT: val SU(3): Latency=2 Reg=%vreg2 ; CHECK: ********** INTERVALS ********** define i64 @shiftable(i64 %A, i64 %B) { %tmp0 = sub i64 %B, 20 diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll index cfc2ebf..1cfba82 100644 --- a/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -842,7 +842,7 @@ define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { ; CHECK-LABEL: testDUP.v1i8: -; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} +; CHECK: dup v0.8b, v0.b[0] %b = extractelement <1 x i8> %a, i32 0 %c = insertelement <8 x i8> undef, i8 %b, i32 0 %d = insertelement <8 x i8> %c, i8 %b, i32 1 @@ -857,7 +857,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { ; CHECK-LABEL: testDUP.v1i16: -; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} +; CHECK: dup v0.8h, v0.h[0] %b = extractelement <1 x i16> %a, i32 0 %c = insertelement <8 x i16> undef, i16 %b, i32 0 %d = insertelement <8 x i16> %c, i16 %b, i32 1 @@ -872,7 +872,7 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { ; CHECK-LABEL: testDUP.v1i32: -; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} +; CHECK: dup v0.4s, v0.s[0] %b = extractelement <1 x i32> %a, i32 0 %c = insertelement <4 x i32> undef, i32 %b, i32 0 %d = insertelement <4 x i32> %c, i32 %b, i32 1 @@ -1411,35 +1411,35 @@ define <16 x i8> @concat_vector_v16i8_const() { define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { ; CHECK-LABEL: concat_vector_v4i16: -; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} +; CHECK: dup v0.4h, v0.h[0] %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer ret <4 x i16> %r } define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { ; CHECK-LABEL: concat_vector_v4i32: -; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} +; CHECK: dup v0.4s, v0.s[0] %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %r } define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { ; CHECK-LABEL: concat_vector_v8i8: -; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} +; CHECK: dup v0.8b, v0.b[0] %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer ret <8 x i8> %r } define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { ; CHECK-LABEL: concat_vector_v8i16: -; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} +; CHECK: dup v0.8h, v0.h[0] %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer ret <8 x i16> %r } define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { ; CHECK-LABEL: concat_vector_v16i8: -; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} +; CHECK: dup v0.16b, v0.b[0] %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer ret <16 x i8> %r } diff --git a/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/test/CodeGen/AArch64/arm64-neon-select_cc.ll index 255b90d..95c582a 100644 --- a/test/CodeGen/AArch64/arm64-neon-select_cc.ll +++ b/test/CodeGen/AArch64/arm64-neon-select_cc.ll @@ -136,8 +136,8 @@ define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) { ; CHECK-LABEL: test_select_cc_v1f32: -; CHECK: fcmp s0, s1 -; CHECK-NEXT: fcsel s0, s2, s3, eq +; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s +; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b %cmp31 = fcmp oeq float %a, %b %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d ret <1 x float> %e diff --git a/test/CodeGen/AArch64/arm64-shrink-v1i64.ll b/test/CodeGen/AArch64/arm64-shrink-v1i64.ll new file mode 100644 index 0000000..f31a570 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-shrink-v1i64.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=arm64 < %s + +; The DAGCombiner tries to do following shrink: +; Convert x+y to (VT)((SmallVT)x+(SmallVT)y) +; But currently it can't handle vector type and will trigger an assertion failure +; when it tries to generate an add mixed using vector type and scaler type. +; This test checks that such assertion failur should not happen. +define <1 x i64> @dotest(<1 x i64> %in0) { +entry: + %0 = add <1 x i64> %in0, %in0 + %vshl_n = shl <1 x i64> %0, <i64 32> + %vsra_n = ashr <1 x i64> %vshl_n, <i64 32> + ret <1 x i64> %vsra_n +} diff --git a/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll b/test/CodeGen/AArch64/arm64-sqshl-uqshl-i64Contant.ll index 3949b85..3949b85 100644 --- a/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll +++ b/test/CodeGen/AArch64/arm64-sqshl-uqshl-i64Contant.ll diff --git a/test/CodeGen/AArch64/arm64-vcvt.ll b/test/CodeGen/AArch64/arm64-vcvt.ll index 8c9e4e9..6570f0e 100644 --- a/test/CodeGen/AArch64/arm64-vcvt.ll +++ b/test/CodeGen/AArch64/arm64-vcvt.ll @@ -665,19 +665,19 @@ define <2 x double> @ucvtf_2dc(<2 x i64> %A) nounwind { ;CHECK-LABEL: autogen_SD28458: ;CHECK: fcvt ;CHECK: ret -define void @autogen_SD28458() { - %Tr53 = fptrunc <8 x double> undef to <8 x float> - store <8 x float> %Tr53, <8 x float>* undef +define void @autogen_SD28458(<8 x double> %val.f64, <8 x float>* %addr.f32) { + %Tr53 = fptrunc <8 x double> %val.f64 to <8 x float> + store <8 x float> %Tr53, <8 x float>* %addr.f32 ret void } ;CHECK-LABEL: autogen_SD19225: ;CHECK: fcvt ;CHECK: ret -define void @autogen_SD19225() { - %A = load <8 x float>* undef +define void @autogen_SD19225(<8 x double>* %addr.f64, <8 x float>* %addr.f32) { + %A = load <8 x float>* %addr.f32 %Tr53 = fpext <8 x float> %A to <8 x double> - store <8 x double> %Tr53, <8 x double>* undef + store <8 x double> %Tr53, <8 x double>* %addr.f64 ret void } diff --git a/test/CodeGen/AArch64/arm64-vshift.ll b/test/CodeGen/AArch64/arm64-vshift.ll index 82ae486..65bd50c 100644 --- a/test/CodeGen/AArch64/arm64-vshift.ll +++ b/test/CodeGen/AArch64/arm64-vshift.ll @@ -1313,6 +1313,15 @@ define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind { ret <8 x i8> %tmp3 } +define <8 x i8> @uqshli8b_1(<8 x i8>* %A) nounwind { +;CHECK-LABEL: uqshli8b_1: +;CHECK: movi.8b [[REG:v[0-9]+]], #0x8 +;CHECK: uqshl.8b v0, v0, [[REG]] + %tmp1 = load <8 x i8>* %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>) + ret <8 x i8> %tmp3 +} + define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind { ;CHECK-LABEL: uqshli4h: ;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1 diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll index 6cffbde..0c300de 100644 --- a/test/CodeGen/AArch64/arm64-xaluo.ll +++ b/test/CodeGen/AArch64/arm64-xaluo.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-atomic-cfg-tidy=0 | FileCheck %s ; ; Get the actual value of the overflow bit. diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll index 58b5d1d..26301b9 100644 --- a/test/CodeGen/AArch64/atomic-ops.ll +++ b/test/CodeGen/AArch64/atomic-ops.ll @@ -878,7 +878,9 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8: - %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire + %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire + %old = extractvalue { i8, i1 } %pair, 0 + ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 @@ -889,8 +891,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; function there. ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] - ; As above, w1 is a reasonable guess. -; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] +; CHECK: stxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb @@ -900,7 +901,9 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16: - %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst + %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst + %old = extractvalue { i16, i1 } %pair, 0 + ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 @@ -911,8 +914,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; function there. ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] - ; As above, w1 is a reasonable guess. -; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] +; CHECK: stlxrh [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb @@ -922,7 +924,9 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32: - %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic + %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic + %old = extractvalue { i32, i1 } %pair, 0 + ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 @@ -933,8 +937,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; function there. ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] - ; As above, w1 is a reasonable guess. -; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] +; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb @@ -944,7 +947,9 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64: - %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic + %pair = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic + %old = extractvalue { i64, i1 } %pair, 0 + ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll index 1eec4cc..3a5dbdc 100644 --- a/test/CodeGen/AArch64/blockaddress.ll +++ b/test/CodeGen/AArch64/blockaddress.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s @addr = global i8* null diff --git a/test/CodeGen/AArch64/branch-relax-asm.ll b/test/CodeGen/AArch64/branch-relax-asm.ll new file mode 100644 index 0000000..7409c84 --- /dev/null +++ b/test/CodeGen/AArch64/branch-relax-asm.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=aarch64-apple-ios7.0 -disable-block-placement -aarch64-tbz-offset-bits=4 -o - %s | FileCheck %s +define i32 @test_asm_length(i32 %in) { +; CHECK-LABEL: test_asm_length: + + ; It would be more natural to use just one "tbnz %false" here, but if the + ; number of instructions in the asm is counted reasonably, that block is out + ; of the limited range we gave tbz. So branch relaxation has to invert the + ; condition. +; CHECK: tbz w0, #0, [[TRUE:LBB[0-9]+_[0-9]+]] +; CHECK: b [[FALSE:LBB[0-9]+_[0-9]+]] + +; CHECK: [[TRUE]]: +; CHECK: orr w0, wzr, #0x4 +; CHECK: nop +; CHECK: nop +; CHECK: nop +; CHECK: nop +; CHECK: nop +; CHECK: nop +; CHECK: ret + +; CHECK: [[FALSE]]: +; CHECK: ret + + %val = and i32 %in, 1 + %tst = icmp eq i32 %val, 0 + br i1 %tst, label %true, label %false + +true: + call void asm sideeffect "nop\0A\09nop\0A\09nop\0A\09nop\0A\09nop\0A\09nop", ""() + ret i32 4 + +false: + ret i32 0 +} diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll index 591f483..9524044 100644 --- a/test/CodeGen/AArch64/breg.ll +++ b/test/CodeGen/AArch64/breg.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s @stored_label = global i8* null diff --git a/test/CodeGen/AArch64/cmpxchg-idioms.ll b/test/CodeGen/AArch64/cmpxchg-idioms.ll new file mode 100644 index 0000000..0c008c2 --- /dev/null +++ b/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -0,0 +1,93 @@ +; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s + +define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) { +; CHECK-LABEL: test_return: + +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[LOADED:w[0-9]+]], [x0] +; CHECK: cmp [[LOADED]], w1 +; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]] + +; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x0] +; CHECK: cbnz [[STATUS]], [[LOOP]] + +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: orr w0, wzr, #0x1 +; CHECK: ret + +; CHECK: [[FAILED]]: +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: mov w0, wzr +; CHECK: ret + + %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst + %success = extractvalue { i32, i1 } %pair, 1 + %conv = zext i1 %success to i32 + ret i32 %conv +} + +define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) { +; CHECK-LABEL: test_return_bool: + +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxrb [[LOADED:w[0-9]+]], [x0] +; CHECK: cmp [[LOADED]], w1, uxtb +; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]] + +; CHECK: stlxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x0] +; CHECK: cbnz [[STATUS]], [[LOOP]] + +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} + ; FIXME: DAG combine should be able to deal with this. +; CHECK: orr [[TMP:w[0-9]+]], wzr, #0x1 +; CHECK: eor w0, [[TMP]], #0x1 +; CHECK: ret + +; CHECK: [[FAILED]]: +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: mov [[TMP:w[0-9]+]], wzr +; CHECK: eor w0, [[TMP]], #0x1 +; CHECK: ret + + %pair = cmpxchg i8* %value, i8 %oldValue, i8 %newValue acq_rel monotonic + %success = extractvalue { i8, i1 } %pair, 1 + %failure = xor i1 %success, 1 + ret i1 %failure +} + +define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) { +; CHECK-LABEL: test_conditional: + +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[LOADED:w[0-9]+]], [x0] +; CHECK: cmp [[LOADED]], w1 +; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]] + +; CHECK: stlxr [[STATUS:w[0-9]+]], w2, [x0] +; CHECK: cbnz [[STATUS]], [[LOOP]] + +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: b _bar + +; CHECK: [[FAILED]]: +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: b _baz + + %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst + %success = extractvalue { i32, i1 } %pair, 1 + br i1 %success, label %true, label %false + +true: + tail call void @bar() #2 + br label %end + +false: + tail call void @baz() #2 + br label %end + +end: + ret void +} + +declare void @bar() +declare void @baz() diff --git a/test/CodeGen/AArch64/compiler-ident.ll b/test/CodeGen/AArch64/compiler-ident.ll new file mode 100644 index 0000000..0350571 --- /dev/null +++ b/test/CodeGen/AArch64/compiler-ident.ll @@ -0,0 +1,12 @@ +; RUN: llc -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s + +; ModuleID = 'compiler-ident.c' +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +; CHECK: .ident "some LLVM version" + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"some LLVM version"} + diff --git a/test/CodeGen/AArch64/complex-fp-to-int.ll b/test/CodeGen/AArch64/complex-fp-to-int.ll new file mode 100644 index 0000000..13cf762 --- /dev/null +++ b/test/CodeGen/AArch64/complex-fp-to-int.ll @@ -0,0 +1,141 @@ +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s + +define <2 x i64> @test_v2f32_to_signed_v2i64(<2 x float> %in) { +; CHECK-LABEL: test_v2f32_to_signed_v2i64: +; CHECK: fcvtl [[VAL64:v[0-9]+]].2d, v0.2s +; CHECK: fcvtzs.2d v0, [[VAL64]] + + %val = fptosi <2 x float> %in to <2 x i64> + ret <2 x i64> %val +} + +define <2 x i64> @test_v2f32_to_unsigned_v2i64(<2 x float> %in) { +; CHECK-LABEL: test_v2f32_to_unsigned_v2i64: +; CHECK: fcvtl [[VAL64:v[0-9]+]].2d, v0.2s +; CHECK: fcvtzu.2d v0, [[VAL64]] + + %val = fptoui <2 x float> %in to <2 x i64> + ret <2 x i64> %val +} + +define <2 x i16> @test_v2f32_to_signed_v2i16(<2 x float> %in) { +; CHECK-LABEL: test_v2f32_to_signed_v2i16: +; CHECK: fcvtzs.2s v0, v0 + + %val = fptosi <2 x float> %in to <2 x i16> + ret <2 x i16> %val +} + +define <2 x i16> @test_v2f32_to_unsigned_v2i16(<2 x float> %in) { +; CHECK-LABEL: test_v2f32_to_unsigned_v2i16: +; CHECK: fcvtzs.2s v0, v0 + + %val = fptoui <2 x float> %in to <2 x i16> + ret <2 x i16> %val +} + +define <2 x i8> @test_v2f32_to_signed_v2i8(<2 x float> %in) { +; CHECK-LABEL: test_v2f32_to_signed_v2i8: +; CHECK: fcvtzs.2s v0, v0 + + %val = fptosi <2 x float> %in to <2 x i8> + ret <2 x i8> %val +} + +define <2 x i8> @test_v2f32_to_unsigned_v2i8(<2 x float> %in) { +; CHECK-LABEL: test_v2f32_to_unsigned_v2i8: +; CHECK: fcvtzs.2s v0, v0 + + %val = fptoui <2 x float> %in to <2 x i8> + ret <2 x i8> %val +} + +define <4 x i16> @test_v4f32_to_signed_v4i16(<4 x float> %in) { +; CHECK-LABEL: test_v4f32_to_signed_v4i16: +; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.4h v0, [[VAL64]] + + %val = fptosi <4 x float> %in to <4 x i16> + ret <4 x i16> %val +} + +define <4 x i16> @test_v4f32_to_unsigned_v4i16(<4 x float> %in) { +; CHECK-LABEL: test_v4f32_to_unsigned_v4i16: +; CHECK: fcvtzu.4s [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.4h v0, [[VAL64]] + + %val = fptoui <4 x float> %in to <4 x i16> + ret <4 x i16> %val +} + +define <4 x i8> @test_v4f32_to_signed_v4i8(<4 x float> %in) { +; CHECK-LABEL: test_v4f32_to_signed_v4i8: +; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.4h v0, [[VAL64]] + + %val = fptosi <4 x float> %in to <4 x i8> + ret <4 x i8> %val +} + +define <4 x i8> @test_v4f32_to_unsigned_v4i8(<4 x float> %in) { +; CHECK-LABEL: test_v4f32_to_unsigned_v4i8: +; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.4h v0, [[VAL64]] + + %val = fptoui <4 x float> %in to <4 x i8> + ret <4 x i8> %val +} + +define <2 x i32> @test_v2f64_to_signed_v2i32(<2 x double> %in) { +; CHECK-LABEL: test_v2f64_to_signed_v2i32: +; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.2s v0, [[VAL64]] + + %val = fptosi <2 x double> %in to <2 x i32> + ret <2 x i32> %val +} + +define <2 x i32> @test_v2f64_to_unsigned_v2i32(<2 x double> %in) { +; CHECK-LABEL: test_v2f64_to_unsigned_v2i32: +; CHECK: fcvtzu.2d [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.2s v0, [[VAL64]] + + %val = fptoui <2 x double> %in to <2 x i32> + ret <2 x i32> %val +} + +define <2 x i16> @test_v2f64_to_signed_v2i16(<2 x double> %in) { +; CHECK-LABEL: test_v2f64_to_signed_v2i16: +; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.2s v0, [[VAL64]] + + %val = fptosi <2 x double> %in to <2 x i16> + ret <2 x i16> %val +} + +define <2 x i16> @test_v2f64_to_unsigned_v2i16(<2 x double> %in) { +; CHECK-LABEL: test_v2f64_to_unsigned_v2i16: +; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.2s v0, [[VAL64]] + + %val = fptoui <2 x double> %in to <2 x i16> + ret <2 x i16> %val +} + +define <2 x i8> @test_v2f64_to_signed_v2i8(<2 x double> %in) { +; CHECK-LABEL: test_v2f64_to_signed_v2i8: +; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.2s v0, [[VAL64]] + + %val = fptosi <2 x double> %in to <2 x i8> + ret <2 x i8> %val +} + +define <2 x i8> @test_v2f64_to_unsigned_v2i8(<2 x double> %in) { +; CHECK-LABEL: test_v2f64_to_unsigned_v2i8: +; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.2s v0, [[VAL64]] + + %val = fptoui <2 x double> %in to <2 x i8> + ret <2 x i8> %val +} diff --git a/test/CodeGen/AArch64/complex-int-to-fp.ll b/test/CodeGen/AArch64/complex-int-to-fp.ll new file mode 100644 index 0000000..5c943f9 --- /dev/null +++ b/test/CodeGen/AArch64/complex-int-to-fp.ll @@ -0,0 +1,164 @@ +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s + +; CHECK: autogen_SD19655 +; CHECK: scvtf +; CHECK: ret +define void @autogen_SD19655(<2 x i64>* %addr, <2 x float>* %addrfloat) { + %T = load <2 x i64>* %addr + %F = sitofp <2 x i64> %T to <2 x float> + store <2 x float> %F, <2 x float>* %addrfloat + ret void +} + +define <2 x double> @test_signed_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v2i32_to_v2f64: +; CHECK: sshll.2d [[VAL64:v[0-9]+]], v0, #0 +; CHECK-NEXT: scvtf.2d v0, [[VAL64]] +; CHECK-NEXT: ret + %conv = sitofp <2 x i32> %v to <2 x double> + ret <2 x double> %conv +} + +define <2 x double> @test_unsigned_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v2i32_to_v2f64 +; CHECK: ushll.2d [[VAL64:v[0-9]+]], v0, #0 +; CHECK-NEXT: ucvtf.2d v0, [[VAL64]] +; CHECK-NEXT: ret + %conv = uitofp <2 x i32> %v to <2 x double> + ret <2 x double> %conv +} + +define <2 x double> @test_signed_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v2i16_to_v2f64: +; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16 +; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16 +; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0 +; CHECK: scvtf.2d v0, [[VAL64]] + + %conv = sitofp <2 x i16> %v to <2 x double> + ret <2 x double> %conv +} +define <2 x double> @test_unsigned_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v2i16_to_v2f64 +; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff +; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]] +; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0 +; CHECK: ucvtf.2d v0, [[VAL64]] + + %conv = uitofp <2 x i16> %v to <2 x double> + ret <2 x double> %conv +} + +define <2 x double> @test_signed_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v2i8_to_v2f64: +; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24 +; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24 +; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0 +; CHECK: scvtf.2d v0, [[VAL64]] + + %conv = sitofp <2 x i8> %v to <2 x double> + ret <2 x double> %conv +} +define <2 x double> @test_unsigned_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v2i8_to_v2f64 +; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff +; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]] +; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0 +; CHECK: ucvtf.2d v0, [[VAL64]] + + %conv = uitofp <2 x i8> %v to <2 x double> + ret <2 x double> %conv +} + +define <2 x float> @test_signed_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v2i64_to_v2f32: +; CHECK: scvtf.2d [[VAL64:v[0-9]+]], v0 +; CHECK: fcvtn v0.2s, [[VAL64]].2d + + %conv = sitofp <2 x i64> %v to <2 x float> + ret <2 x float> %conv +} +define <2 x float> @test_unsigned_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v2i64_to_v2f32 +; CHECK: ucvtf.2d [[VAL64:v[0-9]+]], v0 +; CHECK: fcvtn v0.2s, [[VAL64]].2d + + %conv = uitofp <2 x i64> %v to <2 x float> + ret <2 x float> %conv +} + +define <2 x float> @test_signed_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v2i16_to_v2f32: +; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16 +; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16 +; CHECK: scvtf.2s v0, [[VAL32]] + + %conv = sitofp <2 x i16> %v to <2 x float> + ret <2 x float> %conv +} +define <2 x float> @test_unsigned_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v2i16_to_v2f32 +; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff +; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]] +; CHECK: ucvtf.2s v0, [[VAL32]] + + %conv = uitofp <2 x i16> %v to <2 x float> + ret <2 x float> %conv +} + +define <2 x float> @test_signed_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v2i8_to_v2f32: +; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24 +; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24 +; CHECK: scvtf.2s v0, [[VAL32]] + + %conv = sitofp <2 x i8> %v to <2 x float> + ret <2 x float> %conv +} +define <2 x float> @test_unsigned_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v2i8_to_v2f32 +; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff +; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]] +; CHECK: ucvtf.2s v0, [[VAL32]] + + %conv = uitofp <2 x i8> %v to <2 x float> + ret <2 x float> %conv +} + +define <4 x float> @test_signed_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v4i16_to_v4f32: +; CHECK: sshll.4s [[VAL32:v[0-9]+]], v0, #0 +; CHECK: scvtf.4s v0, [[VAL32]] + + %conv = sitofp <4 x i16> %v to <4 x float> + ret <4 x float> %conv +} + +define <4 x float> @test_unsigned_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v4i16_to_v4f32 +; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0 +; CHECK: ucvtf.4s v0, [[VAL32]] + + %conv = uitofp <4 x i16> %v to <4 x float> + ret <4 x float> %conv +} + +define <4 x float> @test_signed_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_signed_v4i8_to_v4f32: +; CHECK: shl.4h [[TMP:v[0-9]+]], v0, #8 +; CHECK: sshr.4h [[VAL16:v[0-9]+]], [[TMP]], #8 +; CHECK: sshll.4s [[VAL32:v[0-9]+]], [[VAL16]], #0 +; CHECK: scvtf.4s v0, [[VAL32]] + + %conv = sitofp <4 x i8> %v to <4 x float> + ret <4 x float> %conv +} +define <4 x float> @test_unsigned_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_unsigned_v4i8_to_v4f32 +; CHECK: bic.4h v0, #0xff, lsl #8 +; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0 +; CHECK: ucvtf.4s v0, [[VAL32]] + + %conv = uitofp <4 x i8> %v to <4 x float> + ret <4 x float> %conv +} diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll index 1b51928..fbea4a6 100644 --- a/test/CodeGen/AArch64/directcond.ll +++ b/test/CodeGen/AArch64/directcond.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s --check-prefix=CHECK +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-NOFP %s define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) { ; CHECK-LABEL: test_select_i32: diff --git a/test/CodeGen/AArch64/f16-convert.ll b/test/CodeGen/AArch64/f16-convert.ll new file mode 100644 index 0000000..6fabdc5 --- /dev/null +++ b/test/CodeGen/AArch64/f16-convert.ll @@ -0,0 +1,254 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios -asm-verbose=false | FileCheck %s + +define float @load0(i16* nocapture readonly %a) nounwind { +; CHECK-LABEL: load0: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0] +; CHECK-NEXT: fcvt s0, [[HREG]] +; CHECK-NEXT: ret + + %tmp = load i16* %a, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + ret float %tmp1 +} + +define double @load1(i16* nocapture readonly %a) nounwind { +; CHECK-LABEL: load1: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0] +; CHECK-NEXT: fcvt d0, [[HREG]] +; CHECK-NEXT: ret + + %tmp = load i16* %a, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + %conv = fpext float %tmp1 to double + ret double %conv +} + +define float @load2(i16* nocapture readonly %a, i32 %i) nounwind { +; CHECK-LABEL: load2: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1] +; CHECK-NEXT: fcvt s0, [[HREG]] +; CHECK-NEXT: ret + + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + ret float %tmp1 +} + +define double @load3(i16* nocapture readonly %a, i32 %i) nounwind { +; CHECK-LABEL: load3: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1] +; CHECK-NEXT: fcvt d0, [[HREG]] +; CHECK-NEXT: ret + + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + %conv = fpext float %tmp1 to double + ret double %conv +} + +define float @load4(i16* nocapture readonly %a, i64 %i) nounwind { +; CHECK-LABEL: load4: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1] +; CHECK-NEXT: fcvt s0, [[HREG]] +; CHECK-NEXT: ret + + %arrayidx = getelementptr inbounds i16* %a, i64 %i + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + ret float %tmp1 +} + +define double @load5(i16* nocapture readonly %a, i64 %i) nounwind { +; CHECK-LABEL: load5: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1] +; CHECK-NEXT: fcvt d0, [[HREG]] +; CHECK-NEXT: ret + + %arrayidx = getelementptr inbounds i16* %a, i64 %i + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + %conv = fpext float %tmp1 to double + ret double %conv +} + +define float @load6(i16* nocapture readonly %a) nounwind { +; CHECK-LABEL: load6: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20] +; CHECK-NEXT: fcvt s0, [[HREG]] +; CHECK-NEXT: ret + + %arrayidx = getelementptr inbounds i16* %a, i64 10 + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + ret float %tmp1 +} + +define double @load7(i16* nocapture readonly %a) nounwind { +; CHECK-LABEL: load7: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20] +; CHECK-NEXT: fcvt d0, [[HREG]] +; CHECK-NEXT: ret + + %arrayidx = getelementptr inbounds i16* %a, i64 10 + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + %conv = fpext float %tmp1 to double + ret double %conv +} + +define float @load8(i16* nocapture readonly %a) nounwind { +; CHECK-LABEL: load8: +; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20] +; CHECK-NEXT: fcvt s0, [[HREG]] +; CHECK-NEXT: ret + + %arrayidx = getelementptr inbounds i16* %a, i64 -10 + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + ret float %tmp1 +} + +define double @load9(i16* nocapture readonly %a) nounwind { +; CHECK-LABEL: load9: +; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20] +; CHECK-NEXT: fcvt d0, [[HREG]] +; CHECK-NEXT: ret + + %arrayidx = getelementptr inbounds i16* %a, i64 -10 + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + %conv = fpext float %tmp1 to double + ret double %conv +} + +define void @store0(i16* nocapture %a, float %val) nounwind { +; CHECK-LABEL: store0: +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret + + %tmp = tail call i16 @llvm.convert.to.fp16(float %val) + store i16 %tmp, i16* %a, align 2 + ret void +} + +define void @store1(i16* nocapture %a, double %val) nounwind { +; CHECK-LABEL: store1: +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret + + %conv = fptrunc double %val to float + %tmp = tail call i16 @llvm.convert.to.fp16(float %conv) + store i16 %tmp, i16* %a, align 2 + ret void +} + +define void @store2(i16* nocapture %a, i32 %i, float %val) nounwind { +; CHECK-LABEL: store2: +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: str h0, [x0, w1, sxtw #1] +; CHECK-NEXT: ret + + %tmp = tail call i16 @llvm.convert.to.fp16(float %val) + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store3(i16* nocapture %a, i32 %i, double %val) nounwind { +; CHECK-LABEL: store3: +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: str h0, [x0, w1, sxtw #1] +; CHECK-NEXT: ret + + %conv = fptrunc double %val to float + %tmp = tail call i16 @llvm.convert.to.fp16(float %conv) + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store4(i16* nocapture %a, i64 %i, float %val) nounwind { +; CHECK-LABEL: store4: +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: str h0, [x0, x1, lsl #1] +; CHECK-NEXT: ret + + %tmp = tail call i16 @llvm.convert.to.fp16(float %val) + %arrayidx = getelementptr inbounds i16* %a, i64 %i + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store5(i16* nocapture %a, i64 %i, double %val) nounwind { +; CHECK-LABEL: store5: +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: str h0, [x0, x1, lsl #1] +; CHECK-NEXT: ret + + %conv = fptrunc double %val to float + %tmp = tail call i16 @llvm.convert.to.fp16(float %conv) + %arrayidx = getelementptr inbounds i16* %a, i64 %i + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store6(i16* nocapture %a, float %val) nounwind { +; CHECK-LABEL: store6: +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: str h0, [x0, #20] +; CHECK-NEXT: ret + + %tmp = tail call i16 @llvm.convert.to.fp16(float %val) + %arrayidx = getelementptr inbounds i16* %a, i64 10 + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store7(i16* nocapture %a, double %val) nounwind { +; CHECK-LABEL: store7: +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: str h0, [x0, #20] +; CHECK-NEXT: ret + + %conv = fptrunc double %val to float + %tmp = tail call i16 @llvm.convert.to.fp16(float %conv) + %arrayidx = getelementptr inbounds i16* %a, i64 10 + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store8(i16* nocapture %a, float %val) nounwind { +; CHECK-LABEL: store8: +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: stur h0, [x0, #-20] +; CHECK-NEXT: ret + + %tmp = tail call i16 @llvm.convert.to.fp16(float %val) + %arrayidx = getelementptr inbounds i16* %a, i64 -10 + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +define void @store9(i16* nocapture %a, double %val) nounwind { +; CHECK-LABEL: store9: +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: stur h0, [x0, #-20] +; CHECK-NEXT: ret + + %conv = fptrunc double %val to float + %tmp = tail call i16 @llvm.convert.to.fp16(float %conv) + %arrayidx = getelementptr inbounds i16* %a, i64 -10 + store i16 %tmp, i16* %arrayidx, align 2 + ret void +} + +declare i16 @llvm.convert.to.fp16(float) nounwind readnone +declare float @llvm.convert.from.fp16(i16) nounwind readnone diff --git a/test/CodeGen/AArch64/fast-isel-mul.ll b/test/CodeGen/AArch64/fast-isel-mul.ll new file mode 100644 index 0000000..d02c67f --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-mul.ll @@ -0,0 +1,40 @@ +; RUN: llc -fast-isel -fast-isel-abort -mtriple=aarch64 -o - %s | FileCheck %s + +@var8 = global i8 0 +@var16 = global i16 0 +@var32 = global i32 0 +@var64 = global i64 0 + +define void @test_mul8(i8 %lhs, i8 %rhs) { +; CHECK-LABEL: test_mul8: +; CHECK: mul w0, w0, w1 +; %lhs = load i8* @var8 +; %rhs = load i8* @var8 + %prod = mul i8 %lhs, %rhs + store i8 %prod, i8* @var8 + ret void +} + +define void @test_mul16(i16 %lhs, i16 %rhs) { +; CHECK-LABEL: test_mul16: +; CHECK: mul w0, w0, w1 + %prod = mul i16 %lhs, %rhs + store i16 %prod, i16* @var16 + ret void +} + +define void @test_mul32(i32 %lhs, i32 %rhs) { +; CHECK-LABEL: test_mul32: +; CHECK: mul w0, w0, w1 + %prod = mul i32 %lhs, %rhs + store i32 %prod, i32* @var32 + ret void +} + +define void @test_mul64(i64 %lhs, i64 %rhs) { +; CHECK-LABEL: test_mul64: +; CHECK: mul x0, x0, x1 + %prod = mul i64 %lhs, %rhs + store i64 %prod, i64* @var64 + ret void +} diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll index c9b0b9f..77bbcdd 100644 --- a/test/CodeGen/AArch64/flags-multiuse.ll +++ b/test/CodeGen/AArch64/flags-multiuse.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s ; LLVM should be able to cope with multiple uses of the same flag-setting ; instruction at different points of a routine. Either by rematerializing the diff --git a/test/CodeGen/AArch64/funcptr_cast.ll b/test/CodeGen/AArch64/funcptr_cast.ll new file mode 100644 index 0000000..a00b7bc --- /dev/null +++ b/test/CodeGen/AArch64/funcptr_cast.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +define i8 @test() { +; CHECK-LABEL: @test +; CHECK: adrp {{x[0-9]+}}, foo +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, :lo12:foo +; CHECK: ldrb w0, [{{x[0-9]+}}] +entry: + %0 = load i8* bitcast (void (...)* @foo to i8*), align 1 + ret i8 %0 +} + +declare void @foo(...) diff --git a/test/CodeGen/AArch64/global-merge-1.ll b/test/CodeGen/AArch64/global-merge-1.ll new file mode 100644 index 0000000..68aba5e --- /dev/null +++ b/test/CodeGen/AArch64/global-merge-1.ll @@ -0,0 +1,26 @@ +; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -o - | FileCheck %s +; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -global-merge-on-external -o - | FileCheck %s + +; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -o - | FileCheck %s +; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -global-merge-on-external -o - | FileCheck %s + +; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS +; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS + +@m = internal global i32 0, align 4 +@n = internal global i32 0, align 4 + +define void @f1(i32 %a1, i32 %a2) { +;CHECK-APPLE-IOS: adrp x8, __MergedGlobals@PAGE +;CHECK-APPLE-IOS-NOT: adrp +;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals@PAGEOFF + store i32 %a1, i32* @m, align 4 + store i32 %a2, i32* @n, align 4 + ret void +} + +;CHECK: .type _MergedGlobals,@object // @_MergedGlobals +;CHECK: .local _MergedGlobals +;CHECK: .comm _MergedGlobals,8,8 + +;CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3 ; @_MergedGlobals diff --git a/test/CodeGen/AArch64/global-merge-2.ll b/test/CodeGen/AArch64/global-merge-2.ll new file mode 100644 index 0000000..a773566 --- /dev/null +++ b/test/CodeGen/AArch64/global-merge-2.ll @@ -0,0 +1,51 @@ +; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -global-merge-on-external -o - | FileCheck %s +; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -global-merge-on-external -o - | FileCheck %s +; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS + +@x = global i32 0, align 4 +@y = global i32 0, align 4 +@z = global i32 0, align 4 + +define void @f1(i32 %a1, i32 %a2) { +;CHECK-APPLE-IOS-LABEL: _f1: +;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE +;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF +;CHECK-APPLE-IOS-NOT: adrp + store i32 %a1, i32* @x, align 4 + store i32 %a2, i32* @y, align 4 + ret void +} + +define void @g1(i32 %a1, i32 %a2) { +;CHECK-APPLE-IOS-LABEL: _g1: +;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE +;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF +;CHECK-APPLE-IOS-NOT: adrp + store i32 %a1, i32* @y, align 4 + store i32 %a2, i32* @z, align 4 + ret void +} + +;CHECK: .type _MergedGlobals_x,@object // @_MergedGlobals_x +;CHECK: .globl _MergedGlobals_x +;CHECK: .align 3 +;CHECK: _MergedGlobals_x: +;CHECK: .size _MergedGlobals_x, 12 + +;CHECK: .globl x +;CHECK: x = _MergedGlobals_x +;CHECK: .globl y +;CHECK: y = _MergedGlobals_x+4 +;CHECK: .globl z +;CHECK: z = _MergedGlobals_x+8 + +;CHECK-APPLE-IOS: .globl __MergedGlobals_x ; @_MergedGlobals_x +;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_x,12,3 + +;CHECK-APPLE-IOS: .globl _x +;CHECK-APPLE-IOS: _x = __MergedGlobals_x +;CHECK-APPLE-IOS: .globl _y +;CHECK-APPLE-IOS: _y = __MergedGlobals_x+4 +;CHECK-APPLE-IOS: .globl _z +;CHECK-APPLE-IOS: _z = __MergedGlobals_x+8 +;CHECK-APPLE-IOS: .subsections_via_symbols diff --git a/test/CodeGen/AArch64/global-merge-3.ll b/test/CodeGen/AArch64/global-merge-3.ll new file mode 100644 index 0000000..d455d40 --- /dev/null +++ b/test/CodeGen/AArch64/global-merge-3.ll @@ -0,0 +1,51 @@ +; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -global-merge-on-external -o - | FileCheck %s +; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -global-merge-on-external -o - | FileCheck %s +; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS + +@x = global [1000 x i32] zeroinitializer, align 1 +@y = global [1000 x i32] zeroinitializer, align 1 +@z = internal global i32 1, align 4 + +define void @f1(i32 %a1, i32 %a2, i32 %a3) { +;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE +;CHECK-APPLE-IOS-NOT: adrp +;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF +;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE +;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_y@PAGEOFF + %x3 = getelementptr inbounds [1000 x i32]* @x, i32 0, i64 3 + %y3 = getelementptr inbounds [1000 x i32]* @y, i32 0, i64 3 + store i32 %a1, i32* %x3, align 4 + store i32 %a2, i32* %y3, align 4 + store i32 %a3, i32* @z, align 4 + ret void +} + +;CHECK: .type _MergedGlobals_x,@object // @_MergedGlobals_x +;CHECK: .globl _MergedGlobals_x +;CHECK: .align 4 +;CHECK: _MergedGlobals_x: +;CHECK: .size _MergedGlobals_x, 4004 + +;CHECK: .type _MergedGlobals_y,@object // @_MergedGlobals_y +;CHECK: .globl _MergedGlobals_y +;CHECK: _MergedGlobals_y: +;CHECK: .size _MergedGlobals_y, 4000 + +;CHECK-APPLE-IOS: .globl __MergedGlobals_x ; @_MergedGlobals_x +;CHECK-APPLE-IOS: .align 4 +;CHECK-APPLE-IOS: __MergedGlobals_x: +;CHECK-APPLE-IOS: .long 1 +;CHECK-APPLE-IOS: .space 4000 + +;CHECK-APPLE-IOS: .globl __MergedGlobals_y ; @_MergedGlobals_y +;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_y,4000,4 + +;CHECK: .globl x +;CHECK: x = _MergedGlobals_x+4 +;CHECK: .globl y +;CHECK: y = _MergedGlobals_y + +;CHECK-APPLE-IOS:.globl _x +;CHECK-APPLE-IOS: _x = __MergedGlobals_x+4 +;CHECK-APPLE-IOS:.globl _y +;CHECK-APPLE-IOS: _y = __MergedGlobals_y diff --git a/test/CodeGen/AArch64/global-merge-4.ll b/test/CodeGen/AArch64/global-merge-4.ll new file mode 100644 index 0000000..a525ccd --- /dev/null +++ b/test/CodeGen/AArch64/global-merge-4.ll @@ -0,0 +1,73 @@ +; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -o - | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" +target triple = "arm64-apple-ios7.0.0" + +@bar = internal global [5 x i32] zeroinitializer, align 4 +@baz = internal global [5 x i32] zeroinitializer, align 4 +@foo = internal global [5 x i32] zeroinitializer, align 4 + +; Function Attrs: nounwind ssp +define internal void @initialize() #0 { + %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4 + %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4 + %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4 + %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4 + %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4 + %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4 + %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4 + %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4 + %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4 + %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4 + ret void +} + +declare i32 @calc(...) + +; Function Attrs: nounwind ssp +define internal void @calculate() #0 { + %1 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4 + %2 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4 + %3 = mul nsw i32 %2, %1 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0), align 4 + %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4 + %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4 + %6 = mul nsw i32 %5, %4 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 1), align 4 + %7 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4 + %8 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4 + %9 = mul nsw i32 %8, %7 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 2), align 4 + %10 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4 + %11 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4 + %12 = mul nsw i32 %11, %10 + store i32 %12, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 3), align 4 + %13 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4 + %14 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4 + %15 = mul nsw i32 %14, %13 + store i32 %15, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 4), align 4 + ret void +} + +; Function Attrs: nounwind readnone ssp +define internal i32* @returnFoo() #1 { + ret i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0) +} + +;CHECK: .type _MergedGlobals,@object // @_MergedGlobals +;CHECK: .local _MergedGlobals +;CHECK: .comm _MergedGlobals,60,16 + +attributes #0 = { nounwind ssp } +attributes #1 = { nounwind readnone ssp } +attributes #2 = { nounwind } diff --git a/test/CodeGen/AArch64/global-merge.ll b/test/CodeGen/AArch64/global-merge.ll new file mode 100644 index 0000000..aed1dc4 --- /dev/null +++ b/test/CodeGen/AArch64/global-merge.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck --check-prefix=NO-MERGE %s +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 -global-merge-on-external=true | FileCheck --check-prefix=NO-MERGE %s + +; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE +; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE + +; FIXME: add O1/O2 test for aarch64-none-linux-gnu and aarch64-apple-ios + +@m = internal global i32 0, align 4 +@n = internal global i32 0, align 4 + +define void @f1(i32 %a1, i32 %a2) { +; CHECK-LABEL: f1: +; CHECK: adrp x{{[0-9]+}}, _MergedGlobals +; CHECK-NOT: adrp + +; CHECK-APPLE-IOS-LABEL: f1: +; CHECK-APPLE-IOS: adrp x{{[0-9]+}}, __MergedGlobals +; CHECK-APPLE-IOS-NOT: adrp + store i32 %a1, i32* @m, align 4 + store i32 %a2, i32* @n, align 4 + ret void +} + +; CHECK: .local _MergedGlobals +; CHECK: .comm _MergedGlobals,8,8 +; NO-MERGE-NOT: .local _MergedGlobals + +; CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3 +; CHECK-APPLE-IOS-NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,8,3 diff --git a/test/CodeGen/AArch64/i128-fast-isel-fallback.ll b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll new file mode 100644 index 0000000..1cffbf3 --- /dev/null +++ b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll @@ -0,0 +1,18 @@ +; RUN: llc -O0 -mtriple=arm64-apple-ios7.0 -mcpu=generic < %s | FileCheck %s + +; Function Attrs: nounwind ssp +define void @test1() { + %1 = sext i32 0 to i128 + call void @test2(i128 %1) + ret void + +; The i128 is 0 so the we can test to make sure it is propogated into the x +; registers that make up the i128 pair + +; CHECK: mov x0, xzr +; CHECK: mov x1, x0 +; CHECK: bl _test2 + +} + +declare void @test2(i128) diff --git a/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll b/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll new file mode 100644 index 0000000..645214a --- /dev/null +++ b/test/CodeGen/AArch64/inlineasm-ldr-pseudo.ll @@ -0,0 +1,26 @@ +; We actually need to use -filetype=obj in this test because if we output +; assembly, the current code path will bypass the parser and just write the +; raw text out to the Streamer. We need to actually parse the inlineasm to +; demonstrate the bug. Going the asm->obj route does not show the issue. +; RUN: llc -mtriple=aarch64 < %s -filetype=obj | llvm-objdump -arch=aarch64 -d - | FileCheck %s + +; CHECK-LABEL: foo: +; CHECK: a0 79 95 d2 movz x0, #0xabcd +; CHECK: c0 03 5f d6 ret +define i32 @foo() nounwind { +entry: + %0 = tail call i32 asm sideeffect "ldr $0,=0xabcd", "=r"() nounwind + ret i32 %0 +} +; CHECK-LABEL: bar: +; CHECK: 40 00 00 58 ldr x0, #8 +; CHECK: c0 03 5f d6 ret +; Make sure the constant pool entry comes after the return +; CHECK-LABEL: $d.1: +define i32 @bar() nounwind { +entry: + %0 = tail call i32 asm sideeffect "ldr $0,=0x10001", "=r"() nounwind + ret i32 %0 +} + + diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll index 1dfb789..69fbd99 100644 --- a/test/CodeGen/AArch64/jump-table.ll +++ b/test/CodeGen/AArch64/jump-table.ll @@ -1,6 +1,6 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s define i32 @test_jumptable(i32 %in) { ; CHECK: test_jumptable diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index 1ce5c95..e4f4295 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s ; This file contains tests for the AArch64 load/store optimizer. @@ -166,6 +166,217 @@ bar: ; Check the following transform: ; +; add x8, x8, #16 +; ... +; ldr X, [x8] +; -> +; ldr X, [x8, #16]! +; +; with X being either w0, x0, s0, d0 or q0. + +%pre.struct.i32 = type { i32, i32, i32} +%pre.struct.i64 = type { i32, i64, i64} +%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>} +%pre.struct.float = type { i32, float, float} +%pre.struct.double = type { i32, double, double} + +define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond, + %pre.struct.i32* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-word2 +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #4]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i32** %this + %gep1 = getelementptr inbounds %pre.struct.i32* %load1, i64 0, i32 1 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i32* %load2, i64 0, i32 2 + br label %return +return: + %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load i32* %retptr + ret i32 %ret +} + +define i64 @load-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond, + %pre.struct.i64* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-doubleword2 +; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #8]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i64** %this + %gep1 = getelementptr inbounds %pre.struct.i64* %load1, i64 0, i32 1 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i64* %load2, i64 0, i32 2 + br label %return +return: + %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load i64* %retptr + ret i64 %ret +} + +define <2 x i64> @load-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond, + %pre.struct.i128* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-quadword2 +; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #16]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i128** %this + %gep1 = getelementptr inbounds %pre.struct.i128* %load1, i64 0, i32 1 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i128* %load2, i64 0, i32 2 + br label %return +return: + %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load <2 x i64>* %retptr + ret <2 x i64> %ret +} + +define float @load-pre-indexed-float2(%pre.struct.float** %this, i1 %cond, + %pre.struct.float* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-float2 +; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #4]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.float** %this + %gep1 = getelementptr inbounds %pre.struct.float* %load1, i64 0, i32 1 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.float* %load2, i64 0, i32 2 + br label %return +return: + %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load float* %retptr + ret float %ret +} + +define double @load-pre-indexed-double2(%pre.struct.double** %this, i1 %cond, + %pre.struct.double* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-double2 +; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #8]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.double** %this + %gep1 = getelementptr inbounds %pre.struct.double* %load1, i64 0, i32 1 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.double* %load2, i64 0, i32 2 + br label %return +return: + %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load double* %retptr + ret double %ret +} + +; Check the following transform: +; +; add x8, x8, #16 +; ... +; str X, [x8] +; -> +; str X, [x8, #16]! +; +; with X being either w0, x0, s0, d0 or q0. + +define void @store-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond, + %pre.struct.i32* %load2, + i32 %val) nounwind { +; CHECK-LABEL: store-pre-indexed-word2 +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i32** %this + %gep1 = getelementptr inbounds %pre.struct.i32* %load1, i64 0, i32 1 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i32* %load2, i64 0, i32 2 + br label %return +return: + %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] + store i32 %val, i32* %retptr + ret void +} + +define void @store-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond, + %pre.struct.i64* %load2, + i64 %val) nounwind { +; CHECK-LABEL: store-pre-indexed-doubleword2 +; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #8]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i64** %this + %gep1 = getelementptr inbounds %pre.struct.i64* %load1, i64 0, i32 1 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i64* %load2, i64 0, i32 2 + br label %return +return: + %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] + store i64 %val, i64* %retptr + ret void +} + +define void @store-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond, + %pre.struct.i128* %load2, + <2 x i64> %val) nounwind { +; CHECK-LABEL: store-pre-indexed-quadword2 +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #16]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i128** %this + %gep1 = getelementptr inbounds %pre.struct.i128* %load1, i64 0, i32 1 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i128* %load2, i64 0, i32 2 + br label %return +return: + %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] + store <2 x i64> %val, <2 x i64>* %retptr + ret void +} + +define void @store-pre-indexed-float2(%pre.struct.float** %this, i1 %cond, + %pre.struct.float* %load2, + float %val) nounwind { +; CHECK-LABEL: store-pre-indexed-float2 +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #4]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.float** %this + %gep1 = getelementptr inbounds %pre.struct.float* %load1, i64 0, i32 1 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.float* %load2, i64 0, i32 2 + br label %return +return: + %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] + store float %val, float* %retptr + ret void +} + +define void @store-pre-indexed-double2(%pre.struct.double** %this, i1 %cond, + %pre.struct.double* %load2, + double %val) nounwind { +; CHECK-LABEL: store-pre-indexed-double2 +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #8]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.double** %this + %gep1 = getelementptr inbounds %pre.struct.double* %load1, i64 0, i32 1 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.double* %load2, i64 0, i32 2 + br label %return +return: + %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] + store double %val, double* %retptr + ret void +} + +; Check the following transform: +; ; ldr X, [x20] ; ... ; add x20, x20, #32 @@ -294,8 +505,263 @@ exit: ret void } +; Check the following transform: +; +; str X, [x20] +; ... +; add x20, x20, #32 +; -> +; str X, [x20], #32 +; +; with X being either w0, x0, s0, d0 or q0. + +define void @store-post-indexed-word(i32* %array, i64 %count, i32 %val) nounwind { +; CHECK-LABEL: store-post-indexed-word +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #16 +entry: + %gep1 = getelementptr i32* %array, i64 2 + br label %body + +body: + %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr i32* %iv2, i64 -1 + %load = load i32* %gep2 + call void @use-word(i32 %load) + store i32 %val, i32* %iv2 + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr i32* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @store-post-indexed-doubleword(i64* %array, i64 %count, i64 %val) nounwind { +; CHECK-LABEL: store-post-indexed-doubleword +; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #32 +entry: + %gep1 = getelementptr i64* %array, i64 2 + br label %body + +body: + %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr i64* %iv2, i64 -1 + %load = load i64* %gep2 + call void @use-doubleword(i64 %load) + store i64 %val, i64* %iv2 + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr i64* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @store-post-indexed-quadword(<2 x i64>* %array, i64 %count, <2 x i64> %val) nounwind { +; CHECK-LABEL: store-post-indexed-quadword +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #64 +entry: + %gep1 = getelementptr <2 x i64>* %array, i64 2 + br label %body + +body: + %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr <2 x i64>* %iv2, i64 -1 + %load = load <2 x i64>* %gep2 + call void @use-quadword(<2 x i64> %load) + store <2 x i64> %val, <2 x i64>* %iv2 + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr <2 x i64>* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @store-post-indexed-float(float* %array, i64 %count, float %val) nounwind { +; CHECK-LABEL: store-post-indexed-float +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #16 +entry: + %gep1 = getelementptr float* %array, i64 2 + br label %body + +body: + %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr float* %iv2, i64 -1 + %load = load float* %gep2 + call void @use-float(float %load) + store float %val, float* %iv2 + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr float* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @store-post-indexed-double(double* %array, i64 %count, double %val) nounwind { +; CHECK-LABEL: store-post-indexed-double +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #32 +entry: + %gep1 = getelementptr double* %array, i64 2 + br label %body + +body: + %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr double* %iv2, i64 -1 + %load = load double* %gep2 + call void @use-double(double %load) + store double %val, double* %iv2 + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr double* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + declare void @use-word(i32) declare void @use-doubleword(i64) declare void @use-quadword(<2 x i64>) declare void @use-float(float) declare void @use-double(double) + +; Check the following transform: +; +; (ldr|str) X, [x20] +; ... +; sub x20, x20, #16 +; -> +; (ldr|str) X, [x20], #-16 +; +; with X being either w0, x0, s0, d0 or q0. + +define void @post-indexed-sub-word(i32* %a, i32* %b, i64 %count) nounwind { +; CHECK-LABEL: post-indexed-sub-word +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #-8 +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #-8 + br label %for.body +for.body: + %phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ] + %phi2 = phi i32* [ %gep3, %for.body ], [ %a, %0 ] + %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] + %gep1 = getelementptr i32* %phi1, i64 -1 + %load1 = load i32* %gep1 + %gep2 = getelementptr i32* %phi2, i64 -1 + store i32 %load1, i32* %gep2 + %load2 = load i32* %phi1 + store i32 %load2, i32* %phi2 + %dec.i = add nsw i64 %i, -1 + %gep3 = getelementptr i32* %phi2, i64 -2 + %gep4 = getelementptr i32* %phi1, i64 -2 + %cond = icmp sgt i64 %dec.i, 0 + br i1 %cond, label %for.body, label %end +end: + ret void +} + +define void @post-indexed-sub-doubleword(i64* %a, i64* %b, i64 %count) nounwind { +; CHECK-LABEL: post-indexed-sub-doubleword +; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-16 +; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-16 + br label %for.body +for.body: + %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] + %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ] + %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] + %gep1 = getelementptr i64* %phi1, i64 -1 + %load1 = load i64* %gep1 + %gep2 = getelementptr i64* %phi2, i64 -1 + store i64 %load1, i64* %gep2 + %load2 = load i64* %phi1 + store i64 %load2, i64* %phi2 + %dec.i = add nsw i64 %i, -1 + %gep3 = getelementptr i64* %phi2, i64 -2 + %gep4 = getelementptr i64* %phi1, i64 -2 + %cond = icmp sgt i64 %dec.i, 0 + br i1 %cond, label %for.body, label %end +end: + ret void +} + +define void @post-indexed-sub-quadword(<2 x i64>* %a, <2 x i64>* %b, i64 %count) nounwind { +; CHECK-LABEL: post-indexed-sub-quadword +; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #-32 +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #-32 + br label %for.body +for.body: + %phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ] + %phi2 = phi <2 x i64>* [ %gep3, %for.body ], [ %a, %0 ] + %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] + %gep1 = getelementptr <2 x i64>* %phi1, i64 -1 + %load1 = load <2 x i64>* %gep1 + %gep2 = getelementptr <2 x i64>* %phi2, i64 -1 + store <2 x i64> %load1, <2 x i64>* %gep2 + %load2 = load <2 x i64>* %phi1 + store <2 x i64> %load2, <2 x i64>* %phi2 + %dec.i = add nsw i64 %i, -1 + %gep3 = getelementptr <2 x i64>* %phi2, i64 -2 + %gep4 = getelementptr <2 x i64>* %phi1, i64 -2 + %cond = icmp sgt i64 %dec.i, 0 + br i1 %cond, label %for.body, label %end +end: + ret void +} + +define void @post-indexed-sub-float(float* %a, float* %b, i64 %count) nounwind { +; CHECK-LABEL: post-indexed-sub-float +; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #-8 +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #-8 + br label %for.body +for.body: + %phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ] + %phi2 = phi float* [ %gep3, %for.body ], [ %a, %0 ] + %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] + %gep1 = getelementptr float* %phi1, i64 -1 + %load1 = load float* %gep1 + %gep2 = getelementptr float* %phi2, i64 -1 + store float %load1, float* %gep2 + %load2 = load float* %phi1 + store float %load2, float* %phi2 + %dec.i = add nsw i64 %i, -1 + %gep3 = getelementptr float* %phi2, i64 -2 + %gep4 = getelementptr float* %phi1, i64 -2 + %cond = icmp sgt i64 %dec.i, 0 + br i1 %cond, label %for.body, label %end +end: + ret void +} + +define void @post-indexed-sub-double(double* %a, double* %b, i64 %count) nounwind { +; CHECK-LABEL: post-indexed-sub-double +; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #-16 +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #-16 + br label %for.body +for.body: + %phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ] + %phi2 = phi double* [ %gep3, %for.body ], [ %a, %0 ] + %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ] + %gep1 = getelementptr double* %phi1, i64 -1 + %load1 = load double* %gep1 + %gep2 = getelementptr double* %phi2, i64 -1 + store double %load1, double* %gep2 + %load2 = load double* %phi1 + store double %load2, double* %phi2 + %dec.i = add nsw i64 %i, -1 + %gep3 = getelementptr double* %phi2, i64 -2 + %gep4 = getelementptr double* %phi1, i64 -2 + %cond = icmp sgt i64 %dec.i, 0 + br i1 %cond, label %for.body, label %end +end: + ret void +} diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg index 77493d8..125995c 100644 --- a/test/CodeGen/AArch64/lit.local.cfg +++ b/test/CodeGen/AArch64/lit.local.cfg @@ -2,8 +2,7 @@ import re config.suffixes = ['.ll'] -targets = set(config.root.targets_to_build.split()) -if not 'AArch64' in targets: +if not 'AArch64' in config.root.targets: config.unsupported = True # For now we don't test arm64-win32. diff --git a/test/CodeGen/AArch64/memcpy-f128.ll b/test/CodeGen/AArch64/memcpy-f128.ll new file mode 100644 index 0000000..76db297 --- /dev/null +++ b/test/CodeGen/AArch64/memcpy-f128.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s + +%structA = type { i128 } +@stubA = internal unnamed_addr constant %structA zeroinitializer, align 8 + +; Make sure we don't hit llvm_unreachable. + +define void @test1() { +; CHECK-LABEL: @test1 +; CHECK: adrp +; CHECK: ldr q0 +; CHECK: str q0 +; CHECK: ret +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast (%structA* @stubA to i8*), i64 48, i32 8, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) diff --git a/test/CodeGen/AArch64/mul_pow2.ll b/test/CodeGen/AArch64/mul_pow2.ll new file mode 100644 index 0000000..efc0ec8 --- /dev/null +++ b/test/CodeGen/AArch64/mul_pow2.ll @@ -0,0 +1,123 @@ +; RUN: llc < %s -march=aarch64 | FileCheck %s + +; Convert mul x, pow2 to shift. +; Convert mul x, pow2 +/- 1 to shift + add/sub. + +define i32 @test2(i32 %x) { +; CHECK-LABEL: test2 +; CHECK: lsl w0, w0, #1 + + %mul = shl nsw i32 %x, 1 + ret i32 %mul +} + +define i32 @test3(i32 %x) { +; CHECK-LABEL: test3 +; CHECK: add w0, w0, w0, lsl #1 + + %mul = mul nsw i32 %x, 3 + ret i32 %mul +} + +define i32 @test4(i32 %x) { +; CHECK-LABEL: test4 +; CHECK: lsl w0, w0, #2 + + %mul = shl nsw i32 %x, 2 + ret i32 %mul +} + +define i32 @test5(i32 %x) { +; CHECK-LABEL: test5 +; CHECK: add w0, w0, w0, lsl #2 + + + %mul = mul nsw i32 %x, 5 + ret i32 %mul +} + +define i32 @test7(i32 %x) { +; CHECK-LABEL: test7 +; CHECK: lsl {{w[0-9]+}}, w0, #3 +; CHECK: sub w0, {{w[0-9]+}}, w0 + + %mul = mul nsw i32 %x, 7 + ret i32 %mul +} + +define i32 @test8(i32 %x) { +; CHECK-LABEL: test8 +; CHECK: lsl w0, w0, #3 + + %mul = shl nsw i32 %x, 3 + ret i32 %mul +} + +define i32 @test9(i32 %x) { +; CHECK-LABEL: test9 +; CHECK: add w0, w0, w0, lsl #3 + + %mul = mul nsw i32 %x, 9 + ret i32 %mul +} + +; Convert mul x, -pow2 to shift. +; Convert mul x, -(pow2 +/- 1) to shift + add/sub. + +define i32 @ntest2(i32 %x) { +; CHECK-LABEL: ntest2 +; CHECK: neg w0, w0, lsl #1 + + %mul = mul nsw i32 %x, -2 + ret i32 %mul +} + +define i32 @ntest3(i32 %x) { +; CHECK-LABEL: ntest3 +; CHECK: add {{w[0-9]+}}, w0, w0, lsl #1 +; CHECK: neg w0, {{w[0-9]+}} + + %mul = mul nsw i32 %x, -3 + ret i32 %mul +} + +define i32 @ntest4(i32 %x) { +; CHECK-LABEL: ntest4 +; CHECK:neg w0, w0, lsl #2 + + %mul = mul nsw i32 %x, -4 + ret i32 %mul +} + +define i32 @ntest5(i32 %x) { +; CHECK-LABEL: ntest5 +; CHECK: add {{w[0-9]+}}, w0, w0, lsl #2 +; CHECK: neg w0, {{w[0-9]+}} + %mul = mul nsw i32 %x, -5 + ret i32 %mul +} + +define i32 @ntest7(i32 %x) { +; CHECK-LABEL: ntest7 +; CHECK: sub w0, w0, w0, lsl #3 + + %mul = mul nsw i32 %x, -7 + ret i32 %mul +} + +define i32 @ntest8(i32 %x) { +; CHECK-LABEL: ntest8 +; CHECK: neg w0, w0, lsl #3 + + %mul = mul nsw i32 %x, -8 + ret i32 %mul +} + +define i32 @ntest9(i32 %x) { +; CHECK-LABEL: ntest9 +; CHECK: add {{w[0-9]+}}, w0, w0, lsl #3 +; CHECK: neg w0, {{w[0-9]+}} + + %mul = mul nsw i32 %x, -9 + ret i32 %mul +} diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll index e32ac84..03c3f33 100644 --- a/test/CodeGen/AArch64/regress-tail-livereg.ll +++ b/test/CodeGen/AArch64/regress-tail-livereg.ll @@ -17,3 +17,17 @@ define void @foo() { ; CHECK: br {{x([0-79]|1[0-8])}} ret void } + +; No matter how tempting it is, LLVM should not use x30 since that'll be +; restored to its incoming value before the "br". +define void @test_x30_tail() { +; CHECK-LABEL: test_x30_tail: +; CHECK: mov [[DEST:x[0-9]+]], x30 +; CHECK: br [[DEST]] + %addr = call i8* @llvm.returnaddress(i32 0) + %faddr = bitcast i8* %addr to void()* + tail call void %faddr() + ret void +} + +declare i8* @llvm.returnaddress(i32) diff --git a/test/CodeGen/AArch64/trunc-v1i64.ll b/test/CodeGen/AArch64/trunc-v1i64.ll new file mode 100644 index 0000000..159b8e0 --- /dev/null +++ b/test/CodeGen/AArch64/trunc-v1i64.ll @@ -0,0 +1,63 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s + +; An optimization in DAG Combiner to fold +; (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)) +; will generate nodes like: +; v1i32 trunc v1i64, v1i16 trunc v1i64, v1i8 trunc v1i64. +; And such nodes will be defaultly scalarized in type legalization. But such +; scalarization will cause an assertion failure, as v1i64 is a legal type in +; AArch64. We change the default behaviour from be scalarized to be widen. + +; FIXME: Currently XTN is generated for v1i32, but it can be optimized. +; Just like v1i16 and v1i8, there is no XTN generated. + +define <2 x i32> @test_v1i32_0(<1 x i64> %in0) { +; CHECK-LABEL: test_v1i32_0: +; CHECK: xtn v0.2s, v0.2d + %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 0, i32 undef> + %2 = trunc <2 x i64> %1 to <2 x i32> + ret <2 x i32> %2 +} + +define <2 x i32> @test_v1i32_1(<1 x i64> %in0) { +; CHECK-LABEL: test_v1i32_1: +; CHECK: xtn v0.2s, v0.2d +; CHECK-NEXT: dup v0.2s, v0.s[0] + %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 undef, i32 0> + %2 = trunc <2 x i64> %1 to <2 x i32> + ret <2 x i32> %2 +} + +define <4 x i16> @test_v1i16_0(<1 x i64> %in0) { +; CHECK-LABEL: test_v1i16_0: +; CHECK-NOT: xtn + %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> + %2 = trunc <4 x i64> %1 to <4 x i16> + ret <4 x i16> %2 +} + +define <4 x i16> @test_v1i16_1(<1 x i64> %in0) { +; CHECK-LABEL: test_v1i16_1: +; CHECK-NOT: xtn +; CHECK: dup v0.4h, v0.h[0] + %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef> + %2 = trunc <4 x i64> %1 to <4 x i16> + ret <4 x i16> %2 +} + +define <8 x i8> @test_v1i8_0(<1 x i64> %in0) { +; CHECK-LABEL: test_v1i8_0: +; CHECK-NOT: xtn + %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = trunc <8 x i64> %1 to <8 x i8> + ret <8 x i8> %2 +} + +define <8 x i8> @test_v1i8_1(<1 x i64> %in0) { +; CHECK-LABEL: test_v1i8_1: +; CHECK-NOT: xtn +; CHECK: dup v0.8b, v0.b[0] + %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = trunc <8 x i64> %1 to <8 x i8> + ret <8 x i8> %2 +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll index 8a2fe26..5dc7b5d 100644 --- a/test/CodeGen/AArch64/tst-br.ll +++ b/test/CodeGen/AArch64/tst-br.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s ; We've got the usual issues with LLVM reordering blocks here. The ; tests are correct for the current order, but who knows when that |