aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBill Wendling <isanbard@gmail.com>2013-11-26 12:29:45 +0000
committerBill Wendling <isanbard@gmail.com>2013-11-26 12:29:45 +0000
commite40ef6a9fc96c74f7df5681a070246ea990499eb (patch)
tree37793d334acfaf1564f24e16004af59fa226219b
parent5943d4e3eea9ad5ef55618c075262337463aafa9 (diff)
downloadexternal_llvm-e40ef6a9fc96c74f7df5681a070246ea990499eb.zip
external_llvm-e40ef6a9fc96c74f7df5681a070246ea990499eb.tar.gz
external_llvm-e40ef6a9fc96c74f7df5681a070246ea990499eb.tar.bz2
Merging r195713:
------------------------------------------------------------------------ r195713 | kevinqin | 2013-11-25 18:33:42 -0800 (Mon, 25 Nov 2013) | 1 line [AArch64]Implement 128 bit register copy with NEON. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195758 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp36
-rw-r--r--test/CodeGen/AArch64/neon-perm.ll3
2 files changed, 22 insertions, 17 deletions
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 706d0b0..180110a 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -114,23 +114,25 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (AArch64::FPR128RegClass.contains(DestReg)) {
assert(AArch64::FPR128RegClass.contains(SrcReg));
- // FIXME: there's no good way to do this, at least without NEON:
- // + There's no single move instruction for q-registers
- // + We can't create a spill slot and use normal STR/LDR because stack
- // allocation has already happened
- // + We can't go via X-registers with FMOV because register allocation has
- // already happened.
- // This may not be efficient, but at least it works.
- BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
- .addReg(SrcReg)
- .addReg(AArch64::XSP)
- .addImm(0x1ff & -16);
-
- BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
- .addReg(AArch64::XSP, RegState::Define)
- .addReg(AArch64::XSP)
- .addImm(16);
- return;
+ // If NEON is enable, we use ORR to implement this copy.
+ // If NEON isn't available, emit STR and LDR to handle this.
+ if(getSubTarget().hasNEON()) {
+ BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg);
+ return;
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
+ .addReg(SrcReg)
+ .addReg(AArch64::XSP)
+ .addImm(0x1ff & -16);
+
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
+ .addReg(AArch64::XSP, RegState::Define)
+ .addReg(AArch64::XSP)
+ .addImm(16);
+ return;
+ }
} else {
llvm_unreachable("Unknown register class in copyPhysReg");
}
diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll
index 4e1756e..fa4d54d 100644
--- a/test/CodeGen/AArch64/neon-perm.ll
+++ b/test/CodeGen/AArch64/neon-perm.ll
@@ -238,6 +238,7 @@ entry:
define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) {
; CHECK: test_vuzp2q_s64:
; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
entry:
%shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %shuffle.i
@@ -294,6 +295,7 @@ entry:
define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) {
; CHECK: test_vuzp2q_u64:
; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
entry:
%shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %shuffle.i
@@ -318,6 +320,7 @@ entry:
define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) {
; CHECK: test_vuzp2q_f64:
; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
entry:
%shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
ret <2 x double> %shuffle.i