diff options
author | Bob Wilson <bob.wilson@apple.com> | 2009-07-08 18:11:30 +0000 |
---|---|---|
committer | Bob Wilson <bob.wilson@apple.com> | 2009-07-08 18:11:30 +0000 |
commit | 205a5ca6cfabc6cd408634a2fa7f2529956cc2cf (patch) | |
tree | 06c7383f63d684dc927ea6abcf10511efa8b1ee9 | |
parent | ab7c09b6b6f4516a631fd6788918c237c83939af (diff) | |
download | external_llvm-205a5ca6cfabc6cd408634a2fa7f2529956cc2cf.zip external_llvm-205a5ca6cfabc6cd408634a2fa7f2529956cc2cf.tar.gz external_llvm-205a5ca6cfabc6cd408634a2fa7f2529956cc2cf.tar.bz2 |
Implement NEON vld1 instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@75019 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMInstrFormats.td | 5 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 23 | ||||
-rw-r--r-- | lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/vld1.ll | 67 |
4 files changed, 98 insertions, 1 deletions
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index b9868cc..c7ef149 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1035,6 +1035,11 @@ class NI<dag oops, dag iops, string asm, list<dag> pattern> : NeonI<oops, iops, AddrModeNone, IndexModeNone, asm, "", pattern> { } +class NLdSt<dag oops, dag iops, string asm, list<dag> pattern> + : NeonI<oops, iops, AddrMode6, IndexModeNone, asm, "", pattern> { + let Inst{31-24} = 0b11110100; +} + class NDataI<dag oops, dag iops, string asm, string cstr, list<dag> pattern> : NeonI<oops, iops, AddrModeNone, IndexModeNone, asm, cstr, pattern> { let Inst{31-25} = 0b1111001; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index a62597b..e8d3f58 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -111,6 +111,29 @@ def VSTRQ : NI<(outs), (ins QPR:$src, GPR:$addr), [(store (v2f64 QPR:$src), GPR:$addr)]>; +// VLD1 : Vector Load (multiple single elements) +class VLD1D<string OpcodeStr, ValueType Ty, Intrinsic IntOp> + : NLdSt<(outs DPR:$dst), (ins addrmode6:$addr), + !strconcat(OpcodeStr, "\t${dst:dregsingle}, $addr"), + [(set DPR:$dst, (Ty (IntOp addrmode6:$addr, 1)))]>; +class VLD1Q<string OpcodeStr, ValueType Ty, Intrinsic IntOp> + : NLdSt<(outs QPR:$dst), (ins addrmode6:$addr), + !strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"), + [(set QPR:$dst, (Ty (IntOp addrmode6:$addr, 1)))]>; + +def VLD1d8 : VLD1D<"vld1.8", v8i8, int_arm_neon_vldi>; +def VLD1d16 : VLD1D<"vld1.16", v4i16, int_arm_neon_vldi>; +def VLD1d32 : VLD1D<"vld1.32", v2i32, int_arm_neon_vldi>; +def VLD1df : VLD1D<"vld1.32", v2f32, int_arm_neon_vldf>; +def VLD1d64 : VLD1D<"vld1.64", v1i64, int_arm_neon_vldi>; + +def VLD1q8 : VLD1Q<"vld1.8", v16i8, int_arm_neon_vldi>; +def VLD1q16 : VLD1Q<"vld1.16", v8i16, int_arm_neon_vldi>; +def VLD1q32 : VLD1Q<"vld1.32", v4i32, int_arm_neon_vldi>; +def VLD1qf : VLD1Q<"vld1.32", v4f32, int_arm_neon_vldf>; +def VLD1q64 : VLD1Q<"vld1.64", v2i64, int_arm_neon_vldi>; + + //===----------------------------------------------------------------------===// // NEON pattern fragments //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 434a19a..532e3cc 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -298,8 +298,10 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0 unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1 O << '{' - << TRI->getAsmName(DRegLo) << "-" << TRI->getAsmName(DRegHi) + << TRI->getAsmName(DRegLo) << ',' << TRI->getAsmName(DRegHi) << '}'; + } else if (Modifier && strcmp(Modifier, "dregsingle") == 0) { + O << '{' << TRI->getAsmName(Reg) << '}'; } else { O << TRI->getAsmName(Reg); } diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll new file mode 100644 index 0000000..161cb71 --- /dev/null +++ b/test/CodeGen/ARM/vld1.ll @@ -0,0 +1,67 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t +; RUN: grep {vld1\\.8} %t | count 2 +; RUN: grep {vld1\\.16} %t | count 2 +; RUN: grep {vld1\\.32} %t | count 4 +; RUN: grep {vld1\\.64} %t | count 2 + +define <8 x i8> @vld1i8(i8* %A) nounwind { + %tmp1 = call <8 x i8> @llvm.arm.neon.vldi.v8i8(i8* %A, i32 1) + ret <8 x i8> %tmp1 +} + +define <4 x i16> @vld1i16(i16* %A) nounwind { + %tmp1 = call <4 x i16> @llvm.arm.neon.vldi.v4i16(i16* %A, i32 1) + ret <4 x i16> %tmp1 +} + +define <2 x i32> @vld1i32(i32* %A) nounwind { + %tmp1 = call <2 x i32> @llvm.arm.neon.vldi.v2i32(i32* %A, i32 1) + ret <2 x i32> %tmp1 +} + +define <2 x float> @vld1f(float* %A) nounwind { + %tmp1 = call <2 x float> @llvm.arm.neon.vldf.v2f32(float* %A, i32 1) + ret <2 x float> %tmp1 +} + +define <1 x i64> @vld1i64(i64* %A) nounwind { + %tmp1 = call <1 x i64> @llvm.arm.neon.vldi.v1i64(i64* %A, i32 1) + ret <1 x i64> %tmp1 +} + +define <16 x i8> @vld1Qi8(i8* %A) nounwind { + %tmp1 = call <16 x i8> @llvm.arm.neon.vldi.v16i8(i8* %A, i32 1) + ret <16 x i8> %tmp1 +} + +define <8 x i16> @vld1Qi16(i16* %A) nounwind { + %tmp1 = call <8 x i16> @llvm.arm.neon.vldi.v8i16(i16* %A, i32 1) + ret <8 x i16> %tmp1 +} + +define <4 x i32> @vld1Qi32(i32* %A) nounwind { + %tmp1 = call <4 x i32> @llvm.arm.neon.vldi.v4i32(i32* %A, i32 1) + ret <4 x i32> %tmp1 +} + +define <4 x float> @vld1Qf(float* %A) nounwind { + %tmp1 = call <4 x float> @llvm.arm.neon.vldf.v4f32(float* %A, i32 1) + ret <4 x float> %tmp1 +} + +define <2 x i64> @vld1Qi64(i64* %A) nounwind { + %tmp1 = call <2 x i64> @llvm.arm.neon.vldi.v2i64(i64* %A, i32 1) + ret <2 x i64> %tmp1 +} + +declare <8 x i8> @llvm.arm.neon.vldi.v8i8(i8*, i32) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vldi.v4i16(i16*, i32) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vldi.v2i32(i32*, i32) nounwind readnone +declare <2 x float> @llvm.arm.neon.vldf.v2f32(float*, i32) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vldi.v1i64(i64*, i32) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vldi.v16i8(i8*, i32) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vldi.v8i16(i16*, i32) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vldi.v4i32(i32*, i32) nounwind readnone +declare <4 x float> @llvm.arm.neon.vldf.v4f32(float*, i32) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vldi.v2i64(i64*, i32) nounwind readnone |