diff options
author | Jim Grosbach <grosbach@apple.com> | 2012-01-24 18:53:13 +0000 |
---|---|---|
committer | Jim Grosbach <grosbach@apple.com> | 2012-01-24 18:53:13 +0000 |
commit | 88a54de799240d5de2e79dfff4671ad5653e7ceb (patch) | |
tree | a90a918de56d79828d2a83d1a7d6209a5ebaa2c5 | |
parent | 4f8dc7b17accf4f2ec953b80b2cc79786207492e (diff) | |
download | external_llvm-88a54de799240d5de2e79dfff4671ad5653e7ceb.zip external_llvm-88a54de799240d5de2e79dfff4671ad5653e7ceb.tar.gz external_llvm-88a54de799240d5de2e79dfff4671ad5653e7ceb.tar.bz2 |
NEON VST4(one lane) assembly parsing and encoding.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148836 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 49 | ||||
-rw-r--r-- | lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 99 | ||||
-rw-r--r-- | test/MC/ARM/neon-vst-encoding.s | 44 |
3 files changed, 181 insertions, 11 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 702de8b..4040db9 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -6364,6 +6364,55 @@ def VLD4qWB_register_Asm_32 : (ins VecListFourQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VST4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VST4 multiple structure pseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 9bc4e60..a3657db 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5273,6 +5273,23 @@ static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) { case ARM::VST3qAsm_16: Spacing = 2; return ARM::VST3q16; case ARM::VST3qAsm_32: Spacing = 2; return ARM::VST3q32; + // VST4LN + case ARM::VST4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD; + case ARM::VST4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD; + case ARM::VST4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD; + case ARM::VST4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNq16_UPD; + case ARM::VST4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD; + case ARM::VST4LNdWB_register_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD; + case ARM::VST4LNdWB_register_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD; + case ARM::VST4LNdWB_register_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD; + case ARM::VST4LNqWB_register_Asm_16: Spacing = 2; return ARM::VST4LNq16_UPD; + case ARM::VST4LNqWB_register_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD; + case ARM::VST4LNdAsm_8: Spacing = 1; return ARM::VST4LNd8; + case ARM::VST4LNdAsm_16: Spacing = 1; return ARM::VST4LNd16; + case ARM::VST4LNdAsm_32: Spacing = 1; return ARM::VST4LNd32; + case ARM::VST4LNqAsm_16: Spacing = 2; return ARM::VST4LNq16; + case ARM::VST4LNqAsm_32: Spacing = 2; return ARM::VST4LNq32; + // VST4 case ARM::VST4dWB_fixed_Asm_8: Spacing = 1; return ARM::VST4d8_UPD; case ARM::VST4dWB_fixed_Asm_16: Spacing = 1; return ARM::VST4d16_UPD; @@ -5493,6 +5510,34 @@ processInstruction(MCInst &Inst, return true; } + case ARM::VST4LNdWB_register_Asm_8: + case ARM::VST4LNdWB_register_Asm_16: + case ARM::VST4LNdWB_register_Asm_32: + case ARM::VST4LNqWB_register_Asm_16: + case ARM::VST4LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_32: { @@ -5563,6 +5608,34 @@ processInstruction(MCInst &Inst, return true; } + case ARM::VST4LNdWB_fixed_Asm_8: + case ARM::VST4LNdWB_fixed_Asm_16: + case ARM::VST4LNdWB_fixed_Asm_32: + case ARM::VST4LNqWB_fixed_Asm_16: + case ARM::VST4LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_32: { @@ -5627,6 +5700,32 @@ processInstruction(MCInst &Inst, return true; } + case ARM::VST4LNdAsm_8: + case ARM::VST4LNdAsm_16: + case ARM::VST4LNdAsm_32: + case ARM::VST4LNqAsm_16: + case ARM::VST4LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle NEON VLD complex aliases. case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_16: diff --git a/test/MC/ARM/neon-vst-encoding.s b/test/MC/ARM/neon-vst-encoding.s index ba2620f..2b14d37 100644 --- a/test/MC/ARM/neon-vst-encoding.s +++ b/test/MC/ARM/neon-vst-encoding.s @@ -210,17 +210,39 @@ @ CHECK: vst3.32 {d5[1], d7[1], d9[1]}, [r4]! @ encoding: [0xcd,0x5a,0x84,0xf4] -@ vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] -@ vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] -@ vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] -@ vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64] -@ vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] - -@ FIXME: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xc0,0xf4] -@ FIXME: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf4] -@ FIXME: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xc0,0xf4] -@ FIXME: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64] @ encoding: [0xff,0x17,0xc0,0xf4] -@ FIXME: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf4] + vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1] + vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] + vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] + vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] + vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] + + vst4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! + vst4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! + vst4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! + vst4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]! + vst4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! + + vst4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 + vst4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 + vst4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 + vst4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 + vst4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 + +@ CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1] @ encoding: [0x2f,0x03,0xc1,0xf4] +@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] @ encoding: [0x4f,0x07,0xc2,0xf4] +@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xc3,0xf4] +@ CHECK: vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xc7,0xf4] +@ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xc8,0xf4] +@ CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xc1,0xf4] +@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xc2,0xf4] +@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xc3,0xf4] +@ CHECK: vst4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xc7,0xf4] +@ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xc8,0xf4] +@ CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xc1,0xf4] +@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xc2,0xf4] +@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xc3,0xf4] +@ CHECK: vst4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xc6,0xf4] +@ CHECK: vst4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xc9,0xf4] @ Spot-check additional size-suffix aliases. |