diff options
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 91 | ||||
-rw-r--r-- | lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 101 | ||||
-rw-r--r-- | lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 22 | ||||
-rw-r--r-- | lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 4 | ||||
-rw-r--r-- | test/MC/ARM/neon-vld-encoding.s | 41 |
5 files changed, 253 insertions, 6 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 4040db9..eea25a6 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -171,6 +171,27 @@ def VecListTwoQAllLanes : RegisterOperand<DPR, "printVectorListTwoSpacedAllLanes"> { let ParserMatchClass = VecListTwoQAllLanesAsmOperand; } +// Register list of three D registers, with "all lanes" subscripting. +def VecListThreeDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeDAllLanes : RegisterOperand<DPR, + "printVectorListThreeAllLanes"> { + let ParserMatchClass = VecListThreeDAllLanesAsmOperand; +} +// Register list of three D registers spaced by 2 (three sequential Q regs). +def VecListThreeQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQAllLanes : RegisterOperand<DPR, + "printVectorListThreeSpacedAllLanes"> { + let ParserMatchClass = VecListThreeQAllLanesAsmOperand; +} + // Register list of one D register, with byte lane subscripting. def VecListOneDByteIndexAsmOperand : AsmOperandClass { @@ -1433,9 +1454,9 @@ def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; // ...with double-spaced registers (not used for codegen): -def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">; -def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">; -def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; +def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; +def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; +def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; // ...with address register writeback: class VLD3DUPWB<bits<4> op7_4, string Dt> @@ -1451,9 +1472,9 @@ def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; -def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">; -def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">; -def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">; +def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; +def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; +def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; @@ -6036,6 +6057,64 @@ def VST2LNqWB_register_Asm_32 : (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VLD3 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD3DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + // VLD3 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index a3657db..20b2e85 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1132,6 +1132,16 @@ public: return VectorList.Count == 2; } + bool isVecListThreeDAllLanes() const { + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 3; + } + + bool isVecListThreeQAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; + return VectorList.Count == 3; + } + bool isSingleSpacedVectorIndexed() const { return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced; } @@ -5343,6 +5353,26 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { case ARM::VLD2LNqAsm_16: Spacing = 2; return ARM::VLD2LNq16; case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32; + // VLD3DUP + case ARM::VLD3DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; + case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; + case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; + case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; + case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; + case ARM::VLD3DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; + case ARM::VLD3DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD3DUPq8_UPD; + case ARM::VLD3DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; + case ARM::VLD3DUPdAsm_8: Spacing = 1; return ARM::VLD3DUPd8; + case ARM::VLD3DUPdAsm_16: Spacing = 1; return ARM::VLD3DUPd16; + case ARM::VLD3DUPdAsm_32: Spacing = 1; return ARM::VLD3DUPd32; + case ARM::VLD3DUPqAsm_8: Spacing = 2; return ARM::VLD3DUPq8; + case ARM::VLD3DUPqAsm_16: Spacing = 2; return ARM::VLD3DUPq16; + case ARM::VLD3DUPqAsm_32: Spacing = 2; return ARM::VLD3DUPq32; + // VLD3LN case ARM::VLD3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD; case ARM::VLD3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD; @@ -6061,6 +6091,77 @@ processInstruction(MCInst &Inst, return true; } + // VLD3DUP single 3-element structure to all lanes instructions. + case ARM::VLD3DUPdAsm_8: + case ARM::VLD3DUPdAsm_16: + case ARM::VLD3DUPdAsm_32: + case ARM::VLD3DUPqAsm_8: + case ARM::VLD3DUPqAsm_16: + case ARM::VLD3DUPqAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3DUPdWB_fixed_Asm_8: + case ARM::VLD3DUPdWB_fixed_Asm_16: + case ARM::VLD3DUPdWB_fixed_Asm_32: + case ARM::VLD3DUPqWB_fixed_Asm_8: + case ARM::VLD3DUPqWB_fixed_Asm_16: + case ARM::VLD3DUPqWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3DUPdWB_register_Asm_8: + case ARM::VLD3DUPdWB_register_Asm_16: + case ARM::VLD3DUPdWB_register_Asm_32: + case ARM::VLD3DUPqWB_register_Asm_8: + case ARM::VLD3DUPqWB_register_Asm_16: + case ARM::VLD3DUPqWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // VLD3 multiple 3-element structure instructions. case ARM::VLD3dAsm_8: case ARM::VLD3dAsm_16: diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 27d5de8..455a487 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1067,6 +1067,17 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}"; } +void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; +} + void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with @@ -1086,6 +1097,17 @@ void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; } +void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; +} + void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O) { diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 4af116c..ffcc79b 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -139,10 +139,14 @@ public: raw_ostream &O); void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s index ba9b218..de6b8fb 100644 --- a/test/MC/ARM/neon-vld-encoding.s +++ b/test/MC/ARM/neon-vld-encoding.s @@ -336,6 +336,47 @@ @ CHECK: vld3.32 {d5[0], d7[0], d9[0]}, [r4]! @ encoding: [0x4d,0x5a,0xa4,0xf4] + vld3.8 {d16[], d17[], d18[]}, [r1] + vld3.16 {d16[], d17[], d18[]}, [r2] + vld3.32 {d16[], d17[], d18[]}, [r3] + vld3.8 {d17[], d19[], d21[]}, [r7] + vld3.16 {d17[], d19[], d21[]}, [r7] + vld3.32 {d16[], d18[], d20[]}, [r8] + + vld3.s8 {d16[], d17[], d18[]}, [r1]! + vld3.s16 {d16[], d17[], d18[]}, [r2]! + vld3.s32 {d16[], d17[], d18[]}, [r3]! + vld3.u8 {d17[], d19[], d21[]}, [r7]! + vld3.u16 {d17[], d19[], d21[]}, [r7]! + vld3.u32 {d16[], d18[], d20[]}, [r8]! + + vld3.p8 {d16[], d17[], d18[]}, [r1], r8 + vld3.p16 {d16[], d17[], d18[]}, [r2], r7 + vld3.f32 {d16[], d17[], d18[]}, [r3], r5 + vld3.i8 {d16[], d18[], d20[]}, [r6], r3 + vld3.i16 {d16[], d18[], d20[]}, [r6], r3 + vld3.i32 {d17[], d19[], d21[]}, [r9], r4 + +@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1] @ encoding: [0x0f,0x0e,0xe1,0xf4] +@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2] @ encoding: [0x4f,0x0e,0xe2,0xf4] +@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3] @ encoding: [0x8f,0x0e,0xe3,0xf4] +@ CHECK: vld3.8 {d17[], d19[], d21[]}, [r7] @ encoding: [0x2f,0x1e,0xe7,0xf4] +@ CHECK: vld3.16 {d17[], d19[], d21[]}, [r7] @ encoding: [0x6f,0x1e,0xe7,0xf4] +@ CHECK: vld3.32 {d16[], d18[], d20[]}, [r8] @ encoding: [0xaf,0x0e,0xe8,0xf4] +@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1]! @ encoding: [0x0d,0x0e,0xe1,0xf4] +@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2]! @ encoding: [0x4d,0x0e,0xe2,0xf4] +@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3]! @ encoding: [0x8d,0x0e,0xe3,0xf4] +@ CHECK: vld3.8 {d17[], d18[], d19[]}, [r7]! @ encoding: [0x2d,0x1e,0xe7,0xf4] +@ CHECK: vld3.16 {d17[], d18[], d19[]}, [r7]! @ encoding: [0x6d,0x1e,0xe7,0xf4] +@ CHECK: vld3.32 {d16[], d18[], d20[]}, [r8]! @ encoding: [0xad,0x0e,0xe8,0xf4] +@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1], r8 @ encoding: [0x08,0x0e,0xe1,0xf4] +@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2], r7 @ encoding: [0x47,0x0e,0xe2,0xf4] +@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3], r5 @ encoding: [0x85,0x0e,0xe3,0xf4] +@ CHECK: vld3.8 {d16[], d18[], d20[]}, [r6], r3 @ encoding: [0x23,0x0e,0xe6,0xf4] +@ CHECK: vld3.16 {d16[], d18[], d20[]}, [r6], r3 @ encoding: [0x63,0x0e,0xe6,0xf4] +@ CHECK: vld3.32 {d17[], d19[], d21[]}, [r9], r4 @ encoding: [0xa4,0x1e,0xe9,0xf4] + + vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1] vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] |