diff options
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 78 | ||||
-rw-r--r-- | lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 109 | ||||
-rw-r--r-- | lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 26 | ||||
-rw-r--r-- | lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 4 | ||||
-rw-r--r-- | test/MC/ARM/neon-vld-encoding.s | 40 |
5 files changed, 255 insertions, 2 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f78fa2c..d597a12 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -191,6 +191,25 @@ def VecListThreeQAllLanes : RegisterOperand<DPR, "printVectorListThreeSpacedAllLanes"> { let ParserMatchClass = VecListThreeQAllLanesAsmOperand; } +// Register list of four D registers, with "all lanes" subscripting. +def VecListFourDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { + let ParserMatchClass = VecListFourDAllLanesAsmOperand; +} +// Register list of four D registers spaced by 2 (four sequential Q regs). +def VecListFourQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourQAllLanes : RegisterOperand<DPR, + "printVectorListFourSpacedAllLanes"> { + let ParserMatchClass = VecListFourQAllLanesAsmOperand; +} // Register list of one D register, with byte lane subscripting. @@ -6333,6 +6352,65 @@ def VST3qWB_register_Asm_32 : (ins VecListThreeQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +// VLD4 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD4DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + // VLD4 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 20b2e85..94604c9 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1142,6 +1142,16 @@ public: return VectorList.Count == 3; } + bool isVecListFourDAllLanes() const { + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 4; + } + + bool isVecListFourQAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; + return VectorList.Count == 4; + } + bool isSingleSpacedVectorIndexed() const { return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced; } @@ -5427,6 +5437,26 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { case ARM::VLD4LNqAsm_16: Spacing = 2; return ARM::VLD4LNq16; case ARM::VLD4LNqAsm_32: Spacing = 2; return ARM::VLD4LNq32; + // VLD4DUP + case ARM::VLD4DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD; + case ARM::VLD4DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD; + case ARM::VLD4DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPq8_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPq16_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD; + case ARM::VLD4DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD; + case ARM::VLD4DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD; + case ARM::VLD4DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD; + case ARM::VLD4DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD4DUPq8_UPD; + case ARM::VLD4DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD4DUPq16_UPD; + case ARM::VLD4DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD; + case ARM::VLD4DUPdAsm_8: Spacing = 1; return ARM::VLD4DUPd8; + case ARM::VLD4DUPdAsm_16: Spacing = 1; return ARM::VLD4DUPd16; + case ARM::VLD4DUPdAsm_32: Spacing = 1; return ARM::VLD4DUPd32; + case ARM::VLD4DUPqAsm_8: Spacing = 2; return ARM::VLD4DUPq8; + case ARM::VLD4DUPqAsm_16: Spacing = 2; return ARM::VLD4DUPq16; + case ARM::VLD4DUPqAsm_32: Spacing = 2; return ARM::VLD4DUPq32; + // VLD4 case ARM::VLD4dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD; case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD; @@ -6233,7 +6263,84 @@ processInstruction(MCInst &Inst, return true; } - // VLD4 multiple 3-element structure instructions. + // VLD4DUP single 3-element structure to all lanes instructions. + case ARM::VLD4DUPdAsm_8: + case ARM::VLD4DUPdAsm_16: + case ARM::VLD4DUPdAsm_32: + case ARM::VLD4DUPqAsm_8: + case ARM::VLD4DUPqAsm_16: + case ARM::VLD4DUPqAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4DUPdWB_fixed_Asm_8: + case ARM::VLD4DUPdWB_fixed_Asm_16: + case ARM::VLD4DUPdWB_fixed_Asm_32: + case ARM::VLD4DUPqWB_fixed_Asm_8: + case ARM::VLD4DUPqWB_fixed_Asm_16: + case ARM::VLD4DUPqWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4DUPdWB_register_Asm_8: + case ARM::VLD4DUPdWB_register_Asm_16: + case ARM::VLD4DUPdWB_register_Asm_32: + case ARM::VLD4DUPqWB_register_Asm_8: + case ARM::VLD4DUPqWB_register_Asm_16: + case ARM::VLD4DUPqWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD4 multiple 4-element structure instructions. case ARM::VLD4dAsm_8: case ARM::VLD4dAsm_16: case ARM::VLD4dAsm_32: diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 455a487..aa60e5a 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1078,6 +1078,18 @@ void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; } +void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}"; +} + void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with @@ -1105,7 +1117,19 @@ void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, // sort order is guaranteed because they're all of the form D<n>. O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[]}"; +} + +void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "[]}"; } void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index ffcc79b..f63157f 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -141,12 +141,16 @@ public: raw_ostream &O); void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s index de6b8fb..3cc6bf1 100644 --- a/test/MC/ARM/neon-vld-encoding.s +++ b/test/MC/ARM/neon-vld-encoding.s @@ -412,6 +412,46 @@ @ CHECK: vld4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xe9,0xf4] + vld4.8 {d16[], d17[], d18[], d19[]}, [r1] + vld4.16 {d16[], d17[], d18[], d19[]}, [r2] + vld4.32 {d16[], d17[], d18[], d19[]}, [r3] + vld4.8 {d17[], d19[], d21[], d23[]}, [r7] + vld4.16 {d17[], d19[], d21[], d23[]}, [r7] + vld4.32 {d16[], d18[], d20[], d22[]}, [r8] + + vld4.s8 {d16[], d17[], d18[], d19[]}, [r1]! + vld4.s16 {d16[], d17[], d18[], d19[]}, [r2]! + vld4.s32 {d16[], d17[], d18[], d19[]}, [r3]! + vld4.u8 {d17[], d19[], d21[], d23[]}, [r7]! + vld4.u16 {d17[], d19[], d21[], d23[]}, [r7]! + vld4.u32 {d16[], d18[], d20[], d22[]}, [r8]! + + vld4.p8 {d16[], d17[], d18[], d19[]}, [r1], r8 + vld4.p16 {d16[], d17[], d18[], d19[]}, [r2], r7 + vld4.f32 {d16[], d17[], d18[], d19[]}, [r3], r5 + vld4.i8 {d16[], d18[], d20[], d22[]}, [r6], r3 + vld4.i16 {d16[], d18[], d20[], d22[]}, [r6], r3 + vld4.i32 {d17[], d19[], d21[], d23[]}, [r9], r4 + +@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1] @ encoding: [0x0f,0x0f,0xe1,0xf4] +@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2] @ encoding: [0x4f,0x0f,0xe2,0xf4] +@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3] @ encoding: [0x8f,0x0f,0xe3,0xf4] +@ CHECK: vld4.8 {d17[], d19[], d21[], d23[]}, [r7] @ encoding: [0x2f,0x1f,0xe7,0xf4] +@ CHECK: vld4.16 {d17[], d19[], d21[], d23[]}, [r7] @ encoding: [0x6f,0x1f,0xe7,0xf4] +@ CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r8] @ encoding: [0xaf,0x0f,0xe8,0xf4] +@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1]! @ encoding: [0x0d,0x0f,0xe1,0xf4] +@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2]! @ encoding: [0x4d,0x0f,0xe2,0xf4] +@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3]! @ encoding: [0x8d,0x0f,0xe3,0xf4] +@ CHECK: vld4.8 {d17[], d18[], d19[], d20[]}, [r7]! @ encoding: [0x2d,0x1f,0xe7,0xf4] +@ CHECK: vld4.16 {d17[], d18[], d19[], d20[]}, [r7]! @ encoding: [0x6d,0x1f,0xe7,0xf4] +@ CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r8]! @ encoding: [0xad,0x0f,0xe8,0xf4] +@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1], r8 @ encoding: [0x08,0x0f,0xe1,0xf4] +@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2], r7 @ encoding: [0x47,0x0f,0xe2,0xf4] +@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3], r5 @ encoding: [0x85,0x0f,0xe3,0xf4] +@ CHECK: vld4.8 {d16[], d18[], d20[], d22[]}, [r6], r3 @ encoding: [0x23,0x0f,0xe6,0xf4] +@ CHECK: vld4.16 {d16[], d18[], d20[], d22[]}, [r6], r3 @ encoding: [0x63,0x0f,0xe6,0xf4] +@ CHECK: vld4.32 {d17[], d19[], d21[], d23[]}, [r9], r4 @ encoding: [0xa4,0x1f,0xe9,0xf4] + @ Handle 'Q' registers in register lists as if the sub-reg D regs were @ specified instead. vld1.8 {q3}, [r9] |