diff options
author | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2012-10-26 23:39:46 +0000 |
---|---|---|
committer | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2012-10-26 23:39:46 +0000 |
commit | 17f42e02a10bd4d43e4ba904c640224de2c48f51 (patch) | |
tree | 6ca6ca5b16bf1df4d8a4d09f6b8b871514a60361 /lib | |
parent | 61fac6810ff8686c5302dedb35d52cc662a983fe (diff) | |
download | external_llvm-17f42e02a10bd4d43e4ba904c640224de2c48f51.zip external_llvm-17f42e02a10bd4d43e4ba904c640224de2c48f51.tar.gz external_llvm-17f42e02a10bd4d43e4ba904c640224de2c48f51.tar.bz2 |
Revert r163298 "Optimize codegen for VSETLNi{8,16,32} operating on Q registers."
Keep the integer_insertelement test case, the new coalescer can handle
this kind of lane insertion without help from pseudo-instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166835 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/ARM/ARMExpandPseudoInsts.cpp | 51 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 32 |
2 files changed, 17 insertions, 66 deletions
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c130b2e..f7c8a9c 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1208,57 +1208,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; - case ARM::VSETLNi8Q: - case ARM::VSETLNi16Q: { - // Expand VSETLNs acting on a Q register to equivalent VSETLNs acting - // on the respective D register. - - unsigned QReg = MI.getOperand(1).getReg(); - unsigned QLane = MI.getOperand(3).getImm(); - - unsigned NewOpcode, DLane, DSubReg; - switch (Opcode) { - default: llvm_unreachable("Invalid opcode!"); - case ARM::VSETLNi8Q: - // 4 possible 8-bit lanes per DPR: - NewOpcode = ARM::VSETLNi8; - DLane = QLane % 8; - DSubReg = (QLane / 8) ? ARM::dsub_1 : ARM::dsub_0; - break; - case ARM::VSETLNi16Q: - // 4 possible 16-bit lanes per DPR. - NewOpcode = ARM::VSETLNi16; - DLane = QLane % 4; - DSubReg = (QLane / 4) ? ARM::dsub_1 : ARM::dsub_0; - break; - } - - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpcode)); - - unsigned DReg = TRI->getSubReg(QReg, DSubReg); - - MIB.addReg(DReg, RegState::Define); // Output DPR - MIB.addReg(DReg); // Input DPR - MIB.addOperand(MI.getOperand(2)); // Input GPR - MIB.addImm(DLane); // Lane - - // Add the predicate operands. - MIB.addOperand(MI.getOperand(4)); - MIB.addOperand(MI.getOperand(5)); - - if (MI.getOperand(1).isKill()) // Add an implicit kill for the Q register. - MIB->addRegisterKilled(QReg, TRI, true); - // And an implicit def of the output register (which should always be the - // same as the input register). - MIB->addRegisterDefined(QReg, TRI); - - TransferImpOps(MI, MIB, MIB); - - MI.eraseFromParent(); - return true; - } - case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index ede4def..3cf213c 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5140,23 +5140,25 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), GPR:$R, imm:$lane))]> { let Inst{21} = lane{0}; } - -def VSETLNi8Q : PseudoNeonI<(outs QPR:$V), - (ins QPR:$src1, GPR:$R, VectorIndex8:$lane), - IIC_VMOVISL, "", - [(set QPR:$V, (vector_insert (v16i8 QPR:$src1), - GPR:$R, imm:$lane))]>; -def VSETLNi16Q : PseudoNeonI<(outs QPR:$V), - (ins QPR:$src1, GPR:$R, VectorIndex16:$lane), - IIC_VMOVISL, "", - [(set QPR:$V, (vector_insert (v8i16 QPR:$src1), - GPR:$R, imm:$lane))]>; } - +def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), + (v16i8 (INSERT_SUBREG QPR:$src1, + (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i8_reg imm:$lane))), + GPR:$src2, (SubReg_i8_lane imm:$lane))), + (DSubReg_i8_reg imm:$lane)))>; +def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), + (v8i16 (INSERT_SUBREG QPR:$src1, + (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i16_reg imm:$lane))), + GPR:$src2, (SubReg_i16_lane imm:$lane))), + (DSubReg_i16_reg imm:$lane)))>; def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), - (v4i32 (INSERT_SUBREG QPR:$src1, - GPR:$src2, - (SSubReg_f32_reg imm:$lane)))>; + (v4i32 (INSERT_SUBREG QPR:$src1, + (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i32_reg imm:$lane))), + GPR:$src2, (SubReg_i32_lane imm:$lane))), + (DSubReg_i32_reg imm:$lane)))>; def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), |