diff options
author | Daniel Sanders <daniel.sanders@imgtec.com> | 2013-12-01 15:54:07 +0000 |
---|---|---|
committer | Daniel Sanders <daniel.sanders@imgtec.com> | 2013-12-01 15:54:07 +0000 |
commit | 102f231863034e18863333bf850f8037b46e6947 (patch) | |
tree | 3a663926a2222874d2aef35b5dd0c531a5ab6cc9 | |
parent | ff4b604f961aa9b9ec2f05a5c31885b19fa636e4 (diff) | |
download | external_llvm-102f231863034e18863333bf850f8037b46e6947.zip external_llvm-102f231863034e18863333bf850f8037b46e6947.tar.gz external_llvm-102f231863034e18863333bf850f8037b46e6947.tar.bz2 |
Merged r195973:
------------------------------------------------------------------------
r195973 | dsanders | 2013-11-30 13:47:57 +0000 (Sat, 30 Nov 2013) | 5 lines
[mips][msa] MSA loads and stores have a 10-bit offset. Account for this when lowering FrameIndex.
This prevents the compiler from emitting invalid ld.[bhwd]'s and st.[bhwd]'s
when the stack frame is between 512 and 32,768 bytes in size.
------------------------------------------------------------------------
Review of this commit by Matheus Almeida revealed that it is still possible to
emit invalid code (when the offset is not a multiple of the element size).
However, we agreed that this commit still represents an improvement since it
fixes many cases that previously emitted invalid code, and does not cause any
cases that previously emitted valid code to emit invalid code.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196049 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/Mips/MipsSERegisterInfo.cpp | 52 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/frameindex.ll | 85 |
2 files changed, 132 insertions, 5 deletions
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 2be054e..2d44084 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -62,6 +62,24 @@ MipsSERegisterInfo::intRegClass(unsigned Size) const { return &Mips::GPR64RegClass; } +/// Determine whether a given opcode is an MSA load/store (supporting 10-bit +/// offsets) or a non-MSA load/store (supporting 16-bit offsets). +static inline bool isMSALoadOrStore(const unsigned Opcode) { + switch (Opcode) { + case Mips::LD_B: + case Mips::LD_H: + case Mips::LD_W: + case Mips::LD_D: + case Mips::ST_B: + case Mips::ST_H: + case Mips::ST_W: + case Mips::ST_D: + return true; + default: + return false; + } +} + void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, @@ -111,18 +129,42 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - // If MI is not a debug value, make sure Offset fits in the 16-bit immediate - // field. if (!MI.isDebugValue()) { - if (!isInt<16>(Offset)) { + // Make sure Offset fits within the field available. + // For MSA instructions, this is a 10-bit signed immediate, otherwise it is + // a 16-bit signed immediate. + unsigned OffsetBitSize = isMSALoadOrStore(MI.getOpcode()) ? 10 : 16; + + if (OffsetBitSize == 10 && !isInt<10>(Offset) && isInt<16>(Offset)) { + // If we have an offset that needs to fit into a signed 10-bit immediate + // and doesn't, but does fit into 16-bits then use an ADDiu + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = II->getDebugLoc(); + unsigned ADDiu = Subtarget.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; + const TargetRegisterClass *RC = + Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; + MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); + unsigned Reg = RegInfo.createVirtualRegister(RC); + const MipsSEInstrInfo &TII = + *static_cast<const MipsSEInstrInfo *>( + MBB.getParent()->getTarget().getInstrInfo()); + BuildMI(MBB, II, DL, TII.get(ADDiu), Reg).addReg(FrameReg).addImm(Offset); + + FrameReg = Reg; + Offset = 0; + IsKill = true; + } else if (!isInt<16>(Offset)) { + // Otherwise split the offset into 16-bit pieces and add it in multiple + // instructions. MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned NewImm; + unsigned NewImm = 0; const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>( MBB.getParent()->getTarget().getInstrInfo()); - unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, &NewImm); + unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, + OffsetBitSize == 16 ? &NewImm : NULL); BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg) .addReg(Reg, RegState::Kill); diff --git a/test/CodeGen/Mips/msa/frameindex.ll b/test/CodeGen/Mips/msa/frameindex.ll new file mode 100644 index 0000000..3088e1b --- /dev/null +++ b/test/CodeGen/Mips/msa/frameindex.ll @@ -0,0 +1,85 @@ +; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-BE %s +; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-LE %s + +define void @loadstore_v16i8_near() nounwind { + ; MIPS32-AE: loadstore_v16i8_near: + + %1 = alloca <16 x i8> + %2 = load volatile <16 x i8>* %1 + ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0($sp) + store volatile <16 x i8> %2, <16 x i8>* %1 + ; MIPS32-AE: st.b [[R1]], 0($sp) + + ret void + ; MIPS32-AE: .size loadstore_v16i8_near +} + +define void @loadstore_v16i8_just_under_simm10() nounwind { + ; MIPS32-AE: loadstore_v16i8_just_under_simm10: + + %1 = alloca <16 x i8> + %2 = alloca [496 x i8] ; Push the frame right up to 512 bytes + + %3 = load volatile <16 x i8>* %1 + ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 496($sp) + store volatile <16 x i8> %3, <16 x i8>* %1 + ; MIPS32-AE: st.b [[R1]], 496($sp) + + ret void + ; MIPS32-AE: .size loadstore_v16i8_just_under_simm10 +} + +define void @loadstore_v16i8_just_over_simm10() nounwind { + ; MIPS32-AE: loadstore_v16i8_just_over_simm10: + + %1 = alloca <16 x i8> + %2 = alloca [497 x i8] ; Push the frame just over 512 bytes + + %3 = load volatile <16 x i8>* %1 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512 + ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <16 x i8> %3, <16 x i8>* %1 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512 + ; MIPS32-AE: st.b [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v16i8_just_over_simm10 +} + +define void @loadstore_v16i8_just_under_simm16() nounwind { + ; MIPS32-AE: loadstore_v16i8_just_under_simm16: + + %1 = alloca <16 x i8> + %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes + + %3 = load volatile <16 x i8>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <16 x i8> %3, <16 x i8>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: st.b [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v16i8_just_under_simm16 +} + +define void @loadstore_v16i8_just_over_simm16() nounwind { + ; MIPS32-AE: loadstore_v16i8_just_over_simm16: + + %1 = alloca <16 x i8> + %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes + + %3 = load volatile <16 x i8>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <16 x i8> %3, <16 x i8>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: st.b [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v16i8_just_over_simm16 +} |