diff options
author | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2012-01-07 01:47:05 +0000 |
---|---|---|
committer | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2012-01-07 01:47:05 +0000 |
commit | 45ca7c6336f174fae3a9521d5161a498ca27fd13 (patch) | |
tree | 7885c9d2cea7f10d4f0eff077b8d19543159cbfe | |
parent | f231a6dc7f251859af61677991b9c70ade6e1bfa (diff) | |
download | external_llvm-45ca7c6336f174fae3a9521d5161a498ca27fd13.zip external_llvm-45ca7c6336f174fae3a9521d5161a498ca27fd13.tar.gz external_llvm-45ca7c6336f174fae3a9521d5161a498ca27fd13.tar.bz2 |
Use movw+movt in ARMFastISel::ARMMaterializeGV.
This eliminates a lot of constant pool entries for -O0 builds of code
with many global variable accesses.
This speeds up -O0 codegen of consumer-typeset by 2x because the
constant island pass no longer has to look at thousands of constant pool
entries.
<rdar://problem/10629774>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147712 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMFastISel.cpp | 79 | ||||
-rw-r--r-- | test/CodeGen/ARM/fast-isel-intrinsic.ll | 30 | ||||
-rw-r--r-- | test/CodeGen/ARM/fast-isel.ll | 12 |
3 files changed, 78 insertions, 43 deletions
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 7ce5099..56e3d49 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -617,40 +617,61 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { // TODO: Need more magic for ARM PIC. if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0; - // MachineConstantPool wants an explicit alignment. - unsigned Align = TD.getPrefTypeAlignment(GV->getType()); - if (Align == 0) { - // TODO: Figure out if this is correct. - Align = TD.getTypeAllocSize(GV->getType()); - } - - // Grab index. - unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); - unsigned Id = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, - ARMCP::CPValue, - PCAdj); - unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); - - // Load value. - MachineInstrBuilder MIB; unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); - if (isThumb2) { - unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) - .addConstantPoolIndex(Idx); - if (RelocM == Reloc::PIC_) - MIB.addImm(Id); + + // Use movw+movt when possible, it avoids constant pool entries. + if (Subtarget->isTargetDarwin() && Subtarget->useMovt()) { + unsigned Opc; + switch (RelocM) { + case Reloc::PIC_: + Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel; + break; + case Reloc::DynamicNoPIC: + Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn; + break; + default: + Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm; + break; + } + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), + DestReg).addGlobalAddress(GV)); } else { - // The extra immediate is for addrmode2. - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), - DestReg) - .addConstantPoolIndex(Idx) - .addImm(0); + // MachineConstantPool wants an explicit alignment. + unsigned Align = TD.getPrefTypeAlignment(GV->getType()); + if (Align == 0) { + // TODO: Figure out if this is correct. + Align = TD.getTypeAllocSize(GV->getType()); + } + + // Grab index. + unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : + (Subtarget->isThumb() ? 4 : 8); + unsigned Id = AFI->createPICLabelUId(); + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, + ARMCP::CPValue, + PCAdj); + unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); + + // Load value. + MachineInstrBuilder MIB; + if (isThumb2) { + unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addConstantPoolIndex(Idx); + if (RelocM == Reloc::PIC_) + MIB.addImm(Id); + } else { + // The extra immediate is for addrmode2. + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), + DestReg) + .addConstantPoolIndex(Idx) + .addImm(0); + } + AddOptionalDefs(MIB); } - AddOptionalDefs(MIB); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { + MachineInstrBuilder MIB; unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb2) MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll index f2ba04f..5bc35ee 100644 --- a/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -6,14 +6,16 @@ define void @t1() nounwind ssp { ; ARM: t1 -; ARM: ldr r0, LCPI0_0 +; ARM: movw r0, :lower16:_message1 +; ARM: movt r0, :upper16:_message1 ; ARM: add r0, r0, #5 ; ARM: movw r1, #64 ; ARM: movw r2, #10 ; ARM: uxtb r1, r1 ; ARM: bl _memset ; THUMB: t1 -; THUMB: ldr.n r0, LCPI0_0 +; THUMB: movw r0, :lower16:_message1 +; THUMB: movt r0, :upper16:_message1 ; THUMB: adds r0, #5 ; THUMB: movs r1, #64 ; THUMB: movt r1, #0 @@ -29,7 +31,8 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind define void @t2() nounwind ssp { ; ARM: t2 -; ARM: ldr r0, LCPI1_0 +; ARM: movw r0, :lower16:L_temp$non_lazy_ptr +; ARM: movt r0, :upper16:L_temp$non_lazy_ptr ; ARM: ldr r0, [r0] ; ARM: add r1, r0, #4 ; ARM: add r0, r0, #16 @@ -39,7 +42,8 @@ define void @t2() nounwind ssp { ; ARM: ldr r1, [sp] @ 4-byte Reload ; ARM: bl _memcpy ; THUMB: t2 -; THUMB: ldr.n r0, LCPI1_0 +; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr +; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr ; THUMB: ldr r0, [r0] ; THUMB: adds r1, r0, #4 ; THUMB: adds r0, #16 @@ -55,7 +59,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, define void @t3() nounwind ssp { ; ARM: t3 -; ARM: ldr r0, LCPI2_0 +; ARM: movw r0, :lower16:L_temp$non_lazy_ptr +; ARM: movt r0, :upper16:L_temp$non_lazy_ptr ; ARM: ldr r0, [r0] ; ARM: add r1, r0, #4 ; ARM: add r0, r0, #16 @@ -63,7 +68,8 @@ define void @t3() nounwind ssp { ; ARM: mov r0, r1 ; ARM: bl _memmove ; THUMB: t3 -; THUMB: ldr.n r0, LCPI2_0 +; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr +; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr ; THUMB: ldr r0, [r0] ; THUMB: adds r1, r0, #4 ; THUMB: adds r0, #16 @@ -77,9 +83,11 @@ define void @t3() nounwind ssp { define void @t4() nounwind ssp { ; ARM: t4 -; ARM: ldr r0, LCPI3_0 +; ARM: movw r0, :lower16:L_temp$non_lazy_ptr +; ARM: movt r0, :upper16:L_temp$non_lazy_ptr ; ARM: ldr r0, [r0] -; ARM: ldr r1, LCPI3_1 +; ARM: movw r1, :lower16:L_temp$non_lazy_ptr +; ARM: movt r1, :upper16:L_temp$non_lazy_ptr ; ARM: ldr r1, [r1] ; ARM: ldr r2, [r1, #16] ; ARM: str r2, [r0, #4] @@ -88,9 +96,11 @@ define void @t4() nounwind ssp { ; ARM: ldrh r1, [r1, #24] ; ARM: strh r1, [r0, #12] ; ARM: bx lr -; THUMB: ldr.n r0, LCPI3_0 +; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr +; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr ; THUMB: ldr r0, [r0] -; THUMB: ldr.n r1, LCPI3_1 +; THUMB: movw r1, :lower16:L_temp$non_lazy_ptr +; THUMB: movt r1, :upper16:L_temp$non_lazy_ptr ; THUMB: ldr r1, [r1] ; THUMB: ldr r2, [r1, #16] ; THUMB: str r2, [r0, #4] diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll index 9f33aea..c8e0211 100644 --- a/test/CodeGen/ARM/fast-isel.ll +++ b/test/CodeGen/ARM/fast-isel.ll @@ -142,19 +142,23 @@ define void @test4() { store i32 %b, i32* @test4g ret void -; THUMB: ldr.n r0, LCPI4_1 +; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr +; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr ; THUMB: ldr r0, [r0] ; THUMB: ldr r0, [r0] ; THUMB: adds r0, #1 -; THUMB: ldr.n r1, LCPI4_0 +; THUMB: movw r1, :lower16:L_test4g$non_lazy_ptr +; THUMB: movt r1, :upper16:L_test4g$non_lazy_ptr ; THUMB: ldr r1, [r1] ; THUMB: str r0, [r1] -; ARM: ldr r0, LCPI4_1 +; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr +; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr ; ARM: ldr r0, [r0] ; ARM: ldr r0, [r0] ; ARM: add r0, r0, #1 -; ARM: ldr r1, LCPI4_0 +; ARM: movw r1, :lower16:L_test4g$non_lazy_ptr +; ARM: movt r1, :upper16:L_test4g$non_lazy_ptr ; ARM: ldr r1, [r1] ; ARM: str r0, [r1] } |