diff options
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 72 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 8 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 24 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA9.td | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/fabss.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/fadds.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/fdivs.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/fmacs.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/fmscs.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/fmuls.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/shifter_operand.ll | 2 |
12 files changed, 31 insertions, 96 deletions
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index d34a52d..0ffb4da 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -454,9 +454,6 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, return; unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); - if (Use->isMachineOpcode()) - // Adjust the use operand index by num of defs. - OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); if (Latency >= 0) dep.setLatency(Latency); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 51db677..1c89b97 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1823,8 +1823,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // This may be a def / use of a variable_ops instruction, the operand // latency might be determinable dynamically. Let the target try to // figure it out. - int DefCycle = -1; bool LdmBypass = false; + int DefCycle = -1; switch (DefTID.getOpcode()) { default: DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); @@ -1922,38 +1922,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, ? (*DefMI->memoperands_begin())->getAlignment() : 0; unsigned UseAlign = UseMI->hasOneMemOperand() ? (*UseMI->memoperands_begin())->getAlignment() : 0; - int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, - UseTID, UseIdx, UseAlign); - - if (Latency > 1 && - (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { - // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] - // variants are one cycle cheaper. - switch (DefTID.getOpcode()) { - default: break; - case ARM::LDRrs: - case ARM::LDRBrs: { - unsigned ShOpVal = DefMI->getOperand(3).getImm(); - unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); - if (ShImm == 0 || - (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) - --Latency; - break; - } - case ARM::t2LDRs: - case ARM::t2LDRBs: - case ARM::t2LDRHs: - case ARM::t2LDRSHs: { - // Thumb2 mode: lsl only. - unsigned ShAmt = DefMI->getOperand(3).getImm(); - if (ShAmt == 0 || ShAmt == 2) - --Latency; - break; - } - } - } - - return Latency; + return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, + UseTID, UseIdx, UseAlign); } int @@ -1977,40 +1947,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); unsigned UseAlign = !UseMN->memoperands_empty() ? (*UseMN->memoperands_begin())->getAlignment() : 0; - int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, - UseTID, UseIdx, UseAlign); - - if (Latency > 1 && - (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { - // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] - // variants are one cycle cheaper. - switch (DefTID.getOpcode()) { - default: break; - case ARM::LDRrs: - case ARM::LDRBrs: { - unsigned ShOpVal = - cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); - unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); - if (ShImm == 0 || - (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) - --Latency; - break; - } - case ARM::t2LDRs: - case ARM::t2LDRBs: - case ARM::t2LDRHs: - case ARM::t2LDRSHs: { - // Thumb2 mode: lsl only. - unsigned ShAmt = - cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); - if (ShAmt == 0 || ShAmt == 2) - --Latency; - break; - } - } - } - - return Latency; + return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, + UseTID, UseIdx, UseAlign); } bool ARMBaseInstrInfo:: diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index ffd4962..0974890 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1438,13 +1438,13 @@ def RFE : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base), // Load -defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si, +defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_i, IIC_iLoad_r, UnOpFrag<(load node:$Src)>>; -defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si, +defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_r, UnOpFrag<(zextloadi8 node:$Src)>>; -defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si, +defm STR : AI_str1<0, "str", IIC_iStore_i, IIC_iStore_r, BinOpFrag<(store node:$LHS, node:$RHS)>>; -defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si, +defm STRB : AI_str1<1, "strb", IIC_iStore_bh_i, IIC_iStore_bh_r, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; // Special LDR for loads from non-pc-relative constpools. diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 21b8347..a209fb5 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -574,7 +574,7 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns. multiclass T2I_ld<bit signed, bits<2> opcod, string opc, - InstrItinClass iii, InstrItinClass iis, PatFrag opnode> { + InstrItinClass iii, InstrItinClass iir, PatFrag opnode> { def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), iii, opc, ".w\t$dst, $addr", [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]> { @@ -599,7 +599,7 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let Inst{10} = 1; // The P bit. let Inst{8} = 0; // The W bit. } - def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), iis, + def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), iir, opc, ".w\t$dst, $addr", [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]> { let Inst{31-27} = 0b11111; @@ -626,7 +626,7 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns. multiclass T2I_st<bits<2> opcod, string opc, - InstrItinClass iii, InstrItinClass iis, PatFrag opnode> { + InstrItinClass iii, InstrItinClass iir, PatFrag opnode> { def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), iii, opc, ".w\t$src, $addr", [(opnode GPR:$src, t2addrmode_imm12:$addr)]> { @@ -647,7 +647,7 @@ multiclass T2I_st<bits<2> opcod, string opc, let Inst{10} = 1; // The P bit. let Inst{8} = 0; // The W bit. } - def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), iis, + def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), iir, opc, ".w\t$src, $addr", [(opnode GPR:$src, t2addrmode_so_reg:$addr)]> { let Inst{31-27} = 0b11111; @@ -916,19 +916,19 @@ def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, // Load let canFoldAsLoad = 1, isReMaterializable = 1 in -defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, +defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_r, UnOpFrag<(load node:$Src)>>; // Loads with zero extension -defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, +defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_r, UnOpFrag<(zextloadi16 node:$Src)>>; -defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, +defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_r, UnOpFrag<(zextloadi8 node:$Src)>>; // Loads with sign extension -defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, +defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_r, UnOpFrag<(sextloadi16 node:$Src)>>; -defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, +defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_r, UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { @@ -1070,11 +1070,11 @@ def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>; def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>; // Store -defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, +defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_r, BinOpFrag<(store node:$LHS, node:$RHS)>>; -defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si, +defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_r, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; -defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, +defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_r, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 20aa641..548bc7c 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -574,7 +574,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<9, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe]>], - [8, 1, 1, 1]>, + [8, 0, 1, 1]>, // // Double-precision FP MAC InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -582,7 +582,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<10, [A9_DRegsN], 0, Reserved>, InstrStage<2, [A9_NPipe]>], - [9, 1, 1, 1]>, + [9, 0, 1, 1]>, // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll index f03282b..dfc1e0a 100644 --- a/test/CodeGen/ARM/fabss.ll +++ b/test/CodeGen/ARM/fabss.ll @@ -24,4 +24,4 @@ declare float @fabsf(float) ; CORTEXA8: test: ; CORTEXA8: vabs.f32 d1, d1 ; CORTEXA9: test: -; CORTEXA9: vabs.f32 s1, s1 +; CORTEXA9: vabs.f32 s0, s0 diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll index 749690e..113f0e2 100644 --- a/test/CodeGen/ARM/fadds.ll +++ b/test/CodeGen/ARM/fadds.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vadd.f32 d0, d1, d0 ; CORTEXA9: test: -; CORTEXA9: vadd.f32 s0, s1, s0 +; CORTEXA9: vadd.f32 s0, s0, s1 diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll index 0c31495..9af1217 100644 --- a/test/CodeGen/ARM/fdivs.ll +++ b/test/CodeGen/ARM/fdivs.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vdiv.f32 s0, s1, s0 ; CORTEXA9: test: -; CORTEXA9: vdiv.f32 s0, s1, s0 +; CORTEXA9: vdiv.f32 s0, s0, s1 diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll index f8b47b5..c4ceca9 100644 --- a/test/CodeGen/ARM/fmacs.ll +++ b/test/CodeGen/ARM/fmacs.ll @@ -21,4 +21,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vmul.f32 d0, d1, d0 ; CORTEXA9: test: -; CORTEXA9: vmla.f32 s2, s1, s0 +; CORTEXA9: vmla.f32 s0, s1, s2 diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll index 7a70543..19359a1 100644 --- a/test/CodeGen/ARM/fmscs.ll +++ b/test/CodeGen/ARM/fmscs.ll @@ -19,6 +19,6 @@ entry: ; NFP0: vnmls.f32 s2, s1, s0 ; CORTEXA8: test: -; CORTEXA8: vnmls.f32 s2, s1, s0 +; CORTEXA8: vnmls.f32 s1, s2, s0 ; CORTEXA9: test: -; CORTEXA9: vnmls.f32 s2, s1, s0 +; CORTEXA9: vnmls.f32 s0, s1, s2 diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll index ef4e3e5..bfafd20 100644 --- a/test/CodeGen/ARM/fmuls.ll +++ b/test/CodeGen/ARM/fmuls.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vmul.f32 d0, d1, d0 ; CORTEXA9: test: -; CORTEXA9: vmul.f32 s0, s1, s0 +; CORTEXA9: vmul.f32 s0, s0, s1 diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll index 01e3a92..897fb1a 100644 --- a/test/CodeGen/ARM/shifter_operand.ll +++ b/test/CodeGen/ARM/shifter_operand.ll @@ -36,8 +36,8 @@ entry: ; lsl #2 is free ; A9: test3: -; A9: ldr r0, [r0, r2, lsl #2] ; A9: ldr r1, [r1, r2, lsl #2] +; A9: ldr r0, [r0, r2, lsl #2] %tmp1 = shl i32 %offset, 2 %tmp2 = add i32 %base, %tmp1 %tmp3 = inttoptr i32 %tmp2 to i32* |