[fast-isel] Unaligned loads of floats are not supported. Therefore, convert to a regular

load and then move the result from a GPR to a FPR. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@146502 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chad Rosier <mcrosier@apple.com> 2011-12-13 19:22:14 +0000
committer: Chad Rosier <mcrosier@apple.com> 2011-12-13 19:22:14 +0000
commit: 8a9bce978fa4ca60d3a0ba42a1d44c41463a3c33 (patch)
tree: 16921e540ea3413636954f8c94c4aa19202df488
parent: 21e1b7a13c0b1201b51304c9801a66a9f7dd7871 (diff)
download: external_llvm-8a9bce978fa4ca60d3a0ba42a1d44c41463a3c33.zip
external_llvm-8a9bce978fa4ca60d3a0ba42a1d44c41463a3c33.tar.gz
external_llvm-8a9bce978fa4ca60d3a0ba42a1d44c41463a3c33.tar.bz2
2 files changed, 53 insertions, 7 deletions
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index d3ebf36..0a4faa2 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -178,8 +178,8 @@ class ARMFastISel : public FastISel {
     bool isLoadTypeLegal(Type *Ty, MVT &VT);
     bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
                     bool isZExt);
-    bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, bool isZExt,
-                     bool allocReg);
+    bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment = 0,
+                     bool isZExt = true, bool allocReg = true);
                      
     bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
                       unsigned Alignment = 0);
@@ -965,10 +965,11 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
 }
 
 bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
-                              bool isZExt = true, bool allocReg = true) {
+                              unsigned Alignment, bool isZExt, bool allocReg) {
   assert(VT.isSimple() && "Non-simple types are invalid here!");
   unsigned Opc;
   bool useAM3 = false;
+  bool needVMOV = false;
   TargetRegisterClass *RC;  
   switch (VT.getSimpleVT().SimpleTy) {
     // This is mostly going to be Neon/vector support.
@@ -1014,10 +1015,23 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
       RC = ARM::GPRRegisterClass;
       break;
     case MVT::f32:
-      Opc = ARM::VLDRS;
-      RC = TLI.getRegClassFor(VT);
+      // Unaligned loads need special handling. Floats require word-alignment.
+      if (Alignment && Alignment < 4) {
+        needVMOV = true;
+        VT = MVT::i32;
+        Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+        RC = ARM::GPRRegisterClass;
+      } else {
+        Opc = ARM::VLDRS;
+        RC = TLI.getRegClassFor(VT);
+      }
       break;
     case MVT::f64:
+      if (Alignment && Alignment < 4) {
+        // FIXME: Unaligned loads need special handling.  Doublewords require
+        // word-alignment.
+        return false;
+      }
       Opc = ARM::VLDRD;
       RC = TLI.getRegClassFor(VT);
       break;
@@ -1032,6 +1046,16 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                                     TII.get(Opc), ResultReg);
   AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
+
+  // If we had an unaligned load of a float we've converted it to an regular
+  // load.  Now we must move from the GRP to the FP register.
+  if (needVMOV) {
+    unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(ARM::VMOVSR), MoveReg)
+                    .addReg(ResultReg));
+    ResultReg = MoveReg;
+  }
   return true;
 }
 
@@ -1050,7 +1074,8 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
   if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
 
   unsigned ResultReg;
-  if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+  if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+    return false;
   UpdateValueMap(I, ResultReg);
   return true;
 }
@@ -2477,7 +2502,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
   if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
   
   unsigned ResultReg = MI->getOperand(0).getReg();
-  if (!ARMEmitLoad(VT, ResultReg, Addr, isZExt, false))
+  if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
     return false;
   MI->eraseFromParent();
   return true;
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 0697460..7fc0804 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -198,3 +198,24 @@ entry:
   ret void
 }
 
+; Check unaligned loads of floats
+%class.TAlignTest = type <{ i16, float }>
+
+define zeroext i1 @test6(%class.TAlignTest* %this) nounwind align 2 {
+entry:
+; ARM: @test6
+; THUMB: @test6
+  %0 = alloca %class.TAlignTest*, align 4
+  store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
+  %1 = load %class.TAlignTest** %0
+  %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
+  %3 = load float* %2, align 1
+  %4 = fcmp une float %3, 0.000000e+00
+; ARM: ldr r0, [r0, #2]
+; ARM: vmov s0, r0
+; ARM: vcmpe.f32 s0, #0
+; THUMB: ldr.w r0, [r0, #2]
+; THUMB: vmov s0, r0
+; THUMB: vcmpe.f32 s0, #0
+  ret i1 %4
+}
+\ No newline at end of file
author	Chad Rosier <mcrosier@apple.com>	2011-12-13 19:22:14 +0000
committer	Chad Rosier <mcrosier@apple.com>	2011-12-13 19:22:14 +0000
commit	8a9bce978fa4ca60d3a0ba42a1d44c41463a3c33 (patch)
tree	16921e540ea3413636954f8c94c4aa19202df488
parent	21e1b7a13c0b1201b51304c9801a66a9f7dd7871 (diff)
download	external_llvm-8a9bce978fa4ca60d3a0ba42a1d44c41463a3c33.zip external_llvm-8a9bce978fa4ca60d3a0ba42a1d44c41463a3c33.tar.gz external_llvm-8a9bce978fa4ca60d3a0ba42a1d44c41463a3c33.tar.bz2