diff options
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 34 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrVFP.td | 13 | ||||
-rw-r--r-- | test/CodeGen/ARM/unaligned_load_store.ll | 66 |
5 files changed, 99 insertions, 19 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c66618a..77181cf 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -8808,6 +8808,8 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { case MVT::i16: case MVT::i32: return true; + case MVT::f64: + return Subtarget->hasNEON(); // FIXME: VLD1 etc with standard alignment is legal. } } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index fbd7e7b..76c897c 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -242,6 +242,9 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " "Subtarget->isTargetDarwin()">; +def IsLE : Predicate<"TLI.isLittleEndian()">; +def IsBE : Predicate<"TLI.isBigEndian()">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3134088..048d340 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -398,6 +398,27 @@ def VecListFourQWordIndexed : Operand<i32> { let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } +def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() == 2; +}]>; +def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() == 2; +}]>; +def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() == 1; +}]>; +def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() == 1; +}]>; +def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; +def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -2238,6 +2259,19 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 +// Use vld1/vst1 for unaligned f64 load / store +def : Pat<(f64 (hword_alignedload addrmode6:$addr)), + (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (byte_alignedload addrmode6:$addr)), + (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), + (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; +def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; //===----------------------------------------------------------------------===// // NEON pattern fragments diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e3ab304..eb7eaa6 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -61,6 +61,15 @@ def vfp_f64imm : Operand<f64>, let ParserMatchClass = FPImmOperand; } +def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; + +def alignedstore32 : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; + // The VCVT to/from fixed-point instructions encode the 'fbits' operand // (the number of fixed bits) differently than it appears in the assembly // source. It's encoded as "Size - fbits" where Size is the size of the @@ -86,7 +95,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr), IIC_fpLoad64, "vldr", "\t$Dd, $addr", - [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>; + [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>; def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), IIC_fpLoad32, "vldr", "\t$Sd, $addr", @@ -100,7 +109,7 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), IIC_fpStore64, "vstr", "\t$Dd, $addr", - [(store (f64 DPR:$Dd), addrmode5:$addr)]>; + [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>; def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), IIC_fpStore32, "vstr", "\t$Sd, $addr", diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll index a8237c6..869b926 100644 --- a/test/CodeGen/ARM/unaligned_load_store.ll +++ b/test/CodeGen/ARM/unaligned_load_store.ll @@ -1,25 +1,25 @@ -; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=GENERIC -; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6 -; RUN: llc < %s -mtriple=armv6-apple-darwin -arm-strict-align | FileCheck %s -check-prefix=GENERIC -; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC +; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED +; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -arm-strict-align -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED +; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=UNALIGNED ; rdar://7113725 +; rdar://12091029 define void @t(i8* nocapture %a, i8* nocapture %b) nounwind { entry: -; GENERIC: t: -; GENERIC: ldrb [[R2:r[0-9]+]] -; GENERIC: ldrb [[R3:r[0-9]+]] -; GENERIC: ldrb [[R12:r[0-9]+]] -; GENERIC: ldrb [[R1:r[0-9]+]] -; GENERIC: strb [[R1]] -; GENERIC: strb [[R12]] -; GENERIC: strb [[R3]] -; GENERIC: strb [[R2]] - -; DARWIN_V6: t: -; DARWIN_V6: ldr r1 -; DARWIN_V6: str r1 +; EXPANDED: t: +; EXPANDED: ldrb [[R2:r[0-9]+]] +; EXPANDED: ldrb [[R3:r[0-9]+]] +; EXPANDED: ldrb [[R12:r[0-9]+]] +; EXPANDED: ldrb [[R1:r[0-9]+]] +; EXPANDED: strb [[R1]] +; EXPANDED: strb [[R12]] +; EXPANDED: strb [[R3]] +; EXPANDED: strb [[R2]] + +; UNALIGNED: t: +; UNALIGNED: ldr r1 +; UNALIGNED: str r1 %__src1.i = bitcast i8* %b to i32* ; <i32*> [#uses=1] %__dest2.i = bitcast i8* %a to i32* ; <i32*> [#uses=1] @@ -27,3 +27,35 @@ entry: store i32 %tmp.i, i32* %__dest2.i, align 1 ret void } + +define void @hword(double* %a, double* %b) nounwind { +entry: +; EXPANDED: hword: +; EXPANDED-NOT: vld1 +; EXPANDED: ldrh +; EXPANDED-NOT: str1 +; EXPANDED: strh + +; UNALIGNED: hword: +; UNALIGNED: vld1.16 +; UNALIGNED: vst1.16 + %tmp = load double* %a, align 2 + store double %tmp, double* %b, align 2 + ret void +} + +define void @byte(double* %a, double* %b) nounwind { +entry: +; EXPANDED: byte: +; EXPANDED-NOT: vld1 +; EXPANDED: ldrb +; EXPANDED-NOT: str1 +; EXPANDED: strb + +; UNALIGNED: byte: +; UNALIGNED: vld1.8 +; UNALIGNED: vst1.8 + %tmp = load double* %a, align 1 + store double %tmp, double* %b, align 1 + ret void +} |