Add support for getting & setting the FPSCR application register on ARM when VFP is enabled.

Add support for using the FPSCR in conjunction with the vcvtr instruction, for controlling fp to int rounding. Add support for the FLT_ROUNDS_ node now that the FPSCR is exposed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110152 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nate Begeman <natebegeman@mac.com> 2010-08-03 21:31:55 +0000
committer: Nate Begeman <natebegeman@mac.com> 2010-08-03 21:31:55 +0000
commit: d1fb583128c6682bb8a7c74eafa810a9270cc8df (patch)
tree: d009b76a09bad20fb173a61081cecd9a4ac8cb6a
parent: 6fc24467e91a2c515fa5347e90071573c454bcca (diff)
download: external_llvm-d1fb583128c6682bb8a7c74eafa810a9270cc8df.zip
external_llvm-d1fb583128c6682bb8a7c74eafa810a9270cc8df.tar.gz
external_llvm-d1fb583128c6682bb8a7c74eafa810a9270cc8df.tar.bz2
5 files changed, 53 insertions, 20 deletions
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td
index 7e909b7..2589de2 100644
--- a/include/llvm/IntrinsicsARM.td
+++ b/include/llvm/IntrinsicsARM.td
@@ -36,6 +36,20 @@ let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
 }
 
 //===----------------------------------------------------------------------===//
+// VFP
+
+let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
+  def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">, 
+                         Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+  def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">, 
+                         Intrinsic<[], [llvm_i32_ty], [IntrWriteMem]>;
+  def int_arm_vcvtr     : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
+                                    [IntrNoMem]>;
+  def int_arm_vcvtru    : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
+                                    [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
 // Advanced SIMD (NEON)
 
 let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 31439b4..cd37633 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -176,6 +176,7 @@ getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   Reserved.set(ARM::SP);
   Reserved.set(ARM::PC);
+  Reserved.set(ARM::FPSCR);
   if (STI.isTargetDarwin() || hasFP(MF))
     Reserved.set(FramePtr);
   // Some targets reserve R9.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 571dc2e..08054cb 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -474,10 +474,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   }
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
-  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
     // iff target supports vfp2.
     setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+    setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+  }
 
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -2764,6 +2766,24 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
   return DAG.getMergeValues(Ops, 2, dl);
 }
 
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 
+                                            SelectionDAG &DAG) const {
+  // The rounding mode is in bits 23:22 of the FPSCR.
+  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+  // so that the shift + and get folded into a bitfield extract.
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+                              DAG.getConstant(Intrinsic::arm_get_fpscr,
+                                              MVT::i32));
+  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 
+                                  DAG.getConstant(1U << 22, MVT::i32));
+  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+                              DAG.getConstant(22, MVT::i32));
+  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 
+                     DAG.getConstant(3, MVT::i32));
+}
+
 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
                          const ARMSubtarget *ST) {
   EVT VT = N->getValueType(0);
@@ -3705,6 +3725,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
   }
   return SDValue();
 }
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 5007f96..df15303 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -82,7 +82,7 @@ namespace llvm {
 
       MEMBARRIER,   // Memory barrier
       SYNCBARRIER,  // Memory sync barrier
-
+      
       VCEQ,         // Vector compare equal.
       VCGE,         // Vector compare greater than or equal.
       VCGEU,        // Vector compare unsigned greater than or equal.
@@ -342,6 +342,7 @@ namespace llvm {
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 84c23e1..d155444 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -420,34 +420,35 @@ def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
 
 // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
 // For disassembly only.
-
+let Uses = [FPSCR] in {
 def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
                        (outs SPR:$dst), (ins DPR:$a),
                  IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a",
-                 [/* For disassembly only; pattern left blank */]> {
+                 [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> {
   let Inst{7} = 0; // Z bit
 }
 
 def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
                         (outs SPR:$dst), (ins SPR:$a),
                  IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a",
-                 [/* For disassembly only; pattern left blank */]> {
+                 [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> {
   let Inst{7} = 0; // Z bit
 }
 
 def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
                        (outs SPR:$dst), (ins DPR:$a),
                  IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a",
-                 [/* For disassembly only; pattern left blank */]> {
+                 [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> {
   let Inst{7} = 0; // Z bit
 }
 
 def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
                         (outs SPR:$dst), (ins SPR:$a),
                  IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a",
-                 [/* For disassembly only; pattern left blank */]> {
+                 [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> {
   let Inst{7} = 0; // Z bit
 }
+}
 
 // Convert between floating-point and fixed-point
 // Data type for fixed-point naming convention:
@@ -654,32 +655,27 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
 }
 
 // FPSCR <-> GPR (for disassembly only)
-
-let neverHasSideEffects = 1 in {
-let Uses = [FPSCR] in {
-def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
-                 "\t$dst, fpscr",
-             [/* For disassembly only; pattern left blank */]> {
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT,
+                 "vmrs", "\t$dst, fpscr",
+             [(set GPR:$dst, (int_arm_get_fpscr))]> {
   let Inst{27-20} = 0b11101111;
   let Inst{19-16} = 0b0001;
   let Inst{11-8}  = 0b1010;
   let Inst{7}     = 0;
   let Inst{4}     = 1;
 }
-}
 
-let Defs = [FPSCR] in {
-def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
-                 "\tfpscr, $src",
-             [/* For disassembly only; pattern left blank */]> {
+let Defs = [FPSCR] in 
+def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, 
+                 "vmsr", "\tfpscr, $src",
+             [(int_arm_set_fpscr GPR:$src)]> {
   let Inst{27-20} = 0b11101110;
   let Inst{19-16} = 0b0001;
   let Inst{11-8}  = 0b1010;
   let Inst{7}     = 0;
   let Inst{4}     = 1;
 }
-}
-} // neverHasSideEffects
 
 // Materialize FP immediates. VFP3 only.
 let isReMaterializable = 1 in {
author	Nate Begeman <natebegeman@mac.com>	2010-08-03 21:31:55 +0000
committer	Nate Begeman <natebegeman@mac.com>	2010-08-03 21:31:55 +0000
commit	d1fb583128c6682bb8a7c74eafa810a9270cc8df (patch)
tree	d009b76a09bad20fb173a61081cecd9a4ac8cb6a
parent	6fc24467e91a2c515fa5347e90071573c454bcca (diff)
download	external_llvm-d1fb583128c6682bb8a7c74eafa810a9270cc8df.zip external_llvm-d1fb583128c6682bb8a7c74eafa810a9270cc8df.tar.gz external_llvm-d1fb583128c6682bb8a7c74eafa810a9270cc8df.tar.bz2