aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/R600
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600')
-rw-r--r--lib/Target/R600/AMDGPU.td5
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp10
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp127
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp3
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp30
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h13
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td22
-rw-r--r--lib/Target/R600/AMDGPUIntrinsics.td1
-rw-r--r--lib/Target/R600/AMDGPUPromoteAlloca.cpp13
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.cpp5
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h3
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp2
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h9
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp30
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.h10
-rw-r--r--lib/Target/R600/AMDGPUTargetTransformInfo.cpp5
-rw-r--r--lib/Target/R600/AMDILCFGStructurizer.cpp26
-rw-r--r--lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp7
-rw-r--r--lib/Target/R600/EvergreenInstructions.td5
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp16
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h2
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp58
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h4
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp4
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp1
-rw-r--r--lib/Target/R600/Processors.td8
-rw-r--r--lib/Target/R600/R600ClauseMergePass.cpp2
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp10
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp14
-rw-r--r--lib/Target/R600/R600OptimizeVectorRegisters.cpp4
-rw-r--r--lib/Target/R600/R600RegisterInfo.cpp10
-rw-r--r--lib/Target/R600/R600RegisterInfo.h2
-rw-r--r--lib/Target/R600/SIFixSGPRLiveRanges.cpp1
-rw-r--r--lib/Target/R600/SIFoldOperands.cpp3
-rw-r--r--lib/Target/R600/SIISelLowering.cpp53
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp6
-rw-r--r--lib/Target/R600/SIInstrFormats.td11
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp112
-rw-r--r--lib/Target/R600/SIInstrInfo.h6
-rw-r--r--lib/Target/R600/SIInstrInfo.td569
-rw-r--r--lib/Target/R600/SIInstructions.td486
-rw-r--r--lib/Target/R600/SILoadStoreOptimizer.cpp5
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp84
-rw-r--r--lib/Target/R600/SIRegisterInfo.h9
-rw-r--r--lib/Target/R600/SIRegisterInfo.td7
-rw-r--r--lib/Target/R600/SIShrinkInstructions.cpp8
46 files changed, 1102 insertions, 719 deletions
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index a7d48b3..e5d5ce2 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -103,6 +103,11 @@ def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
"true",
"Enable spilling of VGPRs to scratch memory">;
+def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
+ "SGPRInitBug",
+ "true",
+ "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
+
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 92bc314..d911014 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -105,8 +105,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
- EmitFunctionHeader();
-
MCContext &Context = getObjFileLowering().getContext();
const MCSectionELF *ConfigSection =
Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
@@ -129,7 +127,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
HexLines.clear();
DisasmLineMaxLen = 0;
- OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
EmitFunctionBody();
if (isVerbose()) {
@@ -339,6 +336,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.NumVGPR = MaxVGPR + 1;
ProgInfo.NumSGPR = MaxSGPR + 1;
+ if (STM.hasSGPRInitBug()) {
+ if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG)
+ llvm_unreachable("Too many SGPRs used with the SGPR init bug");
+
+ ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ }
+
ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index b5ab703..7341cd9 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -95,7 +95,8 @@ private:
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset) const;
+ SDValue &SOffset, SDValue &Offset, SDValue &GLC,
+ SDValue &SLC, SDValue &TFE) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
@@ -120,6 +121,11 @@ private:
SDNode *SelectADD_SUB_I64(SDNode *N);
SDNode *SelectDIV_SCALE(SDNode *N);
+ SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+ uint32_t Offset, uint32_t Width);
+ SDNode *SelectS_BFEFromShifts(SDNode *N);
+ SDNode *SelectS_BFE(SDNode *N);
+
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
};
@@ -519,21 +525,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
bool Signed = Opc == AMDGPUISD::BFE_I32;
- // Transformation function, pack the offset and width of a BFE into
- // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
- // source, bits [5:0] contain the offset and bits [22:16] the width.
-
uint32_t OffsetVal = Offset->getZExtValue();
uint32_t WidthVal = Width->getZExtValue();
- uint32_t PackedVal = OffsetVal | WidthVal << 16;
-
- SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
- return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
- SDLoc(N),
- MVT::i32,
- N->getOperand(0),
- PackedOffsetWidth);
+ return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
+ N->getOperand(0), OffsetVal, WidthVal);
}
case AMDGPUISD::DIV_SCALE: {
@@ -547,6 +543,14 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
case ISD::ADDRSPACECAST:
return SelectAddrSpaceCast(N);
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SRA:
+ if (N->getValueType(0) != MVT::i32 ||
+ Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ break;
+
+ return SelectS_BFE(N);
}
return SelectCode(N);
@@ -966,8 +970,9 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset) const {
- SDValue Ptr, Offen, Idxen, Addr64, GLC, SLC, TFE;
+ SDValue &Offset, SDValue &GLC,
+ SDValue &SLC, SDValue &TFE) const {
+ SDValue Ptr, Offen, Idxen, Addr64;
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
GLC, SLC, TFE);
@@ -991,8 +996,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &Offset,
SDValue &SLC) const {
SLC = CurDAG->getTargetConstant(0, MVT::i1);
+ SDValue GLC, TFE;
- return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset);
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
@@ -1147,6 +1153,95 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
}
+SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+ uint32_t Offset, uint32_t Width) {
+ // Transformation function, pack the offset and width of a BFE into
+ // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
+ // source, bits [5:0] contain the offset and bits [22:16] the width.
+ uint32_t PackedVal = Offset | (Width << 16);
+ SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, MVT::i32);
+
+ return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
+ // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
+ // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
+ // Predicate: 0 < b <= c < 32
+
+ const SDValue &Shl = N->getOperand(0);
+ ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+ if (B && C) {
+ uint32_t BVal = B->getZExtValue();
+ uint32_t CVal = C->getZExtValue();
+
+ if (0 < BVal && BVal <= CVal && CVal < 32) {
+ bool Signed = N->getOpcode() == ISD::SRA;
+ unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
+
+ return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
+ CVal - BVal, 32 - CVal);
+ }
+ }
+ return SelectCode(N);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::AND:
+ if (N->getOperand(0).getOpcode() == ISD::SRL) {
+ // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
+ // Predicate: isMask(mask)
+ const SDValue &Srl = N->getOperand(0);
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+ if (Shift && Mask) {
+ uint32_t ShiftVal = Shift->getZExtValue();
+ uint32_t MaskVal = Mask->getZExtValue();
+
+ if (isMask_32(MaskVal)) {
+ uint32_t WidthVal = countPopulation(MaskVal);
+
+ return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
+ ShiftVal, WidthVal);
+ }
+ }
+ }
+ break;
+ case ISD::SRL:
+ if (N->getOperand(0).getOpcode() == ISD::AND) {
+ // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
+ // Predicate: isMask(mask >> b)
+ const SDValue &And = N->getOperand(0);
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
+
+ if (Shift && Mask) {
+ uint32_t ShiftVal = Shift->getZExtValue();
+ uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
+
+ if (isMask_32(MaskVal)) {
+ uint32_t WidthVal = countPopulation(MaskVal);
+
+ return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
+ ShiftVal, WidthVal);
+ }
+ }
+ } else if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return SelectS_BFEFromShifts(N);
+ break;
+ case ISD::SRA:
+ if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return SelectS_BFEFromShifts(N);
+ break;
+ }
+
+ return SelectCode(N);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 4707279..62a33fa 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -885,9 +885,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return LowerIntrinsicIABS(Op, DAG);
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
- case AMDGPUIntrinsic::AMDGPU_fract:
- case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
- return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_clamp:
case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index f4de2d6..f0f10ca 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
void AMDGPUInstrInfo::anchor() {}
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUGenInstrInfo(-1,-1), RI(st), ST(st) { }
+ : AMDGPUGenInstrInfo(-1, -1), ST(st) {}
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
return RI;
@@ -152,26 +152,22 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const
return true;
}
-
-MachineInstr *
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
// TODO: Implement this function
return nullptr;
}
-MachineInstr*
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr *LoadMI) const {
+MachineInstr *
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const {
// TODO: Implement this function
return nullptr;
}
-bool
-AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
+bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ ArrayRef<unsigned> Ops) const {
// TODO: Implement this function
return false;
}
@@ -360,8 +356,8 @@ static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
}
int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
- int MCOp = AMDGPU::getMCOpcode(Opcode,
- AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration()));
+ int MCOp = AMDGPU::getMCOpcode(
+ Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
// -1 means that Opcode is already a native instruction.
if (MCOp == -1)
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index 202183c..07042b5 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -85,14 +85,13 @@ public:
const TargetRegisterInfo *TRI) const override;
protected:
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
int FrameIndex) const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
MachineInstr *LoadMI) const override;
+
public:
/// \returns the smallest register index that will be accessed by an indirect
/// read or write or -1 if indirect addressing is not used by this program.
@@ -103,7 +102,7 @@ public:
int getIndirectIndexEnd(const MachineFunction &MF) const;
bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const override;
+ ArrayRef<unsigned> Ops) const override;
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr *> &NewMIs) const override;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 849b241..4d08201 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -578,22 +578,20 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
// Bitfield extract patterns
-/*
-
-XXX: The BFE pattern is not working correctly because the XForm is not being
-applied.
+def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
+ return isMask_32(N->getZExtValue());
+}]>;
-def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
-def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
- SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(countTrailingOnes(N->getZExtValue()), MVT::i32);}]>>;
+def IMMPopCount : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()),
+ MVT::i32);
+}]>;
-class BFEPattern <Instruction BFE> : Pat <
- (and (srl i32:$x, legalshift32:$y), bfemask:$z),
- (BFE $x, $y, $z)
+class BFEPattern <Instruction BFE, Instruction MOV> : Pat <
+ (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
+ (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
>;
-*/
-
// rotr pattern
class ROTRPattern <Instruction BIT_ALIGN> : Pat <
(rotr i32:$src0, i32:$src1),
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index eee9c29..ab489cd 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -68,6 +68,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
}
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
index b81fef4..175dcd8 100644
--- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "amdgpu-promote-alloca"
@@ -87,7 +88,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
continue;
if (Use->getParent()->getParent() == &F)
LocalMemAvailable -=
- Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType());
+ Mod->getDataLayout().getTypeAllocSize(GVTy->getElementType());
}
}
}
@@ -276,8 +277,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
// value from the reqd_work_group_size function attribute if it is
// available.
unsigned WorkGroupSize = 256;
- int AllocaSize = WorkGroupSize *
- Mod->getDataLayout()->getTypeAllocSize(AllocaTy);
+ int AllocaSize =
+ WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
if (AllocaSize > LocalMemAvailable) {
DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
@@ -294,9 +295,9 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
DEBUG(dbgs() << "Promoting alloca to local memory\n");
LocalMemAvailable -= AllocaSize;
+ Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
GlobalVariable *GV = new GlobalVariable(
- *Mod, ArrayType::get(I.getAllocatedType(), 256), false,
- GlobalValue::ExternalLinkage, 0, I.getName(), 0,
+ *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0,
GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
FunctionType *FTy = FunctionType::get(
@@ -332,7 +333,7 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
Indices.push_back(TID);
- Value *Offset = Builder.CreateGEP(GV, Indices);
+ Value *Offset = Builder.CreateGEP(GVTy, GV, Indices);
I.mutateType(Offset->getType());
I.replaceAllUsesWith(Offset);
I.eraseFromParent();
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
index 57b054b..3ca0eca 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -17,10 +17,7 @@
using namespace llvm;
-AMDGPURegisterInfo::AMDGPURegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPUGenRegisterInfo(0),
- ST(st)
- { }
+AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
//===----------------------------------------------------------------------===//
// Function handling callbacks - Functions are a seldom used feature of GPUS, so
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
index f27576a..cfd800b 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -30,9 +30,8 @@ class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
static const MCPhysReg CalleeSavedReg;
- const AMDGPUSubtarget &ST;
- AMDGPURegisterInfo(const AMDGPUSubtarget &st);
+ AMDGPURegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override {
assert(!"Unimplemented"); return BitVector();
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 70c8525..0ead652 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -70,7 +70,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
- EnableVGPRSpilling(false),
+ EnableVGPRSpilling(false), SGPRInitBug(false),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
0),
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 1b0122c..403a3e4 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -44,6 +44,10 @@ public:
VOLCANIC_ISLANDS,
};
+ enum {
+ FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
+ };
+
private:
std::string DevName;
bool Is64bit;
@@ -66,6 +70,7 @@ private:
bool CFALUBug;
int LocalMemorySize;
bool EnableVGPRSpilling;
+ bool SGPRInitBug;
AMDGPUFrameLowering FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@@ -206,6 +211,10 @@ public:
return LocalMemorySize;
}
+ bool hasSGPRInitBug() const {
+ return SGPRInitBug;
+ }
+
unsigned getAmdKernelCodeChipID() const;
bool enableMachineScheduler() const override {
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index a862f3c..cb95835 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -71,10 +71,10 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
TargetOptions Options, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OptLevel)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
- DL(computeDataLayout(TT)),
- TLOF(new TargetLoweringObjectFileELF()),
- Subtarget(TT, CPU, FS, *this), IntrinsicInfo() {
+ : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
+ OptLevel),
+ TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this),
+ IntrinsicInfo() {
setRequiresStructuredCFG(true);
initAsmInfo();
}
@@ -118,7 +118,7 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
return createR600MachineScheduler(C);
return nullptr;
@@ -174,7 +174,7 @@ void AMDGPUPassConfig::addIRPasses() {
}
void AMDGPUPassConfig::addCodeGenPrepare() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
if (ST.isPromoteAllocaEnabled()) {
addPass(createAMDGPUPromoteAlloca(ST));
addPass(createSROAPass());
@@ -184,7 +184,7 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
bool
AMDGPUPassConfig::addPreISel() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
addPass(createFlattenCFGPass());
if (ST.IsIRStructurizerEnabled())
addPass(createStructurizeCFGPass());
@@ -211,7 +211,7 @@ void R600PassConfig::addPreRegAlloc() {
}
void R600PassConfig::addPreSched2() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
addPass(createR600EmitClauseMarkers(), false);
if (ST.isIfCvtEnabled())
addPass(&IfConverterID, false);
@@ -251,15 +251,15 @@ bool GCNPassConfig::addInstSelector() {
}
void GCNPassConfig::addPreRegAlloc() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
- // Don't do this with no optimizations since it throws away debug info by
- // merging nonadjacent loads.
+ // Don't do this with no optimizations since it throws away debug info by
+ // merging nonadjacent loads.
- // This should be run after scheduling, but before register allocation. It
- // also need extra copies to the address operand to be eliminated.
- initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
- insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
+ // This should be run after scheduling, but before register allocation. It
+ // also need extra copies to the address operand to be eliminated.
+ initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
+ insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
}
addPass(createSIShrinkInstructionsPass(), false);
addPass(createSIFixSGPRLiveRangesPass(), false);
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
index a691536..785c119 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -30,7 +30,6 @@ namespace llvm {
class AMDGPUTargetMachine : public LLVMTargetMachine {
private:
- const DataLayout DL;
protected:
TargetLoweringObjectFile *TLOF;
@@ -42,12 +41,9 @@ public:
StringRef CPU, TargetOptions Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
~AMDGPUTargetMachine();
- // FIXME: This is currently broken, the DataLayout needs to move to
- // the target machine.
- const DataLayout *getDataLayout() const override {
- return &DL;
- }
- const AMDGPUSubtarget *getSubtargetImpl() const override {
+
+ const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index 68f4600..96edc41 100644
--- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
@@ -36,13 +37,15 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
// TODO: Do we want runtime unrolling?
for (const BasicBlock *BB : L->getBlocks()) {
+ const DataLayout &DL = BB->getModule()->getDataLayout();
for (const Instruction &I : *BB) {
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
continue;
const Value *Ptr = GEP->getPointerOperand();
- const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
+ const AllocaInst *Alloca =
+ dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
if (Alloca) {
// We want to do whatever we can to limit the number of alloca
// instructions that make it through to the code generator. allocas
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index ee6e8ec..ee6551b 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -10,8 +10,8 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
-#include "R600InstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -30,6 +30,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include <deque>
using namespace llvm;
@@ -165,6 +166,7 @@ public:
TRI = &TII->getRegisterInfo();
DEBUG(MF.dump(););
OrderedBlks.clear();
+ Visited.clear();
FuncRep = &MF;
MLI = &getAnalysis<MachineLoopInfo>();
DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
@@ -621,7 +623,7 @@ DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
++It) {
MachineInstr *instr = &(*It);
- if (instr->getDebugLoc().isUnknown() == false)
+ if (!instr->getDebugLoc().isUnknown())
DL = instr->getDebugLoc();
}
return DL;
@@ -1075,21 +1077,19 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
}
int AMDGPUCFGStructurizer::loopendPatternMatch() {
- std::vector<MachineLoop *> NestedLoops;
- for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); It != E;
- ++It)
- for (MachineLoop *ML : depth_first(*It))
- NestedLoops.push_back(ML);
+ std::deque<MachineLoop *> NestedLoops;
+ for (auto &It: *MLI)
+ for (MachineLoop *ML : depth_first(It))
+ NestedLoops.push_front(ML);
if (NestedLoops.size() == 0)
return 0;
- // Process nested loop outside->inside, so "continue" to a outside loop won't
- // be mistaken as "break" of the current loop.
+ // Process nested loop outside->inside (we did push_front),
+ // so "continue" to a outside loop won't be mistaken as "break"
+ // of the current loop.
int Num = 0;
- for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(),
- E = NestedLoops.rend(); It != E; ++It) {
- MachineLoop *ExaminedLoop = *It;
+ for (MachineLoop *ExaminedLoop : NestedLoops) {
if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
continue;
DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
@@ -1611,7 +1611,7 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
- if (UseContinueLogical == false) {
+ if (!UseContinueLogical) {
int BranchOpcode =
TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
getBranchZeroOpcode(OldOpcode);
diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
index 3b4ba1a..49f0f23 100644
--- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -46,10 +46,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
/// }
public:
- AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI), Parser(Parser) {
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index 9f9472c..5560146 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -287,9 +287,8 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
VecALU
>;
-// XXX: This pattern is broken, disabling for now. See comment in
-// AMDGPUInstructions.td for more info.
-// def : BFEPattern <BFE_UINT_eg>;
+def : BFEPattern <BFE_UINT_eg, MOV_IMM_I32>;
+
def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
[(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
VecALU
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index b66ed10..d62fd3f 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -99,6 +99,12 @@ void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
printU8ImmDecOperand(MI, OpNo, O);
}
+void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " gds";
+}
+
void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm())
@@ -208,6 +214,16 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
O << Type << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']';
}
+void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3)
+ O << "_e64 ";
+ else
+ O << "_e32 ";
+
+ printOperand(MI, OpNo, O);
+}
+
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
int32_t SImm = static_cast<int32_t>(Imm);
if (SImm >= -16 && SImm <= 64) {
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 1d43c7a..5289718 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -44,10 +44,12 @@ private:
void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printGDS(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printRegOperand(unsigned RegNo, raw_ostream &O);
+ void printVOPDst(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printImmediate32(uint32_t I, raw_ostream &O);
void printImmediate64(uint64_t I, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 83403ba..fb2deef 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -17,6 +17,7 @@
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "SIDefines.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -72,50 +73,19 @@ static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
return new AMDGPUInstPrinter(MAI, MII, MRI);
}
-static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
- return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
- } else {
- return createR600MCCodeEmitter(MCII, MRI, STI);
- }
-}
-
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS, MCCodeEmitter *_Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, false);
-}
-
extern "C" void LLVMInitializeR600TargetMC() {
+ for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
+ RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
+
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createAMDGPUMCCodeGenInfo);
+ TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
+ TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
+ TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
+ TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
+ }
- RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
- RegisterMCAsmInfo<AMDGPUMCAsmInfo> Z(TheGCNTarget);
-
- TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheGCNTarget, createAMDGPUMCCodeGenInfo);
-
- TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheGCNTarget, createAMDGPUMCInstrInfo);
-
- TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheGCNTarget, createAMDGPUMCRegisterInfo);
-
- TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheGCNTarget, createAMDGPUMCSubtargetInfo);
-
- TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheGCNTarget, createAMDGPUMCInstPrinter);
-
- TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createAMDGPUMCCodeEmitter);
-
- TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheGCNTarget, createAMDGPUAsmBackend);
-
- TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheGCNTarget, createMCStreamer);
+ TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget,
+ createR600MCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter);
}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index bc8cd53..23f0196 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -16,6 +16,7 @@
#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
#include "llvm/ADT/StringRef.h"
namespace llvm {
@@ -34,11 +35,10 @@ extern Target TheGCNTarget;
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI);
+ MCContext &Ctx);
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 8a555ff..fa25f59 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -81,8 +81,8 @@ enum FCInstr {
};
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
return new R600MCCodeEmitter(MCII, MRI);
}
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index 7e23772..760aa37 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -72,7 +72,6 @@ public:
MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new SIMCCodeEmitter(MCII, MRI, Ctx);
}
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index fb5aa61..82c6d13 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -119,8 +119,12 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
// Volcanic Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"tonga", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"tonga", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
-def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"iceland", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
diff --git a/lib/Target/R600/R600ClauseMergePass.cpp b/lib/Target/R600/R600ClauseMergePass.cpp
index f07be00..3cb9021 100644
--- a/lib/Target/R600/R600ClauseMergePass.cpp
+++ b/lib/Target/R600/R600ClauseMergePass.cpp
@@ -14,11 +14,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index c738611..a34e2dc 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -837,6 +837,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::AMDGPU_rsq:
// XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
break;
@@ -1479,8 +1483,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
// Lower loads constant address space global variable loads
if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
- isa<GlobalVariable>(
- GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
+ isa<GlobalVariable>(GetUnderlyingObject(
+ LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
@@ -1867,7 +1871,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
SelectCC.getOperand(0), // LHS
SelectCC.getOperand(1), // RHS
DAG.getConstant(-1, MVT::i32), // True
- DAG.getConstant(0, MVT::i32), // Flase
+ DAG.getConstant(0, MVT::i32), // False
SelectCC.getOperand(4)); // CC
break;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 653fd0d..5f0bdf3 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -29,9 +29,7 @@ using namespace llvm;
#include "AMDGPUGenDFAPacketizer.inc"
R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st),
- RI(st)
- { }
+ : AMDGPUInstrInfo(st), RI() {}
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
return RI;
@@ -268,9 +266,8 @@ int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
return getOperandIdx(Opcode, OpTable[SrcNum]);
}
-#define SRC_SEL_ROWS 11
int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
- static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
+ static const unsigned SrcSelTable[][2] = {
{AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
@@ -284,14 +281,13 @@ int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
{AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
};
- for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
- if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
- return getOperandIdx(Opcode, SrcSelTable[i][1]);
+ for (const auto &Row : SrcSelTable) {
+ if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) {
+ return getOperandIdx(Opcode, Row[1]);
}
}
return -1;
}
-#undef SRC_SEL_ROWS
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
R600InstrInfo::getSrcs(MachineInstr *MI) const {
diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
index 742c0e0..0c06ccc 100644
--- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
@@ -27,10 +27,9 @@
/// to reduce MOV count.
//===----------------------------------------------------------------------===//
-#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
-#include "R600InstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -38,6 +37,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
index dc95675..fb0359c 100644
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -20,14 +20,16 @@
using namespace llvm;
-R600RegisterInfo::R600RegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPURegisterInfo(st)
- { RCW.RegWeight = 0; RCW.WeightLimit = 0;}
+R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() {
+ RCW.RegWeight = 0;
+ RCW.WeightLimit = 0;
+}
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(ST.getInstrInfo());
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
index f1a8a41..9713e60 100644
--- a/lib/Target/R600/R600RegisterInfo.h
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -24,7 +24,7 @@ class AMDGPUSubtarget;
struct R600RegisterInfo : public AMDGPURegisterInfo {
RegClassWeight RCW;
- R600RegisterInfo(const AMDGPUSubtarget &st);
+ R600RegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
index f34c375..0c54446 100644
--- a/lib/Target/R600/SIFixSGPRLiveRanges.cpp
+++ b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
@@ -54,6 +54,7 @@
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/lib/Target/R600/SIFoldOperands.cpp b/lib/Target/R600/SIFoldOperands.cpp
index ae4b05d..7ba5a6d 100644
--- a/lib/Target/R600/SIFoldOperands.cpp
+++ b/lib/Target/R600/SIFoldOperands.cpp
@@ -17,9 +17,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-fold-operands"
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 7d794b8..bd0c3c2 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -172,16 +172,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
- // We only support LOAD/STORE and vector manipulation ops for vectors
- // with > 4 elements.
- MVT VecTypes[] = {
- MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32
- };
-
setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
setOperationAction(ISD::SELECT, MVT::i1, Promote);
- for (MVT VT : VecTypes) {
+ // We only support LOAD/STORE and vector manipulation ops for vectors
+ // with > 4 elements.
+ for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch(Op) {
case ISD::LOAD:
@@ -206,10 +202,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);
@@ -932,6 +928,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
+
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+ return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
+ DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
+
default:
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
@@ -1346,6 +1348,35 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
return SDValue();
}
+/// \brief Return true if the given offset Size in bytes can be folded into
+/// the immediate offsets of a memory instruction for the given address space.
+static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
+ const AMDGPUSubtarget &STI) {
+ switch (AS) {
+ case AMDGPUAS::GLOBAL_ADDRESS: {
+ // MUBUF instructions a 12-bit offset in bytes.
+ return isUInt<12>(OffsetSize);
+ }
+ case AMDGPUAS::CONSTANT_ADDRESS: {
+ // SMRD instructions have an 8-bit offset in dwords on SI and
+ // a 20-bit offset in bytes on VI.
+ if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return isUInt<20>(OffsetSize);
+ else
+ return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
+ }
+ case AMDGPUAS::LOCAL_ADDRESS:
+ case AMDGPUAS::REGION_ADDRESS: {
+ // The single offset versions have a 16-bit offset in bytes.
+ return isUInt<16>(OffsetSize);
+ }
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ // Indirect register addressing does not use any offsets.
+ default:
+ return 0;
+ }
+}
+
// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
// This is a variant of
@@ -1377,13 +1408,10 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
if (!CAdd)
return SDValue();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
// If the resulting offset is too large, we can't fold it into the addressing
// mode offset.
APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
- if (!TII->canFoldOffset(Offset.getZExtValue(), AddrSpace))
+ if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *Subtarget))
return SDValue();
SelectionDAG &DAG = DCI.DAG;
@@ -1595,6 +1623,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::UMAX:
case AMDGPUISD::UMIN: {
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
+ N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
return performMin3Max3Combine(N, DCI);
break;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 50f20ac..90a37f1 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -259,7 +259,8 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
return;
}
- if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+ AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
// or SMEM clause, respectively.
//
@@ -412,7 +413,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
- if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <
+ AMDGPUSubtarget::VOLCANIC_ISLANDS)
return;
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index c90c741..4167590 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -83,6 +83,9 @@ class Enc64 {
int Size = 8;
}
+class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
+def VOPDstVCC : VOPDstOperand <VCCReg>;
+
let Uses = [EXEC] in {
class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -96,7 +99,7 @@ class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
}
class VOPCCommon <dag ins, string asm, list<dag> pattern> :
- VOPAnyCommon <(outs VCCReg:$dst), ins, asm, pattern> {
+ VOPAnyCommon <(outs VOPDstVCC:$dst), ins, asm, pattern> {
let DisableEncoding = "$dst";
let VOPC = 1;
@@ -577,6 +580,12 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
let DS = 1;
let UseNamedOperandTable = 1;
let DisableEncoding = "$m0";
+
+ // Most instruction load and store data, so set this as the default.
+ let mayLoad = 1;
+ let mayStore = 1;
+
+ let hasSideEffects = 0;
let SchedRW = [WriteLDS];
}
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 4f1e5ad..ba98ad7 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -28,7 +28,7 @@
using namespace llvm;
SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st), RI(st) {}
+ : AMDGPUInstrInfo(st), RI() {}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@@ -120,12 +120,20 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
if (Load0->getOperand(0) != Load1->getOperand(0))
return false;
+ const ConstantSDNode *Load0Offset =
+ dyn_cast<ConstantSDNode>(Load0->getOperand(1));
+ const ConstantSDNode *Load1Offset =
+ dyn_cast<ConstantSDNode>(Load1->getOperand(1));
+
+ if (!Load0Offset || !Load1Offset)
+ return false;
+
// Check chain.
if (findChainOperand(Load0) != findChainOperand(Load1))
return false;
- Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue();
- Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue();
+ Offset0 = Load0Offset->getZExtValue();
+ Offset1 = Load1Offset->getZExtValue();
return true;
}
@@ -418,7 +426,9 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
}
-unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
+unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
+ const unsigned Opcode = MI.getOpcode();
+
int NewOpc;
// Try to map original to commuted opcode
@@ -583,10 +593,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
unsigned InputPtrReg =
TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
- static const unsigned TIDIGRegs[3] = {
- TIDIGXReg, TIDIGYReg, TIDIGZReg
- };
- for (unsigned Reg : TIDIGRegs) {
+ for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
if (!Entry.isLiveIn(Reg))
Entry.addLiveIn(Reg);
}
@@ -720,6 +727,26 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MI->eraseFromParent();
break;
}
+
+ case AMDGPU::V_CNDMASK_B64_PSEUDO: {
+ unsigned Dst = MI->getOperand(0).getReg();
+ unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
+ unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+ unsigned Src0 = MI->getOperand(1).getReg();
+ unsigned Src1 = MI->getOperand(2).getReg();
+ const MachineOperand &SrcCond = MI->getOperand(3);
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+ .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
+ .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
+ .addOperand(SrcCond);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+ .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
+ .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
+ .addOperand(SrcCond);
+ MI->eraseFromParent();
+ break;
+ }
}
return true;
}
@@ -792,7 +819,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
}
if (MI)
- MI->setDesc(get(commuteOpcode(MI->getOpcode())));
+ MI->setDesc(get(commuteOpcode(*MI)));
return MI;
}
@@ -1172,32 +1199,6 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
return RI.opCanUseInlineConstant(OpInfo.OperandType);
}
-bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const {
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS: {
- // MUBUF instructions a 12-bit offset in bytes.
- return isUInt<12>(OffsetSize);
- }
- case AMDGPUAS::CONSTANT_ADDRESS: {
- // SMRD instructions have an 8-bit offset in dwords on SI and
- // a 20-bit offset in bytes on VI.
- if (RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return isUInt<20>(OffsetSize);
- else
- return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
- }
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS: {
- // The single offset versions have a 16-bit offset in bytes.
- return isUInt<16>(OffsetSize);
- }
- case AMDGPUAS::PRIVATE_ADDRESS:
- // Indirect register addressing does not use any offsets.
- default:
- return 0;
- }
-}
-
bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
int Op32 = AMDGPU::getVOPe32(Opcode);
if (Op32 == -1)
@@ -1405,6 +1406,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
@@ -1423,6 +1425,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
+ case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
}
}
@@ -1865,12 +1868,15 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MachineInstr *Addr64 =
BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
.addOperand(*VData)
- .addOperand(*SRsrc)
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
// This will be replaced later
// with the new value of vaddr.
+ .addOperand(*SRsrc)
.addOperand(*SOffset)
- .addOperand(*Offset);
+ .addOperand(*Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0); // tfe
MI->removeFromParent();
MI = Addr64;
@@ -1914,14 +1920,20 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
// The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
// on VI.
+
+ bool IsKill = SBase->isKill();
if (OffOp) {
- bool isVI = RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ bool isVI =
+ MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+ AMDGPUSubtarget::VOLCANIC_ISLANDS;
unsigned OffScale = isVI ? 1 : 4;
// Handle the _IMM variant
unsigned LoOffset = OffOp->getImm() * OffScale;
unsigned HiOffset = LoOffset + HalfSize;
Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
- .addOperand(*SBase)
+ // Use addReg instead of addOperand
+ // to make sure kill flag is cleared.
+ .addReg(SBase->getReg(), 0, SBase->getSubReg())
.addImm(LoOffset / OffScale);
if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) {
@@ -1930,25 +1942,28 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
.addImm(HiOffset); // The offset in register is in bytes.
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addReg(OffsetSGPR);
} else {
Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addImm(HiOffset / OffScale);
}
} else {
// Handle the _SGPR variant
MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), 0, SBase->getSubReg())
.addOperand(*SOff);
unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
.addOperand(*SOff)
.addImm(HalfSize);
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addReg(OffsetSGPR);
}
@@ -2003,7 +2018,8 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
// SMRD instructions take a dword offsets on SI and byte offset on VI
// and MUBUF instructions always take a byte offset.
ImmOffset = MI->getOperand(2).getImm();
- if (RI.ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <=
+ AMDGPUSubtarget::SEA_ISLANDS)
ImmOffset <<= 2;
RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
@@ -2043,13 +2059,15 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
.addImm(AMDGPU::sub3);
MI->setDesc(get(NewOpcode));
if (MI->getOperand(2).isReg()) {
- MI->getOperand(2).setReg(MI->getOperand(1).getReg());
+ MI->getOperand(2).setReg(SRsrc);
} else {
- MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
+ MI->getOperand(2).ChangeToRegister(SRsrc, false);
}
- MI->getOperand(1).setReg(SRsrc);
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe
const TargetRegisterClass *NewDstRC =
RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 12dc3f3..a9aa99f 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -114,7 +114,7 @@ public:
// register. If there is no hardware instruction that can store to \p
// DstRC, then AMDGPU::COPY is returned.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
- unsigned commuteOpcode(unsigned Opcode) const;
+ unsigned commuteOpcode(const MachineInstr &MI) const;
MachineInstr *commuteInstruction(MachineInstr *MI,
bool NewMI = false) const override;
@@ -218,10 +218,6 @@ public:
bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO) const;
- /// \brief Return true if the given offset Size in bytes can be folded into
- /// the immediate offsets of a memory instruction for the given address space.
- bool canFoldOffset(unsigned OffsetSize, unsigned AS) const;
-
/// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding.
/// This function will return false if you pass it a 32-bit instruction.
bool hasVALU32BitEncoding(unsigned Opcode) const;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index e2747dc..d603ecb 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -264,6 +264,9 @@ def ds_offset0 : Operand<i8> {
def ds_offset1 : Operand<i8> {
let PrintMethod = "printDSOffset1";
}
+def gds : Operand <i1> {
+ let PrintMethod = "printGDS";
+}
def glc : Operand <i1> {
let PrintMethod = "printGLC";
}
@@ -284,6 +287,8 @@ def ClampMod : Operand <i1> {
} // End OperandType = "OPERAND_IMMEDIATE"
+def VOPDstS64 : VOPDstOperand <SReg_64>;
+
//===----------------------------------------------------------------------===//
// Complex patterns
//===----------------------------------------------------------------------===//
@@ -292,7 +297,7 @@ def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
+def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
@@ -315,6 +320,7 @@ def SIOperand {
def SRCMODS {
int NONE = 0;
+ int NEG = 1;
}
def DSTCLAMP {
@@ -516,7 +522,7 @@ multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
string opName, PatLeaf cond> : SOPC <
op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1),
- opName#" $dst, $src0, $src1", []>;
+ opName#" $src0, $src1", []>;
class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
: SOPC_Helper<op, SSrc_32, i32, opName, cond>;
@@ -637,9 +643,9 @@ class getNumSrcArgs<ValueType Src1, ValueType Src2> {
// Returns the register class to use for the destination of VOP[123C]
// instructions for the given VT.
class getVALUDstForVT<ValueType VT> {
- RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32,
- !if(!eq(VT.Size, 64), VReg_64,
- SReg_64)); // else VT == i1
+ RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
+ !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
+ VOPDstOperand<SReg_64>)); // else VT == i1
}
// Returns the register class to use for source 0 of VOP[12C]
@@ -717,7 +723,7 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
class getAsm32 <int NumSrcArgs> {
string src1 = ", $src1";
string src2 = ", $src2";
- string ret = " $dst, $src0"#
+ string ret = "$dst, $src0"#
!if(!eq(NumSrcArgs, 1), "", src1)#
!if(!eq(NumSrcArgs, 3), src2, "");
}
@@ -733,7 +739,7 @@ class getAsm64 <int NumSrcArgs, bit HasModifiers> {
string ret =
!if(!eq(HasModifiers, 0),
getAsm32<NumSrcArgs>.ret,
- " $dst, "#src0#src1#src2#"$clamp"#"$omod");
+ "$dst, "#src0#src1#src2#"$clamp"#"$omod");
}
@@ -745,7 +751,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field ValueType Src0VT = ArgVT[1];
field ValueType Src1VT = ArgVT[2];
field ValueType Src2VT = ArgVT[3];
- field RegisterClass DstRC = getVALUDstForVT<DstVT>.ret;
+ field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
@@ -761,7 +767,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasModifiers>.ret;
- field string Asm32 = "_e32"#getAsm32<NumSrcArgs>.ret;
+ field string Asm32 = getAsm32<NumSrcArgs>.ret;
field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
}
@@ -788,22 +794,27 @@ def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> {
let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- let Asm64 = " $dst, $src0_modifiers, $src1";
+ let Asm64 = "$dst, $src0_modifiers, $src1";
}
def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- let Asm64 = " $dst, $src0_modifiers, $src1";
+ let Asm64 = "$dst, $src0_modifiers, $src1";
}
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
+def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2);
+ let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
+ let Asm64 = "$dst, $src0, $src1, $src2";
+}
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
- field string Asm = " $dst, $src0, $vsrc1, $src2";
+ field string Asm = "$dst, $src0, $vsrc1, $src2";
}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
@@ -835,23 +846,28 @@ class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
field bits<9> src0;
}
+class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
+ VOP1<op.SI, outs, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI>;
+
+class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
+ VOP1<op.VI, outs, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI>;
+
multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName> {
def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
- def _si : VOP1<op.SI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
- def _vi : VOP1<op.VI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI>;
+ def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+
+ def _vi : VOP1_Real_vi <opName, op, outs, ins, asm>;
}
multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName> {
def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
- def _si : VOP1<op.SI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
- // No VI instruction. This class is for SI only.
+ def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
}
class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
@@ -862,13 +878,20 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
let isCodeGenOnly = 1;
}
+class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
+ VOP2 <op.SI, outs, ins, opName#asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI>;
+
+class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
+ VOP2 <op.SI, outs, ins, opName#asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI>;
+
multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName, string revOp> {
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
+ def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
}
multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
@@ -876,10 +899,10 @@ multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
- def _vi : VOP2 <op.VI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI>;
+ def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+
+ def _vi : VOP2_Real_vi <opName, op, outs, ins, asm>;
+
}
class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
@@ -1047,9 +1070,10 @@ multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName,
- bit HasMods, bit defExec> {
+ bit HasMods, bit defExec, string revOp> {
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+ VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
VOP3DisableFields<1, 0, HasMods> {
@@ -1086,7 +1110,7 @@ multiclass VOP1_Helper <vop1 op, string opName, dag outs,
defm _e32 : VOP1_m <op, outs, ins32, opName#asm32, pat32, opName>;
- defm _e64 : VOP3_1_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, HasMods>;
+ defm _e64 : VOP3_1_m <op, outs, ins64, opName#asm64, pat64, opName, HasMods>;
}
multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
@@ -1121,7 +1145,7 @@ multiclass VOP2_Helper <vop2 op, string opName, dag outs,
defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
defm _e64 : VOP3_2_m <op,
- outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+ outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
>;
}
@@ -1145,7 +1169,7 @@ multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
string revOp = opName> {
defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
- defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#"_e64"#P.Asm64,
+ defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
!if(P.HasModifiers,
[(set P.DstVT:$dst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1163,7 +1187,7 @@ multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
defm _e64 : VOP3b_2_m <op,
- outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+ outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
>;
}
@@ -1189,7 +1213,7 @@ multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
string revOp, bit HasMods> {
defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>;
- defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName,
+ defm _e64 : VOP3_2_m <op, outs, ins64, opName#asm64, pat64, opName,
revOp, HasMods>;
}
@@ -1235,28 +1259,30 @@ class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
}
multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, bit DefExec> {
+ string opName, bit DefExec, string revOpName = ""> {
def "" : VOPC_Pseudo <outs, ins, pattern, opName>;
def _si : VOPC<op.SI, ins, asm, []>,
SIMCInstr <opName#"_e32", SISubtarget.SI> {
let Defs = !if(DefExec, [EXEC], []);
+ let hasSideEffects = DefExec;
}
def _vi : VOPC<op.VI, ins, asm, []>,
SIMCInstr <opName#"_e32", SISubtarget.VI> {
let Defs = !if(DefExec, [EXEC], []);
+ let hasSideEffects = DefExec;
}
}
multiclass VOPC_Helper <vopc op, string opName,
dag ins32, string asm32, list<dag> pat32,
dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec> {
+ bit HasMods, bit DefExec, string revOp> {
defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64,
- opName, HasMods, DefExec>;
+ defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
+ opName, HasMods, DefExec, revOp>;
}
// Special case for class instructions which only have modifiers on
@@ -1264,20 +1290,21 @@ multiclass VOPC_Helper <vopc op, string opName,
multiclass VOPC_Class_Helper <vopc op, string opName,
dag ins32, string asm32, list<dag> pat32,
dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec> {
+ bit HasMods, bit DefExec, string revOp> {
defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64,
- opName, HasMods, DefExec>,
+ defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
+ opName, HasMods, DefExec, revOp>,
VOP3DisableModFields<1, 0, 0>;
}
multiclass VOPCInst <vopc op, string opName,
VOPProfile P, PatLeaf cond = COND_NULL,
+ string revOp = opName,
bit DefExec = 0> : VOPC_Helper <
op, opName,
P.Ins32, P.Asm32, [],
- (outs SReg_64:$dst), P.Ins64, P.Asm64,
+ (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
!if(P.HasModifiers,
[(set i1:$dst,
(setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1285,54 +1312,55 @@ multiclass VOPCInst <vopc op, string opName,
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
cond))],
[(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
- P.HasModifiers, DefExec
+ P.HasModifiers, DefExec, revOp
>;
multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
bit DefExec = 0> : VOPC_Class_Helper <
op, opName,
P.Ins32, P.Asm32, [],
- (outs SReg_64:$dst), P.Ins64, P.Asm64,
+ (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
!if(P.HasModifiers,
[(set i1:$dst,
(AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
[(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
- P.HasModifiers, DefExec
+ P.HasModifiers, DefExec, opName
>;
-multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_F32_F32_F32, cond>;
+multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_F32_F32_F32, cond, revOp>;
-multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_F64_F64_F64, cond>;
+multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_F64_F64_F64, cond, revOp>;
-multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_I32_I32_I32, cond>;
+multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_I32_I32_I32, cond, revOp>;
-multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_I64_I64_I64, cond>;
+multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_I64_I64_I64, cond, revOp>;
multiclass VOPCX <vopc op, string opName, VOPProfile P,
- PatLeaf cond = COND_NULL>
- : VOPCInst <op, opName, P, cond, 1>;
+ PatLeaf cond = COND_NULL,
+ string revOp = "">
+ : VOPCInst <op, opName, P, cond, revOp, 1>;
-multiclass VOPCX_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_F32_F32_F32, cond>;
+multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_F32_F32_F32, COND_NULL, revOp>;
-multiclass VOPCX_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_F64_F64_F64, cond>;
+multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_F64_F64_F64, COND_NULL, revOp>;
-multiclass VOPCX_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_I32_I32_I32, cond>;
+multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_I32_I32_I32, COND_NULL, revOp>;
-multiclass VOPCX_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_I64_I64_I64, cond>;
+multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_I64_I64_I64, COND_NULL, revOp>;
multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
- op, outs, ins, opName#asm, pat, opName, NumSrcArgs, HasMods
+ op, outs, ins, opName#" "#asm, pat, opName, NumSrcArgs, HasMods
>;
multiclass VOPC_CLASS_F32 <vopc op, string opName> :
@@ -1349,7 +1377,7 @@ multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag> : VOP3_Helper <
- op, opName, P.Outs, P.Ins64, P.Asm64,
+ op, opName, (outs P.DstRC.RegClass:$dst), P.Ins64, P.Asm64,
!if(!eq(P.NumSrcArgs, 3),
!if(P.HasModifiers,
[(set P.DstVT:$dst,
@@ -1381,7 +1409,7 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName,
VOPProfile P,
SDPatternOperator node = null_frag> : VOP3_Helper <
op, opName,
- P.Outs,
+ (outs P.DstRC.RegClass:$dst),
(ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0,
InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1,
InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
@@ -1483,10 +1511,8 @@ class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
DSe_vi <op>,
SIMCInstr <opName, SISubtarget.VI>;
-class DS_1A_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
- DS <outs, ins, asm, []>,
- DSe <op>,
- SIMCInstr <opName, SISubtarget.SI> {
+class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
+ DS_Real_si <op,opName, outs, ins, asm> {
// Single load interpret the 2 i8imm operands as a single i16 offset.
bits<16> offset;
@@ -1494,10 +1520,8 @@ class DS_1A_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
let offset1 = offset{15-8};
}
-class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
- DS <outs, ins, asm, []>,
- DSe_vi <op>,
- SIMCInstr <opName, SISubtarget.VI> {
+class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
+ DS_Real_vi <op, opName, outs, ins, asm> {
// Single load interpret the 2 i8imm operands as a single i16 offset.
bits<16> offset;
@@ -1505,180 +1529,168 @@ class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
let offset1 = offset{15-8};
}
-multiclass DS_1A_Load_m <bits<8> op, string opName, dag outs, dag ins, string asm,
- list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
+multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs rc:$vdst),
+ dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds, M0Reg:$m0),
+ string asm = opName#" $vdst, $addr"#"$offset$gds"> {
- let data0 = 0, data1 = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let data0 = 0, data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_1A_Load_m <
- op,
- asm,
- (outs regClass:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr"#"$offset",
- []>;
-
-multiclass DS_Load2_m <bits<8> op, string opName, dag outs, dag ins, string asm,
- list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
-
- let data0 = 0, data1 = 0 in {
- def _si : DS_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs rc:$vdst),
+ dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
+ gds:$gds, M0Reg:$m0),
+ string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let data0 = 0, data1 = 0 in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_Load2_m <
- op,
- asm,
- (outs regClass:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
- M0Reg:$m0),
- asm#" $vdst, $addr"#"$offset0"#"$offset1",
- []>;
-
-multiclass DS_1A_Store_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
-
- let data1 = 0, vdst = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds,
+ M0Reg:$m0),
+ string asm = opName#" $addr, $data0"#"$offset$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<opName, 0>;
+
+ let data1 = 0, vdst = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_1A_Store_m <
- op,
- asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $addr, $data0"#"$offset",
- []>;
-
-multiclass DS_Store_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
-
- let vdst = 0 in {
- def _si : DS_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+ ds_offset0:$offset0, ds_offset1:$offset1, gds:$gds, M0Reg:$m0),
+ string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let vdst = 0 in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_Store_m <
- op,
- asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, regClass:$data1,
- ds_offset0:$offset0, ds_offset1:$offset1, M0Reg:$m0),
- asm#" $addr, $data0, $data1"#"$offset0"#"$offset1",
- []>;
-
-// 1 address, 1 data.
-multiclass DS_1A1D_RET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
- let mayLoad = 1, mayStore = 1,
- hasPostISelHook = 1 // Adjusted to no return version.
- in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 1>;
-
- let data1 = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
+ string noRetOp = "",
+ dag outs = (outs rc:$vdst),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds,
+ M0Reg:$m0),
+ string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 1>;
+
+ let data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = ""> : DS_1A1D_RET_m <
- op, asm,
- (outs rc:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr, $data0"#"$offset", [], noRetOp>;
-
-// 1 address, 2 data.
-multiclass DS_1A2D_RET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
- let mayLoad = 1, mayStore = 1,
- hasPostISelHook = 1 // Adjusted to no return version.
- in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 1>;
-
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A2D_RET_m <bits<8> op, string opName, RegisterClass rc,
+ string noRetOp = "", dag ins,
+ dag outs = (outs rc:$vdst),
+ string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 1>;
+
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = ""> : DS_1A2D_RET_m <
- op, asm,
- (outs rc:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr, $data0, $data1"#"$offset",
- [], noRetOp>;
-
-// 1 address, 2 data.
-multiclass DS_1A2D_NORET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
- let mayLoad = 1, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 0>;
+ string noRetOp = "", RegisterClass src = rc> :
+ DS_1A2D_RET_m <op, asm, rc, noRetOp,
+ (ins VGPR_32:$addr, src:$data0, src:$data1,
+ ds_offset:$offset, gds:$gds, M0Reg:$m0)
+>;
- let vdst = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc,
+ string noRetOp = opName,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+ ds_offset:$offset, gds:$gds, M0Reg:$m0),
+ string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 0>;
+
+ let vdst = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = asm> : DS_1A2D_NORET_m <
- op, asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
- asm#" $addr, $data0, $data1"#"$offset",
- [], noRetOp>;
+multiclass DS_0A_RET <bits<8> op, string opName,
+ dag outs = (outs VGPR_32:$vdst),
+ dag ins = (ins ds_offset:$offset, gds:$gds, M0Reg:$m0),
+ string asm = opName#" $vdst"#"$offset"#"$gds"> {
-// 1 address, 1 data.
-multiclass DS_1A1D_NORET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
let mayLoad = 1, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 0>;
+ def "" : DS_Pseudo <opName, outs, ins, []>;
- let data1 = 0, vdst = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
- }
+ let addr = 0, data0 = 0, data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ } // end addr = 0, data0 = 0, data1 = 0
+ } // end mayLoad = 1, mayStore = 1
}
-multiclass DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = asm> : DS_1A1D_NORET_m <
- op, asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $addr, $data0"#"$offset",
- [], noRetOp>;
+multiclass DS_1A_RET_GDS <bits<8> op, string opName,
+ dag outs = (outs VGPR_32:$vdst),
+ dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
+ string asm = opName#" $vdst, $addr"#"$offset gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let data0 = 0, data1 = 0, gds = 1 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ } // end data0 = 0, data1 = 0, gds = 1
+}
+
+multiclass DS_1A_GDS <bits<8> op, string opName,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, M0Reg:$m0),
+ string asm = opName#" $addr gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let vdst = 0, data0 = 0, data1 = 0, offset0 = 0, offset1 = 0, gds = 1 in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
+ } // end vdst = 0, data = 0, data1 = 0, gds = 1
+}
+
+multiclass DS_1A <bits<8> op, string opName,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0, gds:$gds),
+ string asm = opName#" $addr"#"$offset"#"$gds"> {
+
+ let mayLoad = 1, mayStore = 1 in {
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let vdst = 0, data0 = 0, data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ } // let vdst = 0, data0 = 0, data1 = 0
+ } // end mayLoad = 1, mayStore = 1
+}
//===----------------------------------------------------------------------===//
// MTBUF classes
@@ -1861,14 +1873,14 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _ADDR64 : MUBUFAtomicAddr64_m <
op, name#"_addr64", (outs),
(ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
- mbuf_offset:$offset, SCSrc_32:$soffset, slc:$slc),
+ SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0
>;
defm _OFFSET : MUBUFAtomicOffset_m <
op, name#"_offset", (outs),
- (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, slc:$slc),
+ (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, mbuf_offset:$offset,
+ slc:$slc),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0
>;
} // glc = 0
@@ -1880,7 +1892,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
op, name#"_rtn_addr64", (outs rc:$vdata),
(ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
- mbuf_offset:$offset, SSrc_32:$soffset, slc:$slc),
+ SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc",
[(set vt:$vdata,
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
@@ -1889,8 +1901,8 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _RTN_OFFSET : MUBUFAtomicOffset_m <
op, name#"_rtn_offset", (outs rc:$vdata),
- (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, slc:$slc),
+ (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, slc:$slc),
name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
[(set vt:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
@@ -1909,9 +1921,8 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
let mayLoad = 1, mayStore = 0 in {
let offen = 0, idxen = 0, vaddr = 0 in {
defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata),
- (ins SReg_128:$srsrc,
- mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
- slc:$slc, tfe:$tfe),
+ (ins SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
[(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
i32:$soffset, i16:$offset,
@@ -1920,7 +1931,7 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
let offen = 1, idxen = 0 in {
defm _OFFEN : MUBUF_m <op, name#"_offen", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VGPR_32:$vaddr,
+ (ins VGPR_32:$vaddr, SReg_128:$srsrc,
SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
@@ -1928,45 +1939,48 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
let offen = 0, idxen = 1 in {
defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VGPR_32:$vaddr,
- mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
+ (ins VGPR_32:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
slc:$slc, tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 1, idxen = 1 in {
defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr,
- SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
- let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0 in {
+ let offen = 0, idxen = 0 in {
defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr,
- SCSrc_32:$soffset, mbuf_offset:$offset),
- name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset",
+ (ins VReg_64:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset,
+ glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#
+ "$glc"#"$slc"#"$tfe",
[(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
i64:$vaddr, i32:$soffset,
- i16:$offset)))]>;
+ i16:$offset, i1:$glc, i1:$slc,
+ i1:$tfe)))]>;
}
}
}
multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
- ValueType store_vt, SDPatternOperator st> {
+ ValueType store_vt = i32, SDPatternOperator st = null_frag> {
let mayLoad = 0, mayStore = 1 in {
defm : MUBUF_m <op, name, (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#
- "$glc"#"$slc"#"$tfe", []>;
+ "$glc"#"$slc"#"$tfe", []>;
let offen = 0, idxen = 0, vaddr = 0 in {
defm _OFFSET : MUBUF_m <op, name#"_offset",(outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>;
@@ -1974,21 +1988,40 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
let offen = 1, idxen = 0 in {
defm _OFFEN : MUBUF_m <op, name#"_offen", (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
- mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+ slc:$slc, tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#
"$glc"#"$slc"#"$tfe", []>;
} // end offen = 1, idxen = 0
- let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0 in {
+ let offen = 0, idxen = 1 in {
+ defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs),
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+ slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+ }
+
+ let offen = 1, idxen = 1 in {
+ defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs),
+ (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+ }
+
+ let offen = 0, idxen = 0 in {
defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc,
- VReg_64:$vaddr, SCSrc_32:$soffset,
- mbuf_offset:$offset),
- name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset",
+ (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc,
+ tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset addr64"#
+ "$offset"#"$glc"#"$slc"#"$tfe",
[(st store_vt:$vdata,
(MUBUFAddr64 v4i32:$srsrc, i64:$vaddr,
- i32:$soffset, i16:$offset))]>;
+ i32:$soffset, i16:$offset,
+ i1:$glc, i1:$slc, i1:$tfe))]>;
}
} // End mayLoad = 0, mayStore = 1
}
@@ -2182,15 +2215,6 @@ def getVOPe32 : InstrMapping {
let ValueCols = [["4"]];
}
-// Maps an original opcode to its commuted version
-def getCommuteRev : InstrMapping {
- let FilterClass = "VOP2_REV";
- let RowFields = ["RevOp"];
- let ColFields = ["IsOrig"];
- let KeyCol = ["1"];
- let ValueCols = [["0"]];
-}
-
def getMaskedMIMGOp : InstrMapping {
let FilterClass = "MIMG_Mask";
let RowFields = ["Op"];
@@ -2208,6 +2232,33 @@ def getCommuteOrig : InstrMapping {
let ValueCols = [["1"]];
}
+// Maps an original opcode to its commuted version
+def getCommuteRev : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+def getCommuteCmpOrig : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+// Maps an original opcode to its commuted version
+def getCommuteCmpRev : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+
def getMCOpcodeGen : InstrMapping {
let FilterClass = "SIMCInstr";
let RowFields = ["PseudoInstr"];
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 4f72e99..95b2470 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -28,6 +28,8 @@ def SendMsgImm : Operand<i32> {
def isGCN : Predicate<"Subtarget->getGeneration() "
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+def isSI : Predicate<"Subtarget->getGeneration() "
+ "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
def isSICI : Predicate<
"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
@@ -153,7 +155,9 @@ defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
>;
defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
-defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", []>;
+defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32",
+ [(set i32:$dst, (int_AMDGPU_flbit_i32 i32:$src0))]
+>;
defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8",
[(set i32:$dst, (sext_inreg i32:$src0, i8))]
@@ -304,7 +308,8 @@ defm S_ASHR_I64 : SOP2_64_32 <sop2<0x23, 0x21>, "s_ashr_i64",
>;
} // End Defs = [SCC]
-defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32", []>;
+defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32",
+ [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
defm S_BFM_B64 : SOP2_64 <sop2<0x25, 0x23>, "s_bfm_b64", []>;
defm S_MUL_I32 : SOP2_32 <sop2<0x26, 0x24>, "s_mul_i32",
[(set i32:$dst, (mul i32:$src0, i32:$src1))]
@@ -505,31 +510,30 @@ def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
// VOPC Instructions
//===----------------------------------------------------------------------===//
-let isCompare = 1 in {
+let isCompare = 1, isCommutable = 1 in {
defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0, 0x40>, "v_cmp_f_f32">;
-defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT>;
+defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT, "v_cmp_gt_f32">;
defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2, 0x42>, "v_cmp_eq_f32", COND_OEQ>;
-defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE>;
+defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE, "v_cmp_ge_f32">;
defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4, 0x44>, "v_cmp_gt_f32", COND_OGT>;
defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5, 0x45>, "v_cmp_lg_f32", COND_ONE>;
defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6, 0x46>, "v_cmp_ge_f32", COND_OGE>;
defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7, 0x47>, "v_cmp_o_f32", COND_O>;
defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8, 0x48>, "v_cmp_u_f32", COND_UO>;
-defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT>;
+defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT, "v_cmp_nle_f32">;
defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa, 0x4a>, "v_cmp_nlg_f32", COND_UEQ>;
-defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE>;
+defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE, "v_cmp_nlt_f32">;
defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc, 0x4c>, "v_cmp_nle_f32", COND_UGT>;
defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd, 0x4d>, "v_cmp_neq_f32", COND_UNE>;
defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe, 0x4e>, "v_cmp_nlt_f32", COND_UGE>;
defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf, 0x4f>, "v_cmp_tru_f32">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10, 0x50>, "v_cmpx_f_f32">;
-defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32">;
+defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32", "v_cmpx_gt_f32">;
defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12, 0x52>, "v_cmpx_eq_f32">;
-defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32">;
+defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32", "v_cmpx_ge_f32">;
defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14, 0x54>, "v_cmpx_gt_f32">;
defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15, 0x55>, "v_cmpx_lg_f32">;
defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16, 0x56>, "v_cmpx_ge_f32">;
@@ -543,233 +547,207 @@ defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d, 0x5d>, "v_cmpx_neq_f32">;
defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e, 0x5e>, "v_cmpx_nlt_f32">;
defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f, 0x5f>, "v_cmpx_tru_f32">;
-} // End hasSideEffects = 1
defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20, 0x60>, "v_cmp_f_f64">;
-defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT>;
+defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT, "v_cmp_gt_f64">;
defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22, 0x62>, "v_cmp_eq_f64", COND_OEQ>;
-defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE>;
+defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE, "v_cmp_ge_f64">;
defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24, 0x64>, "v_cmp_gt_f64", COND_OGT>;
defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25, 0x65>, "v_cmp_lg_f64", COND_ONE>;
defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26, 0x66>, "v_cmp_ge_f64", COND_OGE>;
defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27, 0x67>, "v_cmp_o_f64", COND_O>;
defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28, 0x68>, "v_cmp_u_f64", COND_UO>;
-defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT>;
+defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT, "v_cmp_nle_f64">;
defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a, 0x6a>, "v_cmp_nlg_f64", COND_UEQ>;
-defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE>;
+defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE, "v_cmp_nlt_f64">;
defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c, 0x6c>, "v_cmp_nle_f64", COND_UGT>;
defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d, 0x6d>, "v_cmp_neq_f64", COND_UNE>;
defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e, 0x6e>, "v_cmp_nlt_f64", COND_UGE>;
defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f, 0x6f>, "v_cmp_tru_f64">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30, 0x70>, "v_cmpx_f_f64">;
-defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64">;
+defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64", "v_cmpx_gt_f64">;
defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32, 0x72>, "v_cmpx_eq_f64">;
-defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64">;
+defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64", "v_cmpx_ge_f64">;
defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34, 0x74>, "v_cmpx_gt_f64">;
defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35, 0x75>, "v_cmpx_lg_f64">;
defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36, 0x76>, "v_cmpx_ge_f64">;
defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37, 0x77>, "v_cmpx_o_f64">;
defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38, 0x78>, "v_cmpx_u_f64">;
-defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64">;
+defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64", "v_cmpx_nle_f64">;
defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a, 0x7a>, "v_cmpx_nlg_f64">;
-defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64">;
+defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64", "v_cmpx_nlt_f64">;
defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c, 0x7c>, "v_cmpx_nle_f64">;
defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d, 0x7d>, "v_cmpx_neq_f64">;
defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e, 0x7e>, "v_cmpx_nlt_f64">;
defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f, 0x7f>, "v_cmpx_tru_f64">;
-} // End hasSideEffects = 1
let SubtargetPredicate = isSICI in {
defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">;
-defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32">;
+defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">;
defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">;
-defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32">;
+defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32", COND_NULL, "v_cmps_ge_f32">;
defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">;
defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">;
defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">;
defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">;
defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">;
-defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32">;
+defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32", COND_NULL, "v_cmps_nle_f32">;
defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">;
-defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32">;
+defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32", COND_NULL, "v_cmps_nlt_f32">;
defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">;
defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">;
defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">;
defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">;
-let hasSideEffects = 1 in {
defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">;
-defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32">;
+defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32", "v_cmpsx_gt_f32">;
defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">;
-defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32">;
+defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32", "v_cmpsx_ge_f32">;
defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">;
defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">;
defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">;
defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">;
defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">;
-defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32">;
+defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32", "v_cmpsx_nle_f32">;
defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">;
-defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32">;
+defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32", "v_cmpsx_nlt_f32">;
defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">;
defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">;
defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">;
defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">;
-} // End hasSideEffects = 1
defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">;
-defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64">;
+defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64", COND_NULL, "v_cmps_gt_f64">;
defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">;
-defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64">;
+defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64", COND_NULL, "v_cmps_ge_f64">;
defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">;
defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">;
defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">;
defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">;
defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">;
-defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64">;
+defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64", COND_NULL, "v_cmps_nle_f64">;
defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">;
-defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64">;
+defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64", COND_NULL, "v_cmps_nlt_f64">;
defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">;
defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">;
defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">;
defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">;
-let hasSideEffects = 1, Defs = [EXEC] in {
-
-defm V_CMPSX_F_F64 : VOPC_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
-defm V_CMPSX_LT_F64 : VOPC_F64 <vopc<0x71>, "v_cmpsx_lt_f64">;
-defm V_CMPSX_EQ_F64 : VOPC_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
-defm V_CMPSX_LE_F64 : VOPC_F64 <vopc<0x73>, "v_cmpsx_le_f64">;
-defm V_CMPSX_GT_F64 : VOPC_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
-defm V_CMPSX_LG_F64 : VOPC_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
-defm V_CMPSX_GE_F64 : VOPC_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
-defm V_CMPSX_O_F64 : VOPC_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
-defm V_CMPSX_U_F64 : VOPC_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
-defm V_CMPSX_NGE_F64 : VOPC_F64 <vopc<0x79>, "v_cmpsx_nge_f64">;
-defm V_CMPSX_NLG_F64 : VOPC_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
-defm V_CMPSX_NGT_F64 : VOPC_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64">;
-defm V_CMPSX_NLE_F64 : VOPC_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
-defm V_CMPSX_NEQ_F64 : VOPC_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
-defm V_CMPSX_NLT_F64 : VOPC_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
-defm V_CMPSX_TRU_F64 : VOPC_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
-
-} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPSX_F_F64 : VOPCX_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
+defm V_CMPSX_LT_F64 : VOPCX_F64 <vopc<0x71>, "v_cmpsx_lt_f64", "v_cmpsx_gt_f64">;
+defm V_CMPSX_EQ_F64 : VOPCX_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
+defm V_CMPSX_LE_F64 : VOPCX_F64 <vopc<0x73>, "v_cmpsx_le_f64", "v_cmpsx_ge_f64">;
+defm V_CMPSX_GT_F64 : VOPCX_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
+defm V_CMPSX_LG_F64 : VOPCX_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
+defm V_CMPSX_GE_F64 : VOPCX_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
+defm V_CMPSX_O_F64 : VOPCX_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
+defm V_CMPSX_U_F64 : VOPCX_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
+defm V_CMPSX_NGE_F64 : VOPCX_F64 <vopc<0x79>, "v_cmpsx_nge_f64", "v_cmpsx_nle_f64">;
+defm V_CMPSX_NLG_F64 : VOPCX_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
+defm V_CMPSX_NGT_F64 : VOPCX_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64", "v_cmpsx_nlt_f64">;
+defm V_CMPSX_NLE_F64 : VOPCX_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
+defm V_CMPSX_NEQ_F64 : VOPCX_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
+defm V_CMPSX_NLT_F64 : VOPCX_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
+defm V_CMPSX_TRU_F64 : VOPCX_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
} // End SubtargetPredicate = isSICI
defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80, 0xc0>, "v_cmp_f_i32">;
-defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT>;
+defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82, 0xc2>, "v_cmp_eq_i32", COND_EQ>;
-defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE>;
+defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84, 0xc4>, "v_cmp_gt_i32", COND_SGT>;
defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85, 0xc5>, "v_cmp_ne_i32", COND_NE>;
defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86, 0xc6>, "v_cmp_ge_i32", COND_SGE>;
defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87, 0xc7>, "v_cmp_t_i32">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90, 0xd0>, "v_cmpx_f_i32">;
-defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32">;
+defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32", "v_cmpx_gt_i32">;
defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92, 0xd2>, "v_cmpx_eq_i32">;
-defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32">;
+defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32", "v_cmpx_ge_i32">;
defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94, 0xd4>, "v_cmpx_gt_i32">;
defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95, 0xd5>, "v_cmpx_ne_i32">;
defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96, 0xd6>, "v_cmpx_ge_i32">;
defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97, 0xd7>, "v_cmpx_t_i32">;
-} // End hasSideEffects = 1
defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0, 0xe0>, "v_cmp_f_i64">;
-defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT>;
+defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2, 0xe2>, "v_cmp_eq_i64", COND_EQ>;
-defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE>;
+defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4, 0xe4>, "v_cmp_gt_i64", COND_SGT>;
defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5, 0xe5>, "v_cmp_ne_i64", COND_NE>;
defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6, 0xe6>, "v_cmp_ge_i64", COND_SGE>;
defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7, 0xe7>, "v_cmp_t_i64">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0, 0xf0>, "v_cmpx_f_i64">;
-defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64">;
+defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64", "v_cmpx_gt_i64">;
defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2, 0xf2>, "v_cmpx_eq_i64">;
-defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64">;
+defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64", "v_cmpx_ge_i64">;
defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4, 0xf4>, "v_cmpx_gt_i64">;
defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5, 0xf5>, "v_cmpx_ne_i64">;
defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6, 0xf6>, "v_cmpx_ge_i64">;
defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7, 0xf7>, "v_cmpx_t_i64">;
-} // End hasSideEffects = 1
defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0, 0xc8>, "v_cmp_f_u32">;
-defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT>;
+defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">;
defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2, 0xca>, "v_cmp_eq_u32", COND_EQ>;
-defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE>;
+defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">;
defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4, 0xcc>, "v_cmp_gt_u32", COND_UGT>;
defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5, 0xcd>, "v_cmp_ne_u32", COND_NE>;
defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6, 0xce>, "v_cmp_ge_u32", COND_UGE>;
defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7, 0xcf>, "v_cmp_t_u32">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0, 0xd8>, "v_cmpx_f_u32">;
-defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32">;
+defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32", "v_cmpx_gt_u32">;
defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2, 0xda>, "v_cmpx_eq_u32">;
-defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32">;
+defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32", "v_cmpx_le_u32">;
defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4, 0xdc>, "v_cmpx_gt_u32">;
defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5, 0xdd>, "v_cmpx_ne_u32">;
defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6, 0xde>, "v_cmpx_ge_u32">;
defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7, 0xdf>, "v_cmpx_t_u32">;
-} // End hasSideEffects = 1
defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0, 0xe8>, "v_cmp_f_u64">;
-defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT>;
+defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">;
defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2, 0xea>, "v_cmp_eq_u64", COND_EQ>;
-defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE>;
+defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">;
defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4, 0xec>, "v_cmp_gt_u64", COND_UGT>;
defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5, 0xed>, "v_cmp_ne_u64", COND_NE>;
defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6, 0xee>, "v_cmp_ge_u64", COND_UGE>;
defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7, 0xef>, "v_cmp_t_u64">;
-let hasSideEffects = 1 in {
-
defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0, 0xf8>, "v_cmpx_f_u64">;
-defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64">;
+defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64", "v_cmpx_gt_u64">;
defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2, 0xfa>, "v_cmpx_eq_u64">;
-defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64">;
+defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64", "v_cmpx_ge_u64">;
defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4, 0xfc>, "v_cmpx_gt_u64">;
defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5, 0xfd>, "v_cmpx_ne_u64">;
defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6, 0xfe>, "v_cmpx_ge_u64">;
defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7, 0xff>, "v_cmpx_t_u64">;
-} // End hasSideEffects = 1
+} // End isCompare = 1, isCommutable = 1
defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <vopc<0x88, 0x10>, "v_cmp_class_f32">;
-
-let hasSideEffects = 1 in {
defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <vopc<0x98, 0x11>, "v_cmpx_class_f32">;
-} // End hasSideEffects = 1
-
defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <vopc<0xa8, 0x12>, "v_cmp_class_f64">;
-
-let hasSideEffects = 1 in {
defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">;
-} // End hasSideEffects = 1
-
-} // End isCompare = 1
//===----------------------------------------------------------------------===//
// DS Instructions
//===----------------------------------------------------------------------===//
-
defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
@@ -782,12 +760,26 @@ defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
-defm DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+defm DS_MSKOR_B32 : DS_1A2D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+let mayLoad = 0 in {
+defm DS_WRITE_B32 : DS_1A1D_NORET <0xd, "ds_write_b32", VGPR_32>;
+defm DS_WRITE2_B32 : DS_1A1D_Off8_NORET <0xe, "ds_write2_b32", VGPR_32>;
+defm DS_WRITE2ST64_B32 : DS_1A1D_Off8_NORET <0xf, "ds_write2st64_b32", VGPR_32>;
+}
defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
-defm DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>;
-defm DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>;
-
+defm DS_MIN_F32 : DS_1A2D_NORET <0x12, "ds_min_f32", VGPR_32>;
+defm DS_MAX_F32 : DS_1A2D_NORET <0x13, "ds_max_f32", VGPR_32>;
+
+defm DS_GWS_INIT : DS_1A_GDS <0x19, "ds_gws_init">;
+defm DS_GWS_SEMA_V : DS_1A_GDS <0x1a, "ds_gws_sema_v">;
+defm DS_GWS_SEMA_BR : DS_1A_GDS <0x1b, "ds_gws_sema_br">;
+defm DS_GWS_SEMA_P : DS_1A_GDS <0x1c, "ds_gws_sema_p">;
+defm DS_GWS_BARRIER : DS_1A_GDS <0x1d, "ds_gws_barrier">;
+let mayLoad = 0 in {
+defm DS_WRITE_B8 : DS_1A1D_NORET <0x1e, "ds_write_b8", VGPR_32>;
+defm DS_WRITE_B16 : DS_1A1D_NORET <0x1f, "ds_write_b16", VGPR_32>;
+}
defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
@@ -800,20 +792,34 @@ defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32"
defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
-defm DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
+defm DS_MSKOR_RTN_B32 : DS_1A2D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
-//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2_b32">;
-//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2st64_b32">;
+defm DS_WRXCHG2_RTN_B32 : DS_1A2D_RET <
+ 0x2e, "ds_wrxchg2_rtn_b32", VReg_64, "", VGPR_32
+>;
+defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET <
+ 0x2f, "ds_wrxchg2st64_rtn_b32", VReg_64, "", VGPR_32
+>;
defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
-defm DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
-defm DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
-
+defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
+defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
let SubtargetPredicate = isCI in {
defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
} // End isCI
-
-
+defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>;
+let mayStore = 0 in {
+defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>;
+defm DS_READ2_B32 : DS_1A_Off8_RET <0x37, "ds_read2_b32", VReg_64>;
+defm DS_READ2ST64_B32 : DS_1A_Off8_RET <0x38, "ds_read2st64_b32", VReg_64>;
+defm DS_READ_I8 : DS_1A_RET <0x39, "ds_read_i8", VGPR_32>;
+defm DS_READ_U8 : DS_1A_RET <0x3a, "ds_read_u8", VGPR_32>;
+defm DS_READ_I16 : DS_1A_RET <0x3b, "ds_read_i16", VGPR_32>;
+defm DS_READ_U16 : DS_1A_RET <0x3c, "ds_read_u16", VGPR_32>;
+}
+defm DS_CONSUME : DS_0A_RET <0x3d, "ds_consume">;
+defm DS_APPEND : DS_0A_RET <0x3e, "ds_append">;
+defm DS_ORDERED_COUNT : DS_1A_RET_GDS <0x3f, "ds_ordered_count">;
defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
@@ -826,7 +832,12 @@ defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
-defm DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+defm DS_MSKOR_B64 : DS_1A2D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+let mayLoad = 0 in {
+defm DS_WRITE_B64 : DS_1A1D_NORET <0x4d, "ds_write_b64", VReg_64>;
+defm DS_WRITE2_B64 : DS_1A1D_Off8_NORET <0x4E, "ds_write2_b64", VReg_64>;
+defm DS_WRITE2ST64_B64 : DS_1A1D_Off8_NORET <0x4f, "ds_write2st64_b64", VReg_64>;
+}
defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
@@ -844,57 +855,88 @@ defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64"
defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
-defm DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
+defm DS_MSKOR_RTN_B64 : DS_1A2D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
-//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">;
-//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">;
+defm DS_WRXCHG2_RTN_B64 : DS_1A2D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_128, "ds_wrxchg2_b64", VReg_64>;
+defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET <0x6f, "ds_wrxchg2st64_rtn_b64", VReg_128, "ds_wrxchg2st64_b64", VReg_64>;
defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">;
defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">;
+let mayStore = 0 in {
+defm DS_READ_B64 : DS_1A_RET <0x76, "ds_read_b64", VReg_64>;
+defm DS_READ2_B64 : DS_1A_Off8_RET <0x77, "ds_read2_b64", VReg_128>;
+defm DS_READ2ST64_B64 : DS_1A_Off8_RET <0x78, "ds_read2st64_b64", VReg_128>;
+}
+
+defm DS_ADD_SRC2_U32 : DS_1A <0x80, "ds_add_src2_u32">;
+defm DS_SUB_SRC2_U32 : DS_1A <0x81, "ds_sub_src2_u32">;
+defm DS_RSUB_SRC2_U32 : DS_1A <0x82, "ds_rsub_src2_u32">;
+defm DS_INC_SRC2_U32 : DS_1A <0x83, "ds_inc_src2_u32">;
+defm DS_DEC_SRC2_U32 : DS_1A <0x84, "ds_dec_src2_u32">;
+defm DS_MIN_SRC2_I32 : DS_1A <0x85, "ds_min_src2_i32">;
+defm DS_MAX_SRC2_I32 : DS_1A <0x86, "ds_max_src2_i32">;
+defm DS_MIN_SRC2_U32 : DS_1A <0x87, "ds_min_src2_u32">;
+defm DS_MAX_SRC2_U32 : DS_1A <0x88, "ds_max_src2_u32">;
+defm DS_AND_SRC2_B32 : DS_1A <0x89, "ds_and_src_b32">;
+defm DS_OR_SRC2_B32 : DS_1A <0x8a, "ds_or_src2_b32">;
+defm DS_XOR_SRC2_B32 : DS_1A <0x8b, "ds_xor_src2_b32">;
+defm DS_WRITE_SRC2_B32 : DS_1A <0x8c, "ds_write_src2_b32">;
+
+defm DS_MIN_SRC2_F32 : DS_1A <0x92, "ds_min_src2_f32">;
+defm DS_MAX_SRC2_F32 : DS_1A <0x93, "ds_max_src2_f32">;
+
+defm DS_ADD_SRC2_U64 : DS_1A <0xc0, "ds_add_src2_u64">;
+defm DS_SUB_SRC2_U64 : DS_1A <0xc1, "ds_sub_src2_u64">;
+defm DS_RSUB_SRC2_U64 : DS_1A <0xc2, "ds_rsub_src2_u64">;
+defm DS_INC_SRC2_U64 : DS_1A <0xc3, "ds_inc_src2_u64">;
+defm DS_DEC_SRC2_U64 : DS_1A <0xc4, "ds_dec_src2_u64">;
+defm DS_MIN_SRC2_I64 : DS_1A <0xc5, "ds_min_src2_i64">;
+defm DS_MAX_SRC2_I64 : DS_1A <0xc6, "ds_max_src2_i64">;
+defm DS_MIN_SRC2_U64 : DS_1A <0xc7, "ds_min_src2_u64">;
+defm DS_MAX_SRC2_U64 : DS_1A <0xc8, "ds_max_src2_u64">;
+defm DS_AND_SRC2_B64 : DS_1A <0xc9, "ds_and_src2_b64">;
+defm DS_OR_SRC2_B64 : DS_1A <0xca, "ds_or_src2_b64">;
+defm DS_XOR_SRC2_B64 : DS_1A <0xcb, "ds_xor_src2_b64">;
+defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">;
+
+defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">;
+defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">;
+
//let SubtargetPredicate = isCI in {
// DS_CONDXCHG32_RTN_B64
// DS_CONDXCHG32_RTN_B128
//} // End isCI
-// TODO: _SRC2_* forms
-
-defm DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "ds_write_b32", VGPR_32>;
-defm DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "ds_write_b8", VGPR_32>;
-defm DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "ds_write_b16", VGPR_32>;
-defm DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "ds_write_b64", VReg_64>;
-
-defm DS_READ_B32 : DS_Load_Helper <0x00000036, "ds_read_b32", VGPR_32>;
-defm DS_READ_I8 : DS_Load_Helper <0x00000039, "ds_read_i8", VGPR_32>;
-defm DS_READ_U8 : DS_Load_Helper <0x0000003a, "ds_read_u8", VGPR_32>;
-defm DS_READ_I16 : DS_Load_Helper <0x0000003b, "ds_read_i16", VGPR_32>;
-defm DS_READ_U16 : DS_Load_Helper <0x0000003c, "ds_read_u16", VGPR_32>;
-defm DS_READ_B64 : DS_Load_Helper <0x00000076, "ds_read_b64", VReg_64>;
-
-// 2 forms.
-defm DS_WRITE2_B32 : DS_Store2_Helper <0x0000000E, "ds_write2_b32", VGPR_32>;
-defm DS_WRITE2ST64_B32 : DS_Store2_Helper <0x0000000F, "ds_write2st64_b32", VGPR_32>;
-defm DS_WRITE2_B64 : DS_Store2_Helper <0x0000004E, "ds_write2_b64", VReg_64>;
-defm DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg_64>;
-
-defm DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>;
-defm DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>;
-defm DS_READ2_B64 : DS_Load2_Helper <0x00000075, "ds_read2_b64", VReg_128>;
-defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000076, "ds_read2st64_b64", VReg_128>;
-
//===----------------------------------------------------------------------===//
// MUBUF Instructions
//===----------------------------------------------------------------------===//
-//def BUFFER_LOAD_FORMAT_X : MUBUF_ <mubuf<0x00>, "buffer_load_format_x", []>;
-//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <mubuf<0x01>, "buffer_load_format_xy", []>;
-//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <mubuf<0x02>, "buffer_load_format_xyz", []>;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <mubuf<0x03>, "buffer_load_format_xyzw", VReg_128>;
-//def BUFFER_STORE_FORMAT_X : MUBUF_ <mubuf<0x04>, "buffer_store_format_x", []>;
-//def BUFFER_STORE_FORMAT_XY : MUBUF_ <mubuf<0x05>, "buffer_store_format_xy", []>;
-//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <mubuf<0x06>, "buffer_store_format_xyz", []>;
-//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <mubuf<0x07>, "buffer_store_format_xyzw", []>;
+defm BUFFER_LOAD_FORMAT_X : MUBUF_Load_Helper <
+ mubuf<0x00>, "buffer_load_format_x", VGPR_32
+>;
+defm BUFFER_LOAD_FORMAT_XY : MUBUF_Load_Helper <
+ mubuf<0x01>, "buffer_load_format_xy", VReg_64
+>;
+defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper <
+ mubuf<0x02>, "buffer_load_format_xyz", VReg_96
+>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <
+ mubuf<0x03>, "buffer_load_format_xyzw", VReg_128
+>;
+defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper <
+ mubuf<0x04>, "buffer_store_format_x", VGPR_32
+>;
+defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper <
+ mubuf<0x05>, "buffer_store_format_xy", VReg_64
+>;
+defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper <
+ mubuf<0x06>, "buffer_store_format_xyz", VReg_96
+>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper <
+ mubuf<0x07>, "buffer_store_format_xyzw", VReg_128
+>;
defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
>;
@@ -1418,13 +1460,17 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
// VOP2 Instructions
//===----------------------------------------------------------------------===//
-defm V_CNDMASK_B32_e64 : VOP3_m_nomods <vop3<0x100>, (outs VGPR_32:$dst),
- (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2),
- "v_cndmask_b32_e64 $dst, $src0, $src1, $src2",
- [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))],
- "v_cndmask_b32_e64", 3
->;
+multiclass V_CNDMASK <vop2 op, string name> {
+ defm _e32 : VOP2_m <
+ op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [],
+ name, name>;
+
+ defm _e64 : VOP3_m <
+ op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
+ name#!cast<string>(VOP_CNDMASK.Asm64), [], name, 3>;
+}
+defm V_CNDMASK_B32 : V_CNDMASK<vop2<0x0>, "v_cndmask_b32">;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32",
@@ -1568,8 +1614,8 @@ defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
>;
} // End isCommutable = 1
-defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
- AMDGPUbfm
+defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
+ VOP_I32_I32_I32
>;
defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
VOP_I32_I32_I32
@@ -1638,14 +1684,12 @@ defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147, 0x1c7>, "v_cubema_f32",
VOP_F32_F32_F32_F32
>;
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
defm V_BFE_U32 : VOP3Inst <vop3<0x148, 0x1c8>, "v_bfe_u32",
VOP_I32_I32_I32_I32, AMDGPUbfe_u32
>;
defm V_BFE_I32 : VOP3Inst <vop3<0x149, 0x1c9>, "v_bfe_i32",
VOP_I32_I32_I32_I32, AMDGPUbfe_i32
>;
-}
defm V_BFI_B32 : VOP3Inst <vop3<0x14a, 0x1ca>, "v_bfi_b32",
VOP_I32_I32_I32_I32, AMDGPUbfi
@@ -1833,6 +1877,11 @@ defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
//===----------------------------------------------------------------------===//
let isCodeGenOnly = 1, isPseudo = 1 in {
+// For use in patterns
+def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst),
+ (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []
+>;
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
// pass to enable folding of inline immediates.
@@ -2049,7 +2098,7 @@ def : Pat <
/* int_SI_vs_load_input */
def : Pat<
(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
- (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
+ (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
>;
/* int_SI_export */
@@ -2196,6 +2245,11 @@ def : Pat <
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
+def : Pat <
+ (i32 (select i1:$src0, i32:$src1, i32:$src2)),
+ (V_CNDMASK_B32_e64 $src2, $src1, $src0)
+>;
+
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
@@ -2738,7 +2792,7 @@ def : Ext32Pat <anyext>;
// Offset in an 32Bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
- (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
+ (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
>;
// The multiplication scales from [0,1] to the unsigned integer range
@@ -2781,7 +2835,7 @@ def : ROTRPattern <V_ALIGNBIT_B32>;
class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
(vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
- (inst (i1 0), $ptr, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
@@ -2799,12 +2853,12 @@ def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>;
def : Pat <
(v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1))),
- (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1, (S_MOV_B32 -1))
+ (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0), (S_MOV_B32 -1))
>;
class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, $value, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
@@ -2819,14 +2873,14 @@ def : DSWritePat <DS_WRITE_B64, v2i32, local_store_aligned8bytes>;
def : Pat <
(local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1)),
- (DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0),
- (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
- (S_MOV_B32 -1))
+ (DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0),
+ (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
+ (i1 0), (S_MOV_B32 -1))
>;
class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, $value, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
@@ -2842,13 +2896,13 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
class DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
- (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
- (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
@@ -2898,8 +2952,9 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
PatFrag constant_ld> {
def : Pat <
- (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))),
- (Instr_ADDR64 $srsrc, $vaddr, $soffset, $offset)
+ (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
>;
}
@@ -2916,7 +2971,7 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
(vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset))),
- (Instr $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
+ (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
@@ -2935,7 +2990,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
imm:$offset, 0, 0, imm:$glc, imm:$slc,
imm:$tfe)),
- (offset $rsrc, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
(as_i1imm $slc), (as_i1imm $tfe))
>;
@@ -2943,7 +2998,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 1, 0, imm:$glc, imm:$slc,
imm:$tfe)),
- (offen $rsrc, $vaddr, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
+ (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
(as_i1imm $tfe))
>;
@@ -2951,7 +3006,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 0, 1, imm:$glc, imm:$slc,
imm:$tfe)),
- (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
(as_i1imm $slc), (as_i1imm $tfe))
>;
@@ -2959,7 +3014,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
imm:$offset, 1, 1, imm:$glc, imm:$slc,
imm:$tfe)),
- (bothen $rsrc, $vaddr, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
+ (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
(as_i1imm $tfe))
>;
}
@@ -2974,7 +3029,7 @@ defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_
class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
u16imm:$offset)),
- (Instr $value, $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
+ (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
@@ -3104,26 +3159,26 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST I
// 1. Extract with offset
def : Pat<
- (vector_extract vt:$vec, (add i32:$idx, imm:$off)),
- (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
+ (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))),
+ (SI_INDIRECT_SRC $vec, $idx, imm:$off)
>;
// 2. Extract without offset
def : Pat<
- (vector_extract vt:$vec, i32:$idx),
- (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
+ (eltvt (vector_extract vt:$vec, i32:$idx)),
+ (SI_INDIRECT_SRC $vec, $idx, 0)
>;
// 3. Insert with offset
def : Pat<
(vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
- (IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
+ (IndDst $vec, $idx, imm:$off, $val)
>;
// 4. Insert without offset
def : Pat<
(vector_insert vt:$vec, eltvt:$val, i32:$idx),
- (IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
+ (IndDst $vec, $idx, 0, $val)
>;
}
@@ -3269,6 +3324,89 @@ def : Pat <
(V_CNDMASK_B32_e64 $src0, $src1, $src2)
>;
+multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
+ def : Pat <
+ (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
+ (BFM $a, $b)
+ >;
+
+ def : Pat <
+ (vt (add (vt (shl 1, vt:$a)), -1)),
+ (BFM $a, (MOV 0))
+ >;
+}
+
+defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
+// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
+
+def : BFEPattern <V_BFE_U32, S_MOV_B32>;
+
+//===----------------------------------------------------------------------===//
+// Fract Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isSI] in {
+
+// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is
+// used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient
+// way to implement it is using V_FRACT_F64.
+// The workaround for the V_FRACT bug is:
+// fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
+
+// Convert (x + (-floor(x)) to fract(x)
+def : Pat <
+ (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+ (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+ (V_CNDMASK_B64_PSEUDO
+ $x,
+ (V_MIN_F64
+ SRCMODS.NONE,
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+ SRCMODS.NONE,
+ (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
+ DSTCLAMP.NONE, DSTOMOD.NONE),
+ (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/))
+>;
+
+// Convert floor(x) to (x - fract(x))
+def : Pat <
+ (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
+ (V_ADD_F64
+ $mods,
+ $x,
+ SRCMODS.NEG,
+ (V_CNDMASK_B64_PSEUDO
+ $x,
+ (V_MIN_F64
+ SRCMODS.NONE,
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+ SRCMODS.NONE,
+ (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
+ DSTCLAMP.NONE, DSTOMOD.NONE),
+ (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)),
+ DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isSI]
+
+let Predicates = [isCI] in {
+
+// Convert (x - floor(x)) to fract(x)
+def : Pat <
+ (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
+ (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
+ (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+// Convert (x + (-floor(x))) to fract(x)
+def : Pat <
+ (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+ (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isCI]
+
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//
diff --git a/lib/Target/R600/SILoadStoreOptimizer.cpp b/lib/Target/R600/SILoadStoreOptimizer.cpp
index 46630d0..a927ad8 100644
--- a/lib/Target/R600/SILoadStoreOptimizer.cpp
+++ b/lib/Target/R600/SILoadStoreOptimizer.cpp
@@ -45,6 +45,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -249,10 +250,10 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
DebugLoc DL = I->getDebugLoc();
MachineInstrBuilder Read2
= BuildMI(*MBB, I, DL, Read2Desc, DestReg)
- .addImm(0) // gds
.addOperand(*AddrReg) // addr
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
+ .addImm(0) // gds
.addOperand(*M0Reg) // M0
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
@@ -332,12 +333,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
MachineInstrBuilder Write2
= BuildMI(*MBB, I, DL, Write2Desc)
- .addImm(0) // gds
.addOperand(*Addr) // addr
.addOperand(*Data0) // data0
.addOperand(*Data1) // data1
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
+ .addImm(0) // gds
.addOperand(*M0Reg) // m0
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 9224e14..13a8974 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -14,7 +14,6 @@
#include "SIRegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -25,9 +24,7 @@
using namespace llvm;
-SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPURegisterInfo(st)
- { }
+SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
@@ -47,14 +44,34 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::VGPR255);
Reserved.set(AMDGPU::VGPR254);
+ // Tonga and Iceland can only allocate a fixed number of SGPRs due
+ // to a hw bug.
+ if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) {
+ unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
+ // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
+ // Assume XNACK_MASK is unused.
+ unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
+
+ for (unsigned i = Limit; i < NumSGPRs; ++i) {
+ unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
+ MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
+
+ for (; R.isValid(); ++R)
+ Reserved.set(*R);
+ }
+ }
+
return Reserved;
}
-unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
+unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const {
+ const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
// FIXME: We should adjust the max number of waves based on LDS size.
- unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
- unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
+ unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
+ STI.getMaxWavesPerCU());
+ unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
for (regclass_iterator I = regclass_begin(), E = regclass_end();
I != E; ++I) {
@@ -125,9 +142,10 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
int64_t Offset,
RegScavenger *RS) const {
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
MachineBasicBlock *MBB = MI->getParent();
const MachineFunction *MF = MI->getParent()->getParent();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
LLVMContext &Ctx = MF->getFunction()->getContext();
DebugLoc DL = MI->getDebugLoc();
bool IsLoad = TII->get(LoadStoreOp).mayLoad();
@@ -162,8 +180,8 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
.addReg(SubReg, getDefRegState(IsLoad))
.addReg(ScratchRsrcReg, getKillRegState(IsKill))
- .addImm(Offset)
.addReg(SOffset)
+ .addImm(Offset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // tfe
@@ -178,7 +196,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineOperand &FIOp = MI->getOperand(FIOperandNum);
@@ -249,7 +268,22 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addReg(SubReg);
}
}
- TII->insertNOPs(MI, 3);
+
+ // TODO: only do this when it is needed
+ switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
+ case AMDGPUSubtarget::SOUTHERN_ISLANDS:
+ // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
+ TII->insertNOPs(MI, 3);
+ break;
+ case AMDGPUSubtarget::SEA_ISLANDS:
+ break;
+ default: // VOLCANIC_ISLANDS and later
+ // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
+ // and later. This also applies to VALUs which write VCC, but we're
+ // unlikely to see VMEM use VCC.
+ TII->insertNOPs(MI, 4);
+ }
+
MI->eraseFromParent();
break;
}
@@ -494,14 +528,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
}
}
-unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
- switch(WaveCount) {
- case 10: return 48;
- case 9: return 56;
- case 8: return 64;
- case 7: return 72;
- case 6: return 80;
- case 5: return 96;
- default: return 103;
+unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
+ unsigned WaveCount) const {
+ if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ switch (WaveCount) {
+ case 10: return 80;
+ case 9: return 80;
+ case 8: return 96;
+ default: return 102;
+ }
+ } else {
+ switch(WaveCount) {
+ case 10: return 48;
+ case 9: return 56;
+ case 8: return 64;
+ case 7: return 72;
+ case 6: return 80;
+ case 5: return 96;
+ default: return 103;
+ }
}
}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index d908ffd..bfdb67c 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -17,17 +17,19 @@
#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/Support/Debug.h"
namespace llvm {
struct SIRegisterInfo : public AMDGPURegisterInfo {
- SIRegisterInfo(const AMDGPUSubtarget &st);
+ SIRegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override;
- unsigned getRegPressureSetLimit(unsigned Idx) const override;
+ unsigned getRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const override;
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
@@ -111,7 +113,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
/// concurrent waves.
- unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
+ unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
+ unsigned WaveCount) const;
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 8b25e95..7bb5dc2 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -256,10 +256,3 @@ def VSrc_64 : RegImmOperand<VS_64>;
def VCSrc_32 : RegInlineOperand<VS_32>;
def VCSrc_64 : RegInlineOperand<VS_64>;
-
-//===----------------------------------------------------------------------===//
-// SGPR and VGPR register classes
-//===----------------------------------------------------------------------===//
-
-def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128,
- (add VReg_128, SReg_128)>;
diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp
index 97bbd78..51e72cd 100644
--- a/lib/Target/R600/SIShrinkInstructions.cpp
+++ b/lib/Target/R600/SIShrinkInstructions.cpp
@@ -18,9 +18,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-shrink-instructions"
@@ -88,6 +89,11 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
// Can't shrink instruction with three operands.
+ // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
+ // a special case for it. It can only be shrunk if the third operand
+ // is vcc. We should handle this the same way we handle vopc, by addding
+ // a register allocation hint pre-regalloc and then do the shrining
+ // post-regalloc.
if (Src2)
return false;