summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp16
-rw-r--r--src/gallium/drivers/radeon/R600Instructions.td21
2 files changed, 32 insertions, 5 deletions
diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
index b62c7ab..5b5932a 100644
--- a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
+++ b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
@@ -691,6 +691,11 @@ AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
}
Type *aType = inst->getType();
bool isVector = aType->isVectorTy();
+
+ // XXX Support vector types
+ if (isVector) {
+ return false;
+ }
int numEle = 1;
// This only works on 32bit integers
if (aType->getScalarType()
@@ -792,23 +797,24 @@ AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst)
callTypes.push_back(aType);
callTypes.push_back(aType);
FunctionType *funcType = FunctionType::get(aType, callTypes, false);
- std::string name = "__amdil_ubit_extract";
+ std::string name = "llvm.AMDIL.bit.extract.u32";
if (isVector) {
- name += "_v" + itostr(numEle) + "i32";
+ name += ".v" + itostr(numEle) + "i32";
} else {
- name += "_i32";
+ name += ".";
}
// Lets create the function.
Function *Func =
dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
getOrInsertFunction(llvm::StringRef(name), funcType));
Value *Operands[3] = {
- newMaskConst,
+ ShiftInst->getOperand(0),
shiftValConst,
- ShiftInst->getOperand(0)
+ newMaskConst
};
// Lets create the Call with the operands
CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
+ CI->setDoesNotAccessMemory();
CI->insertBefore(inst);
inst->replaceAllUsesWith(CI);
return true;
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index 409969b..6c74c6c 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -822,6 +822,27 @@ def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
let Predicates = [isEGorCayman] in {
+ // BFE_UINT - bit_extract, an optimization for mask and shift
+ // Src0 = Input
+ // Src1 = Offset
+ // Src2 = Width
+ //
+ // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
+ //
+ // Example Usage:
+ // (Offset, Width)
+ //
+ // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
+ // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
+ // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
+ // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
+ def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
+ [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
+ R600_Reg32:$src1,
+ R600_Reg32:$src2))],
+ VecALU
+ >;
+
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
[(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
R600_Reg32:$src2))],