diff options
-rw-r--r-- | src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp | 16 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/R600Instructions.td | 21 |
2 files changed, 32 insertions, 5 deletions
diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp index b62c7ab..5b5932a 100644 --- a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp +++ b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp @@ -691,6 +691,11 @@ AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst) } Type *aType = inst->getType(); bool isVector = aType->isVectorTy(); + + // XXX Support vector types + if (isVector) { + return false; + } int numEle = 1; // This only works on 32bit integers if (aType->getScalarType() @@ -792,23 +797,24 @@ AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst) callTypes.push_back(aType); callTypes.push_back(aType); FunctionType *funcType = FunctionType::get(aType, callTypes, false); - std::string name = "__amdil_ubit_extract"; + std::string name = "llvm.AMDIL.bit.extract.u32"; if (isVector) { - name += "_v" + itostr(numEle) + "i32"; + name += ".v" + itostr(numEle) + "i32"; } else { - name += "_i32"; + name += "."; } // Lets create the function. Function *Func = dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> getOrInsertFunction(llvm::StringRef(name), funcType)); Value *Operands[3] = { - newMaskConst, + ShiftInst->getOperand(0), shiftValConst, - ShiftInst->getOperand(0) + newMaskConst }; // Lets create the Call with the operands CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt"); + CI->setDoesNotAccessMemory(); CI->insertBefore(inst); inst->replaceAllUsesWith(CI); return true; diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index 409969b..6c74c6c 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -822,6 +822,27 @@ def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; let Predicates = [isEGorCayman] in { + // BFE_UINT - bit_extract, an optimization for mask and shift + // Src0 = Input + // Src1 = Offset + // Src2 = Width + // + // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) + // + // Example Usage: + // (Offset, Width) + // + // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 + // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 + // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 + // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 + def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", + [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0, + R600_Reg32:$src1, + R600_Reg32:$src2))], + VecALU + >; + def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))], |