diff options
author | Tim Rowley <timothy.o.rowley@intel.com> | 2016-02-16 17:28:09 -0600 |
---|---|---|
committer | Tim Rowley <timothy.o.rowley@intel.com> | 2016-03-02 18:38:41 -0600 |
commit | c6e67f5a9373e916a8d2333585cb5787aa5f7bb7 (patch) | |
tree | 5b5c60bea784f16736c394c989fdd5df3ebae233 /src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp | |
parent | 2b2d3680bf164ec4f8b50436b96c3fc195318ea5 (diff) | |
download | external_mesa3d-c6e67f5a9373e916a8d2333585cb5787aa5f7bb7.zip external_mesa3d-c6e67f5a9373e916a8d2333585cb5787aa5f7bb7.tar.gz external_mesa3d-c6e67f5a9373e916a8d2333585cb5787aa5f7bb7.tar.bz2 |
gallium/swr: add OpenSWR rasterizer
Acked-by: Roland Scheidegger <sroland@vmware.com>
Acked-by: Jose Fonseca <jfonseca@vmware.com>
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp')
-rw-r--r-- | src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp | 772 |
1 files changed, 772 insertions, 0 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp new file mode 100644 index 0000000..954524a --- /dev/null +++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp @@ -0,0 +1,772 @@ +/**************************************************************************** +* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +* +* @file blend_jit.cpp +* +* @brief Implementation of the blend jitter +* +* Notes: +* +******************************************************************************/ +#include "jit_api.h" +#include "blend_jit.h" +#include "builder.h" +#include "state_llvm.h" +#include "common/containers.hpp" +#include "llvm/IR/DataLayout.h" + +#include <sstream> + +// components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized +#define QUANTIZE_THRESHOLD 2 + +////////////////////////////////////////////////////////////////////////// +/// Interface to Jitting a blend shader +////////////////////////////////////////////////////////////////////////// +struct BlendJit : public Builder +{ + BlendJit(JitManager* pJitMgr) : Builder(pJitMgr){}; + + template<bool Color, bool Alpha> + void GenerateBlendFactor(SWR_BLEND_FACTOR factor, Value* constColor[4], Value* src[4], Value* src1[4], Value* dst[4], Value* result[4]) + { + Value* out[4]; + + switch (factor) + { + case BLENDFACTOR_ONE: + out[0] = out[1] = out[2] = out[3] = VIMMED1(1.0f); + break; + case BLENDFACTOR_SRC_COLOR: + out[0] = src[0]; + out[1] = src[1]; + out[2] = src[2]; + out[3] = src[3]; + break; + case BLENDFACTOR_SRC_ALPHA: + out[0] = out[1] = out[2] = out[3] = src[3]; + break; + case BLENDFACTOR_DST_ALPHA: + out[0] = out[1] = out[2] = out[3] = dst[3]; + break; + case BLENDFACTOR_DST_COLOR: + out[0] = dst[0]; + out[1] = dst[1]; + out[2] = dst[2]; + out[3] = dst[3]; + break; + case BLENDFACTOR_SRC_ALPHA_SATURATE: + out[0] = out[1] = out[2] = VMINPS(src[3], FSUB(VIMMED1(1.0f), dst[3])); + out[3] = VIMMED1(1.0f); + break; + case BLENDFACTOR_CONST_COLOR: + out[0] = constColor[0]; + out[1] = constColor[1]; + out[2] = constColor[2]; + out[3] = constColor[3]; + break; + case BLENDFACTOR_CONST_ALPHA: + out[0] = out[1] = out[2] = out[3] = constColor[3]; + break; + case BLENDFACTOR_SRC1_COLOR: + out[0] = src1[0]; + out[1] = src1[1]; + out[2] = src1[2]; + out[3] = src1[3]; + break; + case BLENDFACTOR_SRC1_ALPHA: + out[0] = out[1] = out[2] = out[3] = src1[3]; + break; + case BLENDFACTOR_ZERO: + out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f); + break; + case BLENDFACTOR_INV_SRC_COLOR: + out[0] = FSUB(VIMMED1(1.0f), src[0]); + out[1] = FSUB(VIMMED1(1.0f), src[1]); + out[2] = FSUB(VIMMED1(1.0f), src[2]); + out[3] = FSUB(VIMMED1(1.0f), src[3]); + break; + case BLENDFACTOR_INV_SRC_ALPHA: + out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src[3]); + break; + case BLENDFACTOR_INV_DST_ALPHA: + out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), dst[3]); + break; + case BLENDFACTOR_INV_DST_COLOR: + out[0] = FSUB(VIMMED1(1.0f), dst[0]); + out[1] = FSUB(VIMMED1(1.0f), dst[1]); + out[2] = FSUB(VIMMED1(1.0f), dst[2]); + out[3] = FSUB(VIMMED1(1.0f), dst[3]); + break; + case BLENDFACTOR_INV_CONST_COLOR: + out[0] = FSUB(VIMMED1(1.0f), constColor[0]); + out[1] = FSUB(VIMMED1(1.0f), constColor[1]); + out[2] = FSUB(VIMMED1(1.0f), constColor[2]); + out[3] = FSUB(VIMMED1(1.0f), constColor[3]); + break; + case BLENDFACTOR_INV_CONST_ALPHA: + out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), constColor[3]); + break; + case BLENDFACTOR_INV_SRC1_COLOR: + out[0] = FSUB(VIMMED1(1.0f), src1[0]); + out[1] = FSUB(VIMMED1(1.0f), src1[1]); + out[2] = FSUB(VIMMED1(1.0f), src1[2]); + out[3] = FSUB(VIMMED1(1.0f), src1[3]); + break; + case BLENDFACTOR_INV_SRC1_ALPHA: + out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src1[3]); + break; + default: + SWR_ASSERT(false, "Unsupported blend factor: %d", factor); + out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f); + break; + } + + if (Color) + { + result[0] = out[0]; + result[1] = out[1]; + result[2] = out[2]; + } + + if (Alpha) + { + result[3] = out[3]; + } + } + + void Clamp(SWR_FORMAT format, Value* src[4]) + { + const SWR_FORMAT_INFO& info = GetFormatInfo(format); + SWR_TYPE type = info.type[0]; + + switch (type) + { + case SWR_TYPE_FLOAT: + break; + + case SWR_TYPE_UNORM: + src[0] = VMINPS(VMAXPS(src[0], VIMMED1(0.0f)), VIMMED1(1.0f)); + src[1] = VMINPS(VMAXPS(src[1], VIMMED1(0.0f)), VIMMED1(1.0f)); + src[2] = VMINPS(VMAXPS(src[2], VIMMED1(0.0f)), VIMMED1(1.0f)); + src[3] = VMINPS(VMAXPS(src[3], VIMMED1(0.0f)), VIMMED1(1.0f)); + break; + + case SWR_TYPE_SNORM: + src[0] = VMINPS(VMAXPS(src[0], VIMMED1(-1.0f)), VIMMED1(1.0f)); + src[1] = VMINPS(VMAXPS(src[1], VIMMED1(-1.0f)), VIMMED1(1.0f)); + src[2] = VMINPS(VMAXPS(src[2], VIMMED1(-1.0f)), VIMMED1(1.0f)); + src[3] = VMINPS(VMAXPS(src[3], VIMMED1(-1.0f)), VIMMED1(1.0f)); + break; + + default: SWR_ASSERT(false, "Unsupport format type: %d", type); + } + } + + void ApplyDefaults(SWR_FORMAT format, Value* src[4]) + { + const SWR_FORMAT_INFO& info = GetFormatInfo(format); + + bool valid[] = { false, false, false, false }; + for (uint32_t c = 0; c < info.numComps; ++c) + { + valid[info.swizzle[c]] = true; + } + + for (uint32_t c = 0; c < 4; ++c) + { + if (!valid[c]) + { + src[c] = BITCAST(VIMMED1((int)info.defaults[c]), mSimdFP32Ty); + } + } + } + + void ApplyUnusedDefaults(SWR_FORMAT format, Value* src[4]) + { + const SWR_FORMAT_INFO& info = GetFormatInfo(format); + + for (uint32_t c = 0; c < info.numComps; ++c) + { + if (info.type[c] == SWR_TYPE_UNUSED) + { + src[info.swizzle[c]] = BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty); + } + } + } + + void Quantize(SWR_FORMAT format, Value* src[4]) + { + const SWR_FORMAT_INFO& info = GetFormatInfo(format); + for (uint32_t c = 0; c < info.numComps; ++c) + { + if (info.bpc[c] <= QUANTIZE_THRESHOLD) + { + uint32_t swizComp = info.swizzle[c]; + float factor = (float)((1 << info.bpc[c]) - 1); + switch (info.type[c]) + { + case SWR_TYPE_UNORM: + src[swizComp] = FADD(FMUL(src[swizComp], VIMMED1(factor)), VIMMED1(0.5f)); + src[swizComp] = VROUND(src[swizComp], C(_MM_FROUND_TO_ZERO)); + src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f /factor)); + break; + default: SWR_ASSERT(false, "Unsupported format type: %d", info.type[c]); + } + } + } + } + + template<bool Color, bool Alpha> + void BlendFunc(SWR_BLEND_OP blendOp, Value* src[4], Value* srcFactor[4], Value* dst[4], Value* dstFactor[4], Value* result[4]) + { + Value* out[4]; + Value* srcBlend[4]; + Value* dstBlend[4]; + for (uint32_t i = 0; i < 4; ++i) + { + srcBlend[i] = FMUL(src[i], srcFactor[i]); + dstBlend[i] = FMUL(dst[i], dstFactor[i]); + } + + switch (blendOp) + { + case BLENDOP_ADD: + out[0] = FADD(srcBlend[0], dstBlend[0]); + out[1] = FADD(srcBlend[1], dstBlend[1]); + out[2] = FADD(srcBlend[2], dstBlend[2]); + out[3] = FADD(srcBlend[3], dstBlend[3]); + break; + + case BLENDOP_SUBTRACT: + out[0] = FSUB(srcBlend[0], dstBlend[0]); + out[1] = FSUB(srcBlend[1], dstBlend[1]); + out[2] = FSUB(srcBlend[2], dstBlend[2]); + out[3] = FSUB(srcBlend[3], dstBlend[3]); + break; + + case BLENDOP_REVSUBTRACT: + out[0] = FSUB(dstBlend[0], srcBlend[0]); + out[1] = FSUB(dstBlend[1], srcBlend[1]); + out[2] = FSUB(dstBlend[2], srcBlend[2]); + out[3] = FSUB(dstBlend[3], srcBlend[3]); + break; + + case BLENDOP_MIN: + out[0] = VMINPS(src[0], dst[0]); + out[1] = VMINPS(src[1], dst[1]); + out[2] = VMINPS(src[2], dst[2]); + out[3] = VMINPS(src[3], dst[3]); + break; + + case BLENDOP_MAX: + out[0] = VMAXPS(src[0], dst[0]); + out[1] = VMAXPS(src[1], dst[1]); + out[2] = VMAXPS(src[2], dst[2]); + out[3] = VMAXPS(src[3], dst[3]); + break; + + default: + SWR_ASSERT(false, "Unsupported blend operation: %d", blendOp); + out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f); + break; + } + + if (Color) + { + result[0] = out[0]; + result[1] = out[1]; + result[2] = out[2]; + } + + if (Alpha) + { + result[3] = out[3]; + } + } + + void LogicOpFunc(SWR_LOGIC_OP logicOp, Value* src[4], Value* dst[4], Value* result[4]) + { + // Op: (s == PS output, d = RT contents) + switch(logicOp) + { + case LOGICOP_CLEAR: + result[0] = VIMMED1(0); + result[1] = VIMMED1(0); + result[2] = VIMMED1(0); + result[3] = VIMMED1(0); + break; + + case LOGICOP_NOR: + // ~(s | d) + result[0] = XOR(OR(src[0], dst[0]), VIMMED1(0xFFFFFFFF)); + result[1] = XOR(OR(src[1], dst[1]), VIMMED1(0xFFFFFFFF)); + result[2] = XOR(OR(src[2], dst[2]), VIMMED1(0xFFFFFFFF)); + result[3] = XOR(OR(src[3], dst[3]), VIMMED1(0xFFFFFFFF)); + break; + + case LOGICOP_AND_INVERTED: + // ~s & d + // todo: use avx andnot instr when I can find the intrinsic to call + result[0] = AND(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]); + result[1] = AND(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]); + result[2] = AND(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]); + result[3] = AND(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]); + break; + + case LOGICOP_COPY_INVERTED: + // ~s + result[0] = XOR(src[0], VIMMED1(0xFFFFFFFF)); + result[1] = XOR(src[1], VIMMED1(0xFFFFFFFF)); + result[2] = XOR(src[2], VIMMED1(0xFFFFFFFF)); + result[3] = XOR(src[3], VIMMED1(0xFFFFFFFF)); + break; + + case LOGICOP_AND_REVERSE: + // s & ~d + // todo: use avx andnot instr when I can find the intrinsic to call + result[0] = AND(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]); + result[1] = AND(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]); + result[2] = AND(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]); + result[3] = AND(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]); + break; + + case LOGICOP_INVERT: + // ~d + result[0] = XOR(dst[0], VIMMED1(0xFFFFFFFF)); + result[1] = XOR(dst[1], VIMMED1(0xFFFFFFFF)); + result[2] = XOR(dst[2], VIMMED1(0xFFFFFFFF)); + result[3] = XOR(dst[3], VIMMED1(0xFFFFFFFF)); + break; + + case LOGICOP_XOR: + // s ^ d + result[0] = XOR(src[0], dst[0]); + result[1] = XOR(src[1], dst[1]); + result[2] = XOR(src[2], dst[2]); + result[3] = XOR(src[3], dst[3]); + break; + + case LOGICOP_NAND: + // ~(s & d) + result[0] = XOR(AND(src[0], dst[0]), VIMMED1(0xFFFFFFFF)); + result[1] = XOR(AND(src[1], dst[1]), VIMMED1(0xFFFFFFFF)); + result[2] = XOR(AND(src[2], dst[2]), VIMMED1(0xFFFFFFFF)); + result[3] = XOR(AND(src[3], dst[3]), VIMMED1(0xFFFFFFFF)); + break; + + case LOGICOP_AND: + // s & d + result[0] = AND(src[0], dst[0]); + result[1] = AND(src[1], dst[1]); + result[2] = AND(src[2], dst[2]); + result[3] = AND(src[3], dst[3]); + break; + + case LOGICOP_EQUIV: + // ~(s ^ d) + result[0] = XOR(XOR(src[0], dst[0]), VIMMED1(0xFFFFFFFF)); + result[1] = XOR(XOR(src[1], dst[1]), VIMMED1(0xFFFFFFFF)); + result[2] = XOR(XOR(src[2], dst[2]), VIMMED1(0xFFFFFFFF)); + result[3] = XOR(XOR(src[3], dst[3]), VIMMED1(0xFFFFFFFF)); + break; + + case LOGICOP_NOOP: + result[0] = dst[0]; + result[1] = dst[1]; + result[2] = dst[2]; + result[3] = dst[3]; + break; + + case LOGICOP_OR_INVERTED: + // ~s | d + result[0] = OR(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]); + result[1] = OR(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]); + result[2] = OR(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]); + result[3] = OR(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]); + break; + + case LOGICOP_COPY: + result[0] = src[0]; + result[1] = src[1]; + result[2] = src[2]; + result[3] = src[3]; + break; + + case LOGICOP_OR_REVERSE: + // s | ~d + result[0] = OR(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]); + result[1] = OR(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]); + result[2] = OR(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]); + result[3] = OR(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]); + break; + + case LOGICOP_OR: + // s | d + result[0] = OR(src[0], dst[0]); + result[1] = OR(src[1], dst[1]); + result[2] = OR(src[2], dst[2]); + result[3] = OR(src[3], dst[3]); + break; + + case LOGICOP_SET: + result[0] = VIMMED1(0xFFFFFFFF); + result[1] = VIMMED1(0xFFFFFFFF); + result[2] = VIMMED1(0xFFFFFFFF); + result[3] = VIMMED1(0xFFFFFFFF); + break; + + default: + SWR_ASSERT(false, "Unsupported logic operation: %d", logicOp); + result[0] = result[1] = result[2] = result[3] = VIMMED1(0.0f); + break; + } + } + + void AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* pAlpha, Value* ppMask) + { + // load uint32_t reference + Value* pRef = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_alphaTestReference })); + + Value* pTest = nullptr; + if (state.alphaTestFormat == ALPHA_TEST_UNORM8) + { + // convert float alpha to unorm8 + Value* pAlphaU8 = FMUL(pAlpha, VIMMED1(256.0f)); + pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty); + + // compare + switch (state.alphaTestFunction) + { + case ZFUNC_ALWAYS: pTest = VIMMED1(true); break; + case ZFUNC_NEVER: pTest = VIMMED1(false); break; + case ZFUNC_LT: pTest = ICMP_ULT(pAlphaU8, pRef); break; + case ZFUNC_EQ: pTest = ICMP_EQ(pAlphaU8, pRef); break; + case ZFUNC_LE: pTest = ICMP_ULE(pAlphaU8, pRef); break; + case ZFUNC_GT: pTest = ICMP_UGT(pAlphaU8, pRef); break; + case ZFUNC_NE: pTest = ICMP_NE(pAlphaU8, pRef); break; + case ZFUNC_GE: pTest = ICMP_UGE(pAlphaU8, pRef); break; + default: + SWR_ASSERT(false, "Invalid alpha test function"); + break; + } + } + else + { + // cast ref to float + pRef = BITCAST(pRef, mSimdFP32Ty); + + // compare + switch (state.alphaTestFunction) + { + case ZFUNC_ALWAYS: pTest = VIMMED1(true); break; + case ZFUNC_NEVER: pTest = VIMMED1(false); break; + case ZFUNC_LT: pTest = FCMP_OLT(pAlpha, pRef); break; + case ZFUNC_EQ: pTest = FCMP_OEQ(pAlpha, pRef); break; + case ZFUNC_LE: pTest = FCMP_OLE(pAlpha, pRef); break; + case ZFUNC_GT: pTest = FCMP_OGT(pAlpha, pRef); break; + case ZFUNC_NE: pTest = FCMP_ONE(pAlpha, pRef); break; + case ZFUNC_GE: pTest = FCMP_OGE(pAlpha, pRef); break; + default: + SWR_ASSERT(false, "Invalid alpha test function"); + break; + } + } + + // load current mask + Value* pMask = LOAD(ppMask); + + // convert to int1 mask + pMask = MASK(pMask); + + // and with alpha test result + pMask = AND(pMask, pTest); + + // convert back to vector mask + pMask = VMASK(pMask); + + // store new mask + STORE(pMask, ppMask); + } + + Function* Create(const BLEND_COMPILE_STATE& state) + { + static std::size_t jitNum = 0; + + std::stringstream fnName("BlendShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate); + fnName << jitNum++; + + // blend function signature + //typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, BYTE*, simdvector&, simdscalari*, simdscalari*); + + std::vector<Type*> args{ + PointerType::get(Gen_SWR_BLEND_STATE(JM()), 0), // SWR_BLEND_STATE* + PointerType::get(mSimdFP32Ty, 0), // simdvector& src + PointerType::get(mSimdFP32Ty, 0), // simdvector& src1 + Type::getInt32Ty(JM()->mContext), // sampleNum + PointerType::get(mSimdFP32Ty, 0), // uint8_t* pDst + PointerType::get(mSimdFP32Ty, 0), // simdvector& result + PointerType::get(mSimdInt32Ty, 0), // simdscalari* oMask + PointerType::get(mSimdInt32Ty, 0), // simdscalari* pMask + }; + + FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false); + Function* blendFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule); + + BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc); + + IRB()->SetInsertPoint(entry); + + // arguments + auto argitr = blendFunc->getArgumentList().begin(); + Value* pBlendState = &*argitr++; + pBlendState->setName("pBlendState"); + Value* pSrc = &*argitr++; + pSrc->setName("src"); + Value* pSrc1 = &*argitr++; + pSrc1->setName("src1"); + Value* sampleNum = &*argitr++; + sampleNum->setName("sampleNum"); + Value* pDst = &*argitr++; + pDst->setName("pDst"); + Value* pResult = &*argitr++; + pResult->setName("result"); + Value* ppoMask = &*argitr++; + ppoMask->setName("ppoMask"); + Value* ppMask = &*argitr++; + ppMask->setName("pMask"); + + static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format"); + Value* dst[4]; + Value* constantColor[4]; + Value* src[4]; + Value* src1[4]; + Value* result[4]; + for (uint32_t i = 0; i < 4; ++i) + { + // load hot tile + dst[i] = LOAD(pDst, { i }); + + // load constant color + constantColor[i] = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_constantColor, i })); + + // load src + src[i] = LOAD(pSrc, { i }); + + // load src1 + src1[i] = LOAD(pSrc1, { i }); + } + Value* currentMask = VIMMED1(-1); + if(state.desc.alphaToCoverageEnable) + { + currentMask = FP_TO_SI(FMUL(src[3], VBROADCAST(C((float)state.desc.numSamples))), mSimdInt32Ty); + } + + // alpha test + if (state.desc.alphaTestEnable) + { + AlphaTest(state, pBlendState, src[3], ppMask); + } + + // color blend + if (state.blendState.blendEnable) + { + // clamp sources + Clamp(state.format, src); + Clamp(state.format, src1); + Clamp(state.format, dst); + Clamp(state.format, constantColor); + + // apply defaults to hottile contents to take into account missing components + ApplyDefaults(state.format, dst); + + // Force defaults for unused 'X' components + ApplyUnusedDefaults(state.format, dst); + + // Quantize low precision components + Quantize(state.format, dst); + + // special case clamping for R11G11B10_float which has no sign bit + if (state.format == R11G11B10_FLOAT) + { + dst[0] = VMAXPS(dst[0], VIMMED1(0.0f)); + dst[1] = VMAXPS(dst[1], VIMMED1(0.0f)); + dst[2] = VMAXPS(dst[2], VIMMED1(0.0f)); + dst[3] = VMAXPS(dst[3], VIMMED1(0.0f)); + } + + Value* srcFactor[4]; + Value* dstFactor[4]; + if (state.desc.independentAlphaBlendEnable) + { + GenerateBlendFactor<true, false>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor); + GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor, constantColor, src, src1, dst, srcFactor); + + GenerateBlendFactor<true, false>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor); + GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor, constantColor, src, src1, dst, dstFactor); + + BlendFunc<true, false>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result); + BlendFunc<false, true>(state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result); + } + else + { + GenerateBlendFactor<true, true>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor); + GenerateBlendFactor<true, true>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor); + + BlendFunc<true, true>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result); + } + + // store results out + for (uint32_t i = 0; i < 4; ++i) + { + STORE(result[i], pResult, { i }); + } + } + + if(state.blendState.logicOpEnable) + { + const SWR_FORMAT_INFO& info = GetFormatInfo(state.format); + SWR_ASSERT(info.type[0] == SWR_TYPE_UINT); + Value* vMask[4]; + for(uint32_t i = 0; i < 4; i++) + { + switch(info.bpc[i]) + { + case 0: vMask[i] = VIMMED1(0x00000000); break; + case 2: vMask[i] = VIMMED1(0x00000003); break; + case 5: vMask[i] = VIMMED1(0x0000001F); break; + case 6: vMask[i] = VIMMED1(0x0000003F); break; + case 8: vMask[i] = VIMMED1(0x000000FF); break; + case 10: vMask[i] = VIMMED1(0x000003FF); break; + case 11: vMask[i] = VIMMED1(0x000007FF); break; + case 16: vMask[i] = VIMMED1(0x0000FFFF); break; + case 24: vMask[i] = VIMMED1(0x00FFFFFF); break; + case 32: vMask[i] = VIMMED1(0xFFFFFFFF); break; + default: + vMask[i] = VIMMED1(0x0); + SWR_ASSERT(0, "Unsupported bpc for logic op\n"); + break; + } + src[i] = BITCAST(src[i], mSimdInt32Ty);//, vMask[i]); + dst[i] = BITCAST(dst[i], mSimdInt32Ty); + } + + LogicOpFunc(state.blendState.logicOpFunc, src, dst, result); + + // store results out + for(uint32_t i = 0; i < 4; ++i) + { + // clear upper bits from PS output not in RT format after doing logic op + result[i] = AND(result[i], vMask[i]); + + STORE(BITCAST(result[i], mSimdFP32Ty), pResult, {i}); + } + } + + if(state.desc.oMaskEnable) + { + assert(!(state.desc.alphaToCoverageEnable)); + // load current mask + Value* oMask = LOAD(ppoMask); + Value* sampleMasked = VBROADCAST(SHL(C(1), sampleNum)); + oMask = AND(oMask, sampleMasked); + currentMask = AND(oMask, currentMask); + } + + if(state.desc.sampleMaskEnable) + { + Value* sampleMask = LOAD(pBlendState, { 0, SWR_BLEND_STATE_sampleMask}); + Value* sampleMasked = SHL(C(1), sampleNum); + sampleMask = AND(sampleMask, sampleMasked); + sampleMask = VBROADCAST(ICMP_SGT(sampleMask, C(0))); + sampleMask = S_EXT(sampleMask, mSimdInt32Ty); + currentMask = AND(sampleMask, currentMask); + } + + if(state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable || + state.desc.oMaskEnable) + { + // load current mask + Value* pMask = LOAD(ppMask); + currentMask = S_EXT(ICMP_SGT(currentMask, VBROADCAST(C(0))), mSimdInt32Ty); + Value* outputMask = AND(pMask, currentMask); + // store new mask + STORE(outputMask, GEP(ppMask, C(0))); + } + + RET_VOID(); + + JitManager::DumpToFile(blendFunc, ""); + + FunctionPassManager passes(JM()->mpCurrentModule); + passes.add(createBreakCriticalEdgesPass()); + passes.add(createCFGSimplificationPass()); + passes.add(createEarlyCSEPass()); + passes.add(createPromoteMemoryToRegisterPass()); + passes.add(createCFGSimplificationPass()); + passes.add(createEarlyCSEPass()); + passes.add(createInstructionCombiningPass()); + passes.add(createInstructionSimplifierPass()); + passes.add(createConstantPropagationPass()); + passes.add(createSCCPPass()); + passes.add(createAggressiveDCEPass()); + + passes.run(*blendFunc); + + JitManager::DumpToFile(blendFunc, "optimized"); + + return blendFunc; + } +}; + +////////////////////////////////////////////////////////////////////////// +/// @brief JITs from fetch shader IR +/// @param hJitMgr - JitManager handle +/// @param func - LLVM function IR +/// @return PFN_FETCH_FUNC - pointer to fetch code +PFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc) +{ + const llvm::Function *func = (const llvm::Function*)hFunc; + JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr); + PFN_BLEND_JIT_FUNC pfnBlend; + pfnBlend = (PFN_BLEND_JIT_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str())); + // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module + pJitMgr->mIsModuleFinalized = true; + + return pfnBlend; +} + +////////////////////////////////////////////////////////////////////////// +/// @brief JIT compiles blend shader +/// @param hJitMgr - JitManager handle +/// @param state - blend state to build function from +extern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr, const BLEND_COMPILE_STATE& state) +{ + JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr); + + pJitMgr->SetupNewModule(); + + BlendJit theJit(pJitMgr); + HANDLE hFunc = theJit.Create(state); + + return JitBlendFunc(hJitMgr, hFunc); +} |