diff options
author | Dave Airlie <airlied@redhat.com> | 2016-09-05 09:54:07 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2016-09-06 10:06:33 +1000 |
commit | 69fca64259495d7a31135876e818e1ac2a36d190 (patch) | |
tree | 3462cfa208d32e2939a07681c0ab2ed56ff648da /src/amd/addrlib/core | |
parent | 1add3562e33f0234da50e54dda8cfa6dac613125 (diff) | |
download | external_mesa3d-69fca64259495d7a31135876e818e1ac2a36d190.zip external_mesa3d-69fca64259495d7a31135876e818e1ac2a36d190.tar.gz external_mesa3d-69fca64259495d7a31135876e818e1ac2a36d190.tar.bz2 |
amd/addrlib: move addrlib from amdgpu winsys to common code
Acked-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Diffstat (limited to 'src/amd/addrlib/core')
-rw-r--r-- | src/amd/addrlib/core/addrcommon.h | 558 | ||||
-rw-r--r-- | src/amd/addrlib/core/addrelemlib.cpp | 1674 | ||||
-rw-r--r-- | src/amd/addrlib/core/addrelemlib.h | 270 | ||||
-rw-r--r-- | src/amd/addrlib/core/addrlib.cpp | 4023 | ||||
-rw-r--r-- | src/amd/addrlib/core/addrlib.h | 695 | ||||
-rw-r--r-- | src/amd/addrlib/core/addrobject.cpp | 246 | ||||
-rw-r--r-- | src/amd/addrlib/core/addrobject.h | 89 |
7 files changed, 7555 insertions, 0 deletions
diff --git a/src/amd/addrlib/core/addrcommon.h b/src/amd/addrlib/core/addrcommon.h new file mode 100644 index 0000000..f996c9a --- /dev/null +++ b/src/amd/addrlib/core/addrcommon.h @@ -0,0 +1,558 @@ +/* + * Copyright © 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +*************************************************************************************************** +* @file addrcommon.h +* @brief Contains the helper function and constants +*************************************************************************************************** +*/ + +#ifndef __ADDR_COMMON_H__ +#define __ADDR_COMMON_H__ + +#include "addrinterface.h" + + +// ADDR_LNX_KERNEL_BUILD is for internal build +// Moved from addrinterface.h so __KERNEL__ is not needed any more +#if ADDR_LNX_KERNEL_BUILD // || (defined(__GNUC__) && defined(__KERNEL__)) + #include "lnx_common_defs.h" // ported from cmmqs +#elif !defined(__APPLE__) + #include <stdlib.h> + #include <string.h> +#endif + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Common constants +/////////////////////////////////////////////////////////////////////////////////////////////////// +static const UINT_32 MicroTileWidth = 8; ///< Micro tile width, for 1D and 2D tiling +static const UINT_32 MicroTileHeight = 8; ///< Micro tile height, for 1D and 2D tiling +static const UINT_32 ThickTileThickness = 4; ///< Micro tile thickness, for THICK modes +static const UINT_32 XThickTileThickness = 8; ///< Extra thick tiling thickness +static const UINT_32 PowerSaveTileBytes = 64; ///< Nuber of bytes per tile for power save 64 +static const UINT_32 CmaskCacheBits = 1024; ///< Number of bits for CMASK cache +static const UINT_32 CmaskElemBits = 4; ///< Number of bits for CMASK element +static const UINT_32 HtileCacheBits = 16384; ///< Number of bits for HTILE cache 512*32 + +static const UINT_32 MicroTilePixels = MicroTileWidth * MicroTileHeight; + +static const INT_32 TileIndexInvalid = TILEINDEX_INVALID; +static const INT_32 TileIndexLinearGeneral = TILEINDEX_LINEAR_GENERAL; +static const INT_32 TileIndexNoMacroIndex = -3; + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Common macros +/////////////////////////////////////////////////////////////////////////////////////////////////// +#define BITS_PER_BYTE 8 +#define BITS_TO_BYTES(x) ( ((x) + (BITS_PER_BYTE-1)) / BITS_PER_BYTE ) +#define BYTES_TO_BITS(x) ( (x) * BITS_PER_BYTE ) + +/// Helper macros to select a single bit from an int (undefined later in section) +#define _BIT(v,b) (((v) >> (b) ) & 1) + +/** +*************************************************************************************************** +* @brief Enums to identify AddrLib type +*************************************************************************************************** +*/ +enum AddrLibClass +{ + BASE_ADDRLIB = 0x0, + R600_ADDRLIB = 0x6, + R800_ADDRLIB = 0x8, + SI_ADDRLIB = 0xa, + CI_ADDRLIB = 0xb, +}; + +/** +*************************************************************************************************** +* AddrChipFamily +* +* @brief +* Neutral enums that specifies chip family. +* +*************************************************************************************************** +*/ +enum AddrChipFamily +{ + ADDR_CHIP_FAMILY_IVLD, ///< Invalid family + ADDR_CHIP_FAMILY_R6XX, + ADDR_CHIP_FAMILY_R7XX, + ADDR_CHIP_FAMILY_R8XX, + ADDR_CHIP_FAMILY_NI, + ADDR_CHIP_FAMILY_SI, + ADDR_CHIP_FAMILY_CI, + ADDR_CHIP_FAMILY_VI, +}; + +/** +*************************************************************************************************** +* ADDR_CONFIG_FLAGS +* +* @brief +* This structure is used to set addr configuration flags. +*************************************************************************************************** +*/ +union ADDR_CONFIG_FLAGS +{ + struct + { + /// Clients do not need to set these flags except forceLinearAligned. + /// There flags are set up by AddrLib inside thru AddrInitGlobalParamsFromRegister + UINT_32 optimalBankSwap : 1; ///< New bank tiling for RV770 only + UINT_32 noCubeMipSlicesPad : 1; ///< Disables faces padding for cubemap mipmaps + UINT_32 fillSizeFields : 1; ///< If clients fill size fields in all input and + /// output structure + UINT_32 ignoreTileInfo : 1; ///< Don't use tile info structure + UINT_32 useTileIndex : 1; ///< Make tileIndex field in input valid + UINT_32 useCombinedSwizzle : 1; ///< Use combined swizzle + UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level + UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment + UINT_32 degradeBaseLevel : 1; ///< Degrade to 1D modes automatically for base level + UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize + UINT_32 reserved : 22; ///< Reserved bits for future use + }; + + UINT_32 value; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Platform specific debug break defines +/////////////////////////////////////////////////////////////////////////////////////////////////// +#if DEBUG + #if defined(__GNUC__) + #define ADDR_DBG_BREAK() + #elif defined(__APPLE__) + #define ADDR_DBG_BREAK() { IOPanic("");} + #else + #define ADDR_DBG_BREAK() { __debugbreak(); } + #endif +#else + #define ADDR_DBG_BREAK() +#endif +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Debug assertions used in AddrLib +/////////////////////////////////////////////////////////////////////////////////////////////////// +#if DEBUG +#define ADDR_ASSERT(__e) if ( !((__e) ? TRUE : FALSE)) { ADDR_DBG_BREAK(); } +#define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK() +#define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case") +#define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented"); +#else //DEBUG +#define ADDR_ASSERT(__e) +#define ADDR_ASSERT_ALWAYS() +#define ADDR_UNHANDLED_CASE() +#define ADDR_NOT_IMPLEMENTED() +#endif //DEBUG +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Debug print macro from legacy address library +/////////////////////////////////////////////////////////////////////////////////////////////////// +#if DEBUG + +#define ADDR_PRNT(a) AddrObject::DebugPrint a + +/// @brief Macro for reporting informational messages +/// @ingroup util +/// +/// This macro optionally prints an informational message to stdout. +/// The first parameter is a condition -- if it is true, nothing is done. +/// The second pararmeter MUST be a parenthesis-enclosed list of arguments, +/// starting with a string. This is passed to printf() or an equivalent +/// in order to format the informational message. For example, +/// ADDR_INFO(0, ("test %d",3) ); prints out "test 3". +/// +#define ADDR_INFO(cond, a) \ +{ if (!(cond)) { ADDR_PRNT(a); } } + + +/// @brief Macro for reporting error warning messages +/// @ingroup util +/// +/// This macro optionally prints an error warning message to stdout, +/// followed by the file name and line number where the macro was called. +/// The first parameter is a condition -- if it is true, nothing is done. +/// The second pararmeter MUST be a parenthesis-enclosed list of arguments, +/// starting with a string. This is passed to printf() or an equivalent +/// in order to format the informational message. For example, +/// ADDR_WARN(0, ("test %d",3) ); prints out "test 3" followed by +/// a second line with the file name and line number. +/// +#define ADDR_WARN(cond, a) \ +{ if (!(cond)) \ + { ADDR_PRNT(a); \ + ADDR_PRNT((" WARNING in file %s, line %d\n", __FILE__, __LINE__)); \ +} } + + +/// @brief Macro for reporting fatal error conditions +/// @ingroup util +/// +/// This macro optionally stops execution of the current routine +/// after printing an error warning message to stdout, +/// followed by the file name and line number where the macro was called. +/// The first parameter is a condition -- if it is true, nothing is done. +/// The second pararmeter MUST be a parenthesis-enclosed list of arguments, +/// starting with a string. This is passed to printf() or an equivalent +/// in order to format the informational message. For example, +/// ADDR_EXIT(0, ("test %d",3) ); prints out "test 3" followed by +/// a second line with the file name and line number, then stops execution. +/// +#define ADDR_EXIT(cond, a) \ +{ if (!(cond)) \ + { ADDR_PRNT(a); ADDR_DBG_BREAK();\ +} } + +#else // DEBUG + +#define ADDRDPF 1 ? (void)0 : (void) + +#define ADDR_PRNT(a) + +#define ADDR_DBG_BREAK() + +#define ADDR_INFO(cond, a) + +#define ADDR_WARN(cond, a) + +#define ADDR_EXIT(cond, a) + +#endif // DEBUG +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Misc helper functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +*************************************************************************************************** +* AddrXorReduce +* +* @brief +* Xor the right-side numberOfBits bits of x. +*************************************************************************************************** +*/ +static inline UINT_32 XorReduce( + UINT_32 x, + UINT_32 numberOfBits) +{ + UINT_32 i; + UINT_32 result = x & 1; + + for (i=1; i<numberOfBits; i++) + { + result ^= ((x>>i) & 1); + } + + return result; +} + +/** +*************************************************************************************************** +* IsPow2 +* +* @brief +* Check if the size (UINT_32) is pow 2 +*************************************************************************************************** +*/ +static inline UINT_32 IsPow2( + UINT_32 dim) ///< [in] dimension of miplevel +{ + ADDR_ASSERT(dim > 0); + return !(dim & (dim - 1)); +} + +/** +*************************************************************************************************** +* IsPow2 +* +* @brief +* Check if the size (UINT_64) is pow 2 +*************************************************************************************************** +*/ +static inline UINT_64 IsPow2( + UINT_64 dim) ///< [in] dimension of miplevel +{ + ADDR_ASSERT(dim > 0); + return !(dim & (dim - 1)); +} + +/** +*************************************************************************************************** +* ByteAlign +* +* @brief +* Align UINT_32 "x" to "align" alignment, "align" should be power of 2 +*************************************************************************************************** +*/ +static inline UINT_32 PowTwoAlign( + UINT_32 x, + UINT_32 align) +{ + // + // Assert that x is a power of two. + // + ADDR_ASSERT(IsPow2(align)); + return (x + (align - 1)) & (~(align - 1)); +} + +/** +*************************************************************************************************** +* ByteAlign +* +* @brief +* Align UINT_64 "x" to "align" alignment, "align" should be power of 2 +*************************************************************************************************** +*/ +static inline UINT_64 PowTwoAlign( + UINT_64 x, + UINT_64 align) +{ + // + // Assert that x is a power of two. + // + ADDR_ASSERT(IsPow2(align)); + return (x + (align - 1)) & (~(align - 1)); +} + +/** +*************************************************************************************************** +* Min +* +* @brief +* Get the min value between two unsigned values +*************************************************************************************************** +*/ +static inline UINT_32 Min( + UINT_32 value1, + UINT_32 value2) +{ + return ((value1 < (value2)) ? (value1) : value2); +} + +/** +*************************************************************************************************** +* Min +* +* @brief +* Get the min value between two signed values +*************************************************************************************************** +*/ +static inline INT_32 Min( + INT_32 value1, + INT_32 value2) +{ + return ((value1 < (value2)) ? (value1) : value2); +} + +/** +*************************************************************************************************** +* Max +* +* @brief +* Get the max value between two unsigned values +*************************************************************************************************** +*/ +static inline UINT_32 Max( + UINT_32 value1, + UINT_32 value2) +{ + return ((value1 > (value2)) ? (value1) : value2); +} + +/** +*************************************************************************************************** +* Max +* +* @brief +* Get the max value between two signed values +*************************************************************************************************** +*/ +static inline INT_32 Max( + INT_32 value1, + INT_32 value2) +{ + return ((value1 > (value2)) ? (value1) : value2); +} + +/** +*************************************************************************************************** +* NextPow2 +* +* @brief +* Compute the mipmap's next level dim size +*************************************************************************************************** +*/ +static inline UINT_32 NextPow2( + UINT_32 dim) ///< [in] dimension of miplevel +{ + UINT_32 newDim; + + newDim = 1; + + if (dim > 0x7fffffff) + { + ADDR_ASSERT_ALWAYS(); + newDim = 0x80000000; + } + else + { + while (newDim < dim) + { + newDim <<= 1; + } + } + + return newDim; +} + +/** +*************************************************************************************************** +* Log2 +* +* @brief +* Compute log of base 2 +*************************************************************************************************** +*/ +static inline UINT_32 Log2( + UINT_32 x) ///< [in] the value should calculate log based 2 +{ + UINT_32 y; + + // + // Assert that x is a power of two. + // + ADDR_ASSERT(IsPow2(x)); + + y = 0; + while (x > 1) + { + x >>= 1; + y++; + } + + return y; +} + +/** +*************************************************************************************************** +* QLog2 +* +* @brief +* Compute log of base 2 quickly (<= 16) +*************************************************************************************************** +*/ +static inline UINT_32 QLog2( + UINT_32 x) ///< [in] the value should calculate log based 2 +{ + ADDR_ASSERT(x <= 16); + + UINT_32 y = 0; + + switch (x) + { + case 1: + y = 0; + break; + case 2: + y = 1; + break; + case 4: + y = 2; + break; + case 8: + y = 3; + break; + case 16: + y = 4; + break; + default: + ADDR_ASSERT_ALWAYS(); + } + + return y; +} + +/** +*************************************************************************************************** +* SafeAssign +* +* @brief +* NULL pointer safe assignment +*************************************************************************************************** +*/ +static inline VOID SafeAssign( + UINT_32* pLVal, ///< [in] Pointer to left val + UINT_32 rVal) ///< [in] Right value +{ + if (pLVal) + { + *pLVal = rVal; + } +} + +/** +*************************************************************************************************** +* SafeAssign +* +* @brief +* NULL pointer safe assignment for 64bit values +*************************************************************************************************** +*/ +static inline VOID SafeAssign( + UINT_64* pLVal, ///< [in] Pointer to left val + UINT_64 rVal) ///< [in] Right value +{ + if (pLVal) + { + *pLVal = rVal; + } +} + +/** +*************************************************************************************************** +* SafeAssign +* +* @brief +* NULL pointer safe assignment for AddrTileMode +*************************************************************************************************** +*/ +static inline VOID SafeAssign( + AddrTileMode* pLVal, ///< [in] Pointer to left val + AddrTileMode rVal) ///< [in] Right value +{ + if (pLVal) + { + *pLVal = rVal; + } +} + +#endif // __ADDR_COMMON_H__ + diff --git a/src/amd/addrlib/core/addrelemlib.cpp b/src/amd/addrlib/core/addrelemlib.cpp new file mode 100644 index 0000000..76b1bad --- /dev/null +++ b/src/amd/addrlib/core/addrelemlib.cpp @@ -0,0 +1,1674 @@ +/* + * Copyright © 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +*************************************************************************************************** +* @file addrelemlib.cpp +* @brief Contains the class implementation for element/pixel related functions +*************************************************************************************************** +*/ + +#include "addrelemlib.h" +#include "addrlib.h" + + +/** +*************************************************************************************************** +* AddrElemLib::AddrElemLib +* +* @brief +* constructor +* +* @return +* N/A +*************************************************************************************************** +*/ +AddrElemLib::AddrElemLib( + AddrLib* const pAddrLib) : ///< [in] Parent addrlib instance pointer + AddrObject(pAddrLib->GetClient()), + m_pAddrLib(pAddrLib) +{ + switch (m_pAddrLib->GetAddrChipFamily()) + { + case ADDR_CHIP_FAMILY_R6XX: + m_depthPlanarType = ADDR_DEPTH_PLANAR_R600; + m_fp16ExportNorm = 0; + break; + case ADDR_CHIP_FAMILY_R7XX: + m_depthPlanarType = ADDR_DEPTH_PLANAR_R600; + m_fp16ExportNorm = 1; + break; + case ADDR_CHIP_FAMILY_R8XX: + case ADDR_CHIP_FAMILY_NI: // Same as 8xx + m_depthPlanarType = ADDR_DEPTH_PLANAR_R800; + m_fp16ExportNorm = 1; + break; + default: + m_fp16ExportNorm = 1; + m_depthPlanarType = ADDR_DEPTH_PLANAR_R800; + } + + m_configFlags.value = 0; +} + +/** +*************************************************************************************************** +* AddrElemLib::~AddrElemLib +* +* @brief +* destructor +* +* @return +* N/A +*************************************************************************************************** +*/ +AddrElemLib::~AddrElemLib() +{ +} + +/** +*************************************************************************************************** +* AddrElemLib::Create +* +* @brief +* Creates and initializes AddrLib object. +* +* @return +* Returns point to ADDR_CREATEINFO if successful. +*************************************************************************************************** +*/ +AddrElemLib* AddrElemLib::Create( + const AddrLib* const pAddrLib) ///< [in] Pointer of parent AddrLib instance +{ + AddrElemLib* pElemLib = NULL; + + if (pAddrLib) + { + pElemLib = new(pAddrLib->GetClient()) AddrElemLib(const_cast<AddrLib* const>(pAddrLib)); + } + + return pElemLib; +} + +/************************************************************************************************** +* AddrElemLib::Flt32sToInt32s +* +* @brief +* Convert a ADDR_FLT_32 value to Int32 value +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrElemLib::Flt32sToInt32s( + ADDR_FLT_32 value, ///< [in] ADDR_FLT_32 value + UINT_32 bits, ///< [in] nubmer of bits in value + AddrNumberType numberType, ///< [in] the type of number + UINT_32* pResult) ///< [out] Int32 value +{ + UINT_8 round = 128; //ADDR_ROUND_BY_HALF + UINT_32 uscale; + UINT_32 sign; + + //convert each component to an INT_32 + switch ( numberType ) + { + case ADDR_NO_NUMBER: //fall through + case ADDR_ZERO: //fall through + case ADDR_ONE: //fall through + case ADDR_EPSILON: //fall through + return; // these are zero-bit components, so don't set result + + case ADDR_UINT_BITS: // unsigned integer bit field, clamped to range + uscale = (1<<bits) - 1; + if (bits == 32) // special case unsigned 32-bit int + { + *pResult = value.i; + } + else + { + if ((value.i < 0) || (value.u > uscale)) + { + *pResult = uscale; + } + else + { + *pResult = value.i; + } + return; + } + + // The algorithm used in the DB and TX differs at one value for 24-bit unorms + case ADDR_UNORM_R6XXDB: // unsigned repeating fraction + if ((bits==24) && (value.i == 0x33000000)) + { + *pResult = 1; + return; + } // Else treat like ADDR_UNORM_R6XX + + case ADDR_UNORM_R6XX: // unsigned repeating fraction + if (value.f <= 0) + { + *pResult = 0; // first clamp to [0..1] + } + else + { + if (value.f >= 1) + { + *pResult = (1<<bits) - 1; + } + else + { + if ((value.i | 0x87FFFFFF) == 0xFFFFFFFF) + { + *pResult = 0; // NaN, so force to 0 + } + + #if 0 // floating point version for documentation + else + { + FLOAT f = value.f * ((1<<bits) - 1); + *pResult = static_cast<INT_32>(f + (round/256.0f)); + } + #endif + else + { + ADDR_FLT_32 scaled; + ADDR_FLT_32 shifted; + UINT_64 truncated, rounded; + UINT_32 altShift; + UINT_32 mask = (1 << bits) - 1; + UINT_32 half = 1 << (bits - 1); + UINT_32 mant24 = (value.i & 0x7FFFFF) + 0x800000; + UINT_64 temp = mant24 - (mant24>>bits) - + static_cast<INT_32>((mant24 & mask) > half); + UINT_32 exp8 = value.i >> 23; + UINT_32 shift = 126 - exp8 + 24 - bits; + UINT_64 final; + + if (shift >= 32) // This is zero, even with maximum dither add + { + final = 0; + } + else + { + final = ((temp<<8) + (static_cast<UINT_64>(round)<<shift)) >> (shift+8); + } + //ADDR_EXIT( *pResult == final, + // ("Float %x converted to %d-bit Unorm %x != bitwise %x", + // value.u, bits, (UINT_32)*pResult, (UINT_32)final) ); + if (final > mask) + { + final = mask; + } + + scaled.f = value.f * ((1<<bits) - 1); + shifted.f = (scaled.f * 256); + truncated = ((shifted.i&0x7FFFFF) + (INT_64)0x800000) << 8; + altShift = 126 + 24 + 8 - ((shifted.i>>23)&0xFF); + truncated = (altShift > 60) ? 0 : truncated >> altShift; + rounded = static_cast<INT_32>((round + truncated) >> 8); + //if (rounded > ((1<<bits) - 1)) + // rounded = ((1<<bits) - 1); + *pResult = static_cast<INT_32>(rounded); //(INT_32)final; + } + } + } + + return; + + case ADDR_S8FLOAT32: // 32-bit IEEE float, passes through NaN values + *pResult = value.i; + return; + + // @@ FIX ROUNDING in this code, fix the denorm case + case ADDR_U4FLOATC: // Unsigned float, 4-bit exponent. bias 15, clamped [0..1] + sign = (value.i >> 31) & 1; + if ((value.i&0x7F800000) == 0x7F800000) // If NaN or INF: + { + if ((value.i&0x007FFFFF) != 0) // then if NaN + { + *pResult = 0; // return 0 + } + else + { + *pResult = (sign)?0:0xF00000; // else +INF->+1, -INF->0 + } + return; + } + if (value.f <= 0) + { + *pResult = 0; + } + else + { + if (value.f>=1) + { + *pResult = 0xF << (bits-4); + } + else + { + if ((value.i>>23) > 112 ) + { + // 24-bit float: normalized + // value.i += 1 << (22-bits+4); + // round the IEEE mantissa to mantissa size + // @@ NOTE: add code to support rounding + value.u &= 0x7FFFFFF; // mask off high 4 exponent bits + *pResult = value.i >> (23-bits+4);// shift off unused mantissa bits + } + else + { + // 24-bit float: denormalized + value.f = value.f / (1<<28) / (1<<28); + value.f = value.f / (1<<28) / (1<<28); // convert to IEEE denorm + // value.i += 1 << (22-bits+4); + // round the IEEE mantissa to mantissa size + // @@ NOTE: add code to support rounding + *pResult = value.i >> (23-bits+4); // shift off unused mantissa bits + } + } + } + + return; + + default: // invalid number mode + //ADDR_EXIT(0, ("Invalid AddrNumber %d", numberType) ); + break; + + } +} + +/** +*************************************************************************************************** +* AddrElemLib::Int32sToPixel +* +* @brief +* Pack 32-bit integer values into an uncompressed pixel, +* in the proper order +* +* @return +* N/A +* +* @note +* This entry point packes four 32-bit integer values into +* an uncompressed pixel. The pixel values are specifies in +* standard order, e.g. depth/stencil. This routine asserts +* if called on compressed pixel. +*************************************************************************************************** +*/ +VOID AddrElemLib::Int32sToPixel( + UINT_32 numComps, ///< [in] number of components + UINT_32* pComps, ///< [in] compnents + UINT_32* pCompBits, ///< [in] total bits in each component + UINT_32* pCompStart, ///< [in] the first bit position of each component + ADDR_COMPONENT_FLAGS properties, ///< [in] properties about byteAligned, exportNorm + UINT_32 resultBits, ///< [in] result bits: total bpp after decompression + UINT_8* pPixel) ///< [out] a depth/stencil pixel value +{ + UINT_32 i; + UINT_32 j; + UINT_32 start; + UINT_32 size; + UINT_32 byte; + UINT_32 value = 0; + UINT_32 compMask; + UINT_32 elemMask=0; + UINT_32 elementXor = 0; // address xor when reading bytes from elements + + + // @@ NOTE: assert if called on a compressed format! + + if (properties.byteAligned) // Components are all byte-sized + { + for (i = 0; i < numComps; i++) // Then for each component + { + // Copy the bytes of the component into the element + start = pCompStart[i] / 8; + size = pCompBits[i] / 8; + for (j = 0; j < size; j++) + { + pPixel[(j+start)^elementXor] = static_cast<UINT_8>(pComps[i] >> (8*j)); + } + } + } + else // Element is 32-bits or less, components are bit fields + { + // First, extract each component in turn and combine it into a 32-bit value + for (i = 0; i < numComps; i++) + { + compMask = (1 << pCompBits[i]) - 1; + elemMask |= compMask << pCompStart[i]; + value |= (pComps[i] & compMask) << pCompStart[i]; + } + + // Mext, copy the masked value into the element + size = (resultBits + 7) / 8; + for (i = 0; i < size; i++) + { + byte = pPixel[i^elementXor] & ~(elemMask >> (8*i)); + pPixel[i^elementXor] = static_cast<UINT_8>(byte | ((elemMask & value) >> (8*i))); + } + } +} + +/** +*************************************************************************************************** +* Flt32ToDepthPixel +* +* @brief +* Convert a FLT_32 value to a depth/stencil pixel value +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrElemLib::Flt32ToDepthPixel( + AddrDepthFormat format, ///< [in] Depth format + const ADDR_FLT_32 comps[2], ///< [in] two components of depth + UINT_8* pPixel ///< [out] depth pixel value + ) const +{ + UINT_32 i; + UINT_32 values[2]; + ADDR_COMPONENT_FLAGS properties; // byteAligned, exportNorm + UINT_32 resultBits = 0; // result bits: total bits per pixel after decompression + + ADDR_PIXEL_FORMATINFO fmt; + + // get type for each component + PixGetDepthCompInfo(format, &fmt); + + //initialize properties + properties.byteAligned = TRUE; + properties.exportNorm = TRUE; + properties.floatComp = FALSE; + + //set properties and result bits + for (i = 0; i < 2; i++) + { + if ((fmt.compBit[i] & 7) || (fmt.compStart[i] & 7)) + { + properties.byteAligned = FALSE; + } + + if (resultBits < fmt.compStart[i] + fmt.compBit[i]) + { + resultBits = fmt.compStart[i] + fmt.compBit[i]; + } + + // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format + if (fmt.compBit[i] > 11 || fmt.numType[i] >= ADDR_USCALED) + { + properties.exportNorm = FALSE; + } + + // Mark if there are any floating point components + if ((fmt.numType[i] == ADDR_U4FLOATC) || (fmt.numType[i] >= ADDR_S8FLOAT) ) + { + properties.floatComp = TRUE; + } + } + + // Convert the two input floats to integer values + for (i = 0; i < 2; i++) + { + Flt32sToInt32s(comps[i], fmt.compBit[i], fmt.numType[i], &values[i]); + } + + // Then pack the two integer components, in the proper order + Int32sToPixel(2, values, fmt.compBit, fmt.compStart, properties, resultBits, pPixel ); + +} + +/** +*************************************************************************************************** +* Flt32ToColorPixel +* +* @brief +* Convert a FLT_32 value to a red/green/blue/alpha pixel value +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrElemLib::Flt32ToColorPixel( + AddrColorFormat format, ///< [in] Color format + AddrSurfaceNumber surfNum, ///< [in] Surface number + AddrSurfaceSwap surfSwap, ///< [in] Surface swap + const ADDR_FLT_32 comps[4], ///< [in] four components of color + UINT_8* pPixel ///< [out] a red/green/blue/alpha pixel value + ) const +{ + ADDR_PIXEL_FORMATINFO pixelInfo; + + UINT_32 i; + UINT_32 values[4]; + ADDR_COMPONENT_FLAGS properties; // byteAligned, exportNorm + UINT_32 resultBits = 0; // result bits: total bits per pixel after decompression + + memset(&pixelInfo, 0, sizeof(ADDR_PIXEL_FORMATINFO)); + + PixGetColorCompInfo(format, surfNum, surfSwap, &pixelInfo); + + //initialize properties + properties.byteAligned = TRUE; + properties.exportNorm = TRUE; + properties.floatComp = FALSE; + + //set properties and result bits + for (i = 0; i < 4; i++) + { + if ( (pixelInfo.compBit[i] & 7) || (pixelInfo.compStart[i] & 7) ) + { + properties.byteAligned = FALSE; + } + + if (resultBits < pixelInfo.compStart[i] + pixelInfo.compBit[i]) + { + resultBits = pixelInfo.compStart[i] + pixelInfo.compBit[i]; + } + + if (m_fp16ExportNorm) + { + // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format + // or if it's not FP and <=16 bits + if (((pixelInfo.compBit[i] > 11) || (pixelInfo.numType[i] >= ADDR_USCALED)) + && (pixelInfo.numType[i] !=ADDR_U4FLOATC)) + { + properties.exportNorm = FALSE; + } + } + else + { + // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format + if (pixelInfo.compBit[i] > 11 || pixelInfo.numType[i] >= ADDR_USCALED) + { + properties.exportNorm = FALSE; + } + } + + // Mark if there are any floating point components + if ( (pixelInfo.numType[i] == ADDR_U4FLOATC) || + (pixelInfo.numType[i] >= ADDR_S8FLOAT) ) + { + properties.floatComp = TRUE; + } + } + + // Convert the four input floats to integer values + for (i = 0; i < 4; i++) + { + Flt32sToInt32s(comps[i], pixelInfo.compBit[i], pixelInfo.numType[i], &values[i]); + } + + // Then pack the four integer components, in the proper order + Int32sToPixel(4, values, &pixelInfo.compBit[0], &pixelInfo.compStart[0], + properties, resultBits, pPixel); +} + +/** +*************************************************************************************************** +* AddrElemLib::GetCompType +* +* @brief +* Fill per component info +* +* @return +* N/A +* +*************************************************************************************************** +*/ +VOID AddrElemLib::GetCompType( + AddrColorFormat format, ///< [in] surface format + AddrSurfaceNumber numType, ///< [in] number type + ADDR_PIXEL_FORMATINFO* pInfo) ///< [in][out] per component info out +{ + BOOL_32 handled = FALSE; + + // Floating point formats override the number format + switch (format) + { + case ADDR_COLOR_16_FLOAT: // fall through for all pure floating point format + case ADDR_COLOR_16_16_FLOAT: + case ADDR_COLOR_16_16_16_16_FLOAT: + case ADDR_COLOR_32_FLOAT: + case ADDR_COLOR_32_32_FLOAT: + case ADDR_COLOR_32_32_32_32_FLOAT: + case ADDR_COLOR_10_11_11_FLOAT: + case ADDR_COLOR_11_11_10_FLOAT: + numType = ADDR_NUMBER_FLOAT; + break; + // Special handling for the depth formats + case ADDR_COLOR_8_24: // fall through for these 2 similar format + case ADDR_COLOR_24_8: + for (UINT_32 c = 0; c < 4; c++) + { + if (pInfo->compBit[c] == 8) + { + pInfo->numType[c] = ADDR_UINT_BITS; + } + else if (pInfo->compBit[c] == 24) + { + pInfo->numType[c] = ADDR_UNORM_R6XX; + } + else + { + pInfo->numType[c] = ADDR_NO_NUMBER; + } + } + handled = TRUE; + break; + case ADDR_COLOR_8_24_FLOAT: // fall through for these 3 similar format + case ADDR_COLOR_24_8_FLOAT: + case ADDR_COLOR_X24_8_32_FLOAT: + for (UINT_32 c = 0; c < 4; c++) + { + if (pInfo->compBit[c] == 8) + { + pInfo->numType[c] = ADDR_UINT_BITS; + } + else if (pInfo->compBit[c] == 24) + { + pInfo->numType[c] = ADDR_U4FLOATC; + } + else if (pInfo->compBit[c] == 32) + { + pInfo->numType[c] = ADDR_S8FLOAT32; + } + else + { + pInfo->numType[c] = ADDR_NO_NUMBER; + } + } + handled = TRUE; + break; + default: + break; + } + + if (!handled) + { + for (UINT_32 c = 0; c < 4; c++) + { + // Assign a number type for each component + AddrSurfaceNumber cnum; + + // First handle default component values + if (pInfo->compBit[c] == 0) + { + if (c < 3) + { + pInfo->numType[c] = ADDR_ZERO; // Default is zero for RGB + } + else if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT) + { + pInfo->numType[c] = ADDR_EPSILON; // Alpha INT_32 bits default is 0x01 + } + else + { + pInfo->numType[c] = ADDR_ONE; // Alpha normal default is float 1.0 + } + continue; + } + // Now handle small components + else if (pInfo->compBit[c] == 1) + { + if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT) + { + cnum = ADDR_NUMBER_UINT; + } + else + { + cnum = ADDR_NUMBER_UNORM; + } + } + else + { + cnum = numType; + } + + // If no default, set the number type fom num, compbits, and architecture + switch (cnum) + { + case ADDR_NUMBER_SRGB: + pInfo->numType[c] = (c < 3) ? ADDR_GAMMA8_R6XX : ADDR_UNORM_R6XX; + break; + case ADDR_NUMBER_UNORM: + pInfo->numType[c] = ADDR_UNORM_R6XX; + break; + case ADDR_NUMBER_SNORM: + pInfo->numType[c] = ADDR_SNORM_R6XX; + break; + case ADDR_NUMBER_USCALED: + pInfo->numType[c] = ADDR_USCALED; // @@ Do we need separate Pele routine? + break; + case ADDR_NUMBER_SSCALED: + pInfo->numType[c] = ADDR_SSCALED; // @@ Do we need separate Pele routine? + break; + case ADDR_NUMBER_FLOAT: + if (pInfo->compBit[c] == 32) + { + pInfo->numType[c] = ADDR_S8FLOAT32; + } + else if (pInfo->compBit[c] == 16) + { + pInfo->numType[c] = ADDR_S5FLOAT; + } + else if (pInfo->compBit[c] >= 10) + { + pInfo->numType[c] = ADDR_U5FLOAT; + } + else + { + ADDR_ASSERT_ALWAYS(); + } + break; + case ADDR_NUMBER_SINT: + pInfo->numType[c] = ADDR_SINT_BITS; + break; + case ADDR_NUMBER_UINT: + pInfo->numType[c] = ADDR_UINT_BITS; + break; + + default: + ADDR_ASSERT(!"Invalid number type"); + pInfo->numType[c] = ADDR_NO_NUMBER; + break; + } + } + } +} + +/** +*************************************************************************************************** +* AddrElemLib::GetCompSwap +* +* @brief +* Get components swapped for color surface +* +* @return +* N/A +* +*************************************************************************************************** +*/ +VOID AddrElemLib::GetCompSwap( + AddrSurfaceSwap swap, ///< [in] swap mode + ADDR_PIXEL_FORMATINFO* pInfo) ///< [in/out] output per component info +{ + switch (pInfo->comps) + { + case 4: + switch (swap) + { + case ADDR_SWAP_ALT: + SwapComps( 0, 2, pInfo ); + break; // BGRA + case ADDR_SWAP_STD_REV: + SwapComps( 0, 3, pInfo ); + SwapComps( 1, 2, pInfo ); + break; // ABGR + case ADDR_SWAP_ALT_REV: + SwapComps( 0, 3, pInfo ); + SwapComps( 0, 2, pInfo ); + SwapComps( 0, 1, pInfo ); + break; // ARGB + default: + break; + } + break; + case 3: + switch (swap) + { + case ADDR_SWAP_ALT_REV: + SwapComps( 0, 3, pInfo ); + SwapComps( 0, 2, pInfo ); + break; // AGR + case ADDR_SWAP_STD_REV: + SwapComps( 0, 2, pInfo ); + break; // BGR + case ADDR_SWAP_ALT: + SwapComps( 2, 3, pInfo ); + break; // RGA + default: + break; // RGB + } + break; + case 2: + switch (swap) + { + case ADDR_SWAP_ALT_REV: + SwapComps( 0, 1, pInfo ); + SwapComps( 1, 3, pInfo ); + break; // AR + case ADDR_SWAP_STD_REV: + SwapComps( 0, 1, pInfo ); + break; // GR + case ADDR_SWAP_ALT: + SwapComps( 1, 3, pInfo ); + break; // RA + default: + break; // RG + } + break; + case 1: + switch (swap) + { + case ADDR_SWAP_ALT_REV: + SwapComps( 0, 3, pInfo ); + break; // A + case ADDR_SWAP_STD_REV: + SwapComps( 0, 2, pInfo ); + break; // B + case ADDR_SWAP_ALT: + SwapComps( 0, 1, pInfo ); + break; // G + default: + break; // R + } + break; + } +} + +/** +*************************************************************************************************** +* AddrElemLib::GetCompSwap +* +* @brief +* Get components swapped for color surface +* +* @return +* N/A +* +*************************************************************************************************** +*/ +VOID AddrElemLib::SwapComps( + UINT_32 c0, ///< [in] component index 0 + UINT_32 c1, ///< [in] component index 1 + ADDR_PIXEL_FORMATINFO* pInfo) ///< [in/out] output per component info +{ + UINT_32 start; + UINT_32 bits; + + start = pInfo->compStart[c0]; + pInfo->compStart[c0] = pInfo->compStart[c1]; + pInfo->compStart[c1] = start; + + bits = pInfo->compBit[c0]; + pInfo->compBit[c0] = pInfo->compBit[c1]; + pInfo->compBit[c1] = bits; +} + +/** +*************************************************************************************************** +* AddrElemLib::PixGetColorCompInfo +* +* @brief +* Get per component info for color surface +* +* @return +* N/A +* +*************************************************************************************************** +*/ +VOID AddrElemLib::PixGetColorCompInfo( + AddrColorFormat format, ///< [in] surface format, read from register + AddrSurfaceNumber number, ///< [in] pixel number type + AddrSurfaceSwap swap, ///< [in] component swap mode + ADDR_PIXEL_FORMATINFO* pInfo ///< [out] output per component info + ) const +{ + // 1. Get componet bits + switch (format) + { + case ADDR_COLOR_8: + GetCompBits(8, 0, 0, 0, pInfo); + break; + case ADDR_COLOR_1_5_5_5: + GetCompBits(5, 5, 5, 1, pInfo); + break; + case ADDR_COLOR_5_6_5: + GetCompBits(8, 6, 5, 0, pInfo); + break; + case ADDR_COLOR_6_5_5: + GetCompBits(5, 5, 6, 0, pInfo); + break; + case ADDR_COLOR_8_8: + GetCompBits(8, 8, 0, 0, pInfo); + break; + case ADDR_COLOR_4_4_4_4: + GetCompBits(4, 4, 4, 4, pInfo); + break; + case ADDR_COLOR_16: + GetCompBits(16, 0, 0, 0, pInfo); + break; + case ADDR_COLOR_8_8_8_8: + GetCompBits(8, 8, 8, 8, pInfo); + break; + case ADDR_COLOR_2_10_10_10: + GetCompBits(10, 10, 10, 2, pInfo); + break; + case ADDR_COLOR_10_11_11: + GetCompBits(11, 11, 10, 0, pInfo); + break; + case ADDR_COLOR_11_11_10: + GetCompBits(10, 11, 11, 0, pInfo); + break; + case ADDR_COLOR_16_16: + GetCompBits(16, 16, 0, 0, pInfo); + break; + case ADDR_COLOR_16_16_16_16: + GetCompBits(16, 16, 16, 16, pInfo); + break; + case ADDR_COLOR_16_FLOAT: + GetCompBits(16, 0, 0, 0, pInfo); + break; + case ADDR_COLOR_16_16_FLOAT: + GetCompBits(16, 16, 0, 0, pInfo); + break; + case ADDR_COLOR_32_FLOAT: + GetCompBits(32, 0, 0, 0, pInfo); + break; + case ADDR_COLOR_32_32_FLOAT: + GetCompBits(32, 32, 0, 0, pInfo); + break; + case ADDR_COLOR_16_16_16_16_FLOAT: + GetCompBits(16, 16, 16, 16, pInfo); + break; + case ADDR_COLOR_32_32_32_32_FLOAT: + GetCompBits(32, 32, 32, 32, pInfo); + break; + + case ADDR_COLOR_32: + GetCompBits(32, 0, 0, 0, pInfo); + break; + case ADDR_COLOR_32_32: + GetCompBits(32, 32, 0, 0, pInfo); + break; + case ADDR_COLOR_32_32_32_32: + GetCompBits(32, 32, 32, 32, pInfo); + break; + case ADDR_COLOR_10_10_10_2: + GetCompBits(2, 10, 10, 10, pInfo); + break; + case ADDR_COLOR_10_11_11_FLOAT: + GetCompBits(11, 11, 10, 0, pInfo); + break; + case ADDR_COLOR_11_11_10_FLOAT: + GetCompBits(10, 11, 11, 0, pInfo); + break; + case ADDR_COLOR_5_5_5_1: + GetCompBits(1, 5, 5, 5, pInfo); + break; + case ADDR_COLOR_3_3_2: + GetCompBits(2, 3, 3, 0, pInfo); + break; + case ADDR_COLOR_4_4: + GetCompBits(4, 4, 0, 0, pInfo); + break; + case ADDR_COLOR_8_24: + case ADDR_COLOR_8_24_FLOAT: // same bit count, fall through + GetCompBits(24, 8, 0, 0, pInfo); + break; + case ADDR_COLOR_24_8: + case ADDR_COLOR_24_8_FLOAT: // same bit count, fall through + GetCompBits(8, 24, 0, 0, pInfo); + break; + case ADDR_COLOR_X24_8_32_FLOAT: + GetCompBits(32, 8, 0, 0, pInfo); + break; + + case ADDR_COLOR_INVALID: + GetCompBits(0, 0, 0, 0, pInfo); + break; + default: + ADDR_ASSERT(0); + GetCompBits(0, 0, 0, 0, pInfo); + break; + } + + // 2. Get component number type + + GetCompType(format, number, pInfo); + + // 3. Swap components if needed + + GetCompSwap(swap, pInfo); +} + +/** +*************************************************************************************************** +* AddrElemLib::PixGetDepthCompInfo +* +* @brief +* Get per component info for depth surface +* +* @return +* N/A +* +*************************************************************************************************** +*/ +VOID AddrElemLib::PixGetDepthCompInfo( + AddrDepthFormat format, ///< [in] surface format, read from register + ADDR_PIXEL_FORMATINFO* pInfo ///< [out] output per component bits and type + ) const +{ + if (m_depthPlanarType == ADDR_DEPTH_PLANAR_R800) + { + if (format == ADDR_DEPTH_8_24_FLOAT) + { + format = ADDR_DEPTH_X24_8_32_FLOAT; // Use this format to represent R800's D24FS8 + } + + if (format == ADDR_DEPTH_X8_24_FLOAT) + { + format = ADDR_DEPTH_32_FLOAT; + } + } + + switch (format) + { + case ADDR_DEPTH_16: + GetCompBits(16, 0, 0, 0, pInfo); + break; + case ADDR_DEPTH_8_24: + case ADDR_DEPTH_8_24_FLOAT: // similar format, fall through + GetCompBits(24, 8, 0, 0, pInfo); + break; + case ADDR_DEPTH_X8_24: + case ADDR_DEPTH_X8_24_FLOAT: // similar format, fall through + GetCompBits(24, 0, 0, 0, pInfo); + break; + case ADDR_DEPTH_32_FLOAT: + GetCompBits(32, 0, 0, 0, pInfo); + break; + case ADDR_DEPTH_X24_8_32_FLOAT: + GetCompBits(32, 8, 0, 0, pInfo); + break; + case ADDR_DEPTH_INVALID: + GetCompBits(0, 0, 0, 0, pInfo); + break; + default: + ADDR_ASSERT(0); + GetCompBits(0, 0, 0, 0, pInfo); + break; + } + + switch (format) + { + case ADDR_DEPTH_16: + pInfo->numType [0] = ADDR_UNORM_R6XX; + pInfo->numType [1] = ADDR_ZERO; + break; + case ADDR_DEPTH_8_24: + pInfo->numType [0] = ADDR_UNORM_R6XXDB; + pInfo->numType [1] = ADDR_UINT_BITS; + break; + case ADDR_DEPTH_8_24_FLOAT: + pInfo->numType [0] = ADDR_U4FLOATC; + pInfo->numType [1] = ADDR_UINT_BITS; + break; + case ADDR_DEPTH_X8_24: + pInfo->numType [0] = ADDR_UNORM_R6XXDB; + pInfo->numType [1] = ADDR_ZERO; + break; + case ADDR_DEPTH_X8_24_FLOAT: + pInfo->numType [0] = ADDR_U4FLOATC; + pInfo->numType [1] = ADDR_ZERO; + break; + case ADDR_DEPTH_32_FLOAT: + pInfo->numType [0] = ADDR_S8FLOAT32; + pInfo->numType [1] = ADDR_ZERO; + break; + case ADDR_DEPTH_X24_8_32_FLOAT: + pInfo->numType [0] = ADDR_S8FLOAT32; + pInfo->numType [1] = ADDR_UINT_BITS; + break; + default: + pInfo->numType [0] = ADDR_NO_NUMBER; + pInfo->numType [1] = ADDR_NO_NUMBER; + break; + } + + pInfo->numType [2] = ADDR_NO_NUMBER; + pInfo->numType [3] = ADDR_NO_NUMBER; +} + +/** +*************************************************************************************************** +* AddrElemLib::PixGetExportNorm +* +* @brief +* Check if fp16 export norm can be enabled. +* +* @return +* TRUE if this can be enabled. +* +*************************************************************************************************** +*/ +BOOL_32 AddrElemLib::PixGetExportNorm( + AddrColorFormat colorFmt, ///< [in] surface format, read from register + AddrSurfaceNumber numberFmt, ///< [in] pixel number type + AddrSurfaceSwap swap ///< [in] components swap type + ) const +{ + BOOL_32 enabled = TRUE; + + ADDR_PIXEL_FORMATINFO formatInfo; + + PixGetColorCompInfo(colorFmt, numberFmt, swap, &formatInfo); + + for (UINT_32 c = 0; c < 4; c++) + { + if (m_fp16ExportNorm) + { + if (((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) && + (formatInfo.numType[c] != ADDR_U4FLOATC) && + (formatInfo.numType[c] != ADDR_S5FLOAT) && + (formatInfo.numType[c] != ADDR_S5FLOATM) && + (formatInfo.numType[c] != ADDR_U5FLOAT) && + (formatInfo.numType[c] != ADDR_U3FLOATM)) + { + enabled = FALSE; + break; + } + } + else + { + if ((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) + { + enabled = FALSE; + break; + } + } + } + + return enabled; +} + +/** +*************************************************************************************************** +* AddrElemLib::AdjustSurfaceInfo +* +* @brief +* Adjust bpp/base pitch/width/height according to elemMode and expandX/Y +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrElemLib::AdjustSurfaceInfo( + AddrElemMode elemMode, ///< [in] element mode + UINT_32 expandX, ///< [in] decompression expansion factor in X + UINT_32 expandY, ///< [in] decompression expansion factor in Y + UINT_32* pBpp, ///< [in/out] bpp + UINT_32* pBasePitch, ///< [in/out] base pitch + UINT_32* pWidth, ///< [in/out] width + UINT_32* pHeight) ///< [in/out] height +{ + UINT_32 packedBits; + UINT_32 basePitch; + UINT_32 width; + UINT_32 height; + UINT_32 bpp; + BOOL_32 bBCnFormat = FALSE; + + ADDR_ASSERT(pBpp != NULL); + ADDR_ASSERT(pWidth != NULL && pHeight != NULL && pBasePitch != NULL); + + if (pBpp) + { + bpp = *pBpp; + + switch (elemMode) + { + case ADDR_EXPANDED: + packedBits = bpp / expandX / expandY; + break; + case ADDR_PACKED_STD: // Different bit order + case ADDR_PACKED_REV: + packedBits = bpp * expandX * expandY; + break; + case ADDR_PACKED_GBGR: + case ADDR_PACKED_BGRG: + packedBits = bpp; // 32-bit packed ==> 2 32-bit result + break; + case ADDR_PACKED_BC1: // Fall through + case ADDR_PACKED_BC4: + packedBits = 64; + bBCnFormat = TRUE; + break; + case ADDR_PACKED_BC2: // Fall through + case ADDR_PACKED_BC3: // Fall through + case ADDR_PACKED_BC5: // Fall through + bBCnFormat = TRUE; + packedBits = 128; + break; + case ADDR_ROUND_BY_HALF: // Fall through + case ADDR_ROUND_TRUNCATE: // Fall through + case ADDR_ROUND_DITHER: // Fall through + case ADDR_UNCOMPRESSED: + packedBits = bpp; + break; + default: + packedBits = bpp; + ADDR_ASSERT_ALWAYS(); + break; + } + + *pBpp = packedBits; + } + + if (pWidth && pHeight && pBasePitch) + { + basePitch = *pBasePitch; + width = *pWidth; + height = *pHeight; + + if ((expandX > 1) || (expandY > 1)) + { + if (elemMode == ADDR_EXPANDED) + { + basePitch *= expandX; + width *= expandX; + height *= expandY; + } + else + { + // Evergreen family workaround + if (bBCnFormat && (m_pAddrLib->GetAddrChipFamily() == ADDR_CHIP_FAMILY_R8XX)) + { + // For BCn we now pad it to POW2 at the beginning so it is safe to + // divide by 4 directly + basePitch = basePitch / expandX; + width = width / expandX; + height = height / expandY; +#if DEBUG + width = (width == 0) ? 1 : width; + height = (height == 0) ? 1 : height; + + if ((*pWidth > PowTwoAlign(width, 8) * expandX) || + (*pHeight > PowTwoAlign(height, 8) * expandY)) // 8 is 1D tiling alignment + { + // if this assertion is hit we may have issues if app samples + // rightmost/bottommost pixels + ADDR_ASSERT_ALWAYS(); + } +#endif + } + else // Not BCn format we still keep old way (FMT_1? No real test yet) + { + basePitch = (basePitch + expandX - 1) / expandX; + width = (width + expandX - 1) / expandX; + height = (height + expandY - 1) / expandY; + } + } + + *pBasePitch = basePitch; // 0 is legal value for base pitch. + *pWidth = (width == 0) ? 1 : width; + *pHeight = (height == 0) ? 1 : height; + } //if (pWidth && pHeight && pBasePitch) + } +} + +/** +*************************************************************************************************** +* AddrElemLib::RestoreSurfaceInfo +* +* @brief +* Reverse operation of AdjustSurfaceInfo +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrElemLib::RestoreSurfaceInfo( + AddrElemMode elemMode, ///< [in] element mode + UINT_32 expandX, ///< [in] decompression expansion factor in X + UINT_32 expandY, ///< [out] decompression expansion factor in Y + UINT_32* pBpp, ///< [in/out] bpp + UINT_32* pWidth, ///< [in/out] width + UINT_32* pHeight) ///< [in/out] height +{ + UINT_32 originalBits; + UINT_32 width; + UINT_32 height; + UINT_32 bpp; + + ADDR_ASSERT(pBpp != NULL); + ADDR_ASSERT(pWidth != NULL && pHeight != NULL); + + if (pBpp) + { + bpp = *pBpp; + + switch (elemMode) + { + case ADDR_EXPANDED: + originalBits = bpp * expandX * expandY; + break; + case ADDR_PACKED_STD: // Different bit order + case ADDR_PACKED_REV: + originalBits = bpp / expandX / expandY; + break; + case ADDR_PACKED_GBGR: + case ADDR_PACKED_BGRG: + originalBits = bpp; // 32-bit packed ==> 2 32-bit result + break; + case ADDR_PACKED_BC1: // Fall through + case ADDR_PACKED_BC4: + originalBits = 64; + break; + case ADDR_PACKED_BC2: // Fall through + case ADDR_PACKED_BC3: // Fall through + case ADDR_PACKED_BC5: + originalBits = 128; + break; + case ADDR_ROUND_BY_HALF: // Fall through + case ADDR_ROUND_TRUNCATE: // Fall through + case ADDR_ROUND_DITHER: // Fall through + case ADDR_UNCOMPRESSED: + originalBits = bpp; + break; + default: + originalBits = bpp; + ADDR_ASSERT_ALWAYS(); + break; + } + + *pBpp = originalBits; + } + + if (pWidth && pHeight) + { + width = *pWidth; + height = *pHeight; + + if ((expandX > 1) || (expandY > 1)) + { + if (elemMode == ADDR_EXPANDED) + { + width /= expandX; + height /= expandY; + } + else + { + width *= expandX; + height *= expandY; + } + } + + *pWidth = (width == 0) ? 1 : width; + *pHeight = (height == 0) ? 1 : height; + } +} + +/** +*************************************************************************************************** +* AddrElemLib::GetBitsPerPixel +* +* @brief +* Compute the total bits per element according to a format +* code. For compressed formats, this is not the same as +* the number of bits per decompressed element. +* +* @return +* Bits per pixel +*************************************************************************************************** +*/ +UINT_32 AddrElemLib::GetBitsPerPixel( + AddrFormat format, ///< [in] surface format code + AddrElemMode* pElemMode, ///< [out] element mode + UINT_32* pExpandX, ///< [out] decompression expansion factor in X + UINT_32* pExpandY, ///< [out] decompression expansion factor in Y + UINT_32* pUnusedBits) ///< [out] bits unused +{ + UINT_32 bpp; + UINT_32 expandX = 1; + UINT_32 expandY = 1; + UINT_32 bitUnused = 0; + AddrElemMode elemMode = ADDR_UNCOMPRESSED; // default value + + switch (format) + { + case ADDR_FMT_8: + bpp = 8; + break; + case ADDR_FMT_1_5_5_5: + case ADDR_FMT_5_6_5: + case ADDR_FMT_6_5_5: + case ADDR_FMT_8_8: + case ADDR_FMT_4_4_4_4: + case ADDR_FMT_16: + case ADDR_FMT_16_FLOAT: + bpp = 16; + break; + case ADDR_FMT_GB_GR: // treat as FMT_8_8 + elemMode = ADDR_PACKED_GBGR; + bpp = 16; + break; + case ADDR_FMT_BG_RG: // treat as FMT_8_8 + elemMode = ADDR_PACKED_BGRG; + bpp = 16; + break; + case ADDR_FMT_8_8_8_8: + case ADDR_FMT_2_10_10_10: + case ADDR_FMT_10_11_11: + case ADDR_FMT_11_11_10: + case ADDR_FMT_16_16: + case ADDR_FMT_16_16_FLOAT: + case ADDR_FMT_32: + case ADDR_FMT_32_FLOAT: + case ADDR_FMT_24_8: + case ADDR_FMT_24_8_FLOAT: + bpp = 32; + break; + case ADDR_FMT_16_16_16_16: + case ADDR_FMT_16_16_16_16_FLOAT: + case ADDR_FMT_32_32: + case ADDR_FMT_32_32_FLOAT: + case ADDR_FMT_CTX1: + bpp = 64; + break; + case ADDR_FMT_32_32_32_32: + case ADDR_FMT_32_32_32_32_FLOAT: + bpp = 128; + break; + case ADDR_FMT_INVALID: + bpp = 0; + break; + case ADDR_FMT_1_REVERSED: + elemMode = ADDR_PACKED_REV; + expandX = 8; + bpp = 1; + break; + case ADDR_FMT_1: + elemMode = ADDR_PACKED_STD; + expandX = 8; + bpp = 1; + break; + case ADDR_FMT_4_4: + case ADDR_FMT_3_3_2: + bpp = 8; + break; + case ADDR_FMT_5_5_5_1: + bpp = 16; + break; + case ADDR_FMT_32_AS_8: + case ADDR_FMT_32_AS_8_8: + case ADDR_FMT_8_24: + case ADDR_FMT_8_24_FLOAT: + case ADDR_FMT_10_10_10_2: + case ADDR_FMT_10_11_11_FLOAT: + case ADDR_FMT_11_11_10_FLOAT: + case ADDR_FMT_5_9_9_9_SHAREDEXP: + bpp = 32; + break; + case ADDR_FMT_X24_8_32_FLOAT: + bpp = 64; + bitUnused = 24; + break; + case ADDR_FMT_8_8_8: + elemMode = ADDR_EXPANDED; + bpp = 24;//@@ 8; // read 3 elements per pixel + expandX = 3; + break; + case ADDR_FMT_16_16_16: + case ADDR_FMT_16_16_16_FLOAT: + elemMode = ADDR_EXPANDED; + bpp = 48;//@@ 16; // read 3 elements per pixel + expandX = 3; + break; + case ADDR_FMT_32_32_32_FLOAT: + case ADDR_FMT_32_32_32: + elemMode = ADDR_EXPANDED; + expandX = 3; + bpp = 96;//@@ 32; // read 3 elements per pixel + break; + case ADDR_FMT_BC1: + elemMode = ADDR_PACKED_BC1; + expandX = 4; + expandY = 4; + bpp = 64; + break; + case ADDR_FMT_BC4: + elemMode = ADDR_PACKED_BC4; + expandX = 4; + expandY = 4; + bpp = 64; + break; + case ADDR_FMT_BC2: + elemMode = ADDR_PACKED_BC2; + expandX = 4; + expandY = 4; + bpp = 128; + break; + case ADDR_FMT_BC3: + elemMode = ADDR_PACKED_BC3; + expandX = 4; + expandY = 4; + bpp = 128; + break; + case ADDR_FMT_BC5: + case ADDR_FMT_BC6: // reuse ADDR_PACKED_BC5 + case ADDR_FMT_BC7: // reuse ADDR_PACKED_BC5 + elemMode = ADDR_PACKED_BC5; + expandX = 4; + expandY = 4; + bpp = 128; + break; + default: + bpp = 0; + ADDR_ASSERT_ALWAYS(); + break; + // @@ or should this be an error? + } + + SafeAssign(pExpandX, expandX); + SafeAssign(pExpandY, expandY); + SafeAssign(pUnusedBits, bitUnused); + SafeAssign(reinterpret_cast<UINT_32*>(pElemMode), elemMode); + + return bpp; +} + +/** +*************************************************************************************************** +* AddrElemLib::GetCompBits +* +* @brief +* Set each component's bit size and bit start. And set element mode and number type +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrElemLib::GetCompBits( + UINT_32 c0, ///< [in] bits of component 0 + UINT_32 c1, ///< [in] bits of component 1 + UINT_32 c2, ///< [in] bits of component 2 + UINT_32 c3, ///< [in] bits of component 3 + ADDR_PIXEL_FORMATINFO* pInfo, ///< [out] per component info out + AddrElemMode elemMode) ///< [in] element mode +{ + pInfo->comps = 0; + + pInfo->compBit[0] = c0; + pInfo->compBit[1] = c1; + pInfo->compBit[2] = c2; + pInfo->compBit[3] = c3; + + pInfo->compStart[0] = 0; + pInfo->compStart[1] = c0; + pInfo->compStart[2] = c0+c1; + pInfo->compStart[3] = c0+c1+c2; + + pInfo->elemMode = elemMode; + // still needed since component swap may depend on number of components + for (INT i=0; i<4; i++) + { + if (pInfo->compBit[i] == 0) + { + pInfo->compStart[i] = 0; // all null components start at bit 0 + pInfo->numType[i] = ADDR_NO_NUMBER; // and have no number type + } + else + { + pInfo->comps++; + } + } +} + +/** +*************************************************************************************************** +* AddrElemLib::GetCompBits +* +* @brief +* Set the clear color (or clear depth/stencil) for a surface +* +* @note +* If clearColor is zero, a default clear value is used in place of comps[4]. +* If float32 is set, full precision is used, else the mantissa is reduced to 12-bits +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrElemLib::SetClearComps( + ADDR_FLT_32 comps[4], ///< [in/out] components + BOOL_32 clearColor, ///< [in] TRUE if clear color is set (CLEAR_COLOR) + BOOL_32 float32) ///< [in] TRUE if float32 component (BLEND_FLOAT32) +{ + INT_32 i; + + // Use default clearvalues if clearColor is disabled + if (clearColor == FALSE) + { + for (i=0; i<3; i++) + { + comps[i].f = 0.0; + } + comps[3].f = 1.0; + } + + // Otherwise use the (modified) clear value + else + { + for (i=0; i<4; i++) + { // If full precision, use clear value unchanged + if (float32) + { + // Do nothing + //comps[i] = comps[i]; + } + // Else if it is a NaN, use the standard NaN value + else if ((comps[i].u & 0x7FFFFFFF) > 0x7F800000) + { + comps[i].u = 0xFFC00000; + } + // Else reduce the mantissa precision + else + { + comps[i].u = comps[i].u & 0xFFFFF000; + } + } + } +} + +/** +*************************************************************************************************** +* AddrElemLib::IsBlockCompressed +* +* @brief +* TRUE if this is block compressed format +* +* @note +* +* @return +* BOOL_32 +*************************************************************************************************** +*/ +BOOL_32 AddrElemLib::IsBlockCompressed( + AddrFormat format) ///< [in] Format +{ + return format >= ADDR_FMT_BC1 && format <= ADDR_FMT_BC7; +} + + +/** +*************************************************************************************************** +* AddrElemLib::IsCompressed +* +* @brief +* TRUE if this is block compressed format or 1 bit format +* +* @note +* +* @return +* BOOL_32 +*************************************************************************************************** +*/ +BOOL_32 AddrElemLib::IsCompressed( + AddrFormat format) ///< [in] Format +{ + return IsBlockCompressed(format) || format == ADDR_FMT_BC1 || format == ADDR_FMT_BC7; +} + +/** +*************************************************************************************************** +* AddrElemLib::IsExpand3x +* +* @brief +* TRUE if this is 3x expand format +* +* @note +* +* @return +* BOOL_32 +*************************************************************************************************** +*/ +BOOL_32 AddrElemLib::IsExpand3x( + AddrFormat format) ///< [in] Format +{ + BOOL_32 is3x = FALSE; + + switch (format) + { + case ADDR_FMT_8_8_8: + case ADDR_FMT_16_16_16: + case ADDR_FMT_16_16_16_FLOAT: + case ADDR_FMT_32_32_32: + case ADDR_FMT_32_32_32_FLOAT: + is3x = TRUE; + break; + default: + break; + } + + return is3x; +} + + diff --git a/src/amd/addrlib/core/addrelemlib.h b/src/amd/addrlib/core/addrelemlib.h new file mode 100644 index 0000000..c302b3b --- /dev/null +++ b/src/amd/addrlib/core/addrelemlib.h @@ -0,0 +1,270 @@ +/* + * Copyright © 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +*************************************************************************************************** +* @file addrelemlib.h +* @brief Contains the class for element/pixel related functions +*************************************************************************************************** +*/ + +#ifndef __ELEM_LIB_H__ +#define __ELEM_LIB_H__ + +#include "addrinterface.h" +#include "addrobject.h" +#include "addrcommon.h" + +class AddrLib; + +// The masks for property bits within the Properties INT_32 +union ADDR_COMPONENT_FLAGS +{ + struct + { + UINT_32 byteAligned : 1; ///< all components are byte aligned + UINT_32 exportNorm : 1; ///< components support R6xx NORM compression + UINT_32 floatComp : 1; ///< there is at least one floating point component + }; + + UINT_32 value; +}; + +// Copy from legacy lib's AddrNumberType +enum AddrNumberType +{ + // The following number types have the range [-1..1] + ADDR_NO_NUMBER, // This component doesn't exist and has no default value + ADDR_EPSILON, // Force component value to integer 0x00000001 + ADDR_ZERO, // Force component value to integer 0x00000000 + ADDR_ONE, // Force component value to floating point 1.0 + // Above values don't have any bits per component (keep ADDR_ONE the last of these) + + ADDR_UNORM, // Unsigned normalized (repeating fraction) full precision + ADDR_SNORM, // Signed normalized (repeating fraction) full precision + ADDR_GAMMA, // Gamma-corrected, full precision + + ADDR_UNORM_R5XXRB, // Unsigned normalized (repeating fraction) for r5xx RB + ADDR_SNORM_R5XXRB, // Signed normalized (repeating fraction) for r5xx RB + ADDR_GAMMA_R5XXRB, // Gamma-corrected for r5xx RB (note: unnormalized value) + ADDR_UNORM_R5XXBC, // Unsigned normalized (repeating fraction) for r5xx BC + ADDR_SNORM_R5XXBC, // Signed normalized (repeating fraction) for r5xx BC + ADDR_GAMMA_R5XXBC, // Gamma-corrected for r5xx BC (note: unnormalized value) + + ADDR_UNORM_R6XX, // Unsigned normalized (repeating fraction) for R6xx + ADDR_UNORM_R6XXDB, // Unorms for 24-bit depth: one value differs from ADDR_UNORM_R6XX + ADDR_SNORM_R6XX, // Signed normalized (repeating fraction) for R6xx + ADDR_GAMMA8_R6XX, // Gamma-corrected for r6xx + ADDR_GAMMA8_R7XX_TP, // Gamma-corrected for r7xx TP 12bit unorm 8.4. + + ADDR_U4FLOATC, // Unsigned float: 4-bit exponent, bias=15, no NaN, clamp [0..1] + ADDR_GAMMA_4SEG, // Gamma-corrected, four segment approximation + ADDR_U0FIXED, // Unsigned 0.N-bit fixed point + + // The following number types have large ranges (LEAVE ADDR_USCALED first or fix Finish routine) + ADDR_USCALED, // Unsigned integer converted to/from floating point + ADDR_SSCALED, // Signed integer converted to/from floating point + ADDR_USCALED_R5XXRB, // Unsigned integer to/from floating point for r5xx RB + ADDR_SSCALED_R5XXRB, // Signed integer to/from floating point for r5xx RB + ADDR_UINT_BITS, // Keep in unsigned integer form, clamped to specified range + ADDR_SINT_BITS, // Keep in signed integer form, clamped to specified range + ADDR_UINTBITS, // @@ remove Keep in unsigned integer form, use modulus to reduce bits + ADDR_SINTBITS, // @@ remove Keep in signed integer form, use modulus to reduce bits + + // The following number types and ADDR_U4FLOATC have exponents + // (LEAVE ADDR_S8FLOAT first or fix Finish routine) + ADDR_S8FLOAT, // Signed floating point with 8-bit exponent, bias=127 + ADDR_S8FLOAT32, // 32-bit IEEE float, passes through NaN values + ADDR_S5FLOAT, // Signed floating point with 5-bit exponent, bias=15 + ADDR_S5FLOATM, // Signed floating point with 5-bit exponent, bias=15, no NaN/Inf + ADDR_U5FLOAT, // Signed floating point with 5-bit exponent, bias=15 + ADDR_U3FLOATM, // Unsigned floating point with 3-bit exponent, bias=3 + + ADDR_S5FIXED, // Signed 5.N-bit fixed point, with rounding + + ADDR_END_NUMBER // Used for range comparisons +}; + +// Copy from legacy lib's AddrElement +enum AddrElemMode +{ + // These formats allow both packing an unpacking + ADDR_ROUND_BY_HALF, // add 1/2 and truncate when packing this element + ADDR_ROUND_TRUNCATE, // truncate toward 0 for sign/mag, else toward neg + ADDR_ROUND_DITHER, // Pack by dithering -- requires (x,y) position + + // These formats only allow unpacking, no packing + ADDR_UNCOMPRESSED, // Elements are not compressed: one data element per pixel/texel + ADDR_EXPANDED, // Elements are split up and stored in multiple data elements + ADDR_PACKED_STD, // Elements are compressed into ExpandX by ExpandY data elements + ADDR_PACKED_REV, // Like ADDR_PACKED, but X order of pixels is reverved + ADDR_PACKED_GBGR, // Elements are compressed 4:2:2 in G1B_G0R order (high to low) + ADDR_PACKED_BGRG, // Elements are compressed 4:2:2 in BG1_RG0 order (high to low) + ADDR_PACKED_BC1, // Each data element is uncompressed to a 4x4 pixel/texel array + ADDR_PACKED_BC2, // Each data element is uncompressed to a 4x4 pixel/texel array + ADDR_PACKED_BC3, // Each data element is uncompressed to a 4x4 pixel/texel array + ADDR_PACKED_BC4, // Each data element is uncompressed to a 4x4 pixel/texel array + ADDR_PACKED_BC5, // Each data element is uncompressed to a 4x4 pixel/texel array + + // These formats provide various kinds of compression + ADDR_ZPLANE_R5XX, // Compressed Zplane using r5xx architecture format + ADDR_ZPLANE_R6XX, // Compressed Zplane using r6xx architecture format + //@@ Fill in the compression modes + + ADDR_END_ELEMENT // Used for range comparisons +}; + +enum AddrDepthPlanarType +{ + ADDR_DEPTH_PLANAR_NONE = 0, // No plane z/stencl + ADDR_DEPTH_PLANAR_R600 = 1, // R600 z and stencil planes are store within a tile + ADDR_DEPTH_PLANAR_R800 = 2, // R800 has separate z and stencil planes +}; + +/** +*************************************************************************************************** +* ADDR_PIXEL_FORMATINFO +* +* @brief +* Per component info +* +*************************************************************************************************** +*/ +struct ADDR_PIXEL_FORMATINFO +{ + UINT_32 compBit[4]; + AddrNumberType numType[4]; + UINT_32 compStart[4]; + AddrElemMode elemMode; + UINT_32 comps; ///< Number of components +}; + +/** +*************************************************************************************************** +* @brief This class contains asic indepentent element related attributes and operations +*************************************************************************************************** +*/ +class AddrElemLib : public AddrObject +{ +protected: + AddrElemLib(AddrLib* const pAddrLib); + +public: + + /// Makes this class virtual + virtual ~AddrElemLib(); + + static AddrElemLib *Create( + const AddrLib* const pAddrLib); + + /// The implementation is only for R6xx/R7xx, so make it virtual in case we need for R8xx + BOOL_32 PixGetExportNorm( + AddrColorFormat colorFmt, + AddrSurfaceNumber numberFmt, AddrSurfaceSwap swap) const; + + /// Below method are asic independent, so make them just static. + /// Remove static if we need different operation in hwl. + + VOID Flt32ToDepthPixel( + AddrDepthFormat format, const ADDR_FLT_32 comps[2], UINT_8 *pPixel) const; + + VOID Flt32ToColorPixel( + AddrColorFormat format, AddrSurfaceNumber surfNum, AddrSurfaceSwap surfSwap, + const ADDR_FLT_32 comps[4], UINT_8 *pPixel) const; + + static VOID Flt32sToInt32s( + ADDR_FLT_32 value, UINT_32 bits, AddrNumberType numberType, UINT_32* pResult); + + static VOID Int32sToPixel( + UINT_32 numComps, UINT_32* pComps, UINT_32* pCompBits, UINT_32* pCompStart, + ADDR_COMPONENT_FLAGS properties, UINT_32 resultBits, UINT_8* pPixel); + + VOID PixGetColorCompInfo( + AddrColorFormat format, AddrSurfaceNumber number, AddrSurfaceSwap swap, + ADDR_PIXEL_FORMATINFO* pInfo) const; + + VOID PixGetDepthCompInfo( + AddrDepthFormat format, ADDR_PIXEL_FORMATINFO* pInfo) const; + + UINT_32 GetBitsPerPixel( + AddrFormat format, AddrElemMode* pElemMode, + UINT_32* pExpandX = NULL, UINT_32* pExpandY = NULL, UINT_32* pBitsUnused = NULL); + + static VOID SetClearComps( + ADDR_FLT_32 comps[4], BOOL_32 clearColor, BOOL_32 float32); + + VOID AdjustSurfaceInfo( + AddrElemMode elemMode, UINT_32 expandX, UINT_32 expandY, + UINT_32* pBpp, UINT_32* pBasePitch, UINT_32* pWidth, UINT_32* pHeight); + + VOID RestoreSurfaceInfo( + AddrElemMode elemMode, UINT_32 expandX, UINT_32 expandY, + UINT_32* pBpp, UINT_32* pWidth, UINT_32* pHeight); + + /// Checks if depth and stencil are planar inside a tile + BOOL_32 IsDepthStencilTilePlanar() + { + return (m_depthPlanarType == ADDR_DEPTH_PLANAR_R600) ? TRUE : FALSE; + } + + /// Sets m_configFlags, copied from AddrLib + VOID SetConfigFlags(ADDR_CONFIG_FLAGS flags) + { + m_configFlags = flags; + } + + static BOOL_32 IsCompressed(AddrFormat format); + static BOOL_32 IsBlockCompressed(AddrFormat format); + static BOOL_32 IsExpand3x(AddrFormat format); + +protected: + + static VOID GetCompBits( + UINT_32 c0, UINT_32 c1, UINT_32 c2, UINT_32 c3, + ADDR_PIXEL_FORMATINFO* pInfo, + AddrElemMode elemMode = ADDR_ROUND_BY_HALF); + + static VOID GetCompType( + AddrColorFormat format, AddrSurfaceNumber numType, + ADDR_PIXEL_FORMATINFO* pInfo); + + static VOID GetCompSwap( + AddrSurfaceSwap swap, ADDR_PIXEL_FORMATINFO* pInfo); + + static VOID SwapComps( + UINT_32 c0, UINT_32 c1, ADDR_PIXEL_FORMATINFO* pInfo); + +private: + + UINT_32 m_fp16ExportNorm; ///< If allow FP16 to be reported as EXPORT_NORM + AddrDepthPlanarType m_depthPlanarType; + + ADDR_CONFIG_FLAGS m_configFlags; ///< Copy of AddrLib's configFlags + AddrLib* const m_pAddrLib; ///< Pointer to parent addrlib instance +}; + +#endif + diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp new file mode 100644 index 0000000..1df693e --- /dev/null +++ b/src/amd/addrlib/core/addrlib.cpp @@ -0,0 +1,4023 @@ +/* + * Copyright © 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +*************************************************************************************************** +* @file addrlib.cpp +* @brief Contains the implementation for the AddrLib base class.. +*************************************************************************************************** +*/ + +#include "addrinterface.h" +#include "addrlib.h" +#include "addrcommon.h" + +#if defined(__APPLE__) + +UINT_32 div64_32(UINT_64 n, UINT_32 base) +{ + UINT_64 rem = n; + UINT_64 b = base; + UINT_64 res, d = 1; + UINT_32 high = rem >> 32; + + res = 0; + if (high >= base) + { + high /= base; + res = (UINT_64) high << 32; + rem -= (UINT_64) (high*base) << 32; + } + + while ((INT_64)b > 0 && b < rem) + { + b = b+b; + d = d+d; + } + + do + { + if (rem >= b) + { + rem -= b; + res += d; + } + b >>= 1; + d >>= 1; + } while (d); + + n = res; + return rem; +} + +extern "C" +UINT_32 __umoddi3(UINT_64 n, UINT_32 base) +{ + return div64_32(n, base); +} + +#endif // __APPLE__ + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Static Const Member +/////////////////////////////////////////////////////////////////////////////////////////////////// + +const AddrTileModeFlags AddrLib::m_modeFlags[ADDR_TM_COUNT] = +{// T L 1 2 3 P Pr B + {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_GENERAL + {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_ALIGNED + {1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THIN1 + {4, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THICK + {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN1 + {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN2 + {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN4 + {4, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THICK + {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN1 + {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN2 + {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN4 + {4, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THICK + {1, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THIN1 + {4, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THICK + {1, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THIN1 + {4, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THICK + {8, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_XTHICK + {8, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_XTHICK + {1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_POWER_SAVE + {1, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THIN1 + {1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THIN1 + {1, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THIN1 + {4, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THICK + {4, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THICK + {4, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THICK +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Constructor/Destructor +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +*************************************************************************************************** +* AddrLib::AddrLib +* +* @brief +* Constructor for the AddrLib class +* +*************************************************************************************************** +*/ +AddrLib::AddrLib() : + m_class(BASE_ADDRLIB), + m_chipFamily(ADDR_CHIP_FAMILY_IVLD), + m_chipRevision(0), + m_version(ADDRLIB_VERSION), + m_pipes(0), + m_banks(0), + m_pipeInterleaveBytes(0), + m_rowSize(0), + m_minPitchAlignPixels(1), + m_maxSamples(8), + m_pElemLib(NULL) +{ + m_configFlags.value = 0; +} + +/** +*************************************************************************************************** +* AddrLib::AddrLib +* +* @brief +* Constructor for the AddrLib class with hClient as parameter +* +*************************************************************************************************** +*/ +AddrLib::AddrLib(const AddrClient* pClient) : + AddrObject(pClient), + m_class(BASE_ADDRLIB), + m_chipFamily(ADDR_CHIP_FAMILY_IVLD), + m_chipRevision(0), + m_version(ADDRLIB_VERSION), + m_pipes(0), + m_banks(0), + m_pipeInterleaveBytes(0), + m_rowSize(0), + m_minPitchAlignPixels(1), + m_maxSamples(8), + m_pElemLib(NULL) +{ + m_configFlags.value = 0; +} + +/** +*************************************************************************************************** +* AddrLib::~AddrLib +* +* @brief +* Destructor for the AddrLib class +* +*************************************************************************************************** +*/ +AddrLib::~AddrLib() +{ + if (m_pElemLib) + { + delete m_pElemLib; + } +} + + + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Initialization/Helper +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +*************************************************************************************************** +* AddrLib::Create +* +* @brief +* Creates and initializes AddrLib object. +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::Create( + const ADDR_CREATE_INPUT* pCreateIn, ///< [in] pointer to ADDR_CREATE_INPUT + ADDR_CREATE_OUTPUT* pCreateOut) ///< [out] pointer to ADDR_CREATE_OUTPUT +{ + AddrLib* pLib = NULL; + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pCreateIn->createFlags.fillSizeFields == TRUE) + { + if ((pCreateIn->size != sizeof(ADDR_CREATE_INPUT)) || + (pCreateOut->size != sizeof(ADDR_CREATE_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if ((returnCode == ADDR_OK) && + (pCreateIn->callbacks.allocSysMem != NULL) && + (pCreateIn->callbacks.freeSysMem != NULL)) + { + AddrClient client = { + pCreateIn->hClient, + pCreateIn->callbacks + }; + + switch (pCreateIn->chipEngine) + { + case CIASICIDGFXENGINE_SOUTHERNISLAND: + switch (pCreateIn->chipFamily) + { + case FAMILY_SI: + pLib = AddrSIHwlInit(&client); + break; + case FAMILY_VI: + case FAMILY_CZ: // VI based fusion(carrizo) + case FAMILY_CI: + case FAMILY_KV: // CI based fusion + pLib = AddrCIHwlInit(&client); + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + + if ((pLib != NULL)) + { + BOOL_32 initValid; + + // Pass createFlags to configFlags first since these flags may be overwritten + pLib->m_configFlags.noCubeMipSlicesPad = pCreateIn->createFlags.noCubeMipSlicesPad; + pLib->m_configFlags.fillSizeFields = pCreateIn->createFlags.fillSizeFields; + pLib->m_configFlags.useTileIndex = pCreateIn->createFlags.useTileIndex; + pLib->m_configFlags.useCombinedSwizzle = pCreateIn->createFlags.useCombinedSwizzle; + pLib->m_configFlags.checkLast2DLevel = pCreateIn->createFlags.checkLast2DLevel; + pLib->m_configFlags.useHtileSliceAlign = pCreateIn->createFlags.useHtileSliceAlign; + pLib->m_configFlags.degradeBaseLevel = pCreateIn->createFlags.degradeBaseLevel; + pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile; + + pLib->SetAddrChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision); + + pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels); + + // Global parameters initialized and remaining configFlags bits are set as well + initValid = pLib->HwlInitGlobalParams(pCreateIn); + + if (initValid) + { + pLib->m_pElemLib = AddrElemLib::Create(pLib); + } + else + { + pLib->m_pElemLib = NULL; // Don't go on allocating element lib + returnCode = ADDR_INVALIDGBREGVALUES; + } + + if (pLib->m_pElemLib == NULL) + { + delete pLib; + pLib = NULL; + ADDR_ASSERT_ALWAYS(); + } + else + { + pLib->m_pElemLib->SetConfigFlags(pLib->m_configFlags); + } + } + + pCreateOut->hLib = pLib; + + if ((pLib == NULL) && + (returnCode == ADDR_OK)) + { + // Unknown failures, we return the general error code + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::SetAddrChipFamily +* +* @brief +* Convert familyID defined in atiid.h to AddrChipFamily and set m_chipFamily/m_chipRevision +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrLib::SetAddrChipFamily( + UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h + UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h +{ + AddrChipFamily family = ADDR_CHIP_FAMILY_IVLD; + + family = HwlConvertChipFamily(uChipFamily, uChipRevision); + + ADDR_ASSERT(family != ADDR_CHIP_FAMILY_IVLD); + + m_chipFamily = family; + m_chipRevision = uChipRevision; +} + +/** +*************************************************************************************************** +* AddrLib::SetMinPitchAlignPixels +* +* @brief +* Set m_minPitchAlignPixels with input param +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrLib::SetMinPitchAlignPixels( + UINT_32 minPitchAlignPixels) ///< [in] minmum pitch alignment in pixels +{ + m_minPitchAlignPixels = (minPitchAlignPixels == 0)? 1 : minPitchAlignPixels; +} + +/** +*************************************************************************************************** +* AddrLib::GetAddrLib +* +* @brief +* Get AddrLib pointer +* +* @return +* An AddrLib class pointer +*************************************************************************************************** +*/ +AddrLib * AddrLib::GetAddrLib( + ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE +{ + return static_cast<AddrLib *>(hLib); +} + + + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Surface Methods +/////////////////////////////////////////////////////////////////////////////////////////////////// + + +/** +*************************************************************************************************** +* AddrLib::ComputeSurfaceInfo +* +* @brief +* Interface function stub of AddrComputeSurfaceInfo. +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + // We suggest client do sanity check but a check here is also good + if (pIn->bpp > 128) + { + returnCode = ADDR_INVALIDPARAMS; + } + + // Thick modes don't support multisample + if (ComputeSurfaceThickness(pIn->tileMode) > 1 && pIn->numSamples > 1) + { + returnCode = ADDR_INVALIDPARAMS; + } + + if (returnCode == ADDR_OK) + { + // Get a local copy of input structure and only reference pIn for unadjusted values + ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn; + ADDR_TILEINFO tileInfoNull = {0}; + + if (UseTileInfo()) + { + // If the original input has a valid ADDR_TILEINFO pointer then copy its contents. + // Otherwise the default 0's in tileInfoNull are used. + if (pIn->pTileInfo) + { + tileInfoNull = *pIn->pTileInfo; + } + localIn.pTileInfo = &tileInfoNull; + } + + localIn.numSamples = pIn->numSamples == 0 ? 1 : pIn->numSamples; + + // Do mipmap check first + // If format is BCn, pre-pad dimension to power-of-two according to HWL + ComputeMipLevel(&localIn); + + if (m_configFlags.checkLast2DLevel) + { + // Save this level's original height in pixels + pOut->height = pIn->height; + } + + UINT_32 expandX = 1; + UINT_32 expandY = 1; + AddrElemMode elemMode; + + // Save outputs that may not go through HWL + pOut->pixelBits = localIn.bpp; + pOut->numSamples = localIn.numSamples; + pOut->last2DLevel = FALSE; + +#if !ALT_TEST + if (localIn.numSamples > 1) + { + ADDR_ASSERT(localIn.mipLevel == 0); + } +#endif + + if (localIn.format != ADDR_FMT_INVALID) // Set format to INVALID will skip this conversion + { + // Get compression/expansion factors and element mode + // (which indicates compression/expansion + localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format, + &elemMode, + &expandX, + &expandY); + + // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is + // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear- + // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw + // restrictions are different. + // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround + // but we use this flag to skip RestoreSurfaceInfo below + + if ((elemMode == ADDR_EXPANDED) && + (expandX > 1)) + { + ADDR_ASSERT(localIn.tileMode == ADDR_TM_LINEAR_ALIGNED || localIn.height == 1); + } + + GetElemLib()->AdjustSurfaceInfo(elemMode, + expandX, + expandY, + &localIn.bpp, + &localIn.basePitch, + &localIn.width, + &localIn.height); + + // Overwrite these parameters if we have a valid format + } + else if (localIn.bpp != 0) + { + localIn.width = (localIn.width != 0) ? localIn.width : 1; + localIn.height = (localIn.height != 0) ? localIn.height : 1; + } + else // Rule out some invalid parameters + { + ADDR_ASSERT_ALWAYS(); + + returnCode = ADDR_INVALIDPARAMS; + } + + // Check mipmap after surface expansion + if (returnCode == ADDR_OK) + { + returnCode = PostComputeMipLevel(&localIn, pOut); + } + + if (returnCode == ADDR_OK) + { + if (UseTileIndex(localIn.tileIndex)) + { + // Make sure pTileInfo is not NULL + ADDR_ASSERT(localIn.pTileInfo); + + UINT_32 numSamples = GetNumFragments(localIn.numSamples, localIn.numFrags); + + INT_32 macroModeIndex = TileIndexNoMacroIndex; + + if (localIn.tileIndex != TileIndexLinearGeneral) + { + // Try finding a macroModeIndex + macroModeIndex = HwlComputeMacroModeIndex(localIn.tileIndex, + localIn.flags, + localIn.bpp, + numSamples, + localIn.pTileInfo, + &localIn.tileMode, + &localIn.tileType); + } + + // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info + if (macroModeIndex == TileIndexNoMacroIndex) + { + returnCode = HwlSetupTileCfg(localIn.tileIndex, macroModeIndex, + localIn.pTileInfo, + &localIn.tileMode, &localIn.tileType); + } + // If macroModeIndex is invalid, then assert this is not macro tiled + else if (macroModeIndex == TileIndexInvalid) + { + ADDR_ASSERT(!IsMacroTiled(localIn.tileMode)); + } + } + } + + if (returnCode == ADDR_OK) + { + AddrTileMode tileMode = localIn.tileMode; + AddrTileType tileType = localIn.tileType; + + // HWL layer may override tile mode if necessary + if (HwlOverrideTileMode(&localIn, &tileMode, &tileType)) + { + localIn.tileMode = tileMode; + localIn.tileType = tileType; + } + // Degrade base level if applicable + if (DegradeBaseLevel(&localIn, &tileMode)) + { + localIn.tileMode = tileMode; + } + } + + // Call main function to compute surface info + if (returnCode == ADDR_OK) + { + returnCode = HwlComputeSurfaceInfo(&localIn, pOut); + } + + if (returnCode == ADDR_OK) + { + // Since bpp might be changed we just pass it through + pOut->bpp = localIn.bpp; + + // Also original width/height/bpp + pOut->pixelPitch = pOut->pitch; + pOut->pixelHeight = pOut->height; + +#if DEBUG + if (localIn.flags.display) + { + ADDR_ASSERT((pOut->pitchAlign % 32) == 0); + } +#endif //DEBUG + + if (localIn.format != ADDR_FMT_INVALID) + { + // + // 96 bits surface of level 1+ requires element pitch of 32 bits instead + // In hwl function we skip multiplication of 3 then we should skip division of 3 + // We keep pitch that represents 32 bit element instead of 96 bits since we + // will get an odd number if divided by 3. + // + if (!((expandX == 3) && (localIn.mipLevel > 0))) + { + + GetElemLib()->RestoreSurfaceInfo(elemMode, + expandX, + expandY, + &localIn.bpp, + &pOut->pixelPitch, + &pOut->pixelHeight); + } + } + + if (localIn.flags.qbStereo) + { + if (pOut->pStereoInfo) + { + ComputeQbStereoInfo(pOut); + } + } + + if (localIn.flags.volume) // For volume sliceSize equals to all z-slices + { + pOut->sliceSize = pOut->surfSize; + } + else // For array: sliceSize is likely to have slice-padding (the last one) + { + pOut->sliceSize = pOut->surfSize / pOut->depth; + + // array or cubemap + if (pIn->numSlices > 1) + { + // If this is the last slice then add the padding size to this slice + if (pIn->slice == (pIn->numSlices - 1)) + { + pOut->sliceSize += pOut->sliceSize * (pOut->depth - pIn->numSlices); + } + else if (m_configFlags.checkLast2DLevel) + { + // Reset last2DLevel flag if this is not the last array slice + pOut->last2DLevel = FALSE; + } + } + } + + pOut->pitchTileMax = pOut->pitch / 8 - 1; + pOut->heightTileMax = pOut->height / 8 - 1; + pOut->sliceTileMax = pOut->pitch * pOut->height / 64 - 1; + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeSurfaceInfo +* +* @brief +* Interface function stub of AddrComputeSurfaceInfo. +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeSurfaceAddrFromCoord( + const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + const ADDR_SURFACE_FLAGS flags = {{0}}; + UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags); + + // Try finding a macroModeIndex + INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex, + flags, + input.bpp, + numSamples, + input.pTileInfo, + &input.tileMode, + &input.tileType); + + // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info + if (macroModeIndex == TileIndexNoMacroIndex) + { + returnCode = HwlSetupTileCfg(input.tileIndex, macroModeIndex, + input.pTileInfo, &input.tileMode, &input.tileType); + } + // If macroModeIndex is invalid, then assert this is not macro tiled + else if (macroModeIndex == TileIndexInvalid) + { + ADDR_ASSERT(!IsMacroTiled(input.tileMode)); + } + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlComputeSurfaceAddrFromCoord(pIn, pOut); + + if (returnCode == ADDR_OK) + { + pOut->prtBlockIndex = static_cast<UINT_32>(pOut->addr / (64 * 1024)); + } + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeSurfaceCoordFromAddr +* +* @brief +* Interface function stub of ComputeSurfaceCoordFromAddr. +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeSurfaceCoordFromAddr( + const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + const ADDR_SURFACE_FLAGS flags = {{0}}; + UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags); + + // Try finding a macroModeIndex + INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex, + flags, + input.bpp, + numSamples, + input.pTileInfo, + &input.tileMode, + &input.tileType); + + // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info + if (macroModeIndex == TileIndexNoMacroIndex) + { + returnCode = HwlSetupTileCfg(input.tileIndex, macroModeIndex, + input.pTileInfo, &input.tileMode, &input.tileType); + } + // If macroModeIndex is invalid, then assert this is not macro tiled + else if (macroModeIndex == TileIndexInvalid) + { + ADDR_ASSERT(!IsMacroTiled(input.tileMode)); + } + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlComputeSurfaceCoordFromAddr(pIn, pOut); + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeSliceTileSwizzle +* +* @brief +* Interface function stub of ComputeSliceTileSwizzle. +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeSliceTileSwizzle( + const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_SLICESWIZZLE_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, + input.pTileInfo, &input.tileMode); + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlComputeSliceTileSwizzle(pIn, pOut); + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ExtractBankPipeSwizzle +* +* @brief +* Interface function stub of AddrExtractBankPipeSwizzle. +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ExtractBankPipeSwizzle( + const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure + ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT)) || + (pOut->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo); + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlExtractBankPipeSwizzle(pIn, pOut); + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::CombineBankPipeSwizzle +* +* @brief +* Interface function stub of AddrCombineBankPipeSwizzle. +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::CombineBankPipeSwizzle( + const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure + ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo); + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlCombineBankPipeSwizzle(pIn->bankSwizzle, + pIn->pipeSwizzle, + pIn->pTileInfo, + pIn->baseAddr, + &pOut->tileSwizzle); + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeBaseSwizzle +* +* @brief +* Interface function stub of AddrCompueBaseSwizzle. +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeBaseSwizzle( + const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_BASE_SWIZZLE_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo); + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + if (IsMacroTiled(pIn->tileMode)) + { + returnCode = HwlComputeBaseSwizzle(pIn, pOut); + } + else + { + pOut->tileSwizzle = 0; + } + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeFmaskInfo +* +* @brief +* Interface function stub of ComputeFmaskInfo. +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeFmaskInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut ///< [out] output structure + ) +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + // No thick MSAA + if (ComputeSurfaceThickness(pIn->tileMode) > 1) + { + returnCode = ADDR_INVALIDPARAMS; + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_FMASK_INFO_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + + if (pOut->pTileInfo) + { + // Use temp tile info for calcalation + input.pTileInfo = pOut->pTileInfo; + } + else + { + input.pTileInfo = &tileInfoNull; + } + + ADDR_SURFACE_FLAGS flags = {{0}}; + flags.fmask = 1; + + // Try finding a macroModeIndex + INT_32 macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex, + flags, + HwlComputeFmaskBits(pIn, NULL), + pIn->numSamples, + input.pTileInfo, + &input.tileMode); + + // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info + if (macroModeIndex == TileIndexNoMacroIndex) + { + returnCode = HwlSetupTileCfg(input.tileIndex, macroModeIndex, + input.pTileInfo, &input.tileMode); + } + + ADDR_ASSERT(macroModeIndex != TileIndexInvalid); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + if (pIn->numSamples > 1) + { + returnCode = HwlComputeFmaskInfo(pIn, pOut); + } + else + { + memset(pOut, 0, sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)); + + returnCode = ADDR_INVALIDPARAMS; + } + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeFmaskAddrFromCoord +* +* @brief +* Interface function stub of ComputeFmaskAddrFromCoord. +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeFmaskAddrFromCoord( + const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_ASSERT(pIn->numSamples > 1); + + if (pIn->numSamples > 1) + { + returnCode = HwlComputeFmaskAddrFromCoord(pIn, pOut); + } + else + { + returnCode = ADDR_INVALIDPARAMS; + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeFmaskCoordFromAddr +* +* @brief +* Interface function stub of ComputeFmaskAddrFromCoord. +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeFmaskCoordFromAddr( + const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_ASSERT(pIn->numSamples > 1); + + if (pIn->numSamples > 1) + { + returnCode = HwlComputeFmaskCoordFromAddr(pIn, pOut); + } + else + { + returnCode = ADDR_INVALIDPARAMS; + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ConvertTileInfoToHW +* +* @brief +* Convert tile info from real value to HW register value in HW layer +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ConvertTileInfoToHW( + const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure + ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_INPUT)) || + (pOut->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_CONVERT_TILEINFOTOHW_INPUT input; + // if pIn->reverse is TRUE, indices are ignored + if (pIn->reverse == FALSE && UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo); + + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlConvertTileInfoToHW(pIn, pOut); + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ConvertTileIndex +* +* @brief +* Convert tile index to tile mode/type/info +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ConvertTileIndex( + const ADDR_CONVERT_TILEINDEX_INPUT* pIn, ///< [in] input structure + ADDR_CONVERT_TILEINDEX_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX_INPUT)) || + (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + + returnCode = HwlSetupTileCfg(pIn->tileIndex, pIn->macroModeIndex, + pOut->pTileInfo, &pOut->tileMode, &pOut->tileType); + + if (returnCode == ADDR_OK && pIn->tileInfoHw) + { + ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0}; + ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0}; + + hwInput.pTileInfo = pOut->pTileInfo; + hwInput.tileIndex = -1; + hwOutput.pTileInfo = pOut->pTileInfo; + + returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput); + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ConvertTileIndex1 +* +* @brief +* Convert tile index to tile mode/type/info +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ConvertTileIndex1( + const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, ///< [in] input structure + ADDR_CONVERT_TILEINDEX_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX1_INPUT)) || + (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_SURFACE_FLAGS flags = {{0}}; + + HwlComputeMacroModeIndex(pIn->tileIndex, flags, pIn->bpp, pIn->numSamples, + pOut->pTileInfo, &pOut->tileMode, &pOut->tileType); + + if (pIn->tileInfoHw) + { + ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0}; + ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0}; + + hwInput.pTileInfo = pOut->pTileInfo; + hwInput.tileIndex = -1; + hwOutput.pTileInfo = pOut->pTileInfo; + + returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput); + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::GetTileIndex +* +* @brief +* Get tile index from tile mode/type/info +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::GetTileIndex( + const ADDR_GET_TILEINDEX_INPUT* pIn, ///< [in] input structure + ADDR_GET_TILEINDEX_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_GET_TILEINDEX_INPUT)) || + (pOut->size != sizeof(ADDR_GET_TILEINDEX_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlGetTileIndex(pIn, pOut); + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeSurfaceThickness +* +* @brief +* Compute surface thickness +* +* @return +* Surface thickness +*************************************************************************************************** +*/ +UINT_32 AddrLib::ComputeSurfaceThickness( + AddrTileMode tileMode) ///< [in] tile mode +{ + return m_modeFlags[tileMode].thickness; +} + + + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// CMASK/HTILE +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +*************************************************************************************************** +* AddrLib::ComputeHtileInfo +* +* @brief +* Interface function stub of AddrComputeHtilenfo +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeHtileInfo( + const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + BOOL_32 isWidth8 = (pIn->blockWidth == 8) ? TRUE : FALSE; + BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_HTILE_INFO_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + pOut->bpp = ComputeHtileInfo(pIn->flags, + pIn->pitch, + pIn->height, + pIn->numSlices, + pIn->isLinear, + isWidth8, + isHeight8, + pIn->pTileInfo, + &pOut->pitch, + &pOut->height, + &pOut->htileBytes, + &pOut->macroWidth, + &pOut->macroHeight, + &pOut->sliceSize, + &pOut->baseAlign); + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeCmaskInfo +* +* @brief +* Interface function stub of AddrComputeCmaskInfo +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeCmaskInfo( + const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_INFO_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_CMASK_INFO_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_CMASK_INFO_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = ComputeCmaskInfo(pIn->flags, + pIn->pitch, + pIn->height, + pIn->numSlices, + pIn->isLinear, + pIn->pTileInfo, + &pOut->pitch, + &pOut->height, + &pOut->cmaskBytes, + &pOut->macroWidth, + &pOut->macroHeight, + &pOut->sliceSize, + &pOut->baseAlign, + &pOut->blockMax); + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeDccInfo +* +* @brief +* Interface function to compute DCC key info +* +* @return +* return code of HwlComputeDccInfo +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeDccInfo( + const ADDR_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE ret = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_DCCINFO_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT))) + { + ret = ADDR_PARAMSIZEMISMATCH; + } + } + + if (ret == ADDR_OK) + { + ADDR_COMPUTE_DCCINFO_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + + ret = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, + &input.tileInfo, &input.tileMode); + + pIn = &input; + } + + if (ADDR_OK == ret) + { + ret = HwlComputeDccInfo(pIn, pOut); + } + } + + return ret; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeHtileAddrFromCoord +* +* @brief +* Interface function stub of AddrComputeHtileAddrFromCoord +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeHtileAddrFromCoord( + const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + BOOL_32 isWidth8 = (pIn->blockWidth == 8) ? TRUE : FALSE; + BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch, + pIn->height, + pIn->x, + pIn->y, + pIn->slice, + pIn->numSlices, + 1, + pIn->isLinear, + isWidth8, + isHeight8, + pIn->pTileInfo, + &pOut->bitPosition); + } + } + + return returnCode; + +} + +/** +*************************************************************************************************** +* AddrLib::ComputeHtileCoordFromAddr +* +* @brief +* Interface function stub of AddrComputeHtileCoordFromAddr +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeHtileCoordFromAddr( + const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + BOOL_32 isWidth8 = (pIn->blockWidth == 8) ? TRUE : FALSE; + BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + HwlComputeXmaskCoordFromAddr(pIn->addr, + pIn->bitPosition, + pIn->pitch, + pIn->height, + pIn->numSlices, + 1, + pIn->isLinear, + isWidth8, + isHeight8, + pIn->pTileInfo, + &pOut->x, + &pOut->y, + &pOut->slice); + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeCmaskAddrFromCoord +* +* @brief +* Interface function stub of AddrComputeCmaskAddrFromCoord +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeCmaskAddrFromCoord( + const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + if (pIn->flags.tcCompatible == TRUE) + { + returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut); + } + else + { + pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch, + pIn->height, + pIn->x, + pIn->y, + pIn->slice, + pIn->numSlices, + 2, + pIn->isLinear, + FALSE, //this is cmask, isWidth8 is not needed + FALSE, //this is cmask, isHeight8 is not needed + pIn->pTileInfo, + &pOut->bitPosition); + } + + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeCmaskCoordFromAddr +* +* @brief +* Interface function stub of AddrComputeCmaskCoordFromAddr +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeCmaskCoordFromAddr( + const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + HwlComputeXmaskCoordFromAddr(pIn->addr, + pIn->bitPosition, + pIn->pitch, + pIn->height, + pIn->numSlices, + 2, + pIn->isLinear, + FALSE, + FALSE, + pIn->pTileInfo, + &pOut->x, + &pOut->y, + &pOut->slice); + } + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeTileDataWidthAndHeight +* +* @brief +* Compute the squared cache shape for per-tile data (CMASK and HTILE) +* +* @return +* N/A +* +* @note +* MacroWidth and macroHeight are measured in pixels +*************************************************************************************************** +*/ +VOID AddrLib::ComputeTileDataWidthAndHeight( + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 cacheBits, ///< [in] bits of cache + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + UINT_32* pMacroWidth, ///< [out] macro tile width + UINT_32* pMacroHeight ///< [out] macro tile height + ) const +{ + UINT_32 height = 1; + UINT_32 width = cacheBits / bpp; + UINT_32 pipes = HwlGetPipes(pTileInfo); + + // Double height until the macro-tile is close to square + // Height can only be doubled if width is even + + while ((width > height * 2 * pipes) && !(width & 1)) + { + width /= 2; + height *= 2; + } + + *pMacroWidth = 8 * width; + *pMacroHeight = 8 * height * pipes; + + // Note: The above iterative comptuation is equivalent to the following + // + //int log2_height = ((log2(cacheBits)-log2(bpp)-log2(pipes))/2); + //int macroHeight = pow2( 3+log2(pipes)+log2_height ); +} + +/** +*************************************************************************************************** +* AddrLib::HwlComputeTileDataWidthAndHeightLinear +* +* @brief +* Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout +* +* @return +* N/A +* +* @note +* MacroWidth and macroHeight are measured in pixels +*************************************************************************************************** +*/ +VOID AddrLib::HwlComputeTileDataWidthAndHeightLinear( + UINT_32* pMacroWidth, ///< [out] macro tile width + UINT_32* pMacroHeight, ///< [out] macro tile height + UINT_32 bpp, ///< [in] bits per pixel + ADDR_TILEINFO* pTileInfo ///< [in] tile info + ) const +{ + ADDR_ASSERT(bpp != 4); // Cmask does not support linear layout prior to SI + *pMacroWidth = 8 * 512 / bpp; // Align width to 512-bit memory accesses + *pMacroHeight = 8 * m_pipes; // Align height to number of pipes +} + +/** +*************************************************************************************************** +* AddrLib::ComputeHtileInfo +* +* @brief +* Compute htile pitch,width, bytes per 2D slice +* +* @return +* Htile bpp i.e. How many bits for an 8x8 tile +* Also returns by output parameters: +* *Htile pitch, height, total size in bytes, macro-tile dimensions and slice size* +*************************************************************************************************** +*/ +UINT_32 AddrLib::ComputeHtileInfo( + ADDR_HTILE_FLAGS flags, ///< [in] htile flags + UINT_32 pitchIn, ///< [in] pitch input + UINT_32 heightIn, ///< [in] height input + UINT_32 numSlices, ///< [in] number of slices + BOOL_32 isLinear, ///< [in] if it is linear mode + BOOL_32 isWidth8, ///< [in] if htile block width is 8 + BOOL_32 isHeight8, ///< [in] if htile block height is 8 + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + UINT_32* pPitchOut, ///< [out] pitch output + UINT_32* pHeightOut, ///< [out] height output + UINT_64* pHtileBytes, ///< [out] bytes per 2D slice + UINT_32* pMacroWidth, ///< [out] macro-tile width in pixels + UINT_32* pMacroHeight, ///< [out] macro-tile width in pixels + UINT_64* pSliceSize, ///< [out] slice size in bytes + UINT_32* pBaseAlign ///< [out] base alignment + ) const +{ + + UINT_32 macroWidth; + UINT_32 macroHeight; + UINT_32 baseAlign; + UINT_64 surfBytes; + UINT_64 sliceBytes; + + numSlices = Max(1u, numSlices); + + const UINT_32 bpp = HwlComputeHtileBpp(isWidth8, isHeight8); + const UINT_32 cacheBits = HtileCacheBits; + + if (isLinear) + { + HwlComputeTileDataWidthAndHeightLinear(¯oWidth, + ¯oHeight, + bpp, + pTileInfo); + } + else + { + ComputeTileDataWidthAndHeight(bpp, + cacheBits, + pTileInfo, + ¯oWidth, + ¯oHeight); + } + + *pPitchOut = PowTwoAlign(pitchIn, macroWidth); + *pHeightOut = PowTwoAlign(heightIn, macroHeight); + + baseAlign = HwlComputeHtileBaseAlign(flags.tcCompatible, isLinear, pTileInfo); + + surfBytes = HwlComputeHtileBytes(*pPitchOut, + *pHeightOut, + bpp, + isLinear, + numSlices, + &sliceBytes, + baseAlign); + + *pHtileBytes = surfBytes; + + // + // Use SafeAssign since they are optional + // + SafeAssign(pMacroWidth, macroWidth); + + SafeAssign(pMacroHeight, macroHeight); + + SafeAssign(pSliceSize, sliceBytes); + + SafeAssign(pBaseAlign, baseAlign); + + return bpp; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeCmaskBaseAlign +* +* @brief +* Compute cmask base alignment +* +* @return +* Cmask base alignment +*************************************************************************************************** +*/ +UINT_32 AddrLib::ComputeCmaskBaseAlign( + ADDR_CMASK_FLAGS flags, ///< [in] Cmask flags + ADDR_TILEINFO* pTileInfo ///< [in] Tile info + ) const +{ + UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo); + + if (flags.tcCompatible) + { + ADDR_ASSERT(pTileInfo != NULL); + if (pTileInfo) + { + baseAlign *= pTileInfo->banks; + } + } + + return baseAlign; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeCmaskBytes +* +* @brief +* Compute cmask size in bytes +* +* @return +* Cmask size in bytes +*************************************************************************************************** +*/ +UINT_64 AddrLib::ComputeCmaskBytes( + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 numSlices ///< [in] number of slices + ) const +{ + return BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * numSlices * CmaskElemBits) / + MicroTilePixels; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeCmaskInfo +* +* @brief +* Compute cmask pitch,width, bytes per 2D slice +* +* @return +* BlockMax. Also by output parameters: Cmask pitch,height, total size in bytes, +* macro-tile dimensions +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputeCmaskInfo( + ADDR_CMASK_FLAGS flags, ///< [in] cmask flags + UINT_32 pitchIn, ///< [in] pitch input + UINT_32 heightIn, ///< [in] height input + UINT_32 numSlices, ///< [in] number of slices + BOOL_32 isLinear, ///< [in] is linear mode + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + UINT_32* pPitchOut, ///< [out] pitch output + UINT_32* pHeightOut, ///< [out] height output + UINT_64* pCmaskBytes, ///< [out] bytes per 2D slice + UINT_32* pMacroWidth, ///< [out] macro-tile width in pixels + UINT_32* pMacroHeight, ///< [out] macro-tile width in pixels + UINT_64* pSliceSize, ///< [out] slice size in bytes + UINT_32* pBaseAlign, ///< [out] base alignment + UINT_32* pBlockMax ///< [out] block max == slice / 128 / 128 - 1 + ) const +{ + UINT_32 macroWidth; + UINT_32 macroHeight; + UINT_32 baseAlign; + UINT_64 surfBytes; + UINT_64 sliceBytes; + + numSlices = Max(1u, numSlices); + + const UINT_32 bpp = CmaskElemBits; + const UINT_32 cacheBits = CmaskCacheBits; + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (isLinear) + { + HwlComputeTileDataWidthAndHeightLinear(¯oWidth, + ¯oHeight, + bpp, + pTileInfo); + } + else + { + ComputeTileDataWidthAndHeight(bpp, + cacheBits, + pTileInfo, + ¯oWidth, + ¯oHeight); + } + + *pPitchOut = (pitchIn + macroWidth - 1) & ~(macroWidth - 1); + *pHeightOut = (heightIn + macroHeight - 1) & ~(macroHeight - 1); + + + sliceBytes = ComputeCmaskBytes(*pPitchOut, + *pHeightOut, + 1); + + baseAlign = ComputeCmaskBaseAlign(flags, pTileInfo); + + while (sliceBytes % baseAlign) + { + *pHeightOut += macroHeight; + + sliceBytes = ComputeCmaskBytes(*pPitchOut, + *pHeightOut, + 1); + } + + surfBytes = sliceBytes * numSlices; + + *pCmaskBytes = surfBytes; + + // + // Use SafeAssign since they are optional + // + SafeAssign(pMacroWidth, macroWidth); + + SafeAssign(pMacroHeight, macroHeight); + + SafeAssign(pBaseAlign, baseAlign); + + SafeAssign(pSliceSize, sliceBytes); + + UINT_32 slice = (*pPitchOut) * (*pHeightOut); + UINT_32 blockMax = slice / 128 / 128 - 1; + +#if DEBUG + if (slice % (64*256) != 0) + { + ADDR_ASSERT_ALWAYS(); + } +#endif //DEBUG + + UINT_32 maxBlockMax = HwlGetMaxCmaskBlockMax(); + + if (blockMax > maxBlockMax) + { + blockMax = maxBlockMax; + returnCode = ADDR_INVALIDPARAMS; + } + + SafeAssign(pBlockMax, blockMax); + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeXmaskCoordYFromPipe +* +* @brief +* Compute the Y coord from pipe number for cmask/htile +* +* @return +* Y coordinate +* +*************************************************************************************************** +*/ +UINT_32 AddrLib::ComputeXmaskCoordYFromPipe( + UINT_32 pipe, ///< [in] pipe number + UINT_32 x ///< [in] x coordinate + ) const +{ + UINT_32 pipeBit0; + UINT_32 pipeBit1; + UINT_32 xBit0; + UINT_32 xBit1; + UINT_32 yBit0; + UINT_32 yBit1; + + UINT_32 y = 0; + + UINT_32 numPipes = m_pipes; // SI has its implementation + // + // Convert pipe + x to y coordinate. + // + switch (numPipes) + { + case 1: + // + // 1 pipe + // + // p0 = 0 + // + y = 0; + break; + case 2: + // + // 2 pipes + // + // p0 = x0 ^ y0 + // + // y0 = p0 ^ x0 + // + pipeBit0 = pipe & 0x1; + + xBit0 = x & 0x1; + + yBit0 = pipeBit0 ^ xBit0; + + y = yBit0; + break; + case 4: + // + // 4 pipes + // + // p0 = x1 ^ y0 + // p1 = x0 ^ y1 + // + // y0 = p0 ^ x1 + // y1 = p1 ^ x0 + // + pipeBit0 = pipe & 0x1; + pipeBit1 = (pipe & 0x2) >> 1; + + xBit0 = x & 0x1; + xBit1 = (x & 0x2) >> 1; + + yBit0 = pipeBit0 ^ xBit1; + yBit1 = pipeBit1 ^ xBit0; + + y = (yBit0 | + (yBit1 << 1)); + break; + case 8: + // + // 8 pipes + // + // r600 and r800 have different method + // + y = HwlComputeXmaskCoordYFrom8Pipe(pipe, x); + break; + default: + break; + } + return y; +} + +/** +*************************************************************************************************** +* AddrLib::HwlComputeXmaskCoordFromAddr +* +* @brief +* Compute the coord from an address of a cmask/htile +* +* @return +* N/A +* +* @note +* This method is reused by htile, so rename to Xmask +*************************************************************************************************** +*/ +VOID AddrLib::HwlComputeXmaskCoordFromAddr( + UINT_64 addr, ///< [in] address + UINT_32 bitPosition, ///< [in] bitPosition in a byte + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 numSlices, ///< [in] number of slices + UINT_32 factor, ///< [in] factor that indicates cmask or htile + BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout + BOOL_32 isWidth8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value + BOOL_32 isHeight8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + UINT_32* pX, ///< [out] x coord + UINT_32* pY, ///< [out] y coord + UINT_32* pSlice ///< [out] slice index + ) const +{ + UINT_32 pipe; + UINT_32 numPipes; + UINT_32 numPipeBits; + UINT_32 macroTilePitch; + UINT_32 macroTileHeight; + + UINT_64 bitAddr; + + UINT_32 microTileCoordY; + + UINT_32 elemBits; + + UINT_32 pitchAligned = pitch; + UINT_32 heightAligned = height; + UINT_64 totalBytes; + + UINT_64 elemOffset; + + UINT_64 macroIndex; + UINT_32 microIndex; + + UINT_64 macroNumber; + UINT_32 microNumber; + + UINT_32 macroX; + UINT_32 macroY; + UINT_32 macroZ; + + UINT_32 microX; + UINT_32 microY; + + UINT_32 tilesPerMacro; + UINT_32 macrosPerPitch; + UINT_32 macrosPerSlice; + + // + // Extract pipe. + // + numPipes = HwlGetPipes(pTileInfo); + pipe = ComputePipeFromAddr(addr, numPipes); + + // + // Compute the number of group and pipe bits. + // + numPipeBits = Log2(numPipes); + + UINT_32 groupBits = 8 * m_pipeInterleaveBytes; + UINT_32 pipes = numPipes; + + + // + // Compute the micro tile size, in bits. And macro tile pitch and height. + // + if (factor == 2) //CMASK + { + ADDR_CMASK_FLAGS flags = {{0}}; + + elemBits = CmaskElemBits; + + ComputeCmaskInfo(flags, + pitch, + height, + numSlices, + isLinear, + pTileInfo, + &pitchAligned, + &heightAligned, + &totalBytes, + ¯oTilePitch, + ¯oTileHeight); + } + else //HTILE + { + ADDR_HTILE_FLAGS flags = {{0}}; + + if (factor != 1) + { + factor = 1; + } + + elemBits = HwlComputeHtileBpp(isWidth8, isHeight8); + + ComputeHtileInfo(flags, + pitch, + height, + numSlices, + isLinear, + isWidth8, + isHeight8, + pTileInfo, + &pitchAligned, + &heightAligned, + &totalBytes, + ¯oTilePitch, + ¯oTileHeight); + } + + // Should use aligned dims + // + pitch = pitchAligned; + height = heightAligned; + + + // + // Convert byte address to bit address. + // + bitAddr = BYTES_TO_BITS(addr) + bitPosition; + + + // + // Remove pipe bits from address. + // + + bitAddr = (bitAddr % groupBits) + ((bitAddr/groupBits/pipes)*groupBits); + + + elemOffset = bitAddr / elemBits; + + tilesPerMacro = (macroTilePitch/factor) * macroTileHeight / MicroTilePixels >> numPipeBits; + + macrosPerPitch = pitch / (macroTilePitch/factor); + macrosPerSlice = macrosPerPitch * height / macroTileHeight; + + macroIndex = elemOffset / factor / tilesPerMacro; + microIndex = static_cast<UINT_32>(elemOffset % (tilesPerMacro * factor)); + + macroNumber = macroIndex * factor + microIndex % factor; + microNumber = microIndex / factor; + + macroX = static_cast<UINT_32>((macroNumber % macrosPerPitch)); + macroY = static_cast<UINT_32>((macroNumber % macrosPerSlice) / macrosPerPitch); + macroZ = static_cast<UINT_32>((macroNumber / macrosPerSlice)); + + + microX = microNumber % (macroTilePitch / factor / MicroTileWidth); + microY = (microNumber / (macroTilePitch / factor / MicroTileHeight)); + + *pX = macroX * (macroTilePitch/factor) + microX * MicroTileWidth; + *pY = macroY * macroTileHeight + (microY * MicroTileHeight << numPipeBits); + *pSlice = macroZ; + + microTileCoordY = ComputeXmaskCoordYFromPipe(pipe, + *pX/MicroTileWidth); + + + // + // Assemble final coordinates. + // + *pY += microTileCoordY * MicroTileHeight; + +} + +/** +*************************************************************************************************** +* AddrLib::HwlComputeXmaskAddrFromCoord +* +* @brief +* Compute the address from an address of cmask (prior to si) +* +* @return +* Address in bytes +* +*************************************************************************************************** +*/ +UINT_64 AddrLib::HwlComputeXmaskAddrFromCoord( + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 x, ///< [in] x coord + UINT_32 y, ///< [in] y coord + UINT_32 slice, ///< [in] slice/depth index + UINT_32 numSlices, ///< [in] number of slices + UINT_32 factor, ///< [in] factor that indicates cmask(2) or htile(1) + BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout + BOOL_32 isWidth8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value + BOOL_32 isHeight8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + UINT_32* pBitPosition ///< [out] bit position inside a byte + ) const +{ + UINT_64 addr; + UINT_32 numGroupBits; + UINT_32 numPipeBits; + UINT_32 newPitch = 0; + UINT_32 newHeight = 0; + UINT_64 sliceBytes = 0; + UINT_64 totalBytes = 0; + UINT_64 sliceOffset; + UINT_32 pipe; + UINT_32 macroTileWidth; + UINT_32 macroTileHeight; + UINT_32 macroTilesPerRow; + UINT_32 macroTileBytes; + UINT_32 macroTileIndexX; + UINT_32 macroTileIndexY; + UINT_64 macroTileOffset; + UINT_32 pixelBytesPerRow; + UINT_32 pixelOffsetX; + UINT_32 pixelOffsetY; + UINT_32 pixelOffset; + UINT_64 totalOffset; + UINT_64 offsetLo; + UINT_64 offsetHi; + UINT_64 groupMask; + + + UINT_32 elemBits = 0; + + UINT_32 numPipes = m_pipes; // This function is accessed prior to si only + + if (factor == 2) //CMASK + { + elemBits = CmaskElemBits; + + // For asics before SI, cmask is always tiled + isLinear = FALSE; + } + else //HTILE + { + if (factor != 1) // Fix compile warning + { + factor = 1; + } + + elemBits = HwlComputeHtileBpp(isWidth8, isHeight8); + } + + // + // Compute the number of group bits and pipe bits. + // + numGroupBits = Log2(m_pipeInterleaveBytes); + numPipeBits = Log2(numPipes); + + // + // Compute macro tile dimensions. + // + if (factor == 2) // CMASK + { + ADDR_CMASK_FLAGS flags = {{0}}; + + ComputeCmaskInfo(flags, + pitch, + height, + numSlices, + isLinear, + pTileInfo, + &newPitch, + &newHeight, + &totalBytes, + ¯oTileWidth, + ¯oTileHeight); + + sliceBytes = totalBytes / numSlices; + } + else // HTILE + { + ADDR_HTILE_FLAGS flags = {{0}}; + + ComputeHtileInfo(flags, + pitch, + height, + numSlices, + isLinear, + isWidth8, + isHeight8, + pTileInfo, + &newPitch, + &newHeight, + &totalBytes, + ¯oTileWidth, + ¯oTileHeight, + &sliceBytes); + } + + sliceOffset = slice * sliceBytes; + + // + // Get the pipe. Note that neither slice rotation nor pipe swizzling apply for CMASK. + // + pipe = ComputePipeFromCoord(x, + y, + 0, + ADDR_TM_2D_TILED_THIN1, + 0, + FALSE, + pTileInfo); + + // + // Compute the number of macro tiles per row. + // + macroTilesPerRow = newPitch / macroTileWidth; + + // + // Compute the number of bytes per macro tile. + // + macroTileBytes = BITS_TO_BYTES((macroTileWidth * macroTileHeight * elemBits) / MicroTilePixels); + + // + // Compute the offset to the macro tile containing the specified coordinate. + // + macroTileIndexX = x / macroTileWidth; + macroTileIndexY = y / macroTileHeight; + macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes; + + // + // Compute the pixel offset within the macro tile. + // + pixelBytesPerRow = BITS_TO_BYTES(macroTileWidth * elemBits) / MicroTileWidth; + + // + // The nibbles are interleaved (see below), so the part of the offset relative to the x + // coordinate repeats halfway across the row. (Not for HTILE) + // + if (factor == 2) + { + pixelOffsetX = (x % (macroTileWidth / 2)) / MicroTileWidth; + } + else + { + pixelOffsetX = (x % (macroTileWidth)) / MicroTileWidth * BITS_TO_BYTES(elemBits); + } + + // + // Compute the y offset within the macro tile. + // + pixelOffsetY = (((y % macroTileHeight) / MicroTileHeight) / numPipes) * pixelBytesPerRow; + + pixelOffset = pixelOffsetX + pixelOffsetY; + + // + // Combine the slice offset and macro tile offset with the pixel offset, accounting for the + // pipe bits in the middle of the address. + // + totalOffset = ((sliceOffset + macroTileOffset) >> numPipeBits) + pixelOffset; + + // + // Split the offset to put some bits below the pipe bits and some above. + // + groupMask = (1 << numGroupBits) - 1; + offsetLo = totalOffset & groupMask; + offsetHi = (totalOffset & ~groupMask) << numPipeBits; + + // + // Assemble the address from its components. + // + addr = offsetLo; + addr |= offsetHi; + // This is to remove warning with /analyze option + UINT_32 pipeBits = pipe << numGroupBits; + addr |= pipeBits; + + // + // Compute the bit position. The lower nibble is used when the x coordinate within the macro + // tile is less than half of the macro tile width, and the upper nibble is used when the x + // coordinate within the macro tile is greater than or equal to half the macro tile width. + // + *pBitPosition = ((x % macroTileWidth) < (macroTileWidth / factor)) ? 0 : 4; + + return addr; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Surface Addressing Shared +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +*************************************************************************************************** +* AddrLib::ComputeSurfaceAddrFromCoordLinear +* +* @brief +* Compute address from coord for linear surface +* +* @return +* Address in bytes +* +*************************************************************************************************** +*/ +UINT_64 AddrLib::ComputeSurfaceAddrFromCoordLinear( + UINT_32 x, ///< [in] x coord + UINT_32 y, ///< [in] y coord + UINT_32 slice, ///< [in] slice/depth index + UINT_32 sample, ///< [in] sample index + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 numSlices, ///< [in] number of slices + UINT_32* pBitPosition ///< [out] bit position inside a byte + ) const +{ + const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height; + + UINT_64 sliceOffset = (slice + sample * numSlices)* sliceSize; + UINT_64 rowOffset = static_cast<UINT_64>(y) * pitch; + UINT_64 pixOffset = x; + + UINT_64 addr = (sliceOffset + rowOffset + pixOffset) * bpp; + + *pBitPosition = static_cast<UINT_32>(addr % 8); + addr /= 8; + + return addr; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeSurfaceCoordFromAddrLinear +* +* @brief +* Compute the coord from an address of a linear surface +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrLib::ComputeSurfaceCoordFromAddrLinear( + UINT_64 addr, ///< [in] address + UINT_32 bitPosition, ///< [in] bitPosition in a byte + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 numSlices, ///< [in] number of slices + UINT_32* pX, ///< [out] x coord + UINT_32* pY, ///< [out] y coord + UINT_32* pSlice, ///< [out] slice/depth index + UINT_32* pSample ///< [out] sample index + ) const +{ + const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height; + const UINT_64 linearOffset = (BYTES_TO_BITS(addr) + bitPosition) / bpp; + + *pX = static_cast<UINT_32>((linearOffset % sliceSize) % pitch); + *pY = static_cast<UINT_32>((linearOffset % sliceSize) / pitch % height); + *pSlice = static_cast<UINT_32>((linearOffset / sliceSize) % numSlices); + *pSample = static_cast<UINT_32>((linearOffset / sliceSize) / numSlices); +} + +/** +*************************************************************************************************** +* AddrLib::ComputeSurfaceCoordFromAddrMicroTiled +* +* @brief +* Compute the coord from an address of a micro tiled surface +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrLib::ComputeSurfaceCoordFromAddrMicroTiled( + UINT_64 addr, ///< [in] address + UINT_32 bitPosition, ///< [in] bitPosition in a byte + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 numSamples, ///< [in] number of samples + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 tileBase, ///< [in] base offset within a tile + UINT_32 compBits, ///< [in] component bits actually needed(for planar surface) + UINT_32* pX, ///< [out] x coord + UINT_32* pY, ///< [out] y coord + UINT_32* pSlice, ///< [out] slice/depth index + UINT_32* pSample, ///< [out] sample index, + AddrTileType microTileType, ///< [in] micro tiling order + BOOL_32 isDepthSampleOrder ///< [in] TRUE if in depth sample order + ) const +{ + UINT_64 bitAddr; + UINT_32 microTileThickness; + UINT_32 microTileBits; + UINT_64 sliceBits; + UINT_64 rowBits; + UINT_32 sliceIndex; + UINT_32 microTileCoordX; + UINT_32 microTileCoordY; + UINT_32 pixelOffset; + UINT_32 pixelCoordX = 0; + UINT_32 pixelCoordY = 0; + UINT_32 pixelCoordZ = 0; + UINT_32 pixelCoordS = 0; + + // + // Convert byte address to bit address. + // + bitAddr = BYTES_TO_BITS(addr) + bitPosition; + + // + // Compute the micro tile size, in bits. + // + switch (tileMode) + { + case ADDR_TM_1D_TILED_THICK: + microTileThickness = ThickTileThickness; + break; + default: + microTileThickness = 1; + break; + } + + microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples; + + // + // Compute number of bits per slice and number of bits per row of micro tiles. + // + sliceBits = static_cast<UINT_64>(pitch) * height * microTileThickness * bpp * numSamples; + + rowBits = (pitch / MicroTileWidth) * microTileBits; + + // + // Extract the slice index. + // + sliceIndex = static_cast<UINT_32>(bitAddr / sliceBits); + bitAddr -= sliceIndex * sliceBits; + + // + // Extract the y coordinate of the micro tile. + // + microTileCoordY = static_cast<UINT_32>(bitAddr / rowBits) * MicroTileHeight; + bitAddr -= (microTileCoordY / MicroTileHeight) * rowBits; + + // + // Extract the x coordinate of the micro tile. + // + microTileCoordX = static_cast<UINT_32>(bitAddr / microTileBits) * MicroTileWidth; + + // + // Compute the pixel offset within the micro tile. + // + pixelOffset = static_cast<UINT_32>(bitAddr % microTileBits); + + // + // Extract pixel coordinates from the offset. + // + HwlComputePixelCoordFromOffset(pixelOffset, + bpp, + numSamples, + tileMode, + tileBase, + compBits, + &pixelCoordX, + &pixelCoordY, + &pixelCoordZ, + &pixelCoordS, + microTileType, + isDepthSampleOrder); + + // + // Assemble final coordinates. + // + *pX = microTileCoordX + pixelCoordX; + *pY = microTileCoordY + pixelCoordY; + *pSlice = (sliceIndex * microTileThickness) + pixelCoordZ; + *pSample = pixelCoordS; + + if (microTileThickness > 1) + { + *pSample = 0; + } +} + +/** +*************************************************************************************************** +* AddrLib::ComputePipeFromAddr +* +* @brief +* Compute the pipe number from an address +* +* @return +* Pipe number +* +*************************************************************************************************** +*/ +UINT_32 AddrLib::ComputePipeFromAddr( + UINT_64 addr, ///< [in] address + UINT_32 numPipes ///< [in] number of banks + ) const +{ + UINT_32 pipe; + + UINT_32 groupBytes = m_pipeInterleaveBytes; //just different terms + + // R600 + // The LSBs of the address are arranged as follows: + // bank | pipe | group + // + // To get the pipe number, shift off the group bits and mask the pipe bits. + // + + // R800 + // The LSBs of the address are arranged as follows: + // bank | bankInterleave | pipe | pipeInterleave + // + // To get the pipe number, shift off the pipe interleave bits and mask the pipe bits. + // + + pipe = static_cast<UINT_32>(addr >> Log2(groupBytes)) & (numPipes - 1); + + return pipe; +} + +/** +*************************************************************************************************** +* AddrLib::ComputePixelIndexWithinMicroTile +* +* @brief +* Compute the pixel index inside a micro tile of surface +* +* @return +* Pixel index +* +*************************************************************************************************** +*/ +UINT_32 AddrLib::ComputePixelIndexWithinMicroTile( + UINT_32 x, ///< [in] x coord + UINT_32 y, ///< [in] y coord + UINT_32 z, ///< [in] slice/depth index + UINT_32 bpp, ///< [in] bits per pixel + AddrTileMode tileMode, ///< [in] tile mode + AddrTileType microTileType ///< [in] pixel order in display/non-display mode + ) const +{ + UINT_32 pixelBit0 = 0; + UINT_32 pixelBit1 = 0; + UINT_32 pixelBit2 = 0; + UINT_32 pixelBit3 = 0; + UINT_32 pixelBit4 = 0; + UINT_32 pixelBit5 = 0; + UINT_32 pixelBit6 = 0; + UINT_32 pixelBit7 = 0; + UINT_32 pixelBit8 = 0; + UINT_32 pixelNumber; + + UINT_32 x0 = _BIT(x, 0); + UINT_32 x1 = _BIT(x, 1); + UINT_32 x2 = _BIT(x, 2); + UINT_32 y0 = _BIT(y, 0); + UINT_32 y1 = _BIT(y, 1); + UINT_32 y2 = _BIT(y, 2); + UINT_32 z0 = _BIT(z, 0); + UINT_32 z1 = _BIT(z, 1); + UINT_32 z2 = _BIT(z, 2); + + UINT_32 thickness = ComputeSurfaceThickness(tileMode); + + // Compute the pixel number within the micro tile. + + if (microTileType != ADDR_THICK) + { + if (microTileType == ADDR_DISPLAYABLE) + { + switch (bpp) + { + case 8: + pixelBit0 = x0; + pixelBit1 = x1; + pixelBit2 = x2; + pixelBit3 = y1; + pixelBit4 = y0; + pixelBit5 = y2; + break; + case 16: + pixelBit0 = x0; + pixelBit1 = x1; + pixelBit2 = x2; + pixelBit3 = y0; + pixelBit4 = y1; + pixelBit5 = y2; + break; + case 32: + pixelBit0 = x0; + pixelBit1 = x1; + pixelBit2 = y0; + pixelBit3 = x2; + pixelBit4 = y1; + pixelBit5 = y2; + break; + case 64: + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = x1; + pixelBit3 = x2; + pixelBit4 = y1; + pixelBit5 = y2; + break; + case 128: + pixelBit0 = y0; + pixelBit1 = x0; + pixelBit2 = x1; + pixelBit3 = x2; + pixelBit4 = y1; + pixelBit5 = y2; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER) + { + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = x1; + pixelBit3 = y1; + pixelBit4 = x2; + pixelBit5 = y2; + } + else if (microTileType == ADDR_ROTATED) + { + ADDR_ASSERT(thickness == 1); + + switch (bpp) + { + case 8: + pixelBit0 = y0; + pixelBit1 = y1; + pixelBit2 = y2; + pixelBit3 = x1; + pixelBit4 = x0; + pixelBit5 = x2; + break; + case 16: + pixelBit0 = y0; + pixelBit1 = y1; + pixelBit2 = y2; + pixelBit3 = x0; + pixelBit4 = x1; + pixelBit5 = x2; + break; + case 32: + pixelBit0 = y0; + pixelBit1 = y1; + pixelBit2 = x0; + pixelBit3 = y2; + pixelBit4 = x1; + pixelBit5 = x2; + break; + case 64: + pixelBit0 = y0; + pixelBit1 = x0; + pixelBit2 = y1; + pixelBit3 = x1; + pixelBit4 = x2; + pixelBit5 = y2; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + + if (thickness > 1) + { + pixelBit6 = z0; + pixelBit7 = z1; + } + } + else // ADDR_THICK + { + ADDR_ASSERT(thickness > 1); + + switch (bpp) + { + case 8: + case 16: + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = x1; + pixelBit3 = y1; + pixelBit4 = z0; + pixelBit5 = z1; + break; + case 32: + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = x1; + pixelBit3 = z0; + pixelBit4 = y1; + pixelBit5 = z1; + break; + case 64: + case 128: + pixelBit0 = y0; + pixelBit1 = x0; + pixelBit2 = z0; + pixelBit3 = x1; + pixelBit4 = y1; + pixelBit5 = z1; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + pixelBit6 = x2; + pixelBit7 = y2; + } + + if (thickness == 8) + { + pixelBit8 = z2; + } + + pixelNumber = ((pixelBit0 ) | + (pixelBit1 << 1) | + (pixelBit2 << 2) | + (pixelBit3 << 3) | + (pixelBit4 << 4) | + (pixelBit5 << 5) | + (pixelBit6 << 6) | + (pixelBit7 << 7) | + (pixelBit8 << 8)); + + return pixelNumber; +} + +/** +*************************************************************************************************** +* AddrLib::AdjustPitchAlignment +* +* @brief +* Adjusts pitch alignment for flipping surface +* +* @return +* N/A +* +*************************************************************************************************** +*/ +VOID AddrLib::AdjustPitchAlignment( + ADDR_SURFACE_FLAGS flags, ///< [in] Surface flags + UINT_32* pPitchAlign ///< [out] Pointer to pitch alignment + ) const +{ + // Display engine hardwires lower 5 bit of GRPH_PITCH to ZERO which means 32 pixel alignment + // Maybe it will be fixed in future but let's make it general for now. + if (flags.display || flags.overlay) + { + *pPitchAlign = PowTwoAlign(*pPitchAlign, 32); + + if(flags.display) + { + *pPitchAlign = Max(m_minPitchAlignPixels, *pPitchAlign); + } + } +} + +/** +*************************************************************************************************** +* AddrLib::PadDimensions +* +* @brief +* Helper function to pad dimensions +* +* @return +* N/A +* +*************************************************************************************************** +*/ +VOID AddrLib::PadDimensions( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 numSamples, ///< [in] number of samples + ADDR_TILEINFO* pTileInfo, ///< [in/out] bank structure. + UINT_32 padDims, ///< [in] Dimensions to pad valid value 1,2,3 + UINT_32 mipLevel, ///< [in] MipLevel + UINT_32* pPitch, ///< [in/out] pitch in pixels + UINT_32 pitchAlign, ///< [in] pitch alignment + UINT_32* pHeight, ///< [in/out] height in pixels + UINT_32 heightAlign, ///< [in] height alignment + UINT_32* pSlices, ///< [in/out] number of slices + UINT_32 sliceAlign ///< [in] number of slice alignment + ) const +{ + UINT_32 thickness = ComputeSurfaceThickness(tileMode); + + ADDR_ASSERT(padDims <= 3); + + // + // Override padding for mip levels + // + if (mipLevel > 0) + { + if (flags.cube) + { + // for cubemap, we only pad when client call with 6 faces as an identity + if (*pSlices > 1) + { + padDims = 3; // we should pad cubemap sub levels when we treat it as 3d texture + } + else + { + padDims = 2; + } + } + } + + // Any possibilities that padDims is 0? + if (padDims == 0) + { + padDims = 3; + } + + if (IsPow2(pitchAlign)) + { + *pPitch = PowTwoAlign((*pPitch), pitchAlign); + } + else // add this code to pass unit test, r600 linear mode is not align bpp to pow2 for linear + { + *pPitch += pitchAlign - 1; + *pPitch /= pitchAlign; + *pPitch *= pitchAlign; + } + + if (padDims > 1) + { + *pHeight = PowTwoAlign((*pHeight), heightAlign); + } + + if (padDims > 2 || thickness > 1) + { + // for cubemap single face, we do not pad slices. + // if we pad it, the slice number should be set to 6 and current mip level > 1 + if (flags.cube && (!m_configFlags.noCubeMipSlicesPad || flags.cubeAsArray)) + { + *pSlices = NextPow2(*pSlices); + } + + // normal 3D texture or arrays or cubemap has a thick mode? (Just pass unit test) + if (thickness > 1) + { + *pSlices = PowTwoAlign((*pSlices), sliceAlign); + } + + } + + HwlPadDimensions(tileMode, + bpp, + flags, + numSamples, + pTileInfo, + padDims, + mipLevel, + pPitch, + pitchAlign, + pHeight, + heightAlign, + pSlices, + sliceAlign); +} + + +/** +*************************************************************************************************** +* AddrLib::HwlPreHandleBaseLvl3xPitch +* +* @brief +* Pre-handler of 3x pitch (96 bit) adjustment +* +* @return +* Expected pitch +*************************************************************************************************** +*/ +UINT_32 AddrLib::HwlPreHandleBaseLvl3xPitch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input + UINT_32 expPitch ///< [in] pitch + ) const +{ + ADDR_ASSERT(pIn->width == expPitch); + // + // If pitch is pre-multiplied by 3, we retrieve original one here to get correct miplevel size + // + if (AddrElemLib::IsExpand3x(pIn->format) && + pIn->mipLevel == 0 && + pIn->tileMode == ADDR_TM_LINEAR_ALIGNED) + { + expPitch /= 3; + expPitch = NextPow2(expPitch); + } + + return expPitch; +} + +/** +*************************************************************************************************** +* AddrLib::HwlPostHandleBaseLvl3xPitch +* +* @brief +* Post-handler of 3x pitch adjustment +* +* @return +* Expected pitch +*************************************************************************************************** +*/ +UINT_32 AddrLib::HwlPostHandleBaseLvl3xPitch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input + UINT_32 expPitch ///< [in] pitch + ) const +{ + // + // 96 bits surface of sub levels require element pitch of 32 bits instead + // So we just return pitch in 32 bit pixels without timing 3 + // + if (AddrElemLib::IsExpand3x(pIn->format) && + pIn->mipLevel == 0 && + pIn->tileMode == ADDR_TM_LINEAR_ALIGNED) + { + expPitch *= 3; + } + + return expPitch; +} + + +/** +*************************************************************************************************** +* AddrLib::IsMacroTiled +* +* @brief +* Check if the tile mode is macro tiled +* +* @return +* TRUE if it is macro tiled (2D/2B/3D/3B) +*************************************************************************************************** +*/ +BOOL_32 AddrLib::IsMacroTiled( + AddrTileMode tileMode) ///< [in] tile mode +{ + return m_modeFlags[tileMode].isMacro; +} + +/** +*************************************************************************************************** +* AddrLib::IsMacro3dTiled +* +* @brief +* Check if the tile mode is 3D macro tiled +* +* @return +* TRUE if it is 3D macro tiled +*************************************************************************************************** +*/ +BOOL_32 AddrLib::IsMacro3dTiled( + AddrTileMode tileMode) ///< [in] tile mode +{ + return m_modeFlags[tileMode].isMacro3d; +} + +/** +*************************************************************************************************** +* AddrLib::IsMicroTiled +* +* @brief +* Check if the tile mode is micro tiled +* +* @return +* TRUE if micro tiled +*************************************************************************************************** +*/ +BOOL_32 AddrLib::IsMicroTiled( + AddrTileMode tileMode) ///< [in] tile mode +{ + return m_modeFlags[tileMode].isMicro; +} + +/** +*************************************************************************************************** +* AddrLib::IsLinear +* +* @brief +* Check if the tile mode is linear +* +* @return +* TRUE if linear +*************************************************************************************************** +*/ +BOOL_32 AddrLib::IsLinear( + AddrTileMode tileMode) ///< [in] tile mode +{ + return m_modeFlags[tileMode].isLinear; +} + +/** +*************************************************************************************************** +* AddrLib::IsPrtNoRotationTileMode +* +* @brief +* Return TRUE if it is prt tile without rotation +* @note +* This function just used by CI +*************************************************************************************************** +*/ +BOOL_32 AddrLib::IsPrtNoRotationTileMode( + AddrTileMode tileMode) +{ + return m_modeFlags[tileMode].isPrtNoRotation; +} + +/** +*************************************************************************************************** +* AddrLib::IsPrtTileMode +* +* @brief +* Return TRUE if it is prt tile +* @note +* This function just used by CI +*************************************************************************************************** +*/ +BOOL_32 AddrLib::IsPrtTileMode( + AddrTileMode tileMode) +{ + return m_modeFlags[tileMode].isPrt; +} + +/** +*************************************************************************************************** +* AddrLib::Bits2Number +* +* @brief +* Cat a array of binary bit to a number +* +* @return +* The number combined with the array of bits +*************************************************************************************************** +*/ +UINT_32 AddrLib::Bits2Number( + UINT_32 bitNum, ///< [in] how many bits + ...) ///< [in] varaible bits value starting from MSB +{ + UINT_32 number = 0; + UINT_32 i; + va_list bits_ptr; + + va_start(bits_ptr, bitNum); + + for(i = 0; i < bitNum; i++) + { + number |= va_arg(bits_ptr, UINT_32); + number <<= 1; + } + + number>>=1; + + va_end(bits_ptr); + + return number; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeMipLevel +* +* @brief +* Compute mipmap level width/height/slices +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrLib::ComputeMipLevel( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in/out] Input structure + ) const +{ + if (AddrElemLib::IsBlockCompressed(pIn->format)) + { + if (pIn->mipLevel == 0) + { + // DXTn's level 0 must be multiple of 4 + // But there are exceptions: + // 1. Internal surface creation in hostblt/vsblt/etc... + // 2. Runtime doesn't reject ATI1/ATI2 whose width/height are not multiple of 4 + pIn->width = PowTwoAlign(pIn->width, 4); + pIn->height = PowTwoAlign(pIn->height, 4); + } + } + + HwlComputeMipLevel(pIn); +} + +/** +*************************************************************************************************** +* AddrLib::DegradeBaseLevel +* +* @brief +* Check if base level's tile mode can be degraded +* @return +* TRUE if degraded, also returns degraded tile mode (unchanged if not degraded) +*************************************************************************************************** +*/ +BOOL_32 AddrLib::DegradeBaseLevel( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure for surface info + AddrTileMode* pTileMode ///< [out] Degraded tile mode + ) const +{ + BOOL_32 degraded = FALSE; + AddrTileMode tileMode = pIn->tileMode; + UINT_32 thickness = ComputeSurfaceThickness(tileMode); + + if (m_configFlags.degradeBaseLevel) // This is a global setting + { + if (pIn->flags.degrade4Space && // Degradation per surface + pIn->mipLevel == 0 && + pIn->numSamples == 1 && + IsMacroTiled(tileMode)) + { + if (HwlDegradeBaseLevel(pIn)) + { + *pTileMode = thickness == 1 ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; + degraded = TRUE; + } + else if (thickness > 1) + { + // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to + // thinner modes, we should re-evaluate whether the corresponding thinner modes + // need to be degraded. If so, we choose 1D thick mode instead. + tileMode = DegradeLargeThickTile(pIn->tileMode, pIn->bpp); + if (tileMode != pIn->tileMode) + { + ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pIn; + input.tileMode = tileMode; + if (HwlDegradeBaseLevel(&input)) + { + *pTileMode = ADDR_TM_1D_TILED_THICK; + degraded = TRUE; + } + } + } + } + } + + return degraded; +} + +/** +*************************************************************************************************** +* AddrLib::DegradeLargeThickTile +* +* @brief +* Check if the thickness needs to be reduced if a tile is too large +* @return +* The degraded tile mode (unchanged if not degraded) +*************************************************************************************************** +*/ +AddrTileMode AddrLib::DegradeLargeThickTile( + AddrTileMode tileMode, + UINT_32 bpp) const +{ + // Override tilemode + // When tile_width (8) * tile_height (8) * thickness * element_bytes is > row_size, + // it is better to just use THIN mode in this case + UINT_32 thickness = ComputeSurfaceThickness(tileMode); + + if (thickness > 1 && m_configFlags.allowLargeThickTile == 0) + { + UINT_32 tileSize = MicroTilePixels * thickness * (bpp >> 3); + + if (tileSize > m_rowSize) + { + switch (tileMode) + { + case ADDR_TM_2D_TILED_XTHICK: + if ((tileSize >> 1) <= m_rowSize) + { + tileMode = ADDR_TM_2D_TILED_THICK; + break; + } + // else fall through + case ADDR_TM_2D_TILED_THICK: + tileMode = ADDR_TM_2D_TILED_THIN1; + break; + + case ADDR_TM_3D_TILED_XTHICK: + if ((tileSize >> 1) <= m_rowSize) + { + tileMode = ADDR_TM_3D_TILED_THICK; + break; + } + // else fall through + case ADDR_TM_3D_TILED_THICK: + tileMode = ADDR_TM_3D_TILED_THIN1; + break; + + case ADDR_TM_PRT_TILED_THICK: + tileMode = ADDR_TM_PRT_TILED_THIN1; + break; + + case ADDR_TM_PRT_2D_TILED_THICK: + tileMode = ADDR_TM_PRT_2D_TILED_THIN1; + break; + + case ADDR_TM_PRT_3D_TILED_THICK: + tileMode = ADDR_TM_PRT_3D_TILED_THIN1; + break; + + default: + break; + } + } + } + + return tileMode; +} + +/** +*************************************************************************************************** +* AddrLib::PostComputeMipLevel +* @brief +* Compute MipLevel info (including level 0) after surface adjustment +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::PostComputeMipLevel( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in/out] Input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] Output structure + ) const +{ + // Mipmap including level 0 must be pow2 padded since either SI hw expects so or it is + // required by CFX for Hw Compatibility between NI and SI. Otherwise it is only needed for + // mipLevel > 0. Any h/w has different requirement should implement its own virtual function + + if (pIn->flags.pow2Pad) + { + pIn->width = NextPow2(pIn->width); + pIn->height = NextPow2(pIn->height); + pIn->numSlices = NextPow2(pIn->numSlices); + } + else if (pIn->mipLevel > 0) + { + pIn->width = NextPow2(pIn->width); + pIn->height = NextPow2(pIn->height); + + if (!pIn->flags.cube) + { + pIn->numSlices = NextPow2(pIn->numSlices); + } + + // for cubemap, we keep its value at first + } + + return ADDR_OK; +} + +/** +*************************************************************************************************** +* AddrLib::HwlSetupTileCfg +* +* @brief +* Map tile index to tile setting. +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::HwlSetupTileCfg( + INT_32 index, ///< [in] Tile index + INT_32 macroModeIndex, ///< [in] Index in macro tile mode table(CI) + ADDR_TILEINFO* pInfo, ///< [out] Tile Info + AddrTileMode* pMode, ///< [out] Tile mode + AddrTileType* pType ///< [out] Tile type + ) const +{ + return ADDR_NOTSUPPORTED; +} + +/** +*************************************************************************************************** +* AddrLib::HwlGetPipes +* +* @brief +* Get number pipes +* @return +* num pipes +*************************************************************************************************** +*/ +UINT_32 AddrLib::HwlGetPipes( + const ADDR_TILEINFO* pTileInfo ///< [in] Tile info + ) const +{ + //pTileInfo can be NULL when asic is 6xx and 8xx. + return m_pipes; +} + +/** +*************************************************************************************************** +* AddrLib::ComputeQbStereoInfo +* +* @brief +* Get quad buffer stereo information +* @return +* TRUE if no error +*************************************************************************************************** +*/ +BOOL_32 AddrLib::ComputeQbStereoInfo( + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in/out] updated pOut+pStereoInfo + ) const +{ + BOOL_32 success = FALSE; + + if (pOut->pStereoInfo) + { + ADDR_ASSERT(pOut->bpp >= 8); + ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0); + + // Save original height + pOut->pStereoInfo->eyeHeight = pOut->height; + + // Right offset + pOut->pStereoInfo->rightOffset = static_cast<UINT_32>(pOut->surfSize); + + pOut->pStereoInfo->rightSwizzle = HwlComputeQbStereoRightSwizzle(pOut); + // Double height + pOut->height <<= 1; + pOut->pixelHeight <<= 1; + + // Double size + pOut->surfSize <<= 1; + + // Right start address meets the base align since it is guaranteed by AddrLib + + // 1D surface on SI may break this rule, but we can force it to meet by checking .qbStereo. + success = TRUE; + } + + return success; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Element lib +/////////////////////////////////////////////////////////////////////////////////////////////////// + + +/** +*************************************************************************************************** +* AddrLib::Flt32ToColorPixel +* +* @brief +* Convert a FLT_32 value to a depth/stencil pixel value +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::Flt32ToDepthPixel( + const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, + ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ELEM_FLT32TODEPTHPIXEL_INPUT)) || + (pOut->size != sizeof(ELEM_FLT32TODEPTHPIXEL_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + GetElemLib()->Flt32ToDepthPixel(pIn->format, + pIn->comps, + pOut->pPixel); + UINT_32 depthBase = 0; + UINT_32 stencilBase = 0; + UINT_32 depthBits = 0; + UINT_32 stencilBits = 0; + + switch (pIn->format) + { + case ADDR_DEPTH_16: + depthBits = 16; + break; + case ADDR_DEPTH_X8_24: + case ADDR_DEPTH_8_24: + case ADDR_DEPTH_X8_24_FLOAT: + case ADDR_DEPTH_8_24_FLOAT: + depthBase = 8; + depthBits = 24; + stencilBits = 8; + break; + case ADDR_DEPTH_32_FLOAT: + depthBits = 32; + break; + case ADDR_DEPTH_X24_8_32_FLOAT: + depthBase = 8; + depthBits = 32; + stencilBits = 8; + break; + default: + break; + } + + // Overwrite base since R800 has no "tileBase" + if (GetElemLib()->IsDepthStencilTilePlanar() == FALSE) + { + depthBase = 0; + stencilBase = 0; + } + + depthBase *= 64; + stencilBase *= 64; + + pOut->stencilBase = stencilBase; + pOut->depthBase = depthBase; + pOut->depthBits = depthBits; + pOut->stencilBits = stencilBits; + } + + return returnCode; +} + +/** +*************************************************************************************************** +* AddrLib::Flt32ToColorPixel +* +* @brief +* Convert a FLT_32 value to a red/green/blue/alpha pixel value +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::Flt32ToColorPixel( + const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn, + ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ELEM_FLT32TOCOLORPIXEL_INPUT)) || + (pOut->size != sizeof(ELEM_FLT32TOCOLORPIXEL_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + GetElemLib()->Flt32ToColorPixel(pIn->format, + pIn->surfNum, + pIn->surfSwap, + pIn->comps, + pOut->pPixel); + } + + return returnCode; +} + + +/** +*************************************************************************************************** +* AddrLib::GetExportNorm +* +* @brief +* Check one format can be EXPORT_NUM +* @return +* TRUE if EXPORT_NORM can be used +*************************************************************************************************** +*/ +BOOL_32 AddrLib::GetExportNorm( + const ELEM_GETEXPORTNORM_INPUT* pIn) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + BOOL_32 enabled = FALSE; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if (pIn->size != sizeof(ELEM_GETEXPORTNORM_INPUT)) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + enabled = GetElemLib()->PixGetExportNorm(pIn->format, + pIn->num, + pIn->swap); + } + + return enabled; +} + +/** +*************************************************************************************************** +* AddrLib::ComputePrtInfo +* +* @brief +* Compute prt surface related info +* +* @return +* ADDR_E_RETURNCODE +*************************************************************************************************** +*/ +ADDR_E_RETURNCODE AddrLib::ComputePrtInfo( + const ADDR_PRT_INFO_INPUT* pIn, + ADDR_PRT_INFO_OUTPUT* pOut) const +{ + ADDR_ASSERT(pOut != NULL); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + UINT_32 expandX = 1; + UINT_32 expandY = 1; + AddrElemMode elemMode; + + UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, + &elemMode, + &expandX, + &expandY); + + if (bpp <8 || bpp == 24 || bpp == 48 || bpp == 96 ) + { + returnCode = ADDR_INVALIDPARAMS; + } + + UINT_32 numFrags = pIn->numFrags; + ADDR_ASSERT(numFrags <= 8); + + UINT_32 tileWidth = 0; + UINT_32 tileHeight = 0; + if (returnCode == ADDR_OK) + { + // 3D texture without depth or 2d texture + if (pIn->baseMipDepth > 1 || pIn->baseMipHeight > 1) + { + if (bpp == 8) + { + tileWidth = 256; + tileHeight = 256; + } + else if (bpp == 16) + { + tileWidth = 256; + tileHeight = 128; + } + else if (bpp == 32) + { + tileWidth = 128; + tileHeight = 128; + } + else if (bpp == 64) + { + // assume it is BC1/4 + tileWidth = 512; + tileHeight = 256; + + if (elemMode == ADDR_UNCOMPRESSED) + { + tileWidth = 128; + tileHeight = 64; + } + } + else if (bpp == 128) + { + // assume it is BC2/3/5/6H/7 + tileWidth = 256; + tileHeight = 256; + + if (elemMode == ADDR_UNCOMPRESSED) + { + tileWidth = 64; + tileHeight = 64; + } + } + + if (numFrags == 2) + { + tileWidth = tileWidth / 2; + } + else if (numFrags == 4) + { + tileWidth = tileWidth / 2; + tileHeight = tileHeight / 2; + } + else if (numFrags == 8) + { + tileWidth = tileWidth / 4; + tileHeight = tileHeight / 2; + } + } + else // 1d + { + tileHeight = 1; + if (bpp == 8) + { + tileWidth = 65536; + } + else if (bpp == 16) + { + tileWidth = 32768; + } + else if (bpp == 32) + { + tileWidth = 16384; + } + else if (bpp == 64) + { + tileWidth = 8192; + } + else if (bpp == 128) + { + tileWidth = 4096; + } + } + } + + pOut->prtTileWidth = tileWidth; + pOut->prtTileHeight = tileHeight; + + return returnCode; +} diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h new file mode 100644 index 0000000..43c55ff --- /dev/null +++ b/src/amd/addrlib/core/addrlib.h @@ -0,0 +1,695 @@ +/* + * Copyright © 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +*************************************************************************************************** +* @file addrlib.h +* @brief Contains the AddrLib base class definition. +*************************************************************************************************** +*/ + +#ifndef __ADDR_LIB_H__ +#define __ADDR_LIB_H__ + + +#include "addrinterface.h" +#include "addrobject.h" +#include "addrelemlib.h" + +#if BRAHMA_BUILD +#include "amdgpu_id.h" +#else +#include "atiid.h" +#endif + +#ifndef CIASICIDGFXENGINE_R600 +#define CIASICIDGFXENGINE_R600 0x00000006 +#endif + +#ifndef CIASICIDGFXENGINE_R800 +#define CIASICIDGFXENGINE_R800 0x00000008 +#endif + +#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND +#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A +#endif + +#ifndef CIASICIDGFXENGINE_SEAISLAND +#define CIASICIDGFXENGINE_SEAISLAND 0x0000000B +#endif +/** +*************************************************************************************************** +* @brief Neutral enums that define pipeinterleave +*************************************************************************************************** +*/ +enum AddrPipeInterleave +{ + ADDR_PIPEINTERLEAVE_256B = 256, + ADDR_PIPEINTERLEAVE_512B = 512, +}; + +/** +*************************************************************************************************** +* @brief Neutral enums that define DRAM row size +*************************************************************************************************** +*/ +enum AddrRowSize +{ + ADDR_ROWSIZE_1KB = 1024, + ADDR_ROWSIZE_2KB = 2048, + ADDR_ROWSIZE_4KB = 4096, + ADDR_ROWSIZE_8KB = 8192, +}; + +/** +*************************************************************************************************** +* @brief Neutral enums that define bank interleave +*************************************************************************************************** +*/ +enum AddrBankInterleave +{ + ADDR_BANKINTERLEAVE_1 = 1, + ADDR_BANKINTERLEAVE_2 = 2, + ADDR_BANKINTERLEAVE_4 = 4, + ADDR_BANKINTERLEAVE_8 = 8, +}; + +/** +*************************************************************************************************** +* @brief Neutral enums that define MGPU chip tile size +*************************************************************************************************** +*/ +enum AddrChipTileSize +{ + ADDR_CHIPTILESIZE_16 = 16, + ADDR_CHIPTILESIZE_32 = 32, + ADDR_CHIPTILESIZE_64 = 64, + ADDR_CHIPTILESIZE_128 = 128, +}; + +/** +*************************************************************************************************** +* @brief Neutral enums that define shader engine tile size +*************************************************************************************************** +*/ +enum AddrEngTileSize +{ + ADDR_SE_TILESIZE_16 = 16, + ADDR_SE_TILESIZE_32 = 32, +}; + +/** +*************************************************************************************************** +* @brief Neutral enums that define bank swap size +*************************************************************************************************** +*/ +enum AddrBankSwapSize +{ + ADDR_BANKSWAP_128B = 128, + ADDR_BANKSWAP_256B = 256, + ADDR_BANKSWAP_512B = 512, + ADDR_BANKSWAP_1KB = 1024, +}; + +/** +*************************************************************************************************** +* @brief Neutral enums that define bank swap size +*************************************************************************************************** +*/ +enum AddrSampleSplitSize +{ + ADDR_SAMPLESPLIT_1KB = 1024, + ADDR_SAMPLESPLIT_2KB = 2048, + ADDR_SAMPLESPLIT_4KB = 4096, + ADDR_SAMPLESPLIT_8KB = 8192, +}; + +/** +*************************************************************************************************** +* @brief Flags for AddrTileMode +*************************************************************************************************** +*/ +struct AddrTileModeFlags +{ + UINT_32 thickness : 4; + UINT_32 isLinear : 1; + UINT_32 isMicro : 1; + UINT_32 isMacro : 1; + UINT_32 isMacro3d : 1; + UINT_32 isPrt : 1; + UINT_32 isPrtNoRotation : 1; + UINT_32 isBankSwapped : 1; +}; + +/** +*************************************************************************************************** +* @brief This class contains asic independent address lib functionalities +*************************************************************************************************** +*/ +class AddrLib : public AddrObject +{ +public: + virtual ~AddrLib(); + + static ADDR_E_RETURNCODE Create( + const ADDR_CREATE_INPUT* pCreateInfo, ADDR_CREATE_OUTPUT* pCreateOut); + + /// Pair of Create + VOID Destroy() + { + delete this; + } + + static AddrLib* GetAddrLib( + ADDR_HANDLE hLib); + + /// Returns AddrLib version (from compiled binary instead include file) + UINT_32 GetVersion() + { + return m_version; + } + + /// Returns asic chip family name defined by AddrLib + AddrChipFamily GetAddrChipFamily() + { + return m_chipFamily; + } + + /// Returns tileIndex support + BOOL_32 UseTileIndex(INT_32 index) const + { + return m_configFlags.useTileIndex && (index != TileIndexInvalid); + } + + /// Returns combined swizzle support + BOOL_32 UseCombinedSwizzle() const + { + return m_configFlags.useCombinedSwizzle; + } + + // + // Interface stubs + // + ADDR_E_RETURNCODE ComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord( + const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr( + const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSliceTileSwizzle( + const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, + ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ExtractBankPipeSwizzle( + const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, + ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE CombineBankPipeSwizzle( + const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn, + ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeBaseSwizzle( + const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeFmaskInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut); + + ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord( + const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr( + const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ConvertTileInfoToHW( + const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, + ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ConvertTileIndex( + const ADDR_CONVERT_TILEINDEX_INPUT* pIn, + ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ConvertTileIndex1( + const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, + ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE GetTileIndex( + const ADDR_GET_TILEINDEX_INPUT* pIn, + ADDR_GET_TILEINDEX_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeHtileInfo( + const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, + ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeCmaskInfo( + const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, + ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeDccInfo( + const ADDR_COMPUTE_DCCINFO_INPUT* pIn, + ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeHtileAddrFromCoord( + const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord( + const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeHtileCoordFromAddr( + const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr( + const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputePrtInfo( + const ADDR_PRT_INFO_INPUT* pIn, + ADDR_PRT_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE Flt32ToDepthPixel( + const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, + ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE Flt32ToColorPixel( + const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn, + ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const; + + BOOL_32 GetExportNorm( + const ELEM_GETEXPORTNORM_INPUT* pIn) const; + +protected: + AddrLib(); // Constructor is protected + AddrLib(const AddrClient* pClient); + + /// Pure Virtual function for Hwl computing surface info + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl computing surface address from coord + virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord( + const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl computing surface coord from address + virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr( + const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl computing surface tile swizzle + virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle( + const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, + ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl extracting bank/pipe swizzle from base256b + virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle( + const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, + ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl combining bank/pipe swizzle + virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle( + UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO* pTileInfo, + UINT_64 baseAddr, UINT_32* pTileSwizzle) const = 0; + + /// Pure Virtual function for Hwl computing base swizzle + virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle( + const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl computing HTILE base align + virtual UINT_32 HwlComputeHtileBaseAlign( + BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const = 0; + + /// Pure Virtual function for Hwl computing HTILE bpp + virtual UINT_32 HwlComputeHtileBpp( + BOOL_32 isWidth8, BOOL_32 isHeight8) const = 0; + + /// Pure Virtual function for Hwl computing HTILE bytes + virtual UINT_64 HwlComputeHtileBytes( + UINT_32 pitch, UINT_32 height, UINT_32 bpp, + BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const = 0; + + /// Pure Virtual function for Hwl computing FMASK info + virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) = 0; + + /// Pure Virtual function for Hwl FMASK address from coord + virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord( + const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl FMASK coord from address + virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr( + const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl convert tile info from real value to HW value + virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW( + const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, + ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl compute mipmap info + virtual BOOL_32 HwlComputeMipLevel( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0; + + /// Pure Virtual function for Hwl compute max cmask blockMax value + virtual BOOL_32 HwlGetMaxCmaskBlockMax() const = 0; + + /// Pure Virtual function for Hwl compute fmask bits + virtual UINT_32 HwlComputeFmaskBits( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, + UINT_32* pNumSamples) const = 0; + + /// Virtual function to get index (not pure then no need to implement this in all hwls + virtual ADDR_E_RETURNCODE HwlGetTileIndex( + const ADDR_GET_TILEINDEX_INPUT* pIn, + ADDR_GET_TILEINDEX_OUTPUT* pOut) const + { + return ADDR_NOTSUPPORTED; + } + + /// Virtual function for Hwl to compute Dcc info + virtual ADDR_E_RETURNCODE HwlComputeDccInfo( + const ADDR_COMPUTE_DCCINFO_INPUT* pIn, + ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const + { + return ADDR_NOTSUPPORTED; + } + + /// Virtual function to get cmask address for tc compatible cmask + virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( + const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const + { + return ADDR_NOTSUPPORTED; + } + // Compute attributes + + // HTILE + UINT_32 ComputeHtileInfo( + ADDR_HTILE_FLAGS flags, + UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, + BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8, + ADDR_TILEINFO* pTileInfo, + UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pHtileBytes, + UINT_32* pMacroWidth = NULL, UINT_32* pMacroHeight = NULL, + UINT_64* pSliceSize = NULL, UINT_32* pBaseAlign = NULL) const; + + // CMASK + ADDR_E_RETURNCODE ComputeCmaskInfo( + ADDR_CMASK_FLAGS flags, + UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, BOOL_32 isLinear, + ADDR_TILEINFO* pTileInfo, UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pCmaskBytes, + UINT_32* pMacroWidth, UINT_32* pMacroHeight, UINT_64* pSliceSize = NULL, + UINT_32* pBaseAlign = NULL, UINT_32* pBlockMax = NULL) const; + + virtual VOID HwlComputeTileDataWidthAndHeightLinear( + UINT_32* pMacroWidth, UINT_32* pMacroHeight, + UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const; + + // CMASK & HTILE addressing + virtual UINT_64 HwlComputeXmaskAddrFromCoord( + UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice, + UINT_32 numSlices, UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, + BOOL_32 isHeight8, ADDR_TILEINFO* pTileInfo, + UINT_32* bitPosition) const; + + virtual VOID HwlComputeXmaskCoordFromAddr( + UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices, + UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8, + ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const; + + // Surface mipmap + VOID ComputeMipLevel( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + + /// Pure Virtual function for Hwl checking degrade for base level + virtual BOOL_32 HwlDegradeBaseLevel( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0; + + virtual BOOL_32 HwlOverrideTileMode( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + AddrTileMode* pTileMode, + AddrTileType* pTileType) const + { + // not supported in hwl layer, FALSE for not-overrided + return FALSE; + } + + AddrTileMode DegradeLargeThickTile(AddrTileMode tileMode, UINT_32 bpp) const; + + VOID PadDimensions( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, + UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel, + UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign, + UINT_32* pSlices, UINT_32 sliceAlign) const; + + virtual VOID HwlPadDimensions( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, + UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel, + UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign, + UINT_32* pSlices, UINT_32 sliceAlign) const + { + } + + // + // Addressing shared for linear/1D tiling + // + UINT_64 ComputeSurfaceAddrFromCoordLinear( + UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, + UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSlices, + UINT_32* pBitPosition) const; + + VOID ComputeSurfaceCoordFromAddrLinear( + UINT_64 addr, UINT_32 bitPosition, UINT_32 bpp, + UINT_32 pitch, UINT_32 height, UINT_32 numSlices, + UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const; + + VOID ComputeSurfaceCoordFromAddrMicroTiled( + UINT_64 addr, UINT_32 bitPosition, + UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, + AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits, + UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, + AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const; + + UINT_32 ComputePixelIndexWithinMicroTile( + UINT_32 x, UINT_32 y, UINT_32 z, + UINT_32 bpp, AddrTileMode tileMode, AddrTileType microTileType) const; + + /// Pure Virtual function for Hwl computing coord from offset inside micro tile + virtual VOID HwlComputePixelCoordFromOffset( + UINT_32 offset, UINT_32 bpp, UINT_32 numSamples, + AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits, + UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, + AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const = 0; + + // + // Addressing shared by all + // + virtual UINT_32 HwlGetPipes( + const ADDR_TILEINFO* pTileInfo) const; + + UINT_32 ComputePipeFromAddr( + UINT_64 addr, UINT_32 numPipes) const; + + /// Pure Virtual function for Hwl computing pipe from coord + virtual UINT_32 ComputePipeFromCoord( + UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode, + UINT_32 pipeSwizzle, BOOL_32 flags, ADDR_TILEINFO* pTileInfo) const = 0; + + /// Pure Virtual function for Hwl computing coord Y for 8 pipe cmask/htile + virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe( + UINT_32 pipe, UINT_32 x) const = 0; + + // + // Initialization + // + /// Pure Virtual function for Hwl computing internal global parameters from h/w registers + virtual BOOL_32 HwlInitGlobalParams( + const ADDR_CREATE_INPUT* pCreateIn) = 0; + + /// Pure Virtual function for Hwl converting chip family + virtual AddrChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0; + + // + // Misc helper + // + static const AddrTileModeFlags m_modeFlags[ADDR_TM_COUNT]; + + static UINT_32 ComputeSurfaceThickness( + AddrTileMode tileMode); + + // Checking tile mode + static BOOL_32 IsMacroTiled(AddrTileMode tileMode); + static BOOL_32 IsMacro3dTiled(AddrTileMode tileMode); + static BOOL_32 IsLinear(AddrTileMode tileMode); + static BOOL_32 IsMicroTiled(AddrTileMode tileMode); + static BOOL_32 IsPrtTileMode(AddrTileMode tileMode); + static BOOL_32 IsPrtNoRotationTileMode(AddrTileMode tileMode); + + static UINT_32 Bits2Number(UINT_32 bitNum,...); + + static UINT_32 GetNumFragments(UINT_32 numSamples, UINT_32 numFrags) + { + return numFrags != 0 ? numFrags : Max(1u, numSamples); + } + + /// Returns pointer of AddrElemLib + AddrElemLib* GetElemLib() const + { + return m_pElemLib; + } + + /// Return TRUE if tile info is needed + BOOL_32 UseTileInfo() const + { + return !m_configFlags.ignoreTileInfo; + } + + /// Returns fillSizeFields flag + UINT_32 GetFillSizeFieldsFlags() const + { + return m_configFlags.fillSizeFields; + } + + /// Adjusts pitch alignment for flipping surface + VOID AdjustPitchAlignment( + ADDR_SURFACE_FLAGS flags, UINT_32* pPitchAlign) const; + + /// Overwrite tile config according to tile index + virtual ADDR_E_RETURNCODE HwlSetupTileCfg( + INT_32 index, INT_32 macroModeIndex, + ADDR_TILEINFO* pInfo, AddrTileMode* mode = NULL, AddrTileType* type = NULL) const; + + /// Overwrite macro tile config according to tile index + virtual INT_32 HwlComputeMacroModeIndex( + INT_32 index, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples, + ADDR_TILEINFO* pTileInfo, AddrTileMode *pTileMode = NULL, AddrTileType *pTileType = NULL + ) const + { + return TileIndexNoMacroIndex; + } + + /// Pre-handler of 3x pitch (96 bit) adjustment + virtual UINT_32 HwlPreHandleBaseLvl3xPitch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const; + /// Post-handler of 3x pitch adjustment + virtual UINT_32 HwlPostHandleBaseLvl3xPitch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const; + /// Check miplevel after surface adjustment + ADDR_E_RETURNCODE PostComputeMipLevel( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + /// Quad buffer stereo support, has its implementation in ind. layer + virtual BOOL_32 ComputeQbStereoInfo( + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + /// Pure virutual function to compute stereo bank swizzle for right eye + virtual UINT_32 HwlComputeQbStereoRightSwizzle( + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0; + +private: + // Disallow the copy constructor + AddrLib(const AddrLib& a); + + // Disallow the assignment operator + AddrLib& operator=(const AddrLib& a); + + VOID SetAddrChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision); + + UINT_32 ComputeCmaskBaseAlign( + ADDR_CMASK_FLAGS flags, ADDR_TILEINFO* pTileInfo) const; + + UINT_64 ComputeCmaskBytes( + UINT_32 pitch, UINT_32 height, UINT_32 numSlices) const; + + // + // CMASK/HTILE shared methods + // + VOID ComputeTileDataWidthAndHeight( + UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo, + UINT_32* pMacroWidth, UINT_32* pMacroHeight) const; + + UINT_32 ComputeXmaskCoordYFromPipe( + UINT_32 pipe, UINT_32 x) const; + + VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels); + + BOOL_32 DegradeBaseLevel( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, AddrTileMode* pTileMode) const; + +protected: + AddrLibClass m_class; ///< Store class type (HWL type) + + AddrChipFamily m_chipFamily; ///< Chip family translated from the one in atiid.h + + UINT_32 m_chipRevision; ///< Revision id from xxx_id.h + + UINT_32 m_version; ///< Current version + + // + // Global parameters + // + ADDR_CONFIG_FLAGS m_configFlags; ///< Global configuration flags. Note this is setup by + /// AddrLib instead of Client except forceLinearAligned + + UINT_32 m_pipes; ///< Number of pipes + UINT_32 m_banks; ///< Number of banks + /// For r800 this is MC_ARB_RAMCFG.NOOFBANK + /// Keep it here to do default parameter calculation + + UINT_32 m_pipeInterleaveBytes; + ///< Specifies the size of contiguous address space + /// within each tiling pipe when making linear + /// accesses. (Formerly Group Size) + + UINT_32 m_rowSize; ///< DRAM row size, in bytes + + UINT_32 m_minPitchAlignPixels; ///< Minimum pitch alignment in pixels + UINT_32 m_maxSamples; ///< Max numSamples +private: + AddrElemLib* m_pElemLib; ///< Element Lib pointer +}; + +AddrLib* AddrSIHwlInit (const AddrClient* pClient); +AddrLib* AddrCIHwlInit (const AddrClient* pClient); + +#endif + diff --git a/src/amd/addrlib/core/addrobject.cpp b/src/amd/addrlib/core/addrobject.cpp new file mode 100644 index 0000000..369be8c --- /dev/null +++ b/src/amd/addrlib/core/addrobject.cpp @@ -0,0 +1,246 @@ +/* + * Copyright © 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +*************************************************************************************************** +* @file addrobject.cpp +* @brief Contains the AddrObject base class implementation. +*************************************************************************************************** +*/ + +#include "addrinterface.h" +#include "addrobject.h" + +/** +*************************************************************************************************** +* AddrObject::AddrObject +* +* @brief +* Constructor for the AddrObject class. +*************************************************************************************************** +*/ +AddrObject::AddrObject() +{ + m_client.handle = NULL; + m_client.callbacks.allocSysMem = NULL; + m_client.callbacks.freeSysMem = NULL; + m_client.callbacks.debugPrint = NULL; +} + +/** +*************************************************************************************************** +* AddrObject::AddrObject +* +* @brief +* Constructor for the AddrObject class. +*************************************************************************************************** +*/ +AddrObject::AddrObject(const AddrClient* pClient) +{ + m_client = *pClient; +} + +/** +*************************************************************************************************** +* AddrObject::~AddrObject +* +* @brief +* Destructor for the AddrObject class. +*************************************************************************************************** +*/ +AddrObject::~AddrObject() +{ +} + +/** +*************************************************************************************************** +* AddrObject::ClientAlloc +* +* @brief +* Calls instanced allocSysMem inside AddrClient +*************************************************************************************************** +*/ +VOID* AddrObject::ClientAlloc( + size_t objSize, ///< [in] Size to allocate + const AddrClient* pClient) ///< [in] Client pointer +{ + VOID* pObjMem = NULL; + + if (pClient->callbacks.allocSysMem != NULL) + { + ADDR_ALLOCSYSMEM_INPUT allocInput = {0}; + + allocInput.size = sizeof(ADDR_ALLOCSYSMEM_INPUT); + allocInput.flags.value = 0; + allocInput.sizeInBytes = static_cast<UINT_32>(objSize); + allocInput.hClient = pClient->handle; + + pObjMem = pClient->callbacks.allocSysMem(&allocInput); + } + + return pObjMem; +} + +/** +*************************************************************************************************** +* AddrObject::AddrMalloc +* +* @brief +* A wrapper of ClientAlloc +*************************************************************************************************** +*/ +VOID* AddrObject::AddrMalloc( + size_t objSize) const ///< [in] Size to allocate +{ + return ClientAlloc(objSize, &m_client); +} + +/** +*************************************************************************************************** +* AddrObject::ClientFree +* +* @brief +* Calls freeSysMem inside AddrClient +*************************************************************************************************** +*/ +VOID AddrObject::ClientFree( + VOID* pObjMem, ///< [in] User virtual address to free. + const AddrClient* pClient) ///< [in] Client pointer +{ + if (pClient->callbacks.freeSysMem != NULL) + { + if (pObjMem != NULL) + { + ADDR_FREESYSMEM_INPUT freeInput = {0}; + + freeInput.size = sizeof(ADDR_FREESYSMEM_INPUT); + freeInput.hClient = pClient->handle; + freeInput.pVirtAddr = pObjMem; + + pClient->callbacks.freeSysMem(&freeInput); + } + } +} + +/** +*************************************************************************************************** +* AddrObject::AddrFree +* +* @brief +* A wrapper of ClientFree +*************************************************************************************************** +*/ +VOID AddrObject::AddrFree( + VOID* pObjMem) const ///< [in] User virtual address to free. +{ + ClientFree(pObjMem, &m_client); +} + +/** +*************************************************************************************************** +* AddrObject::operator new +* +* @brief +* Allocates memory needed for AddrObject object. (with ADDR_CLIENT_HANDLE) +* +* @return +* Returns NULL if unsuccessful. +*************************************************************************************************** +*/ +VOID* AddrObject::operator new( + size_t objSize, ///< [in] Size to allocate + const AddrClient* pClient) ///< [in] Client pointer +{ + return ClientAlloc(objSize, pClient); +} + + +/** +*************************************************************************************************** +* AddrObject::operator delete +* +* @brief +* Frees AddrObject object memory. +*************************************************************************************************** +*/ +VOID AddrObject::operator delete( + VOID* pObjMem, ///< [in] User virtual address to free. + const AddrClient* pClient) ///< [in] Client handle +{ + ClientFree(pObjMem, pClient); +} + +/** +*************************************************************************************************** +* AddrObject::operator delete +* +* @brief +* Frees AddrObject object memory. +*************************************************************************************************** +*/ +VOID AddrObject::operator delete( + VOID* pObjMem) ///< [in] User virtual address to free. +{ + AddrObject* pObj = static_cast<AddrObject*>(pObjMem); + ClientFree(pObjMem, &pObj->m_client); +} + +/** +*************************************************************************************************** +* AddrObject::DebugPrint +* +* @brief +* Print debug message +* +* @return +* N/A +*************************************************************************************************** +*/ +VOID AddrObject::DebugPrint( + const CHAR* pDebugString, ///< [in] Debug string + ...) const +{ +#if DEBUG + if (m_client.callbacks.debugPrint != NULL) + { + va_list ap; + + va_start(ap, pDebugString); + + ADDR_DEBUGPRINT_INPUT debugPrintInput = {0}; + + debugPrintInput.size = sizeof(ADDR_DEBUGPRINT_INPUT); + debugPrintInput.pDebugString = const_cast<CHAR*>(pDebugString); + debugPrintInput.hClient = m_client.handle; + va_copy(debugPrintInput.ap, ap); + + m_client.callbacks.debugPrint(&debugPrintInput); + + va_end(ap); + } +#endif +} + diff --git a/src/amd/addrlib/core/addrobject.h b/src/amd/addrlib/core/addrobject.h new file mode 100644 index 0000000..3540088 --- /dev/null +++ b/src/amd/addrlib/core/addrobject.h @@ -0,0 +1,89 @@ +/* + * Copyright © 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +*************************************************************************************************** +* @file addrobject.h +* @brief Contains the AddrObject base class definition. +*************************************************************************************************** +*/ + +#ifndef __ADDR_OBJECT_H__ +#define __ADDR_OBJECT_H__ + +#include "addrtypes.h" +#include "addrcommon.h" + +/** +*************************************************************************************************** +* @brief This structure contains client specific data +*************************************************************************************************** +*/ +struct AddrClient +{ + ADDR_CLIENT_HANDLE handle; + ADDR_CALLBACKS callbacks; +}; +/** +*************************************************************************************************** +* @brief This class is the base class for all ADDR class objects. +*************************************************************************************************** +*/ +class AddrObject +{ +public: + AddrObject(); + AddrObject(const AddrClient* pClient); + virtual ~AddrObject(); + + VOID* operator new(size_t size, const AddrClient* pClient); + VOID operator delete(VOID* pObj, const AddrClient* pClient); + VOID operator delete(VOID* pObj); + VOID* AddrMalloc(size_t size) const; + VOID AddrFree(VOID* pObj) const; + + VOID DebugPrint( + const CHAR* pDebugString, + ...) const; + + const AddrClient* GetClient() const {return &m_client;} + +protected: + AddrClient m_client; + +private: + static VOID* ClientAlloc(size_t size, const AddrClient* pClient); + static VOID ClientFree(VOID* pObj, const AddrClient* pClient); + + // disallow the copy constructor + AddrObject(const AddrObject& a); + + // disallow the assignment operator + AddrObject& operator=(const AddrObject& a); +}; + +#endif + |