summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/swr/rasterizer/jitter
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/swr/rasterizer/jitter')
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp4
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/JitManager.h8
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp21
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder.cpp16
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder.h6
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp172
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h8
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp72
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py21
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_types.py2
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp8
11 files changed, 246 insertions, 92 deletions
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
index 734c897..de856c4 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
@@ -47,6 +47,10 @@
#include "llvm/Analysis/CFGPrinter.h"
#include "llvm/IRReader/IRReader.h"
+#if LLVM_USE_INTEL_JITEVENTS
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#endif
+
#include "core/state.h"
#include "common/containers.hpp"
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
index c974a61..4ffb0fb 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
@@ -53,6 +53,10 @@
#include "llvm/Config/config.h"
#endif
+#ifndef HAVE_LLVM
+#define HAVE_LLVM (LLVM_VERSION_MAJOR << 8) || LLVM_VERSION_MINOR
+#endif
+
#include "llvm/IR/Verifier.h"
#include "llvm/ExecutionEngine/MCJIT.h"
#include "llvm/Support/FileSystem.h"
@@ -60,11 +64,10 @@
#include "llvm/Analysis/Passes.h"
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 6
+#if HAVE_LLVM == 0x306
#include "llvm/PassManager.h"
#else
#include "llvm/IR/LegacyPassManager.h"
-using namespace llvm::legacy;
#endif
#include "llvm/CodeGen/Passes.h"
@@ -166,7 +169,6 @@ struct JitManager
FunctionType* mTrinaryFPTy;
FunctionType* mUnaryIntTy;
FunctionType* mBinaryIntTy;
- FunctionType* mTrinaryIntTy;
Type* mSimtFP32Ty;
Type* mSimtInt32Ty;
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
index 954524a..a64f860 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
@@ -576,9 +576,12 @@ struct BlendJit : public Builder
src1[i] = LOAD(pSrc1, { i });
}
Value* currentMask = VIMMED1(-1);
- if(state.desc.alphaToCoverageEnable)
+ if (state.desc.alphaToCoverageEnable)
{
- currentMask = FP_TO_SI(FMUL(src[3], VBROADCAST(C((float)state.desc.numSamples))), mSimdInt32Ty);
+ Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f);
+ uint32_t bits = (1 << state.desc.numSamples) - 1;
+ currentMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits)));
+ currentMask = FP_TO_SI(FADD(currentMask, VIMMED1(0.5f)), mSimdInt32Ty);
}
// alpha test
@@ -702,6 +705,12 @@ struct BlendJit : public Builder
currentMask = AND(sampleMask, currentMask);
}
+ if (state.desc.alphaToCoverageEnable)
+ {
+ Value* sampleMasked = SHL(C(1), sampleNum);
+ currentMask = AND(currentMask, VBROADCAST(sampleMasked));
+ }
+
if(state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable ||
state.desc.oMaskEnable)
{
@@ -717,7 +726,13 @@ struct BlendJit : public Builder
JitManager::DumpToFile(blendFunc, "");
- FunctionPassManager passes(JM()->mpCurrentModule);
+#if HAVE_LLVM == 0x306
+ FunctionPassManager
+#else
+ llvm::legacy::FunctionPassManager
+#endif
+ passes(JM()->mpCurrentModule);
+
passes.add(createBreakCriticalEdgesPass());
passes.add(createCFGSimplificationPass());
passes.add(createEarlyCSEPass());
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
index c15bdf1..757ea3f 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
@@ -38,6 +38,8 @@ using namespace llvm;
Builder::Builder(JitManager *pJitMgr)
: mpJitMgr(pJitMgr)
{
+ mVWidth = pJitMgr->mVWidth;
+
mpIRBuilder = &pJitMgr->mBuilder;
mVoidTy = Type::getVoidTy(pJitMgr->mContext);
@@ -48,14 +50,18 @@ Builder::Builder(JitManager *pJitMgr)
mInt8Ty = Type::getInt8Ty(pJitMgr->mContext);
mInt16Ty = Type::getInt16Ty(pJitMgr->mContext);
mInt32Ty = Type::getInt32Ty(pJitMgr->mContext);
+ mInt8PtrTy = PointerType::get(mInt8Ty, 0);
+ mInt16PtrTy = PointerType::get(mInt16Ty, 0);
+ mInt32PtrTy = PointerType::get(mInt32Ty, 0);
mInt64Ty = Type::getInt64Ty(pJitMgr->mContext);
mV4FP32Ty = StructType::get(pJitMgr->mContext, std::vector<Type*>(4, mFP32Ty), false); // vector4 float type (represented as structure)
mV4Int32Ty = StructType::get(pJitMgr->mContext, std::vector<Type*>(4, mInt32Ty), false); // vector4 int type
- mSimdInt16Ty = VectorType::get(mInt16Ty, mpJitMgr->mVWidth);
- mSimdInt32Ty = VectorType::get(mInt32Ty, mpJitMgr->mVWidth);
- mSimdInt64Ty = VectorType::get(mInt64Ty, mpJitMgr->mVWidth);
- mSimdFP16Ty = VectorType::get(mFP16Ty, mpJitMgr->mVWidth);
- mSimdFP32Ty = VectorType::get(mFP32Ty, mpJitMgr->mVWidth);
+ mSimdInt16Ty = VectorType::get(mInt16Ty, mVWidth);
+ mSimdInt32Ty = VectorType::get(mInt32Ty, mVWidth);
+ mSimdInt64Ty = VectorType::get(mInt64Ty, mVWidth);
+ mSimdFP16Ty = VectorType::get(mFP16Ty, mVWidth);
+ mSimdFP32Ty = VectorType::get(mFP32Ty, mVWidth);
+ mSimdVectorTy = StructType::get(pJitMgr->mContext, std::vector<Type*>(4, mSimdFP32Ty), false);
if (sizeof(uint32_t*) == 4)
{
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
index 4921661..239ef2a 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
@@ -43,6 +43,8 @@ struct Builder
JitManager* mpJitMgr;
IRBuilder<>* mpIRBuilder;
+ uint32_t mVWidth;
+
// Built in types.
Type* mVoidTy;
Type* mInt1Ty;
@@ -54,12 +56,16 @@ struct Builder
Type* mFP16Ty;
Type* mFP32Ty;
Type* mDoubleTy;
+ Type* mInt8PtrTy;
+ Type* mInt16PtrTy;
+ Type* mInt32PtrTy;
Type* mSimdFP16Ty;
Type* mSimdFP32Ty;
Type* mSimdInt16Ty;
Type* mSimdInt32Ty;
Type* mSimdInt64Ty;
Type* mSimdIntPtrTy;
+ Type* mSimdVectorTy;
StructType* mV4FP32Ty;
StructType* mV4Int32Ty;
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index 5394fc7..486dad8 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -28,6 +28,8 @@
*
******************************************************************************/
#include "builder.h"
+#include "common/rdtsc_buckets.h"
+
#include "llvm/Support/DynamicLibrary.h"
void __cdecl CallPrint(const char* fmt, ...);
@@ -189,32 +191,32 @@ Constant *Builder::PRED(bool pred)
Value *Builder::VIMMED1(int i)
{
- return ConstantVector::getSplat(JM()->mVWidth, cast<ConstantInt>(C(i)));
+ return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
}
Value *Builder::VIMMED1(uint32_t i)
{
- return ConstantVector::getSplat(JM()->mVWidth, cast<ConstantInt>(C(i)));
+ return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
}
Value *Builder::VIMMED1(float i)
{
- return ConstantVector::getSplat(JM()->mVWidth, cast<ConstantFP>(C(i)));
+ return ConstantVector::getSplat(mVWidth, cast<ConstantFP>(C(i)));
}
Value *Builder::VIMMED1(bool i)
{
- return ConstantVector::getSplat(JM()->mVWidth, cast<ConstantInt>(C(i)));
+ return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
}
Value *Builder::VUNDEF_IPTR()
{
- return UndefValue::get(VectorType::get(PointerType::get(mInt32Ty, 0),JM()->mVWidth));
+ return UndefValue::get(VectorType::get(mInt32PtrTy,mVWidth));
}
Value *Builder::VUNDEF_I()
{
- return UndefValue::get(VectorType::get(mInt32Ty, JM()->mVWidth));
+ return UndefValue::get(VectorType::get(mInt32Ty, mVWidth));
}
Value *Builder::VUNDEF(Type *ty, uint32_t size)
@@ -224,15 +226,15 @@ Value *Builder::VUNDEF(Type *ty, uint32_t size)
Value *Builder::VUNDEF_F()
{
- return UndefValue::get(VectorType::get(mFP32Ty, JM()->mVWidth));
+ return UndefValue::get(VectorType::get(mFP32Ty, mVWidth));
}
Value *Builder::VUNDEF(Type* t)
{
- return UndefValue::get(VectorType::get(t, JM()->mVWidth));
+ return UndefValue::get(VectorType::get(t, mVWidth));
}
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 6
+#if HAVE_LLVM == 0x306
Value *Builder::VINSERT(Value *vec, Value *val, uint64_t index)
{
return VINSERT(vec, val, C((int64_t)index));
@@ -247,7 +249,7 @@ Value *Builder::VBROADCAST(Value *src)
return src;
}
- return VECTOR_SPLAT(JM()->mVWidth, src);
+ return VECTOR_SPLAT(mVWidth, src);
}
uint32_t Builder::IMMED(Value* v)
@@ -257,6 +259,13 @@ uint32_t Builder::IMMED(Value* v)
return pValConst->getZExtValue();
}
+int32_t Builder::S_IMMED(Value* v)
+{
+ SWR_ASSERT(isa<ConstantInt>(v));
+ ConstantInt *pValConst = cast<ConstantInt>(v);
+ return pValConst->getSExtValue();
+}
+
Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
{
std::vector<Value*> indices;
@@ -342,8 +351,8 @@ Value *Builder::MASKLOADD(Value* src,Value* mask)
else
{
Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule,Intrinsic::x86_avx_maskload_ps_256);
- Value* fMask = BITCAST(mask,VectorType::get(mFP32Ty,JM()->mVWidth));
- vResult = BITCAST(CALL(func,{src,fMask}), VectorType::get(mInt32Ty,JM()->mVWidth));
+ Value* fMask = BITCAST(mask,VectorType::get(mInt32Ty,mVWidth));
+ vResult = BITCAST(CALL(func,{src,fMask}), VectorType::get(mInt32Ty,mVWidth));
}
return vResult;
}
@@ -512,7 +521,7 @@ CallInst *Builder::PRINT(const std::string &printStr,const std::initializer_list
// get a pointer to the first character in the constant string array
std::vector<Constant*> geplist{C(0),C(0)};
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 6
+#if HAVE_LLVM == 0x306
Constant *strGEP = ConstantExpr::getGetElementPtr(gvPtr,geplist,false);
#else
Constant *strGEP = ConstantExpr::getGetElementPtr(nullptr, gvPtr,geplist,false);
@@ -575,7 +584,7 @@ Value *Builder::GATHERPS(Value* vSrc, Value* pBase, Value* vIndices, Value* vMas
Value *vScaleVec = VBROADCAST(Z_EXT(scale,mInt32Ty));
Value *vOffsets = MUL(vIndices,vScaleVec);
Value *mask = MASK(vMask);
- for(uint32_t i = 0; i < JM()->mVWidth; ++i)
+ for(uint32_t i = 0; i < mVWidth; ++i)
{
// single component byte index
Value *offset = VEXTRACT(vOffsets,C(i));
@@ -625,7 +634,7 @@ Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMas
Value *vScaleVec = VBROADCAST(Z_EXT(scale, mInt32Ty));
Value *vOffsets = MUL(vIndices, vScaleVec);
Value *mask = MASK(vMask);
- for(uint32_t i = 0; i < JM()->mVWidth; ++i)
+ for(uint32_t i = 0; i < mVWidth; ++i)
{
// single component byte index
Value *offset = VEXTRACT(vOffsets, C(i));
@@ -774,12 +783,61 @@ Value *Builder::PERMD(Value* a, Value* idx)
}
else
{
- res = VSHUFFLE(a, a, idx);
+ if (isa<Constant>(idx))
+ {
+ res = VSHUFFLE(a, a, idx);
+ }
+ else
+ {
+ res = VUNDEF_I();
+ for (uint32_t l = 0; l < JM()->mVWidth; ++l)
+ {
+ Value* pIndex = VEXTRACT(idx, C(l));
+ Value* pVal = VEXTRACT(a, pIndex);
+ res = VINSERT(res, pVal, C(l));
+ }
+ }
}
return res;
}
//////////////////////////////////////////////////////////////////////////
+/// @brief Generate a VPERMPS operation (shuffle 32 bit float values
+/// across 128 bit lanes) in LLVM IR. If not supported on the underlying
+/// platform, emulate it
+/// @param a - 256bit SIMD lane(8x32bit) of float values.
+/// @param idx - 256bit SIMD lane(8x32bit) of 3 bit lane index values
+Value *Builder::PERMPS(Value* a, Value* idx)
+{
+ Value* res;
+ // use avx2 permute instruction if available
+ if (JM()->mArch.AVX2())
+ {
+ // llvm 3.6.0 swapped the order of the args to vpermd
+ res = VPERMPS(idx, a);
+ }
+ else
+ {
+ if (isa<Constant>(idx))
+ {
+ res = VSHUFFLE(a, a, idx);
+ }
+ else
+ {
+ res = VUNDEF_F();
+ for (uint32_t l = 0; l < JM()->mVWidth; ++l)
+ {
+ Value* pIndex = VEXTRACT(idx, C(l));
+ Value* pVal = VEXTRACT(a, pIndex);
+ res = VINSERT(res, pVal, C(l));
+ }
+ }
+ }
+
+ return res;
+}
+
+//////////////////////////////////////////////////////////////////////////
/// @brief Generate a VCVTPH2PS operation (float16->float32 conversion)
/// in LLVM IR. If not supported on the underlying platform, emulate it
/// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
@@ -800,7 +858,7 @@ Value *Builder::CVTPH2PS(Value* a)
}
Value* pResult = UndefValue::get(mSimdFP32Ty);
- for (uint32_t i = 0; i < JM()->mVWidth; ++i)
+ for (uint32_t i = 0; i < mVWidth; ++i)
{
Value* pSrc = VEXTRACT(a, C(i));
Value* pConv = CALL(pCvtPh2Ps, std::initializer_list<Value*>{pSrc});
@@ -833,7 +891,7 @@ Value *Builder::CVTPS2PH(Value* a, Value* rounding)
}
Value* pResult = UndefValue::get(mSimdInt16Ty);
- for (uint32_t i = 0; i < JM()->mVWidth; ++i)
+ for (uint32_t i = 0; i < mVWidth; ++i)
{
Value* pSrc = VEXTRACT(a, C(i));
Value* pConv = CALL(pCvtPs2Ph, std::initializer_list<Value*>{pSrc});
@@ -1085,8 +1143,8 @@ void Builder::GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byt
void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[2], Value* vGatherOutput[4], bool bPackedOutput)
{
// cast types
- Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), JM()->mVWidth);
- Type* v32x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 4); // vwidth is units of 32 bits
+ Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
// input could either be float or int vector; do shuffle work in int
vGatherInput[0] = BITCAST(vGatherInput[0], mSimdInt32Ty);
@@ -1094,7 +1152,7 @@ void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInp
if(bPackedOutput)
{
- Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), JM()->mVWidth / 4); // vwidth is units of 32 bits
+ Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
// shuffle mask
Value* vConstMask = C<char>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
@@ -1179,12 +1237,12 @@ void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInp
void Builder::Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput)
{
// cast types
- Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), JM()->mVWidth);
- Type* v32x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 4 ); // vwidth is units of 32 bits
+ Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4 ); // vwidth is units of 32 bits
if(bPackedOutput)
{
- Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), JM()->mVWidth / 4); // vwidth is units of 32 bits
+ Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
// shuffle mask
Value* vConstMask = C<char>({0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15});
@@ -1286,16 +1344,18 @@ void Builder::SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask)
{
Value* pStack = STACKSAVE();
+ Type* pSrcTy = vSrc->getType()->getVectorElementType();
+
// allocate tmp stack for masked off lanes
- Value* vTmpPtr = ALLOCA(vSrc->getType()->getVectorElementType());
+ Value* vTmpPtr = ALLOCA(pSrcTy);
Value *mask = MASK(vMask);
- for (uint32_t i = 0; i < JM()->mVWidth; ++i)
+ for (uint32_t i = 0; i < mVWidth; ++i)
{
Value *offset = VEXTRACT(vOffsets, C(i));
// byte pointer to component
Value *storeAddress = GEP(pDst, offset);
- storeAddress = BITCAST(storeAddress, PointerType::get(mFP32Ty, 0));
+ storeAddress = BITCAST(storeAddress, PointerType::get(pSrcTy, 0));
Value *selMask = VEXTRACT(mask, C(i));
Value *srcElem = VEXTRACT(vSrc, C(i));
// switch in a safe address to load if we're trying to access a vertex
@@ -1349,7 +1409,7 @@ Value *Builder::FCLAMP(Value* src, float low, float high)
Value* Builder::STACKSAVE()
{
Function* pfnStackSave = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::stacksave);
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 6
+#if HAVE_LLVM == 0x306
return CALL(pfnStackSave);
#else
return CALLA(pfnStackSave);
@@ -1401,11 +1461,13 @@ void __cdecl CallPrint(const char* fmt, ...)
vsnprintf_s(strBuf, _TRUNCATE, fmt, args);
OutputDebugString(strBuf);
#endif
+
+ va_end(args);
}
Value *Builder::VEXTRACTI128(Value* a, Constant* imm8)
{
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 6
+#if HAVE_LLVM == 0x306
Function *func =
Intrinsic::getDeclaration(JM()->mpCurrentModule,
Intrinsic::x86_avx_vextractf128_si_256);
@@ -1413,8 +1475,8 @@ Value *Builder::VEXTRACTI128(Value* a, Constant* imm8)
#else
bool flag = !imm8->isZeroValue();
SmallVector<Constant*,8> idx;
- for (unsigned i = 0; i < JM()->mVWidth / 2; i++) {
- idx.push_back(C(flag ? i + JM()->mVWidth / 2 : i));
+ for (unsigned i = 0; i < mVWidth / 2; i++) {
+ idx.push_back(C(flag ? i + mVWidth / 2 : i));
}
return VSHUFFLE(a, VUNDEF_I(), ConstantVector::get(idx));
#endif
@@ -1422,7 +1484,7 @@ Value *Builder::VEXTRACTI128(Value* a, Constant* imm8)
Value *Builder::VINSERTI128(Value* a, Value* b, Constant* imm8)
{
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 6
+#if HAVE_LLVM == 0x306
Function *func =
Intrinsic::getDeclaration(JM()->mpCurrentModule,
Intrinsic::x86_avx_vinsertf128_si_256);
@@ -1430,18 +1492,54 @@ Value *Builder::VINSERTI128(Value* a, Value* b, Constant* imm8)
#else
bool flag = !imm8->isZeroValue();
SmallVector<Constant*,8> idx;
- for (unsigned i = 0; i < JM()->mVWidth; i++) {
+ for (unsigned i = 0; i < mVWidth; i++) {
idx.push_back(C(i));
}
Value *inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx));
SmallVector<Constant*,8> idx2;
- for (unsigned i = 0; i < JM()->mVWidth / 2; i++) {
- idx2.push_back(C(flag ? i : i + JM()->mVWidth));
+ for (unsigned i = 0; i < mVWidth / 2; i++) {
+ idx2.push_back(C(flag ? i : i + mVWidth));
}
- for (unsigned i = JM()->mVWidth / 2; i < JM()->mVWidth; i++) {
- idx2.push_back(C(flag ? i + JM()->mVWidth / 2 : i));
+ for (unsigned i = mVWidth / 2; i < mVWidth; i++) {
+ idx2.push_back(C(flag ? i + mVWidth / 2 : i));
}
return VSHUFFLE(a, inter, ConstantVector::get(idx2));
#endif
}
+
+// rdtsc buckets macros
+void Builder::RDTSC_START(Value* pBucketMgr, Value* pId)
+{
+ std::vector<Type*> args{
+ PointerType::get(mInt32Ty, 0), // pBucketMgr
+ mInt32Ty // id
+ };
+
+ FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
+ Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy));
+ if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StartBucket") == nullptr)
+ {
+ sys::DynamicLibrary::AddSymbol("BucketManager_StartBucket", (void*)&BucketManager_StartBucket);
+ }
+
+ CALL(pFunc, { pBucketMgr, pId });
+}
+
+void Builder::RDTSC_STOP(Value* pBucketMgr, Value* pId)
+{
+ std::vector<Type*> args{
+ PointerType::get(mInt32Ty, 0), // pBucketMgr
+ mInt32Ty // id
+ };
+
+ FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
+ Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy));
+ if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StopBucket") == nullptr)
+ {
+ sys::DynamicLibrary::AddSymbol("BucketManager_StopBucket", (void*)&BucketManager_StopBucket);
+ }
+
+ CALL(pFunc, { pBucketMgr, pId });
+}
+
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
index 48e0558..f43ef69 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
@@ -59,7 +59,7 @@ Value *VUNDEF_F();
Value *VUNDEF_I();
Value *VUNDEF(Type* ty, uint32_t size);
Value *VUNDEF_IPTR();
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 6
+#if HAVE_LLVM == 0x306
Value *VINSERT(Value *vec, Value *val, uint64_t index);
#endif
Value *VBROADCAST(Value *src);
@@ -67,6 +67,7 @@ Value *VRCP(Value *va);
Value *VPLANEPS(Value* vA, Value* vB, Value* vC, Value* &vX, Value* &vY);
uint32_t IMMED(Value* i);
+int32_t S_IMMED(Value* i);
Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
@@ -115,6 +116,7 @@ Value *PSHUFB(Value* a, Value* b);
Value *PMOVSXBD(Value* a);
Value *PMOVSXWD(Value* a);
Value *PERMD(Value* a, Value* idx);
+Value *PERMPS(Value* a, Value* idx);
Value *CVTPH2PS(Value* a);
Value *CVTPS2PH(Value* a, Value* rounding);
Value *PMAXSD(Value* a, Value* b);
@@ -147,3 +149,7 @@ Value* INT3() { return INTERRUPT(C((uint8_t)3)); }
Value *VEXTRACTI128(Value* a, Constant* imm8);
Value *VINSERTI128(Value* a, Value* b, Constant* imm8);
+
+// rdtsc buckets macros
+void RDTSC_START(Value* pBucketMgr, Value* pId);
+void RDTSC_STOP(Value* pBucketMgr, Value* pId);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index c5a180e..2c2c56b 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -105,7 +105,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
std::vector<Value*> vtxInputIndices(2, C(0));
// GEP
pVtxOut = GEP(pVtxOut, C(0));
- pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, JM()->mVWidth), 0));
+ pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth), 0));
// SWR_FETCH_CONTEXT::pStreams
Value* streams = LOAD(fetchInfo,{0, SWR_FETCH_CONTEXT_pStreams});
@@ -174,7 +174,12 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
verifyFunction(*fetch);
- FunctionPassManager setupPasses(JM()->mpCurrentModule);
+#if HAVE_LLVM == 0x306
+ FunctionPassManager
+#else
+ llvm::legacy::FunctionPassManager
+#endif
+ setupPasses(JM()->mpCurrentModule);
///@todo We don't need the CFG passes for fetch. (e.g. BreakCriticalEdges and CFGSimplification)
setupPasses.add(createBreakCriticalEdgesPass());
@@ -186,7 +191,12 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
JitManager::DumpToFile(fetch, "se");
- FunctionPassManager optPasses(JM()->mpCurrentModule);
+#if HAVE_LLVM == 0x306
+ FunctionPassManager
+#else
+ llvm::legacy::FunctionPassManager
+#endif
+ optPasses(JM()->mpCurrentModule);
///@todo Haven't touched these either. Need to remove some of these and add others.
optPasses.add(createCFGSimplificationPass());
@@ -220,8 +230,8 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* fet
SWRL::UncheckedFixedVector<Value*, 16> vectors;
- std::vector<Constant*> pMask(JM()->mVWidth);
- for(uint32_t i = 0; i < JM()->mVWidth; ++i)
+ std::vector<Constant*> pMask(mVWidth);
+ for(uint32_t i = 0; i < mVWidth; ++i)
{
pMask[i] = (C(i < 4 ? i : 4));
}
@@ -254,7 +264,7 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* fet
Value* startVertexOffset = MUL(Z_EXT(startVertex, mInt64Ty), stride);
// Load from the stream.
- for(uint32_t lane = 0; lane < JM()->mVWidth; ++lane)
+ for(uint32_t lane = 0; lane < mVWidth; ++lane)
{
// Get index
Value* index = VEXTRACT(vIndices, C(lane));
@@ -380,44 +390,44 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* fet
vectors.push_back(wvec);
}
- std::vector<Constant*> v01Mask(JM()->mVWidth);
- std::vector<Constant*> v23Mask(JM()->mVWidth);
- std::vector<Constant*> v02Mask(JM()->mVWidth);
- std::vector<Constant*> v13Mask(JM()->mVWidth);
+ std::vector<Constant*> v01Mask(mVWidth);
+ std::vector<Constant*> v23Mask(mVWidth);
+ std::vector<Constant*> v02Mask(mVWidth);
+ std::vector<Constant*> v13Mask(mVWidth);
// Concatenate the vectors together.
elements[0] = VUNDEF_F();
elements[1] = VUNDEF_F();
elements[2] = VUNDEF_F();
elements[3] = VUNDEF_F();
- for(uint32_t b = 0, num4Wide = JM()->mVWidth / 4; b < num4Wide; ++b)
+ for(uint32_t b = 0, num4Wide = mVWidth / 4; b < num4Wide; ++b)
{
v01Mask[4 * b + 0] = C(0 + 4 * b);
v01Mask[4 * b + 1] = C(1 + 4 * b);
- v01Mask[4 * b + 2] = C(0 + 4 * b + JM()->mVWidth);
- v01Mask[4 * b + 3] = C(1 + 4 * b + JM()->mVWidth);
+ v01Mask[4 * b + 2] = C(0 + 4 * b + mVWidth);
+ v01Mask[4 * b + 3] = C(1 + 4 * b + mVWidth);
v23Mask[4 * b + 0] = C(2 + 4 * b);
v23Mask[4 * b + 1] = C(3 + 4 * b);
- v23Mask[4 * b + 2] = C(2 + 4 * b + JM()->mVWidth);
- v23Mask[4 * b + 3] = C(3 + 4 * b + JM()->mVWidth);
+ v23Mask[4 * b + 2] = C(2 + 4 * b + mVWidth);
+ v23Mask[4 * b + 3] = C(3 + 4 * b + mVWidth);
v02Mask[4 * b + 0] = C(0 + 4 * b);
v02Mask[4 * b + 1] = C(2 + 4 * b);
- v02Mask[4 * b + 2] = C(0 + 4 * b + JM()->mVWidth);
- v02Mask[4 * b + 3] = C(2 + 4 * b + JM()->mVWidth);
+ v02Mask[4 * b + 2] = C(0 + 4 * b + mVWidth);
+ v02Mask[4 * b + 3] = C(2 + 4 * b + mVWidth);
v13Mask[4 * b + 0] = C(1 + 4 * b);
v13Mask[4 * b + 1] = C(3 + 4 * b);
- v13Mask[4 * b + 2] = C(1 + 4 * b + JM()->mVWidth);
- v13Mask[4 * b + 3] = C(3 + 4 * b + JM()->mVWidth);
+ v13Mask[4 * b + 2] = C(1 + 4 * b + mVWidth);
+ v13Mask[4 * b + 3] = C(3 + 4 * b + mVWidth);
- std::vector<Constant*> iMask(JM()->mVWidth);
- for(uint32_t i = 0; i < JM()->mVWidth; ++i)
+ std::vector<Constant*> iMask(mVWidth);
+ for(uint32_t i = 0; i < mVWidth; ++i)
{
if(((4 * b) <= i) && (i < (4 * (b + 1))))
{
- iMask[i] = C(i % 4 + JM()->mVWidth);
+ iMask[i] = C(i % 4 + mVWidth);
}
else
{
@@ -805,7 +815,7 @@ Value* FetchJit::GetSimdValid8bitIndices(Value* pIndices, Value* pLastIndex)
STORE(C((uint8_t)0), pZeroIndex);
// Load a SIMD of index pointers
- for(int64_t lane = 0; lane < JM()->mVWidth; lane++)
+ for(int64_t lane = 0; lane < mVWidth; lane++)
{
// Calculate the address of the requested index
Value *pIndex = GEP(pIndices, C(lane));
@@ -840,7 +850,7 @@ Value* FetchJit::GetSimdValid16bitIndices(Value* pIndices, Value* pLastIndex)
STORE(C((uint16_t)0), pZeroIndex);
// Load a SIMD of index pointers
- for(int64_t lane = 0; lane < JM()->mVWidth; lane++)
+ for(int64_t lane = 0; lane < mVWidth; lane++)
{
// Calculate the address of the requested index
Value *pIndex = GEP(pIndices, C(lane));
@@ -925,13 +935,13 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
const uint32_t (&swizzle)[4] = std::get<9>(args);
// cast types
- Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), JM()->mVWidth);
- Type* v32x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 4 ); // vwidth is units of 32 bits
+ Type* vGatherTy = mSimdInt32Ty;
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4 ); // vwidth is units of 32 bits
// have to do extra work for sign extending
if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP)){
- Type* v16x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 2); // 8x16bit ints in a 128bit lane
- Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), JM()->mVWidth / 4); // vwidth is units of 32 bits
+ Type* v16x8Ty = VectorType::get(mInt8Ty, mVWidth * 2); // 8x16bit ints in a 128bit lane
+ Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
// shuffle mask, including any swizzling
const char x = (char)swizzle[0]; const char y = (char)swizzle[1];
@@ -1138,8 +1148,8 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
Value* (&vVertexElements)[4] = std::get<8>(args);
// cast types
- Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), JM()->mVWidth);
- Type* v32x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 4); // vwidth is units of 32 bits
+ Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
// have to do extra work for sign extending
if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP)||
@@ -1149,7 +1159,7 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
bool bFP = (extendType == Instruction::CastOps::FPExt) ? true : false;
Type* v8x16Ty = VectorType::get(mInt16Ty, 8); // 8x16bit in a 128bit lane
- Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), JM()->mVWidth / 4); // vwidth is units of 32 bits
+ Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
// shuffle mask
Value* vConstMask = C<char>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py
index 1814b7c..e73b232 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py
@@ -27,7 +27,7 @@ import json as JSON
import operator
header = r"""/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -84,16 +84,16 @@ inst_aliases = {
}
intrinsics = [
- ["VGATHERPS", "x86_avx2_gather_d_ps_256", ["src", "pBase", "indices", "mask", "scale"]],
+ ["VGATHERPS", "x86_avx2_gather_d_ps_256", ["src", "pBase", "indices", "mask", "scale"]],
["VGATHERDD", "x86_avx2_gather_d_d_256", ["src", "pBase", "indices", "mask", "scale"]],
- ["VSQRTPS", "x86_avx_sqrt_ps_256", ["a"]],
- ["VRSQRTPS", "x86_avx_rsqrt_ps_256", ["a"]],
- ["VRCPPS", "x86_avx_rcp_ps_256", ["a"]],
- ["VMINPS", "x86_avx_min_ps_256", ["a", "b"]],
- ["VMAXPS", "x86_avx_max_ps_256", ["a", "b"]],
- ["VPMINSD", "x86_avx2_pmins_d", ["a", "b"]],
- ["VPMAXSD", "x86_avx2_pmaxs_d", ["a", "b"]],
- ["VROUND", "x86_avx_round_ps_256", ["a", "rounding"]],
+ ["VSQRTPS", "x86_avx_sqrt_ps_256", ["a"]],
+ ["VRSQRTPS", "x86_avx_rsqrt_ps_256", ["a"]],
+ ["VRCPPS", "x86_avx_rcp_ps_256", ["a"]],
+ ["VMINPS", "x86_avx_min_ps_256", ["a", "b"]],
+ ["VMAXPS", "x86_avx_max_ps_256", ["a", "b"]],
+ ["VPMINSD", "x86_avx2_pmins_d", ["a", "b"]],
+ ["VPMAXSD", "x86_avx2_pmaxs_d", ["a", "b"]],
+ ["VROUND", "x86_avx_round_ps_256", ["a", "rounding"]],
["VCMPPS", "x86_avx_cmp_ps_256", ["a", "b", "cmpop"]],
["VBLENDVPS", "x86_avx_blendv_ps_256", ["a", "b", "mask"]],
["BEXTR_32", "x86_bmi_bextr_32", ["src", "control"]],
@@ -103,6 +103,7 @@ intrinsics = [
["VPMOVSXBD", "x86_avx2_pmovsxbd", ["a"]], # sign extend packed 8bit components
["VPMOVSXWD", "x86_avx2_pmovsxwd", ["a"]], # sign extend packed 16bit components
["VPERMD", "x86_avx2_permd", ["idx", "a"]],
+ ["VPERMPS", "x86_avx2_permps", ["idx", "a"]],
["VCVTPH2PS", "x86_vcvtph2ps_256", ["a"]],
["VCVTPS2PH", "x86_vcvtps2ph_256", ["a", "round"]],
["VHSUBPS", "x86_avx_hsub_ps_256", ["a", "b"]],
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_types.py b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_types.py
index 7bba435..0b53a92 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_types.py
+++ b/src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_types.py
@@ -28,7 +28,7 @@ import operator
header = r"""
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
index 6c5f22b..36baa8d 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
@@ -293,7 +293,13 @@ struct StreamOutJit : public Builder
JitManager::DumpToFile(soFunc, "SoFunc");
- FunctionPassManager passes(JM()->mpCurrentModule);
+#if HAVE_LLVM == 0x306
+ FunctionPassManager
+#else
+ llvm::legacy::FunctionPassManager
+#endif
+ passes(JM()->mpCurrentModule);
+
passes.add(createBreakCriticalEdgesPass());
passes.add(createCFGSimplificationPass());
passes.add(createEarlyCSEPass());