diff options
Diffstat (limited to 'libs/rs')
| -rw-r--r-- | libs/rs/java/tests/src/com/android/rs/test/math.rs | 140 | ||||
| -rw-r--r-- | libs/rs/rsContext.cpp | 27 | ||||
| -rw-r--r-- | libs/rs/rsLocklessFifo.cpp | 7 | ||||
| -rw-r--r-- | libs/rs/rsScriptC.cpp | 32 | ||||
| -rw-r--r-- | libs/rs/rsScriptC.h | 7 | ||||
| -rw-r--r-- | libs/rs/rsScriptC_Lib.cpp | 532 | ||||
| -rw-r--r-- | libs/rs/rsScriptC_LibCL.cpp | 4 | ||||
| -rw-r--r-- | libs/rs/scriptc/rs_cl.rsh | 759 | ||||
| -rw-r--r-- | libs/rs/scriptc/rs_core.rsh | 984 |
9 files changed, 975 insertions, 1517 deletions
diff --git a/libs/rs/java/tests/src/com/android/rs/test/math.rs b/libs/rs/java/tests/src/com/android/rs/test/math.rs index 02993fe..8cad82b 100644 --- a/libs/rs/java/tests/src/com/android/rs/test/math.rs +++ b/libs/rs/java/tests/src/com/android/rs/test/math.rs @@ -12,6 +12,31 @@ volatile int2 i2; volatile int3 i3; volatile int4 i4; +volatile uint ui1; +volatile uint2 ui2; +volatile uint3 ui3; +volatile uint4 ui4; + +volatile short s1; +volatile short2 s2; +volatile short3 s3; +volatile short4 s4; + +volatile ushort us1; +volatile ushort2 us2; +volatile ushort3 us3; +volatile ushort4 us4; + +volatile char c1; +volatile char2 c2; +volatile char3 c3; +volatile char4 c4; + +volatile uchar uc1; +volatile uchar2 uc2; +volatile uchar3 uc3; +volatile uchar4 uc4; + #define TEST_FN_FUNC_FN(fnc) \ rsDebug("Testing " #fnc, 0); \ f1 = fnc(f1); \ @@ -168,9 +193,124 @@ static bool test_fp_math(uint32_t index) { return failed; } +#define DECL_INT(prefix) \ +volatile char prefix##_c_1 = 1; \ +volatile char2 prefix##_c_2 = 1; \ +volatile char3 prefix##_c_3 = 1; \ +volatile char4 prefix##_c_4 = 1; \ +volatile uchar prefix##_uc_1 = 1; \ +volatile uchar2 prefix##_uc_2 = 1; \ +volatile uchar3 prefix##_uc_3 = 1; \ +volatile uchar4 prefix##_uc_4 = 1; \ +volatile short prefix##_s_1 = 1; \ +volatile short2 prefix##_s_2 = 1; \ +volatile short3 prefix##_s_3 = 1; \ +volatile short4 prefix##_s_4 = 1; \ +volatile ushort prefix##_us_1 = 1; \ +volatile ushort2 prefix##_us_2 = 1; \ +volatile ushort3 prefix##_us_3 = 1; \ +volatile ushort4 prefix##_us_4 = 1; \ +volatile int prefix##_i_1 = 1; \ +volatile int2 prefix##_i_2 = 1; \ +volatile int3 prefix##_i_3 = 1; \ +volatile int4 prefix##_i_4 = 1; \ +volatile uint prefix##_ui_1 = 1; \ +volatile uint2 prefix##_ui_2 = 1; \ +volatile uint3 prefix##_ui_3 = 1; \ +volatile uint4 prefix##_ui_4 = 1; \ +volatile long prefix##_l_1 = 1; \ +volatile ulong prefix##_ul_1 = 1; + +#define TEST_INT_OP_TYPE(op, type) \ +rsDebug("Testing " #op " for " #type "1", i++); \ +res_##type##_1 = src1_##type##_1 op src2_##type##_1; \ +rsDebug("Testing " #op " for " #type "2", i++); \ +res_##type##_2 = src1_##type##_2 op src2_##type##_2; \ +rsDebug("Testing " #op " for " #type "3", i++); \ +res_##type##_3 = src1_##type##_3 op src2_##type##_3; \ +rsDebug("Testing " #op " for " #type "4", i++); \ +res_##type##_4 = src1_##type##_4 op src2_##type##_4; + +#define TEST_INT_OP(op) \ +TEST_INT_OP_TYPE(op, c) \ +TEST_INT_OP_TYPE(op, uc) \ +TEST_INT_OP_TYPE(op, s) \ +TEST_INT_OP_TYPE(op, us) \ +TEST_INT_OP_TYPE(op, i) \ +TEST_INT_OP_TYPE(op, ui) \ +rsDebug("Testing " #op " for l1", i++); \ +res_l_1 = src1_l_1 op src2_l_1; \ +rsDebug("Testing " #op " for ul1", i++); \ +res_ul_1 = src1_ul_1 op src2_ul_1; + +DECL_INT(res) +DECL_INT(src1) +DECL_INT(src2) + +static bool test_basic_operators() { + bool failed = false; + int i = 0; + + TEST_INT_OP(+); + TEST_INT_OP(-); + TEST_INT_OP(*); + TEST_INT_OP(/); + TEST_INT_OP(%); + TEST_INT_OP(<<); + TEST_INT_OP(>>); + + if (failed) { + rsDebug("test_basic_operators FAILED", 0); + } + else { + rsDebug("test_basic_operators PASSED", 0); + } + + return failed; +} + +#define TEST_CVT(to, from, type) \ +rsDebug("Testing convert from " #from " to " #to, 0); \ +to##1 = from##1; \ +to##2 = convert_##type##2(from##2); \ +to##3 = convert_##type##3(from##3); \ +to##4 = convert_##type##4(from##4); + +#define TEST_CVT_MATRIX(to, type) \ +TEST_CVT(to, c, type); \ +TEST_CVT(to, uc, type); \ +TEST_CVT(to, s, type); \ +TEST_CVT(to, us, type); \ +TEST_CVT(to, i, type); \ +TEST_CVT(to, ui, type); \ +TEST_CVT(to, f, type); \ + +static bool test_convert() { + bool failed = false; + + TEST_CVT_MATRIX(c, char); + TEST_CVT_MATRIX(uc, uchar); + TEST_CVT_MATRIX(s, short); + TEST_CVT_MATRIX(us, ushort); + TEST_CVT_MATRIX(i, int); + TEST_CVT_MATRIX(ui, uint); + TEST_CVT_MATRIX(f, float); + + if (failed) { + rsDebug("test_convert FAILED", 0); + } + else { + rsDebug("test_convert PASSED", 0); + } + + return failed; +} + void math_test(uint32_t index, int test_num) { bool failed = false; + failed |= test_convert(); failed |= test_fp_math(index); + failed |= test_basic_operators(); if (failed) { rsSendToClientBlocking(RS_MSG_TEST_FAILED); diff --git a/libs/rs/rsContext.cpp b/libs/rs/rsContext.cpp index 3acb624..40cb5c7 100644 --- a/libs/rs/rsContext.cpp +++ b/libs/rs/rsContext.cpp @@ -201,9 +201,9 @@ bool Context::initGLThread() { mGL.mExtensions = glGetString(GL_EXTENSIONS); //LOGV("EGL Version %i %i", mEGL.mMajorVersion, mEGL.mMinorVersion); - LOGV("GL Version %s", mGL.mVersion); + //LOGV("GL Version %s", mGL.mVersion); //LOGV("GL Vendor %s", mGL.mVendor); - LOGV("GL Renderer %s", mGL.mRenderer); + //LOGV("GL Renderer %s", mGL.mRenderer); //LOGV("GL Extensions %s", mGL.mExtensions); const char *verptr = NULL; @@ -468,7 +468,6 @@ void * Context::threadProc(void *vrsc) { return NULL; } - rsc->mScriptC.init(rsc); if (rsc->mIsGraphicsContext) { rsc->mStateRaster.init(rsc); rsc->setProgramRaster(NULL); @@ -528,7 +527,7 @@ void * Context::threadProc(void *vrsc) { } void Context::destroyWorkerThreadResources() { - LOGV("destroyWorkerThreadResources 1"); + //LOGV("destroyWorkerThreadResources 1"); if (mIsGraphicsContext) { mRaster.clear(); mFragment.clear(); @@ -544,7 +543,7 @@ void Context::destroyWorkerThreadResources() { mShaderCache.cleanupAll(); } ObjectBase::zeroAllUserRef(this); - LOGV("destroyWorkerThreadResources 2"); + //LOGV("destroyWorkerThreadResources 2"); mExit = true; } @@ -552,7 +551,7 @@ void * Context::helperThreadProc(void *vrsc) { Context *rsc = static_cast<Context *>(vrsc); uint32_t idx = (uint32_t)android_atomic_inc(&rsc->mWorkers.mLaunchCount); - LOGV("RS helperThread starting %p idx=%i", rsc, idx); + //LOGV("RS helperThread starting %p idx=%i", rsc, idx); rsc->mWorkers.mLaunchSignals[idx].init(); rsc->mWorkers.mNativeThreadId[idx] = gettid(); @@ -573,7 +572,7 @@ void * Context::helperThreadProc(void *vrsc) { LOGE("pthread_setspecific %i", status); } - while (rsc->mRunning) { + while (!rsc->mExit) { rsc->mWorkers.mLaunchSignals[idx].wait(); if (rsc->mWorkers.mLaunchCallback) { rsc->mWorkers.mLaunchCallback(rsc->mWorkers.mLaunchData, idx); @@ -582,7 +581,7 @@ void * Context::helperThreadProc(void *vrsc) { rsc->mWorkers.mCompleteSignal.set(); } - LOGV("RS helperThread exiting %p idx=%i", rsc, idx); + //LOGV("RS helperThread exited %p idx=%i", rsc, idx); return NULL; } @@ -730,6 +729,18 @@ Context::~Context() { mIO.shutdown(); int status = pthread_join(mThreadId, &res); + // Cleanup compute threads. + mWorkers.mLaunchData = NULL; + mWorkers.mLaunchCallback = NULL; + mWorkers.mRunningCount = (int)mWorkers.mCount; + for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { + mWorkers.mLaunchSignals[ct].set(); + } + for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { + int status = pthread_join(mWorkers.mThreadId[ct], &res); + } + rsAssert(!mWorkers.mRunningCount); + // Global structure cleanup. pthread_mutex_lock(&gInitMutex); if (mDev) { diff --git a/libs/rs/rsLocklessFifo.cpp b/libs/rs/rsLocklessFifo.cpp index eb2af1c..3f88543 100644 --- a/libs/rs/rsLocklessFifo.cpp +++ b/libs/rs/rsLocklessFifo.cpp @@ -76,7 +76,8 @@ uint32_t LocklessCommandFifo::getFreeSpace() const { } bool LocklessCommandFifo::isEmpty() const { - return mPut == mGet; + uint32_t p = android_atomic_acquire_load((int32_t *)&mPut); + return ((uint8_t *)p) == mGet; } @@ -155,7 +156,9 @@ const void * LocklessCommandFifo::get(uint32_t *command, uint32_t *bytesData) { void LocklessCommandFifo::next() { uint32_t bytes = reinterpret_cast<const uint16_t *>(mGet)[1]; - mGet += ((bytes + 3) & ~3) + 4; + + android_atomic_add(((bytes + 3) & ~3) + 4, (int32_t *)&mGet); + //mGet += ((bytes + 3) & ~3) + 4; if (isEmpty()) { mSignalToControl.set(); } diff --git a/libs/rs/rsScriptC.cpp b/libs/rs/rsScriptC.cpp index eecfa16..3858e1c 100644 --- a/libs/rs/rsScriptC.cpp +++ b/libs/rs/rsScriptC.cpp @@ -421,21 +421,9 @@ void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len } ScriptCState::ScriptCState() { - mScript.clear(); } ScriptCState::~ScriptCState() { - mScript.clear(); -} - -void ScriptCState::init(Context *rsc) { - clear(rsc); -} - -void ScriptCState::clear(Context *rsc) { - rsAssert(rsc); - mScript.clear(); - mScript.set(new ScriptC(rsc)); } static void* symbolLookup(void* pContext, char const* name) { @@ -608,8 +596,6 @@ namespace android { namespace renderscript { void rsi_ScriptCBegin(Context * rsc) { - ScriptCState *ss = &rsc->mScriptC; - ss->clear(rsc); } void rsi_ScriptCSetText(Context *rsc, const char *text, uint32_t len) { @@ -618,8 +604,8 @@ void rsi_ScriptCSetText(Context *rsc, const char *text, uint32_t len) { char *t = (char *)malloc(len + 1); memcpy(t, text, len); t[len] = 0; - ss->mScript->mEnviroment.mScriptText = t; - ss->mScript->mEnviroment.mScriptTextLength = len; + ss->mScriptText = t; + ss->mScriptLen = len; } @@ -630,17 +616,19 @@ RsScript rsi_ScriptCCreate(Context *rsc, { ScriptCState *ss = &rsc->mScriptC; - ObjectBaseRef<ScriptC> s(ss->mScript); - ss->mScript.clear(); + ScriptC *s = new ScriptC(rsc); + s->mEnviroment.mScriptText = ss->mScriptText; + s->mEnviroment.mScriptTextLength = ss->mScriptLen; + ss->mScriptText = NULL; + ss->mScriptLen = 0; s->incUserRef(); - if (!ss->runCompiler(rsc, s.get(), resName, cacheDir)) { + if (!ss->runCompiler(rsc, s, resName, cacheDir)) { // Error during compile, destroy s and return null. - s->zeroUserRef(); + delete s; return NULL; } - ss->clear(rsc); - return s.get(); + return s; } } diff --git a/libs/rs/rsScriptC.h b/libs/rs/rsScriptC.h index 612e38a..7143c67 100644 --- a/libs/rs/rsScriptC.h +++ b/libs/rs/rsScriptC.h @@ -76,11 +76,9 @@ public: ScriptCState(); ~ScriptCState(); - ObjectBaseRef<ScriptC> mScript; + char * mScriptText; + size_t mScriptLen; - void init(Context *rsc); - - void clear(Context *rsc); bool runCompiler(Context *rsc, ScriptC *s, const char *resName, const char *cacheDir); struct SymbolTable_t { @@ -88,7 +86,6 @@ public: void * mPtr; bool threadable; }; - //static SymbolTable_t gSyms[]; static const SymbolTable_t * lookupSymbol(const char *); static const SymbolTable_t * lookupSymbolCL(const char *); static const SymbolTable_t * lookupSymbolGL(const char *); diff --git a/libs/rs/rsScriptC_Lib.cpp b/libs/rs/rsScriptC_Lib.cpp index f550d98..8a85f6e 100644 --- a/libs/rs/rsScriptC_Lib.cpp +++ b/libs/rs/rsScriptC_Lib.cpp @@ -305,6 +305,14 @@ int SC_modsi3(int a, int b) { return a % b; } +unsigned int SC_udivsi3(unsigned int a, unsigned int b) { + return a / b; +} + +unsigned int SC_umodsi3(unsigned int a, unsigned int b) { + return a % b; +} + int SC_getAllocation(const void *ptr) { GET_TLS(); const Allocation *alloc = sc->ptrToAllocation(ptr); @@ -339,6 +347,489 @@ void SC_ForEach2(RsScript vs, s->runForEach(rsc, ain, aout, usr, call); } + +////////////////////////////////////////////////////////////////////////////// +// Heavy math functions +////////////////////////////////////////////////////////////////////////////// + +typedef struct { + float m[16]; +} rs_matrix4x4; + +typedef struct { + float m[9]; +} rs_matrix3x3; + +typedef struct { + float m[4]; +} rs_matrix2x2; + +static inline void +rsMatrixSet(rs_matrix4x4 *m, uint32_t row, uint32_t col, float v) { + m->m[row * 4 + col] = v; +} + +static inline float +rsMatrixGet(const rs_matrix4x4 *m, uint32_t row, uint32_t col) { + return m->m[row * 4 + col]; +} + +static inline void +rsMatrixSet(rs_matrix3x3 *m, uint32_t row, uint32_t col, float v) { + m->m[row * 3 + col] = v; +} + +static inline float +rsMatrixGet(const rs_matrix3x3 *m, uint32_t row, uint32_t col) { + return m->m[row * 3 + col]; +} + +static inline void +rsMatrixSet(rs_matrix2x2 *m, uint32_t row, uint32_t col, float v) { + m->m[row * 2 + col] = v; +} + +static inline float +rsMatrixGet(const rs_matrix2x2 *m, uint32_t row, uint32_t col) { + return m->m[row * 2 + col]; +} + + +static void SC_MatrixLoadIdentity_4x4(rs_matrix4x4 *m) { + m->m[0] = 1.f; + m->m[1] = 0.f; + m->m[2] = 0.f; + m->m[3] = 0.f; + m->m[4] = 0.f; + m->m[5] = 1.f; + m->m[6] = 0.f; + m->m[7] = 0.f; + m->m[8] = 0.f; + m->m[9] = 0.f; + m->m[10] = 1.f; + m->m[11] = 0.f; + m->m[12] = 0.f; + m->m[13] = 0.f; + m->m[14] = 0.f; + m->m[15] = 1.f; +} + +static void SC_MatrixLoadIdentity_3x3(rs_matrix3x3 *m) { + m->m[0] = 1.f; + m->m[1] = 0.f; + m->m[2] = 0.f; + m->m[3] = 0.f; + m->m[4] = 1.f; + m->m[5] = 0.f; + m->m[6] = 0.f; + m->m[7] = 0.f; + m->m[8] = 1.f; +} + +static void SC_MatrixLoadIdentity_2x2(rs_matrix2x2 *m) { + m->m[0] = 1.f; + m->m[1] = 0.f; + m->m[2] = 0.f; + m->m[3] = 1.f; +} + +static void SC_MatrixLoad_4x4_f(rs_matrix4x4 *m, const float *v) { + m->m[0] = v[0]; + m->m[1] = v[1]; + m->m[2] = v[2]; + m->m[3] = v[3]; + m->m[4] = v[4]; + m->m[5] = v[5]; + m->m[6] = v[6]; + m->m[7] = v[7]; + m->m[8] = v[8]; + m->m[9] = v[9]; + m->m[10] = v[10]; + m->m[11] = v[11]; + m->m[12] = v[12]; + m->m[13] = v[13]; + m->m[14] = v[14]; + m->m[15] = v[15]; +} + +static void SC_MatrixLoad_3x3_f(rs_matrix3x3 *m, const float *v) { + m->m[0] = v[0]; + m->m[1] = v[1]; + m->m[2] = v[2]; + m->m[3] = v[3]; + m->m[4] = v[4]; + m->m[5] = v[5]; + m->m[6] = v[6]; + m->m[7] = v[7]; + m->m[8] = v[8]; +} + +static void SC_MatrixLoad_2x2_f(rs_matrix2x2 *m, const float *v) { + m->m[0] = v[0]; + m->m[1] = v[1]; + m->m[2] = v[2]; + m->m[3] = v[3]; +} + +static void SC_MatrixLoad_4x4_4x4(rs_matrix4x4 *m, const rs_matrix4x4 *v) { + m->m[0] = v->m[0]; + m->m[1] = v->m[1]; + m->m[2] = v->m[2]; + m->m[3] = v->m[3]; + m->m[4] = v->m[4]; + m->m[5] = v->m[5]; + m->m[6] = v->m[6]; + m->m[7] = v->m[7]; + m->m[8] = v->m[8]; + m->m[9] = v->m[9]; + m->m[10] = v->m[10]; + m->m[11] = v->m[11]; + m->m[12] = v->m[12]; + m->m[13] = v->m[13]; + m->m[14] = v->m[14]; + m->m[15] = v->m[15]; +} + +static void SC_MatrixLoad_4x4_3x3(rs_matrix4x4 *m, const rs_matrix3x3 *v) { + m->m[0] = v->m[0]; + m->m[1] = v->m[1]; + m->m[2] = v->m[2]; + m->m[3] = 0.f; + m->m[4] = v->m[3]; + m->m[5] = v->m[4]; + m->m[6] = v->m[5]; + m->m[7] = 0.f; + m->m[8] = v->m[6]; + m->m[9] = v->m[7]; + m->m[10] = v->m[8]; + m->m[11] = 0.f; + m->m[12] = 0.f; + m->m[13] = 0.f; + m->m[14] = 0.f; + m->m[15] = 1.f; +} + +static void SC_MatrixLoad_4x4_2x2(rs_matrix4x4 *m, const rs_matrix2x2 *v) { + m->m[0] = v->m[0]; + m->m[1] = v->m[1]; + m->m[2] = 0.f; + m->m[3] = 0.f; + m->m[4] = v->m[2]; + m->m[5] = v->m[3]; + m->m[6] = 0.f; + m->m[7] = 0.f; + m->m[8] = 0.f; + m->m[9] = 0.f; + m->m[10] = 1.f; + m->m[11] = 0.f; + m->m[12] = 0.f; + m->m[13] = 0.f; + m->m[14] = 0.f; + m->m[15] = 1.f; +} + +static void SC_MatrixLoad_3x3_3x3(rs_matrix3x3 *m, const rs_matrix3x3 *v) { + m->m[0] = v->m[0]; + m->m[1] = v->m[1]; + m->m[2] = v->m[2]; + m->m[3] = v->m[3]; + m->m[4] = v->m[4]; + m->m[5] = v->m[5]; + m->m[6] = v->m[6]; + m->m[7] = v->m[7]; + m->m[8] = v->m[8]; +} + +static void SC_MatrixLoad_2x2_2x2(rs_matrix2x2 *m, const rs_matrix2x2 *v) { + m->m[0] = v->m[0]; + m->m[1] = v->m[1]; + m->m[2] = v->m[2]; + m->m[3] = v->m[3]; +} + +static void SC_MatrixLoadRotate(rs_matrix4x4 *m, float rot, float x, float y, float z) { + float c, s; + m->m[3] = 0; + m->m[7] = 0; + m->m[11]= 0; + m->m[12]= 0; + m->m[13]= 0; + m->m[14]= 0; + m->m[15]= 1; + rot *= (float)(M_PI / 180.0f); + c = cos(rot); + s = sin(rot); + + const float len = x*x + y*y + z*z; + if (len != 1) { + const float recipLen = 1.f / sqrt(len); + x *= recipLen; + y *= recipLen; + z *= recipLen; + } + const float nc = 1.0f - c; + const float xy = x * y; + const float yz = y * z; + const float zx = z * x; + const float xs = x * s; + const float ys = y * s; + const float zs = z * s; + m->m[ 0] = x*x*nc + c; + m->m[ 4] = xy*nc - zs; + m->m[ 8] = zx*nc + ys; + m->m[ 1] = xy*nc + zs; + m->m[ 5] = y*y*nc + c; + m->m[ 9] = yz*nc - xs; + m->m[ 2] = zx*nc - ys; + m->m[ 6] = yz*nc + xs; + m->m[10] = z*z*nc + c; +} + +static void SC_MatrixLoadScale(rs_matrix4x4 *m, float x, float y, float z) { + SC_MatrixLoadIdentity_4x4(m); + m->m[0] = x; + m->m[5] = y; + m->m[10] = z; +} + +static void SC_MatrixLoadTranslate(rs_matrix4x4 *m, float x, float y, float z) { + SC_MatrixLoadIdentity_4x4(m); + m->m[12] = x; + m->m[13] = y; + m->m[14] = z; +} + +static void SC_MatrixLoadMultiply_4x4_4x4_4x4(rs_matrix4x4 *m, const rs_matrix4x4 *lhs, const rs_matrix4x4 *rhs) { + for (int i=0 ; i<4 ; i++) { + float ri0 = 0; + float ri1 = 0; + float ri2 = 0; + float ri3 = 0; + for (int j=0 ; j<4 ; j++) { + const float rhs_ij = rsMatrixGet(rhs, i,j); + ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij; + ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij; + ri2 += rsMatrixGet(lhs, j, 2) * rhs_ij; + ri3 += rsMatrixGet(lhs, j, 3) * rhs_ij; + } + rsMatrixSet(m, i, 0, ri0); + rsMatrixSet(m, i, 1, ri1); + rsMatrixSet(m, i, 2, ri2); + rsMatrixSet(m, i, 3, ri3); + } +} + +static void SC_MatrixMultiply_4x4_4x4(rs_matrix4x4 *m, const rs_matrix4x4 *rhs) { + rs_matrix4x4 mt; + SC_MatrixLoadMultiply_4x4_4x4_4x4(&mt, m, rhs); + SC_MatrixLoad_4x4_4x4(m, &mt); +} + +static void SC_MatrixLoadMultiply_3x3_3x3_3x3(rs_matrix3x3 *m, const rs_matrix3x3 *lhs, const rs_matrix3x3 *rhs) { + for (int i=0 ; i<3 ; i++) { + float ri0 = 0; + float ri1 = 0; + float ri2 = 0; + for (int j=0 ; j<3 ; j++) { + const float rhs_ij = rsMatrixGet(rhs, i,j); + ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij; + ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij; + ri2 += rsMatrixGet(lhs, j, 2) * rhs_ij; + } + rsMatrixSet(m, i, 0, ri0); + rsMatrixSet(m, i, 1, ri1); + rsMatrixSet(m, i, 2, ri2); + } +} + +static void SC_MatrixMultiply_3x3_3x3(rs_matrix3x3 *m, const rs_matrix3x3 *rhs) { + rs_matrix3x3 mt; + SC_MatrixLoadMultiply_3x3_3x3_3x3(&mt, m, rhs); + SC_MatrixLoad_3x3_3x3(m, &mt); +} + +static void SC_MatrixLoadMultiply_2x2_2x2_2x2(rs_matrix2x2 *m, const rs_matrix2x2 *lhs, const rs_matrix2x2 *rhs) { + for (int i=0 ; i<2 ; i++) { + float ri0 = 0; + float ri1 = 0; + for (int j=0 ; j<2 ; j++) { + const float rhs_ij = rsMatrixGet(rhs, i,j); + ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij; + ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij; + } + rsMatrixSet(m, i, 0, ri0); + rsMatrixSet(m, i, 1, ri1); + } +} + +static void SC_MatrixMultiply_2x2_2x2(rs_matrix2x2 *m, const rs_matrix2x2 *rhs) { + rs_matrix2x2 mt; + SC_MatrixLoadMultiply_2x2_2x2_2x2(&mt, m, rhs); + SC_MatrixLoad_2x2_2x2(m, &mt); +} + +static void SC_MatrixRotate(rs_matrix4x4 *m, float rot, float x, float y, float z) { + rs_matrix4x4 m1; + SC_MatrixLoadRotate(&m1, rot, x, y, z); + SC_MatrixMultiply_4x4_4x4(m, &m1); +} + +static void SC_MatrixScale(rs_matrix4x4 *m, float x, float y, float z) { + rs_matrix4x4 m1; + SC_MatrixLoadScale(&m1, x, y, z); + SC_MatrixMultiply_4x4_4x4(m, &m1); +} + +static void SC_MatrixTranslate(rs_matrix4x4 *m, float x, float y, float z) { + rs_matrix4x4 m1; + SC_MatrixLoadTranslate(&m1, x, y, z); + SC_MatrixMultiply_4x4_4x4(m, &m1); +} + +static void SC_MatrixLoadOrtho(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far) { + SC_MatrixLoadIdentity_4x4(m); + m->m[0] = 2.f / (right - left); + m->m[5] = 2.f / (top - bottom); + m->m[10]= -2.f / (far - near); + m->m[12]= -(right + left) / (right - left); + m->m[13]= -(top + bottom) / (top - bottom); + m->m[14]= -(far + near) / (far - near); +} + +static void SC_MatrixLoadFrustum(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far) { + SC_MatrixLoadIdentity_4x4(m); + m->m[0] = 2.f * near / (right - left); + m->m[5] = 2.f * near / (top - bottom); + m->m[8] = (right + left) / (right - left); + m->m[9] = (top + bottom) / (top - bottom); + m->m[10]= -(far + near) / (far - near); + m->m[11]= -1.f; + m->m[14]= -2.f * far * near / (far - near); + m->m[15]= 0.f; +} + +static void SC_MatrixLoadPerspective(rs_matrix4x4* m, float fovy, float aspect, float near, float far) { + float top = near * tan((float) (fovy * M_PI / 360.0f)); + float bottom = -top; + float left = bottom * aspect; + float right = top * aspect; + SC_MatrixLoadFrustum(m, left, right, bottom, top, near, far); +} + + +// Returns true if the matrix was successfully inversed +static bool SC_MatrixInverse_4x4(rs_matrix4x4 *m) { + rs_matrix4x4 result; + + int i, j; + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { + // computeCofactor for int i, int j + int c0 = (i+1) % 4; + int c1 = (i+2) % 4; + int c2 = (i+3) % 4; + int r0 = (j+1) % 4; + int r1 = (j+2) % 4; + int r2 = (j+3) % 4; + + float minor = (m->m[c0 + 4*r0] * (m->m[c1 + 4*r1] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r1])) + - (m->m[c0 + 4*r1] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r0])) + + (m->m[c0 + 4*r2] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r1] - m->m[c1 + 4*r1] * m->m[c2 + 4*r0])); + + float cofactor = (i+j) & 1 ? -minor : minor; + + result.m[4*i + j] = cofactor; + } + } + + // Dot product of 0th column of source and 0th row of result + float det = m->m[0]*result.m[0] + m->m[4]*result.m[1] + + m->m[8]*result.m[2] + m->m[12]*result.m[3]; + + if (fabs(det) < 1e-6) { + return false; + } + + det = 1.0f / det; + for (i = 0; i < 16; ++i) { + m->m[i] = result.m[i] * det; + } + + return true; +} + +// Returns true if the matrix was successfully inversed +static bool SC_MatrixInverseTranspose_4x4(rs_matrix4x4 *m) { + rs_matrix4x4 result; + + int i, j; + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { + // computeCofactor for int i, int j + int c0 = (i+1) % 4; + int c1 = (i+2) % 4; + int c2 = (i+3) % 4; + int r0 = (j+1) % 4; + int r1 = (j+2) % 4; + int r2 = (j+3) % 4; + + float minor = (m->m[c0 + 4*r0] * (m->m[c1 + 4*r1] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r1])) + - (m->m[c0 + 4*r1] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r0])) + + (m->m[c0 + 4*r2] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r1] - m->m[c1 + 4*r1] * m->m[c2 + 4*r0])); + + float cofactor = (i+j) & 1 ? -minor : minor; + + result.m[4*j + i] = cofactor; + } + } + + // Dot product of 0th column of source and 0th column of result + float det = m->m[0]*result.m[0] + m->m[4]*result.m[4] + + m->m[8]*result.m[8] + m->m[12]*result.m[12]; + + if (fabs(det) < 1e-6) { + return false; + } + + det = 1.0f / det; + for (i = 0; i < 16; ++i) { + m->m[i] = result.m[i] * det; + } + + return true; +} + +static void SC_MatrixTranspose_4x4(rs_matrix4x4 *m) { + int i, j; + float temp; + for (i = 0; i < 3; ++i) { + for (j = i + 1; j < 4; ++j) { + temp = m->m[i*4 + j]; + m->m[i*4 + j] = m->m[j*4 + i]; + m->m[j*4 + i] = temp; + } + } +} + +static void SC_MatrixTranspose_3x3(rs_matrix3x3 *m) { + int i, j; + float temp; + for (i = 0; i < 2; ++i) { + for (j = i + 1; j < 3; ++j) { + temp = m->m[i*3 + j]; + m->m[i*3 + j] = m->m[j*4 + i]; + m->m[j*3 + i] = temp; + } + } +} + +static void SC_MatrixTranspose_2x2(rs_matrix2x2 *m) { + float temp = m->m[1]; + m->m[1] = m->m[2]; + m->m[2] = temp; +} + + ////////////////////////////////////////////////////////////////////////////// // Class implementation ////////////////////////////////////////////////////////////////////////////// @@ -363,6 +854,8 @@ void SC_ForEach2(RsScript vs, static ScriptCState::SymbolTable_t gSyms[] = { { "__divsi3", (void *)&SC_divsi3, true }, { "__modsi3", (void *)&SC_modsi3, true }, + { "__udivsi3", (void *)&SC_udivsi3, true }, + { "__umodsi3", (void *)&SC_umodsi3, true }, // allocation { "_Z19rsAllocationGetDimX13rs_allocation", (void *)&SC_allocGetDimX, true }, @@ -463,6 +956,45 @@ static ScriptCState::SymbolTable_t gSyms[] = { { "_Z22rsSendToClientBlockingi", (void *)&SC_toClientBlocking, false }, { "_Z22rsSendToClientBlockingiPKvj", (void *)&SC_toClientBlocking2, false }, + // matrix + { "_Z20rsMatrixLoadIdentityP12rs_matrix4x4", (void *)&SC_MatrixLoadIdentity_4x4, false }, + { "_Z20rsMatrixLoadIdentityP12rs_matrix3x3", (void *)&SC_MatrixLoadIdentity_3x3, false }, + { "_Z20rsMatrixLoadIdentityP12rs_matrix2x2", (void *)&SC_MatrixLoadIdentity_2x2, false }, + + { "_Z12rsMatrixLoadP12rs_matrix4x4PKf", (void *)&SC_MatrixLoad_4x4_f, false }, + { "_Z12rsMatrixLoadP12rs_matrix3x3PKf", (void *)&SC_MatrixLoad_3x3_f, false }, + { "_Z12rsMatrixLoadP12rs_matrix2x2PKf", (void *)&SC_MatrixLoad_2x2_f, false }, + + { "_Z12rsMatrixLoadP12rs_matrix4x4PKS_", (void *)&SC_MatrixLoad_4x4_4x4, false }, + { "_Z12rsMatrixLoadP12rs_matrix4x4PK12rs_matrix3x3", (void *)&SC_MatrixLoad_4x4_3x3, false }, + { "_Z12rsMatrixLoadP12rs_matrix4x4PK12rs_matrix2x2", (void *)&SC_MatrixLoad_4x4_2x2, false }, + { "_Z12rsMatrixLoadP12rs_matrix3x3PKS_", (void *)&SC_MatrixLoad_3x3_3x3, false }, + { "_Z12rsMatrixLoadP12rs_matrix2x2PKS_", (void *)&SC_MatrixLoad_2x2_2x2, false }, + + { "_Z18rsMatrixLoadRotateP12rs_matrix4x4ffff", (void *)&SC_MatrixLoadRotate, false }, + { "_Z17rsMatrixLoadScaleP12rs_matrix4x4fff", (void *)&SC_MatrixLoadScale, false }, + { "_Z21rsMatrixLoadTranslateP12rs_matrix4x4fff", (void *)&SC_MatrixLoadTranslate, false }, + { "_Z14rsMatrixRotateP12rs_matrix4x4ffff", (void *)&SC_MatrixRotate, false }, + { "_Z13rsMatrixScaleP12rs_matrix4x4fff", (void *)&SC_MatrixScale, false }, + { "_Z17rsMatrixTranslateP12rs_matrix4x4fff", (void *)&SC_MatrixTranslate, false }, + + { "_Z20rsMatrixLoadMultiplyP12rs_matrix4x4PKS_S2_", (void *)&SC_MatrixLoadMultiply_4x4_4x4_4x4, false }, + { "_Z16rsMatrixMultiplyP12rs_matrix4x4PKS_", (void *)&SC_MatrixMultiply_4x4_4x4, false }, + { "_Z20rsMatrixLoadMultiplyP12rs_matrix3x3PKS_S2_", (void *)&SC_MatrixLoadMultiply_3x3_3x3_3x3, false }, + { "_Z16rsMatrixMultiplyP12rs_matrix3x3PKS_", (void *)&SC_MatrixMultiply_3x3_3x3, false }, + { "_Z20rsMatrixLoadMultiplyP12rs_matrix2x2PKS_S2_", (void *)&SC_MatrixLoadMultiply_2x2_2x2_2x2, false }, + { "_Z16rsMatrixMultiplyP12rs_matrix2x2PKS_", (void *)&SC_MatrixMultiply_2x2_2x2, false }, + + { "_Z17rsMatrixLoadOrthoP12rs_matrix4x4ffffff", (void *)&SC_MatrixLoadOrtho, false }, + { "_Z19rsMatrixLoadFrustumP12rs_matrix4x4ffffff", (void *)&SC_MatrixLoadFrustum, false }, + { "_Z23rsMatrixLoadPerspectiveP12rs_matrix4x4ffff", (void *)&SC_MatrixLoadPerspective, false }, + + { "_Z15rsMatrixInverseP12rs_matrix4x4", (void *)&SC_MatrixInverse_4x4, false }, + { "_Z24rsMatrixInverseTransposeP12rs_matrix4x4", (void *)&SC_MatrixInverseTranspose_4x4, false }, + { "_Z17rsMatrixTransposeP12rs_matrix4x4", (void *)&SC_MatrixTranspose_4x4, false }, + { "_Z17rsMatrixTransposeP12rs_matrix4x4", (void *)&SC_MatrixTranspose_3x3, false }, + { "_Z17rsMatrixTransposeP12rs_matrix4x4", (void *)&SC_MatrixTranspose_2x2, false }, + { "_Z9rsForEach9rs_script13rs_allocationS0_PKv", (void *)&SC_ForEach, false }, //{ "_Z9rsForEach9rs_script13rs_allocationS0_PKv", (void *)&SC_ForEach2, true }, diff --git a/libs/rs/rsScriptC_LibCL.cpp b/libs/rs/rsScriptC_LibCL.cpp index 02d33b7..57855db 100644 --- a/libs/rs/rsScriptC_LibCL.cpp +++ b/libs/rs/rsScriptC_LibCL.cpp @@ -195,7 +195,7 @@ static ScriptCState::SymbolTable_t gSyms[] = { { "_Z4logbf", (void *)&logbf, true }, { "_Z3madfff", (void *)&SC_mad, true }, { "_Z4modffPf", (void *)&modff, true }, - //{ "nan", (void *)&, true }, + //{ "_Z3nanj", (void *)&SC_nan, true }, { "_Z9nextafterff", (void *)&nextafterf, true }, { "_Z3powff", (void *)&powf, true }, { "_Z9remainderff", (void *)&remainderf, true }, @@ -210,7 +210,7 @@ static ScriptCState::SymbolTable_t gSyms[] = { { "_Z4sqrtf", (void *)&sqrtf, true }, { "_Z3tanf", (void *)&tanf, true }, { "_Z4tanhf", (void *)&tanhf, true }, - { "_Z6tgammaf", (void *)&lgammaf, true }, // FIXME!!! NEEDS TO USE tgammaf + { "_Z6tgammaf", (void *)&tgammaf, true }, { "_Z5truncf", (void *)&truncf, true }, // OpenCL Int diff --git a/libs/rs/scriptc/rs_cl.rsh b/libs/rs/scriptc/rs_cl.rsh index 3c0496d..d78e62e 100644 --- a/libs/rs/scriptc/rs_cl.rsh +++ b/libs/rs/scriptc/rs_cl.rsh @@ -1,30 +1,17 @@ #ifndef __RS_CL_RSH__ #define __RS_CL_RSH__ -#ifdef BCC_PREPARE_BC -#define _RS_STATIC extern -#else -#define _RS_STATIC static -#endif +#define _RS_RUNTIME extern // Conversions #define CVT_FUNC_2(typeout, typein) \ -_RS_STATIC typeout##2 __attribute__((overloadable)) \ - convert_##typeout##2(typein##2 v) { \ - typeout##2 r = {(typeout)v.x, (typeout)v.y}; \ - return r; \ -} \ -_RS_STATIC typeout##3 __attribute__((overloadable)) \ - convert_##typeout##3(typein##3 v) { \ - typeout##3 r = {(typeout)v.x, (typeout)v.y, (typeout)v.z}; \ - return r; \ -} \ -_RS_STATIC typeout##4 __attribute__((overloadable)) \ - convert_##typeout##4(typein##4 v) { \ - typeout##4 r = {(typeout)v.x, (typeout)v.y, (typeout)v.z, \ - (typeout)v.w}; \ - return r; \ -} +_RS_RUNTIME typeout##2 __attribute__((overloadable)) \ + convert_##typeout##2(typein##2 v); \ +_RS_RUNTIME typeout##3 __attribute__((overloadable)) \ + convert_##typeout##3(typein##3 v); \ +_RS_RUNTIME typeout##4 __attribute__((overloadable)) \ + convert_##typeout##4(typein##4 v); + #define CVT_FUNC(type) CVT_FUNC_2(type, uchar) \ CVT_FUNC_2(type, char) \ @@ -45,279 +32,63 @@ CVT_FUNC(float) // Float ops, 6.11.2 #define FN_FUNC_FN(fnc) \ -_RS_STATIC float2 __attribute__((overloadable)) fnc(float2 v) { \ - float2 r; \ - r.x = fnc(v.x); \ - r.y = fnc(v.y); \ - return r; \ -} \ -_RS_STATIC float3 __attribute__((overloadable)) fnc(float3 v) { \ - float3 r; \ - r.x = fnc(v.x); \ - r.y = fnc(v.y); \ - r.z = fnc(v.z); \ - return r; \ -} \ -_RS_STATIC float4 __attribute__((overloadable)) fnc(float4 v) { \ - float4 r; \ - r.x = fnc(v.x); \ - r.y = fnc(v.y); \ - r.z = fnc(v.z); \ - r.w = fnc(v.w); \ - return r; \ -} +_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v); \ +_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v); \ +_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v); #define IN_FUNC_FN(fnc) \ -_RS_STATIC int2 __attribute__((overloadable)) fnc(float2 v) { \ - int2 r; \ - r.x = fnc(v.x); \ - r.y = fnc(v.y); \ - return r; \ -} \ -_RS_STATIC int3 __attribute__((overloadable)) fnc(float3 v) { \ - int3 r; \ - r.x = fnc(v.x); \ - r.y = fnc(v.y); \ - r.z = fnc(v.z); \ - return r; \ -} \ -_RS_STATIC int4 __attribute__((overloadable)) fnc(float4 v) { \ - int4 r; \ - r.x = fnc(v.x); \ - r.y = fnc(v.y); \ - r.z = fnc(v.z); \ - r.w = fnc(v.w); \ - return r; \ -} +_RS_RUNTIME int2 __attribute__((overloadable)) fnc(float2 v); \ +_RS_RUNTIME int3 __attribute__((overloadable)) fnc(float3 v); \ +_RS_RUNTIME int4 __attribute__((overloadable)) fnc(float4 v); #define FN_FUNC_FN_FN(fnc) \ -_RS_STATIC float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2) { \ - float2 r; \ - r.x = fnc(v1.x, v2.x); \ - r.y = fnc(v1.y, v2.y); \ - return r; \ -} \ -_RS_STATIC float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2) { \ - float3 r; \ - r.x = fnc(v1.x, v2.x); \ - r.y = fnc(v1.y, v2.y); \ - r.z = fnc(v1.z, v2.z); \ - return r; \ -} \ -_RS_STATIC float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2) { \ - float4 r; \ - r.x = fnc(v1.x, v2.x); \ - r.y = fnc(v1.y, v2.y); \ - r.z = fnc(v1.z, v2.z); \ - r.w = fnc(v1.w, v2.w); \ - return r; \ -} +_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2); \ +_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2); \ +_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2); #define FN_FUNC_FN_F(fnc) \ -_RS_STATIC float2 __attribute__((overloadable)) fnc(float2 v1, float v2) { \ - float2 r; \ - r.x = fnc(v1.x, v2); \ - r.y = fnc(v1.y, v2); \ - return r; \ -} \ -_RS_STATIC float3 __attribute__((overloadable)) fnc(float3 v1, float v2) { \ - float3 r; \ - r.x = fnc(v1.x, v2); \ - r.y = fnc(v1.y, v2); \ - r.z = fnc(v1.z, v2); \ - return r; \ -} \ -_RS_STATIC float4 __attribute__((overloadable)) fnc(float4 v1, float v2) { \ - float4 r; \ - r.x = fnc(v1.x, v2); \ - r.y = fnc(v1.y, v2); \ - r.z = fnc(v1.z, v2); \ - r.w = fnc(v1.w, v2); \ - return r; \ -} +_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, float v2); \ +_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, float v2); \ +_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v1, float v2); #define FN_FUNC_FN_IN(fnc) \ -_RS_STATIC float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2) { \ - float2 r; \ - r.x = fnc(v1.x, v2.x); \ - r.y = fnc(v1.y, v2.y); \ - return r; \ -} \ -_RS_STATIC float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2) { \ - float3 r; \ - r.x = fnc(v1.x, v2.x); \ - r.y = fnc(v1.y, v2.y); \ - r.z = fnc(v1.z, v2.z); \ - return r; \ -} \ -_RS_STATIC float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2) { \ - float4 r; \ - r.x = fnc(v1.x, v2.x); \ - r.y = fnc(v1.y, v2.y); \ - r.z = fnc(v1.z, v2.z); \ - r.w = fnc(v1.w, v2.w); \ - return r; \ -} +_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2); \ +_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2); \ +_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2); \ #define FN_FUNC_FN_I(fnc) \ -_RS_STATIC float2 __attribute__((overloadable)) fnc(float2 v1, int v2) { \ - float2 r; \ - r.x = fnc(v1.x, v2); \ - r.y = fnc(v1.y, v2); \ - return r; \ -} \ -_RS_STATIC float3 __attribute__((overloadable)) fnc(float3 v1, int v2) { \ - float3 r; \ - r.x = fnc(v1.x, v2); \ - r.y = fnc(v1.y, v2); \ - r.z = fnc(v1.z, v2); \ - return r; \ -} \ -_RS_STATIC float4 __attribute__((overloadable)) fnc(float4 v1, int v2) { \ - float4 r; \ - r.x = fnc(v1.x, v2); \ - r.y = fnc(v1.y, v2); \ - r.z = fnc(v1.z, v2); \ - r.w = fnc(v1.w, v2); \ - return r; \ -} +_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, int v2); \ +_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, int v2); \ +_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v1, int v2); #define FN_FUNC_FN_PFN(fnc) \ -_RS_STATIC float2 __attribute__((overloadable)) \ - fnc(float2 v1, float2 *v2) { \ - float2 r; \ - float t[2]; \ - r.x = fnc(v1.x, &t[0]); \ - r.y = fnc(v1.y, &t[1]); \ - v2->x = t[0]; \ - v2->y = t[1]; \ - return r; \ -} \ -_RS_STATIC float3 __attribute__((overloadable)) \ - fnc(float3 v1, float3 *v2) { \ - float3 r; \ - float t[3]; \ - r.x = fnc(v1.x, &t[0]); \ - r.y = fnc(v1.y, &t[1]); \ - r.z = fnc(v1.z, &t[2]); \ - v2->x = t[0]; \ - v2->y = t[1]; \ - v2->z = t[2]; \ - return r; \ -} \ -_RS_STATIC float4 __attribute__((overloadable)) \ - fnc(float4 v1, float4 *v2) { \ - float4 r; \ - float t[4]; \ - r.x = fnc(v1.x, &t[0]); \ - r.y = fnc(v1.y, &t[1]); \ - r.z = fnc(v1.z, &t[2]); \ - r.w = fnc(v1.w, &t[3]); \ - v2->x = t[0]; \ - v2->y = t[1]; \ - v2->z = t[2]; \ - v2->w = t[3]; \ - return r; \ -} +_RS_RUNTIME float2 __attribute__((overloadable)) \ + fnc(float2 v1, float2 *v2); \ +_RS_RUNTIME float3 __attribute__((overloadable)) \ + fnc(float3 v1, float3 *v2); \ +_RS_RUNTIME float4 __attribute__((overloadable)) \ + fnc(float4 v1, float4 *v2); #define FN_FUNC_FN_PIN(fnc) \ -_RS_STATIC float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2) { \ - float2 r; \ - int t[2]; \ - r.x = fnc(v1.x, &t[0]); \ - r.y = fnc(v1.y, &t[1]); \ - v2->x = t[0]; \ - v2->y = t[1]; \ - return r; \ -} \ -_RS_STATIC float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2) { \ - float3 r; \ - int t[3]; \ - r.x = fnc(v1.x, &t[0]); \ - r.y = fnc(v1.y, &t[1]); \ - r.z = fnc(v1.z, &t[2]); \ - v2->x = t[0]; \ - v2->y = t[1]; \ - v2->z = t[2]; \ - return r; \ -} \ -_RS_STATIC float4 __attribute__((overloadable)) fnc(float4 v1, int4 *v2) { \ - float4 r; \ - int t[4]; \ - r.x = fnc(v1.x, &t[0]); \ - r.y = fnc(v1.y, &t[1]); \ - r.z = fnc(v1.z, &t[2]); \ - r.w = fnc(v1.w, &t[3]); \ - v2->x = t[0]; \ - v2->y = t[1]; \ - v2->z = t[2]; \ - v2->w = t[3]; \ - return r; \ -} +_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2); \ +_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2); \ +_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v1, int4 *v2); #define FN_FUNC_FN_FN_FN(fnc) \ -_RS_STATIC float2 __attribute__((overloadable)) \ - fnc(float2 v1, float2 v2, float2 v3) { \ - float2 r; \ - r.x = fnc(v1.x, v2.x, v3.x); \ - r.y = fnc(v1.y, v2.y, v3.y); \ - return r; \ -} \ -_RS_STATIC float3 __attribute__((overloadable)) \ - fnc(float3 v1, float3 v2, float3 v3) { \ - float3 r; \ - r.x = fnc(v1.x, v2.x, v3.x); \ - r.y = fnc(v1.y, v2.y, v3.y); \ - r.z = fnc(v1.z, v2.z, v3.z); \ - return r; \ -} \ -_RS_STATIC float4 __attribute__((overloadable)) \ - fnc(float4 v1, float4 v2, float4 v3) { \ - float4 r; \ - r.x = fnc(v1.x, v2.x, v3.x); \ - r.y = fnc(v1.y, v2.y, v3.y); \ - r.z = fnc(v1.z, v2.z, v3.z); \ - r.w = fnc(v1.w, v2.w, v3.w); \ - return r; \ -} +_RS_RUNTIME float2 __attribute__((overloadable)) \ + fnc(float2 v1, float2 v2, float2 v3); \ +_RS_RUNTIME float3 __attribute__((overloadable)) \ + fnc(float3 v1, float3 v2, float3 v3); \ +_RS_RUNTIME float4 __attribute__((overloadable)) \ + fnc(float4 v1, float4 v2, float4 v3); #define FN_FUNC_FN_FN_PIN(fnc) \ -_RS_STATIC float2 __attribute__((overloadable)) \ - fnc(float2 v1, float2 v2, int2 *v3) { \ - float2 r; \ - int t[2]; \ - r.x = fnc(v1.x, v2.x, &t[0]); \ - r.y = fnc(v1.y, v2.y, &t[1]); \ - v3->x = t[0]; \ - v3->y = t[1]; \ - return r; \ -} \ -_RS_STATIC float3 __attribute__((overloadable)) \ - fnc(float3 v1, float3 v2, int3 *v3) { \ - float3 r; \ - int t[3]; \ - r.x = fnc(v1.x, v2.x, &t[0]); \ - r.y = fnc(v1.y, v2.y, &t[1]); \ - r.z = fnc(v1.z, v2.z, &t[2]); \ - v3->x = t[0]; \ - v3->y = t[1]; \ - v3->z = t[2]; \ - return r; \ -} \ -_RS_STATIC float4 __attribute__((overloadable)) \ - fnc(float4 v1, float4 v2, int4 *v3) { \ - float4 r; \ - int t[4]; \ - r.x = fnc(v1.x, v2.x, &t[0]); \ - r.y = fnc(v1.y, v2.y, &t[1]); \ - r.z = fnc(v1.z, v2.z, &t[2]); \ - r.w = fnc(v1.w, v2.w, &t[3]); \ - v3->x = t[0]; \ - v3->y = t[1]; \ - v3->z = t[2]; \ - v3->w = t[3]; \ - return r; \ -} +_RS_RUNTIME float2 __attribute__((overloadable)) \ + fnc(float2 v1, float2 v2, int2 *v3); \ +_RS_RUNTIME float3 __attribute__((overloadable)) \ + fnc(float3 v1, float3 v2, int3 *v3); \ +_RS_RUNTIME float4 __attribute__((overloadable)) \ + fnc(float4 v1, float4 v2, int4 *v3); extern float __attribute__((overloadable)) acos(float); @@ -326,9 +97,9 @@ FN_FUNC_FN(acos) extern float __attribute__((overloadable)) acosh(float); FN_FUNC_FN(acosh) -_RS_STATIC float __attribute__((overloadable)) acospi(float v) { - return acos(v) / M_PI; -} +_RS_RUNTIME float __attribute__((overloadable)) acospi(float v); + + FN_FUNC_FN(acospi) extern float __attribute__((overloadable)) asin(float); @@ -337,9 +108,8 @@ FN_FUNC_FN(asin) extern float __attribute__((overloadable)) asinh(float); FN_FUNC_FN(asinh) -_RS_STATIC float __attribute__((overloadable)) asinpi(float v) { - return asin(v) / M_PI; -} + +_RS_RUNTIME float __attribute__((overloadable)) asinpi(float v); FN_FUNC_FN(asinpi) extern float __attribute__((overloadable)) atan(float); @@ -351,14 +121,12 @@ FN_FUNC_FN_FN(atan2) extern float __attribute__((overloadable)) atanh(float); FN_FUNC_FN(atanh) -_RS_STATIC float __attribute__((overloadable)) atanpi(float v) { - return atan(v) / M_PI; -} + +_RS_RUNTIME float __attribute__((overloadable)) atanpi(float v); FN_FUNC_FN(atanpi) -_RS_STATIC float __attribute__((overloadable)) atan2pi(float y, float x) { - return atan2(y, x) / M_PI; -} + +_RS_RUNTIME float __attribute__((overloadable)) atan2pi(float y, float x); FN_FUNC_FN_FN(atan2pi) extern float __attribute__((overloadable)) cbrt(float); @@ -376,9 +144,8 @@ FN_FUNC_FN(cos) extern float __attribute__((overloadable)) cosh(float); FN_FUNC_FN(cosh) -_RS_STATIC float __attribute__((overloadable)) cospi(float v) { - return cos(v * M_PI); -} + +_RS_RUNTIME float __attribute__((overloadable)) cospi(float v); FN_FUNC_FN(cospi) extern float __attribute__((overloadable)) erfc(float); @@ -394,9 +161,8 @@ extern float __attribute__((overloadable)) exp2(float); FN_FUNC_FN(exp2) extern float __attribute__((overloadable)) pow(float, float); -_RS_STATIC float __attribute__((overloadable)) exp10(float v) { - return pow(10.f, v); -} + +_RS_RUNTIME float __attribute__((overloadable)) exp10(float v); FN_FUNC_FN(exp10) extern float __attribute__((overloadable)) expm1(float); @@ -425,11 +191,8 @@ FN_FUNC_FN_F(fmin); extern float __attribute__((overloadable)) fmod(float, float); FN_FUNC_FN_FN(fmod) -_RS_STATIC float __attribute__((overloadable)) fract(float v, float *iptr) { - int i = (int)floor(v); - iptr[0] = i; - return fmin(v - i, 0x1.fffffep-1f); -} + +_RS_RUNTIME float __attribute__((overloadable)) fract(float v, float *iptr); FN_FUNC_FN_PFN(fract) extern float __attribute__((overloadable)) frexp(float, int *); @@ -457,9 +220,8 @@ FN_FUNC_FN(log) extern float __attribute__((overloadable)) log10(float); FN_FUNC_FN(log10) -_RS_STATIC float __attribute__((overloadable)) log2(float v) { - return log10(v) / log10(2.f); -} + +_RS_RUNTIME float __attribute__((overloadable)) log2(float v); FN_FUNC_FN(log2) extern float __attribute__((overloadable)) log1p(float); @@ -481,31 +243,15 @@ FN_FUNC_FN_FN(nextafter) FN_FUNC_FN_FN(pow) -_RS_STATIC float __attribute__((overloadable)) pown(float v, int p) { - return pow(v, (float)p); -} -_RS_STATIC float2 __attribute__((overloadable)) pown(float2 v, int2 p) { - return pow(v, (float2)p); -} -_RS_STATIC float3 __attribute__((overloadable)) pown(float3 v, int3 p) { - return pow(v, (float3)p); -} -_RS_STATIC float4 __attribute__((overloadable)) pown(float4 v, int4 p) { - return pow(v, (float4)p); -} - -_RS_STATIC float __attribute__((overloadable)) powr(float v, float p) { - return pow(v, p); -} -_RS_STATIC float2 __attribute__((overloadable)) powr(float2 v, float2 p) { - return pow(v, p); -} -_RS_STATIC float3 __attribute__((overloadable)) powr(float3 v, float3 p) { - return pow(v, p); -} -_RS_STATIC float4 __attribute__((overloadable)) powr(float4 v, float4 p) { - return pow(v, p); -} +_RS_RUNTIME float __attribute__((overloadable)) pown(float v, int p); +_RS_RUNTIME float2 __attribute__((overloadable)) pown(float2 v, int2 p); +_RS_RUNTIME float3 __attribute__((overloadable)) pown(float3 v, int3 p); +_RS_RUNTIME float4 __attribute__((overloadable)) pown(float4 v, int4 p); + +_RS_RUNTIME float __attribute__((overloadable)) powr(float v, float p); +_RS_RUNTIME float2 __attribute__((overloadable)) powr(float2 v, float2 p); +_RS_RUNTIME float3 __attribute__((overloadable)) powr(float3 v, float3 p); +_RS_RUNTIME float4 __attribute__((overloadable)) powr(float4 v, float4 p); extern float __attribute__((overloadable)) remainder(float, float); FN_FUNC_FN_FN(remainder) @@ -516,57 +262,33 @@ FN_FUNC_FN_FN_PIN(remquo) extern float __attribute__((overloadable)) rint(float); FN_FUNC_FN(rint) -_RS_STATIC float __attribute__((overloadable)) rootn(float v, int r) { - return pow(v, 1.f / r); -} -_RS_STATIC float2 __attribute__((overloadable)) rootn(float2 v, int2 r) { - float2 t = {1.f / r.x, 1.f / r.y}; - return pow(v, t); -} -_RS_STATIC float3 __attribute__((overloadable)) rootn(float3 v, int3 r) { - float3 t = {1.f / r.x, 1.f / r.y, 1.f / r.z}; - return pow(v, t); -} -_RS_STATIC float4 __attribute__((overloadable)) rootn(float4 v, int4 r) { - float4 t = {1.f / r.x, 1.f / r.y, 1.f / r.z, 1.f / r.w}; - return pow(v, t); -} + +_RS_RUNTIME float __attribute__((overloadable)) rootn(float v, int r); +_RS_RUNTIME float2 __attribute__((overloadable)) rootn(float2 v, int2 r); +_RS_RUNTIME float3 __attribute__((overloadable)) rootn(float3 v, int3 r); +_RS_RUNTIME float4 __attribute__((overloadable)) rootn(float4 v, int4 r); + extern float __attribute__((overloadable)) round(float); FN_FUNC_FN(round) + extern float __attribute__((overloadable)) sqrt(float); -_RS_STATIC float __attribute__((overloadable)) rsqrt(float v) { - return 1.f / sqrt(v); -} +_RS_RUNTIME float __attribute__((overloadable)) rsqrt(float v); FN_FUNC_FN(rsqrt) extern float __attribute__((overloadable)) sin(float); FN_FUNC_FN(sin) -_RS_STATIC float __attribute__((overloadable)) sincos(float v, float *cosptr) { - *cosptr = cos(v); - return sin(v); -} -_RS_STATIC float2 __attribute__((overloadable)) sincos(float2 v, float2 *cosptr) { - *cosptr = cos(v); - return sin(v); -} -_RS_STATIC float3 __attribute__((overloadable)) sincos(float3 v, float3 *cosptr) { - *cosptr = cos(v); - return sin(v); -} -_RS_STATIC float4 __attribute__((overloadable)) sincos(float4 v, float4 *cosptr) { - *cosptr = cos(v); - return sin(v); -} +_RS_RUNTIME float __attribute__((overloadable)) sincos(float v, float *cosptr); +_RS_RUNTIME float2 __attribute__((overloadable)) sincos(float2 v, float2 *cosptr); +_RS_RUNTIME float3 __attribute__((overloadable)) sincos(float3 v, float3 *cosptr); +_RS_RUNTIME float4 __attribute__((overloadable)) sincos(float4 v, float4 *cosptr); extern float __attribute__((overloadable)) sinh(float); FN_FUNC_FN(sinh) -_RS_STATIC float __attribute__((overloadable)) sinpi(float v) { - return sin(v * M_PI); -} +_RS_RUNTIME float __attribute__((overloadable)) sinpi(float v); FN_FUNC_FN(sinpi) FN_FUNC_FN(sqrt) @@ -577,11 +299,10 @@ FN_FUNC_FN(tan) extern float __attribute__((overloadable)) tanh(float); FN_FUNC_FN(tanh) -_RS_STATIC float __attribute__((overloadable)) tanpi(float v) { - return tan(v * M_PI); -} +_RS_RUNTIME float __attribute__((overloadable)) tanpi(float v); FN_FUNC_FN(tanpi) + extern float __attribute__((overloadable)) tgamma(float); FN_FUNC_FN(tgamma) @@ -592,27 +313,9 @@ FN_FUNC_FN(trunc) #define XN_FUNC_YN(typeout, fnc, typein) \ extern typeout __attribute__((overloadable)) fnc(typein); \ -_RS_STATIC typeout##2 __attribute__((overloadable)) fnc(typein##2 v) { \ - typeout##2 r; \ - r.x = fnc(v.x); \ - r.y = fnc(v.y); \ - return r; \ -} \ -_RS_STATIC typeout##3 __attribute__((overloadable)) fnc(typein##3 v) { \ - typeout##3 r; \ - r.x = fnc(v.x); \ - r.y = fnc(v.y); \ - r.z = fnc(v.z); \ - return r; \ -} \ -_RS_STATIC typeout##4 __attribute__((overloadable)) fnc(typein##4 v) { \ - typeout##4 r; \ - r.x = fnc(v.x); \ - r.y = fnc(v.y); \ - r.z = fnc(v.z); \ - r.w = fnc(v.w); \ - return r; \ -} +_RS_RUNTIME typeout##2 __attribute__((overloadable)) fnc(typein##2 v); \ +_RS_RUNTIME typeout##3 __attribute__((overloadable)) fnc(typein##3 v); \ +_RS_RUNTIME typeout##4 __attribute__((overloadable)) fnc(typein##4 v); #define UIN_FUNC_IN(fnc) \ XN_FUNC_YN(uchar, fnc, char) \ @@ -627,35 +330,16 @@ XN_FUNC_YN(short, fnc, short) \ XN_FUNC_YN(uint, fnc, uint) \ XN_FUNC_YN(int, fnc, int) + #define XN_FUNC_XN_XN_BODY(type, fnc, body) \ -_RS_STATIC type __attribute__((overloadable)) \ - fnc(type v1, type v2) { \ - return body; \ -} \ -_RS_STATIC type##2 __attribute__((overloadable)) \ - fnc(type##2 v1, type##2 v2) { \ - type##2 r; \ - r.x = fnc(v1.x, v2.x); \ - r.y = fnc(v1.y, v2.y); \ - return r; \ -} \ -_RS_STATIC type##3 __attribute__((overloadable)) \ - fnc(type##3 v1, type##3 v2) { \ - type##3 r; \ - r.x = fnc(v1.x, v2.x); \ - r.y = fnc(v1.y, v2.y); \ - r.z = fnc(v1.z, v2.z); \ - return r; \ -} \ -_RS_STATIC type##4 __attribute__((overloadable)) \ - fnc(type##4 v1, type##4 v2) { \ - type##4 r; \ - r.x = fnc(v1.x, v2.x); \ - r.y = fnc(v1.y, v2.y); \ - r.z = fnc(v1.z, v2.z); \ - r.w = fnc(v1.w, v2.w); \ - return r; \ -} +_RS_RUNTIME type __attribute__((overloadable)) \ + fnc(type v1, type v2); \ +_RS_RUNTIME type##2 __attribute__((overloadable)) \ + fnc(type##2 v1, type##2 v2); \ +_RS_RUNTIME type##3 __attribute__((overloadable)) \ + fnc(type##3 v1, type##3 v2); \ +_RS_RUNTIME type##4 __attribute__((overloadable)) \ + fnc(type##4 v1, type##4 v2); #define IN_FUNC_IN_IN_BODY(fnc, body) \ XN_FUNC_XN_XN_BODY(uchar, fnc, body) \ @@ -677,129 +361,35 @@ FN_FUNC_FN_F(max) // 6.11.4 -_RS_STATIC float __attribute__((overloadable)) clamp(float amount, float low, float high) { - return amount < low ? low : (amount > high ? high : amount); -} -_RS_STATIC float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high) { - float2 r; - r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x); - r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y); - return r; -} -_RS_STATIC float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high) { - float3 r; - r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x); - r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y); - r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z); - return r; -} -_RS_STATIC float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high) { - float4 r; - r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x); - r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y); - r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z); - r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w); - return r; -} -_RS_STATIC float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high) { - float2 r; - r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); - r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); - return r; -} -_RS_STATIC float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high) { - float3 r; - r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); - r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); - r.z = amount.z < low ? low : (amount.z > high ? high : amount.z); - return r; -} -_RS_STATIC float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high) { - float4 r; - r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); - r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); - r.z = amount.z < low ? low : (amount.z > high ? high : amount.z); - r.w = amount.w < low ? low : (amount.w > high ? high : amount.w); - return r; -} - -_RS_STATIC float __attribute__((overloadable)) degrees(float radians) { - return radians * (180.f / M_PI); -} +_RS_RUNTIME float __attribute__((overloadable)) clamp(float amount, float low, float high); +_RS_RUNTIME float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high); +_RS_RUNTIME float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high); +_RS_RUNTIME float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high); +_RS_RUNTIME float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high); +_RS_RUNTIME float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high); +_RS_RUNTIME float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high); + +_RS_RUNTIME float __attribute__((overloadable)) degrees(float radians); FN_FUNC_FN(degrees) -_RS_STATIC float __attribute__((overloadable)) mix(float start, float stop, float amount) { - return start + (stop - start) * amount; -} -_RS_STATIC float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount) { - return start + (stop - start) * amount; -} -_RS_STATIC float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount) { - return start + (stop - start) * amount; -} -_RS_STATIC float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount) { - return start + (stop - start) * amount; -} -_RS_STATIC float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount) { - return start + (stop - start) * amount; -} -_RS_STATIC float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount) { - return start + (stop - start) * amount; -} -_RS_STATIC float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount) { - return start + (stop - start) * amount; -} - -_RS_STATIC float __attribute__((overloadable)) radians(float degrees) { - return degrees * (M_PI / 180.f); -} +_RS_RUNTIME float __attribute__((overloadable)) mix(float start, float stop, float amount); +_RS_RUNTIME float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount); +_RS_RUNTIME float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount); +_RS_RUNTIME float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount); +_RS_RUNTIME float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount); +_RS_RUNTIME float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount); +_RS_RUNTIME float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount); + +_RS_RUNTIME float __attribute__((overloadable)) radians(float degrees); FN_FUNC_FN(radians) -_RS_STATIC float __attribute__((overloadable)) step(float edge, float v) { - return (v < edge) ? 0.f : 1.f; -} -_RS_STATIC float2 __attribute__((overloadable)) step(float2 edge, float2 v) { - float2 r; - r.x = (v.x < edge.x) ? 0.f : 1.f; - r.y = (v.y < edge.y) ? 0.f : 1.f; - return r; -} -_RS_STATIC float3 __attribute__((overloadable)) step(float3 edge, float3 v) { - float3 r; - r.x = (v.x < edge.x) ? 0.f : 1.f; - r.y = (v.y < edge.y) ? 0.f : 1.f; - r.z = (v.z < edge.z) ? 0.f : 1.f; - return r; -} -_RS_STATIC float4 __attribute__((overloadable)) step(float4 edge, float4 v) { - float4 r; - r.x = (v.x < edge.x) ? 0.f : 1.f; - r.y = (v.y < edge.y) ? 0.f : 1.f; - r.z = (v.z < edge.z) ? 0.f : 1.f; - r.w = (v.w < edge.w) ? 0.f : 1.f; - return r; -} -_RS_STATIC float2 __attribute__((overloadable)) step(float2 edge, float v) { - float2 r; - r.x = (v < edge.x) ? 0.f : 1.f; - r.y = (v < edge.y) ? 0.f : 1.f; - return r; -} -_RS_STATIC float3 __attribute__((overloadable)) step(float3 edge, float v) { - float3 r; - r.x = (v < edge.x) ? 0.f : 1.f; - r.y = (v < edge.y) ? 0.f : 1.f; - r.z = (v < edge.z) ? 0.f : 1.f; - return r; -} -_RS_STATIC float4 __attribute__((overloadable)) step(float4 edge, float v) { - float4 r; - r.x = (v < edge.x) ? 0.f : 1.f; - r.y = (v < edge.y) ? 0.f : 1.f; - r.z = (v < edge.z) ? 0.f : 1.f; - r.w = (v < edge.w) ? 0.f : 1.f; - return r; -} +_RS_RUNTIME float __attribute__((overloadable)) step(float edge, float v); +_RS_RUNTIME float2 __attribute__((overloadable)) step(float2 edge, float2 v); +_RS_RUNTIME float3 __attribute__((overloadable)) step(float3 edge, float3 v); +_RS_RUNTIME float4 __attribute__((overloadable)) step(float4 edge, float4 v); +_RS_RUNTIME float2 __attribute__((overloadable)) step(float2 edge, float v); +_RS_RUNTIME float3 __attribute__((overloadable)) step(float3 edge, float v); +_RS_RUNTIME float4 __attribute__((overloadable)) step(float4 edge, float v); extern float __attribute__((overloadable)) smoothstep(float, float, float); extern float2 __attribute__((overloadable)) smoothstep(float2, float2, float2); @@ -809,82 +399,33 @@ extern float2 __attribute__((overloadable)) smoothstep(float, float, float2); extern float3 __attribute__((overloadable)) smoothstep(float, float, float3); extern float4 __attribute__((overloadable)) smoothstep(float, float, float4); -_RS_STATIC float __attribute__((overloadable)) sign(float v) { - if (v > 0) return 1.f; - if (v < 0) return -1.f; - return v; -} +_RS_RUNTIME float __attribute__((overloadable)) sign(float v); FN_FUNC_FN(sign) // 6.11.5 -_RS_STATIC float3 __attribute__((overloadable)) cross(float3 lhs, float3 rhs) { - float3 r; - r.x = lhs.y * rhs.z - lhs.z * rhs.y; - r.y = lhs.z * rhs.x - lhs.x * rhs.z; - r.z = lhs.x * rhs.y - lhs.y * rhs.x; - return r; -} - -_RS_STATIC float4 __attribute__((overloadable)) cross(float4 lhs, float4 rhs) { - float4 r; - r.x = lhs.y * rhs.z - lhs.z * rhs.y; - r.y = lhs.z * rhs.x - lhs.x * rhs.z; - r.z = lhs.x * rhs.y - lhs.y * rhs.x; - r.w = 0.f; - return r; -} - -_RS_STATIC float __attribute__((overloadable)) dot(float lhs, float rhs) { - return lhs * rhs; -} -_RS_STATIC float __attribute__((overloadable)) dot(float2 lhs, float2 rhs) { - return lhs.x*rhs.x + lhs.y*rhs.y; -} -_RS_STATIC float __attribute__((overloadable)) dot(float3 lhs, float3 rhs) { - return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z; -} -_RS_STATIC float __attribute__((overloadable)) dot(float4 lhs, float4 rhs) { - return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z + lhs.w*rhs.w; -} - -_RS_STATIC float __attribute__((overloadable)) length(float v) { - return v; -} -_RS_STATIC float __attribute__((overloadable)) length(float2 v) { - return sqrt(v.x*v.x + v.y*v.y); -} -_RS_STATIC float __attribute__((overloadable)) length(float3 v) { - return sqrt(v.x*v.x + v.y*v.y + v.z*v.z); -} -_RS_STATIC float __attribute__((overloadable)) length(float4 v) { - return sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w); -} - -_RS_STATIC float __attribute__((overloadable)) distance(float lhs, float rhs) { - return length(lhs - rhs); -} -_RS_STATIC float __attribute__((overloadable)) distance(float2 lhs, float2 rhs) { - return length(lhs - rhs); -} -_RS_STATIC float __attribute__((overloadable)) distance(float3 lhs, float3 rhs) { - return length(lhs - rhs); -} -_RS_STATIC float __attribute__((overloadable)) distance(float4 lhs, float4 rhs) { - return length(lhs - rhs); -} - -_RS_STATIC float __attribute__((overloadable)) normalize(float v) { - return 1.f; -} -_RS_STATIC float2 __attribute__((overloadable)) normalize(float2 v) { - return v / length(v); -} -_RS_STATIC float3 __attribute__((overloadable)) normalize(float3 v) { - return v / length(v); -} -_RS_STATIC float4 __attribute__((overloadable)) normalize(float4 v) { - return v / length(v); -} +_RS_RUNTIME float3 __attribute__((overloadable)) cross(float3 lhs, float3 rhs); + +_RS_RUNTIME float4 __attribute__((overloadable)) cross(float4 lhs, float4 rhs); + +_RS_RUNTIME float __attribute__((overloadable)) dot(float lhs, float rhs); +_RS_RUNTIME float __attribute__((overloadable)) dot(float2 lhs, float2 rhs); +_RS_RUNTIME float __attribute__((overloadable)) dot(float3 lhs, float3 rhs); +_RS_RUNTIME float __attribute__((overloadable)) dot(float4 lhs, float4 rhs); + +_RS_RUNTIME float __attribute__((overloadable)) length(float v); +_RS_RUNTIME float __attribute__((overloadable)) length(float2 v); +_RS_RUNTIME float __attribute__((overloadable)) length(float3 v); +_RS_RUNTIME float __attribute__((overloadable)) length(float4 v); + +_RS_RUNTIME float __attribute__((overloadable)) distance(float lhs, float rhs); +_RS_RUNTIME float __attribute__((overloadable)) distance(float2 lhs, float2 rhs); +_RS_RUNTIME float __attribute__((overloadable)) distance(float3 lhs, float3 rhs); +_RS_RUNTIME float __attribute__((overloadable)) distance(float4 lhs, float4 rhs); + +_RS_RUNTIME float __attribute__((overloadable)) normalize(float v); +_RS_RUNTIME float2 __attribute__((overloadable)) normalize(float2 v); +_RS_RUNTIME float3 __attribute__((overloadable)) normalize(float3 v); +_RS_RUNTIME float4 __attribute__((overloadable)) normalize(float4 v); #undef CVT_FUNC #undef CVT_FUNC_2 @@ -903,6 +444,6 @@ _RS_STATIC float4 __attribute__((overloadable)) normalize(float4 v) { #undef IN_FUNC_IN #undef XN_FUNC_XN_XN_BODY #undef IN_FUNC_IN_IN_BODY -#undef _RS_STATIC +#undef _RS_RUNTIME #endif diff --git a/libs/rs/scriptc/rs_core.rsh b/libs/rs/scriptc/rs_core.rsh index f3e0ab0..e32d435 100644 --- a/libs/rs/scriptc/rs_core.rsh +++ b/libs/rs/scriptc/rs_core.rsh @@ -1,11 +1,7 @@ #ifndef __RS_CORE_RSH__ #define __RS_CORE_RSH__ -#ifdef BCC_PREPARE_BC -#define _RS_STATIC extern -#else -#define _RS_STATIC static -#endif +#define _RS_RUNTIME extern // Debugging, print to the LOG a description string and a value. extern void __attribute__((overloadable)) @@ -41,56 +37,19 @@ extern void __attribute__((overloadable)) #define RS_DEBUG(a) rsDebug(#a, a) #define RS_DEBUG_MARKER rsDebug(__FILE__, __LINE__) -_RS_STATIC void __attribute__((overloadable)) rsDebug(const char *s, float2 v) { - rsDebug(s, v.x, v.y); -} -_RS_STATIC void __attribute__((overloadable)) rsDebug(const char *s, float3 v) { - rsDebug(s, v.x, v.y, v.z); -} -_RS_STATIC void __attribute__((overloadable)) rsDebug(const char *s, float4 v) { - rsDebug(s, v.x, v.y, v.z, v.w); -} - -_RS_STATIC uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b) -{ - uchar4 c; - c.x = (uchar)(r * 255.f); - c.y = (uchar)(g * 255.f); - c.z = (uchar)(b * 255.f); - c.w = 255; - return c; -} - -_RS_STATIC uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a) -{ - uchar4 c; - c.x = (uchar)(r * 255.f); - c.y = (uchar)(g * 255.f); - c.z = (uchar)(b * 255.f); - c.w = (uchar)(a * 255.f); - return c; -} - -_RS_STATIC uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color) -{ - color *= 255.f; - uchar4 c = {color.x, color.y, color.z, 255}; - return c; -} - -_RS_STATIC uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color) -{ - color *= 255.f; - uchar4 c = {color.x, color.y, color.z, color.w}; - return c; -} - -_RS_STATIC float4 rsUnpackColor8888(uchar4 c) -{ - float4 ret = (float4)0.0039156862745f; - ret *= convert_float4(c); - return ret; -} +_RS_RUNTIME void __attribute__((overloadable)) rsDebug(const char *s, float2 v); +_RS_RUNTIME void __attribute__((overloadable)) rsDebug(const char *s, float3 v); +_RS_RUNTIME void __attribute__((overloadable)) rsDebug(const char *s, float4 v); + +_RS_RUNTIME uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b); + +_RS_RUNTIME uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a); + +_RS_RUNTIME uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color); + +_RS_RUNTIME uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color); + +_RS_RUNTIME float4 rsUnpackColor8888(uchar4 c); //extern uchar4 __attribute__((overloadable)) rsPackColorTo565(float r, float g, float b); //extern uchar4 __attribute__((overloadable)) rsPackColorTo565(float3); @@ -101,830 +60,117 @@ _RS_STATIC float4 rsUnpackColor8888(uchar4 c) // Matrix ops ///////////////////////////////////////////////////// -_RS_STATIC void __attribute__((overloadable)) -rsMatrixSet(rs_matrix4x4 *m, uint32_t row, uint32_t col, float v) { - m->m[row * 4 + col] = v; -} - -_RS_STATIC float __attribute__((overloadable)) -rsMatrixGet(const rs_matrix4x4 *m, uint32_t row, uint32_t col) { - return m->m[row * 4 + col]; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixSet(rs_matrix3x3 *m, uint32_t row, uint32_t col, float v) { - m->m[row * 3 + col] = v; -} - -_RS_STATIC float __attribute__((overloadable)) -rsMatrixGet(const rs_matrix3x3 *m, uint32_t row, uint32_t col) { - return m->m[row * 3 + col]; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixSet(rs_matrix2x2 *m, uint32_t row, uint32_t col, float v) { - m->m[row * 2 + col] = v; -} - -_RS_STATIC float __attribute__((overloadable)) -rsMatrixGet(const rs_matrix2x2 *m, uint32_t row, uint32_t col) { - return m->m[row * 2 + col]; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoadIdentity(rs_matrix4x4 *m) { - m->m[0] = 1.f; - m->m[1] = 0.f; - m->m[2] = 0.f; - m->m[3] = 0.f; - m->m[4] = 0.f; - m->m[5] = 1.f; - m->m[6] = 0.f; - m->m[7] = 0.f; - m->m[8] = 0.f; - m->m[9] = 0.f; - m->m[10] = 1.f; - m->m[11] = 0.f; - m->m[12] = 0.f; - m->m[13] = 0.f; - m->m[14] = 0.f; - m->m[15] = 1.f; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoadIdentity(rs_matrix3x3 *m) { - m->m[0] = 1.f; - m->m[1] = 0.f; - m->m[2] = 0.f; - m->m[3] = 0.f; - m->m[4] = 1.f; - m->m[5] = 0.f; - m->m[6] = 0.f; - m->m[7] = 0.f; - m->m[8] = 1.f; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoadIdentity(rs_matrix2x2 *m) { - m->m[0] = 1.f; - m->m[1] = 0.f; - m->m[2] = 0.f; - m->m[3] = 1.f; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoad(rs_matrix4x4 *m, const float *v) { - m->m[0] = v[0]; - m->m[1] = v[1]; - m->m[2] = v[2]; - m->m[3] = v[3]; - m->m[4] = v[4]; - m->m[5] = v[5]; - m->m[6] = v[6]; - m->m[7] = v[7]; - m->m[8] = v[8]; - m->m[9] = v[9]; - m->m[10] = v[10]; - m->m[11] = v[11]; - m->m[12] = v[12]; - m->m[13] = v[13]; - m->m[14] = v[14]; - m->m[15] = v[15]; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoad(rs_matrix3x3 *m, const float *v) { - m->m[0] = v[0]; - m->m[1] = v[1]; - m->m[2] = v[2]; - m->m[3] = v[3]; - m->m[4] = v[4]; - m->m[5] = v[5]; - m->m[6] = v[6]; - m->m[7] = v[7]; - m->m[8] = v[8]; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoad(rs_matrix2x2 *m, const float *v) { - m->m[0] = v[0]; - m->m[1] = v[1]; - m->m[2] = v[2]; - m->m[3] = v[3]; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix4x4 *v) { - m->m[0] = v->m[0]; - m->m[1] = v->m[1]; - m->m[2] = v->m[2]; - m->m[3] = v->m[3]; - m->m[4] = v->m[4]; - m->m[5] = v->m[5]; - m->m[6] = v->m[6]; - m->m[7] = v->m[7]; - m->m[8] = v->m[8]; - m->m[9] = v->m[9]; - m->m[10] = v->m[10]; - m->m[11] = v->m[11]; - m->m[12] = v->m[12]; - m->m[13] = v->m[13]; - m->m[14] = v->m[14]; - m->m[15] = v->m[15]; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix3x3 *v) { - m->m[0] = v->m[0]; - m->m[1] = v->m[1]; - m->m[2] = v->m[2]; - m->m[3] = 0.f; - m->m[4] = v->m[3]; - m->m[5] = v->m[4]; - m->m[6] = v->m[5]; - m->m[7] = 0.f; - m->m[8] = v->m[6]; - m->m[9] = v->m[7]; - m->m[10] = v->m[8]; - m->m[11] = 0.f; - m->m[12] = 0.f; - m->m[13] = 0.f; - m->m[14] = 0.f; - m->m[15] = 1.f; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix2x2 *v) { - m->m[0] = v->m[0]; - m->m[1] = v->m[1]; - m->m[2] = 0.f; - m->m[3] = 0.f; - m->m[4] = v->m[3]; - m->m[5] = v->m[4]; - m->m[6] = 0.f; - m->m[7] = 0.f; - m->m[8] = v->m[6]; - m->m[9] = v->m[7]; - m->m[10] = 1.f; - m->m[11] = 0.f; - m->m[12] = 0.f; - m->m[13] = 0.f; - m->m[14] = 0.f; - m->m[15] = 1.f; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoad(rs_matrix3x3 *m, const rs_matrix3x3 *v) { - m->m[0] = v->m[0]; - m->m[1] = v->m[1]; - m->m[2] = v->m[2]; - m->m[3] = v->m[3]; - m->m[4] = v->m[4]; - m->m[5] = v->m[5]; - m->m[6] = v->m[6]; - m->m[7] = v->m[7]; - m->m[8] = v->m[8]; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoad(rs_matrix2x2 *m, const rs_matrix2x2 *v) { - m->m[0] = v->m[0]; - m->m[1] = v->m[1]; - m->m[2] = v->m[2]; - m->m[3] = v->m[3]; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoadRotate(rs_matrix4x4 *m, float rot, float x, float y, float z) { - float c, s; - m->m[3] = 0; - m->m[7] = 0; - m->m[11]= 0; - m->m[12]= 0; - m->m[13]= 0; - m->m[14]= 0; - m->m[15]= 1; - rot *= (float)(M_PI / 180.0f); - c = cos(rot); - s = sin(rot); - - const float len = x*x + y*y + z*z; - if (len != 1) { - const float recipLen = 1.f / sqrt(len); - x *= recipLen; - y *= recipLen; - z *= recipLen; - } - const float nc = 1.0f - c; - const float xy = x * y; - const float yz = y * z; - const float zx = z * x; - const float xs = x * s; - const float ys = y * s; - const float zs = z * s; - m->m[ 0] = x*x*nc + c; - m->m[ 4] = xy*nc - zs; - m->m[ 8] = zx*nc + ys; - m->m[ 1] = xy*nc + zs; - m->m[ 5] = y*y*nc + c; - m->m[ 9] = yz*nc - xs; - m->m[ 2] = zx*nc - ys; - m->m[ 6] = yz*nc + xs; - m->m[10] = z*z*nc + c; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoadScale(rs_matrix4x4 *m, float x, float y, float z) { - rsMatrixLoadIdentity(m); - m->m[0] = x; - m->m[5] = y; - m->m[10] = z; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoadTranslate(rs_matrix4x4 *m, float x, float y, float z) { - rsMatrixLoadIdentity(m); - m->m[12] = x; - m->m[13] = y; - m->m[14] = z; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoadMultiply(rs_matrix4x4 *m, const rs_matrix4x4 *lhs, const rs_matrix4x4 *rhs) { - for (int i=0 ; i<4 ; i++) { - float ri0 = 0; - float ri1 = 0; - float ri2 = 0; - float ri3 = 0; - for (int j=0 ; j<4 ; j++) { - const float rhs_ij = rsMatrixGet(rhs, i,j); - ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij; - ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij; - ri2 += rsMatrixGet(lhs, j, 2) * rhs_ij; - ri3 += rsMatrixGet(lhs, j, 3) * rhs_ij; - } - rsMatrixSet(m, i, 0, ri0); - rsMatrixSet(m, i, 1, ri1); - rsMatrixSet(m, i, 2, ri2); - rsMatrixSet(m, i, 3, ri3); - } -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixMultiply(rs_matrix4x4 *m, const rs_matrix4x4 *rhs) { - rs_matrix4x4 mt; - rsMatrixLoadMultiply(&mt, m, rhs); - rsMatrixLoad(m, &mt); -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoadMultiply(rs_matrix3x3 *m, const rs_matrix3x3 *lhs, const rs_matrix3x3 *rhs) { - for (int i=0 ; i<3 ; i++) { - float ri0 = 0; - float ri1 = 0; - float ri2 = 0; - for (int j=0 ; j<3 ; j++) { - const float rhs_ij = rsMatrixGet(rhs, i,j); - ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij; - ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij; - ri2 += rsMatrixGet(lhs, j, 2) * rhs_ij; - } - rsMatrixSet(m, i, 0, ri0); - rsMatrixSet(m, i, 1, ri1); - rsMatrixSet(m, i, 2, ri2); - } -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixMultiply(rs_matrix3x3 *m, const rs_matrix3x3 *rhs) { - rs_matrix3x3 mt; - rsMatrixLoadMultiply(&mt, m, rhs); - rsMatrixLoad(m, &mt); -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoadMultiply(rs_matrix2x2 *m, const rs_matrix2x2 *lhs, const rs_matrix2x2 *rhs) { - for (int i=0 ; i<2 ; i++) { - float ri0 = 0; - float ri1 = 0; - for (int j=0 ; j<2 ; j++) { - const float rhs_ij = rsMatrixGet(rhs, i,j); - ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij; - ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij; - } - rsMatrixSet(m, i, 0, ri0); - rsMatrixSet(m, i, 1, ri1); - } -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixMultiply(rs_matrix2x2 *m, const rs_matrix2x2 *rhs) { - rs_matrix2x2 mt; - rsMatrixLoadMultiply(&mt, m, rhs); - rsMatrixLoad(m, &mt); -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixRotate(rs_matrix4x4 *m, float rot, float x, float y, float z) { - rs_matrix4x4 m1; - rsMatrixLoadRotate(&m1, rot, x, y, z); - rsMatrixMultiply(m, &m1); -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixScale(rs_matrix4x4 *m, float x, float y, float z) { - rs_matrix4x4 m1; - rsMatrixLoadScale(&m1, x, y, z); - rsMatrixMultiply(m, &m1); -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixTranslate(rs_matrix4x4 *m, float x, float y, float z) { - rs_matrix4x4 m1; - rsMatrixLoadTranslate(&m1, x, y, z); - rsMatrixMultiply(m, &m1); -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoadOrtho(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far) { - rsMatrixLoadIdentity(m); - m->m[0] = 2.f / (right - left); - m->m[5] = 2.f / (top - bottom); - m->m[10]= -2.f / (far - near); - m->m[12]= -(right + left) / (right - left); - m->m[13]= -(top + bottom) / (top - bottom); - m->m[14]= -(far + near) / (far - near); -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoadFrustum(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far) { - rsMatrixLoadIdentity(m); - m->m[0] = 2.f * near / (right - left); - m->m[5] = 2.f * near / (top - bottom); - m->m[8] = (right + left) / (right - left); - m->m[9] = (top + bottom) / (top - bottom); - m->m[10]= -(far + near) / (far - near); - m->m[11]= -1.f; - m->m[14]= -2.f * far * near / (far - near); - m->m[15]= 0.f; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixLoadPerspective(rs_matrix4x4* m, float fovy, float aspect, float near, float far) { - float top = near * tan((float) (fovy * M_PI / 360.0f)); - float bottom = -top; - float left = bottom * aspect; - float right = top * aspect; - rsMatrixLoadFrustum(m, left, right, bottom, top, near, far); -} - -_RS_STATIC float4 __attribute__((overloadable)) -rsMatrixMultiply(rs_matrix4x4 *m, float4 in) { - float4 ret; - ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w); - ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w); - ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w); - ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w); - return ret; -} - -_RS_STATIC float4 __attribute__((overloadable)) -rsMatrixMultiply(rs_matrix4x4 *m, float3 in) { - float4 ret; - ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12]; - ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13]; - ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14]; - ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15]; - return ret; -} - -_RS_STATIC float4 __attribute__((overloadable)) -rsMatrixMultiply(rs_matrix4x4 *m, float2 in) { - float4 ret; - ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12]; - ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13]; - ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14]; - ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15]; - return ret; -} - -_RS_STATIC float3 __attribute__((overloadable)) -rsMatrixMultiply(rs_matrix3x3 *m, float3 in) { - float3 ret; - ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z); - ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z); - ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z); - return ret; -} - -_RS_STATIC float3 __attribute__((overloadable)) -rsMatrixMultiply(rs_matrix3x3 *m, float2 in) { - float3 ret; - ret.x = (m->m[0] * in.x) + (m->m[3] * in.y); - ret.y = (m->m[1] * in.x) + (m->m[4] * in.y); - ret.z = (m->m[2] * in.x) + (m->m[5] * in.y); - return ret; -} - -_RS_STATIC float2 __attribute__((overloadable)) -rsMatrixMultiply(rs_matrix2x2 *m, float2 in) { - float2 ret; - ret.x = (m->m[0] * in.x) + (m->m[2] * in.y); - ret.y = (m->m[1] * in.x) + (m->m[3] * in.y); - return ret; -} +_RS_RUNTIME void __attribute__((overloadable)) +rsMatrixSet(rs_matrix4x4 *m, uint32_t row, uint32_t col, float v); -// Returns true if the matrix was successfully inversed -_RS_STATIC bool __attribute__((overloadable)) -rsMatrixInverse(rs_matrix4x4 *m) { - rs_matrix4x4 result; - - int i, j; - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) { - // computeCofactor for int i, int j - int c0 = (i+1) % 4; - int c1 = (i+2) % 4; - int c2 = (i+3) % 4; - int r0 = (j+1) % 4; - int r1 = (j+2) % 4; - int r2 = (j+3) % 4; - - float minor = (m->m[c0 + 4*r0] * (m->m[c1 + 4*r1] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r1])) - - (m->m[c0 + 4*r1] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r0])) - + (m->m[c0 + 4*r2] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r1] - m->m[c1 + 4*r1] * m->m[c2 + 4*r0])); - - float cofactor = (i+j) & 1 ? -minor : minor; - - result.m[4*i + j] = cofactor; - } - } - - // Dot product of 0th column of source and 0th row of result - float det = m->m[0]*result.m[0] + m->m[4]*result.m[1] + - m->m[8]*result.m[2] + m->m[12]*result.m[3]; - - if (fabs(det) < 1e-6) { - return false; - } - - det = 1.0f / det; - for (i = 0; i < 16; ++i) { - m->m[i] = result.m[i] * det; - } - - return true; -} +_RS_RUNTIME float __attribute__((overloadable)) +rsMatrixGet(const rs_matrix4x4 *m, uint32_t row, uint32_t col); -// Returns true if the matrix was successfully inversed -_RS_STATIC bool __attribute__((overloadable)) -rsMatrixInverseTranspose(rs_matrix4x4 *m) { - rs_matrix4x4 result; - - int i, j; - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) { - // computeCofactor for int i, int j - int c0 = (i+1) % 4; - int c1 = (i+2) % 4; - int c2 = (i+3) % 4; - int r0 = (j+1) % 4; - int r1 = (j+2) % 4; - int r2 = (j+3) % 4; - - float minor = (m->m[c0 + 4*r0] * (m->m[c1 + 4*r1] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r1])) - - (m->m[c0 + 4*r1] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r0])) - + (m->m[c0 + 4*r2] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r1] - m->m[c1 + 4*r1] * m->m[c2 + 4*r0])); - - float cofactor = (i+j) & 1 ? -minor : minor; - - result.m[4*j + i] = cofactor; - } - } - - // Dot product of 0th column of source and 0th column of result - float det = m->m[0]*result.m[0] + m->m[4]*result.m[4] + - m->m[8]*result.m[8] + m->m[12]*result.m[12]; - - if (fabs(det) < 1e-6) { - return false; - } - - det = 1.0f / det; - for (i = 0; i < 16; ++i) { - m->m[i] = result.m[i] * det; - } - - return true; -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixTranspose(rs_matrix4x4 *m) { - int i, j; - float temp; - for (i = 0; i < 3; ++i) { - for (j = i + 1; j < 4; ++j) { - temp = m->m[i*4 + j]; - m->m[i*4 + j] = m->m[j*4 + i]; - m->m[j*4 + i] = temp; - } - } -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixTranspose(rs_matrix3x3 *m) { - int i, j; - float temp; - for (i = 0; i < 2; ++i) { - for (j = i + 1; j < 3; ++j) { - temp = m->m[i*3 + j]; - m->m[i*3 + j] = m->m[j*4 + i]; - m->m[j*3 + i] = temp; - } - } -} - -_RS_STATIC void __attribute__((overloadable)) -rsMatrixTranspose(rs_matrix2x2 *m) { - float temp = m->m[1]; - m->m[1] = m->m[2]; - m->m[2] = temp; -} +_RS_RUNTIME void __attribute__((overloadable)) +rsMatrixSet(rs_matrix3x3 *m, uint32_t row, uint32_t col, float v); -///////////////////////////////////////////////////// -// quaternion ops -///////////////////////////////////////////////////// +_RS_RUNTIME float __attribute__((overloadable)) +rsMatrixGet(const rs_matrix3x3 *m, uint32_t row, uint32_t col); -_RS_STATIC void __attribute__((overloadable)) -rsQuaternionSet(rs_quaternion *q, float w, float x, float y, float z) { - q->w = w; - q->x = x; - q->y = y; - q->z = z; -} - -_RS_STATIC void __attribute__((overloadable)) -rsQuaternionSet(rs_quaternion *q, const rs_quaternion *rhs) { - q->w = rhs->w; - q->x = rhs->x; - q->y = rhs->y; - q->z = rhs->z; -} - -_RS_STATIC void __attribute__((overloadable)) -rsQuaternionMultiply(rs_quaternion *q, float s) { - q->w *= s; - q->x *= s; - q->y *= s; - q->z *= s; -} - -_RS_STATIC void __attribute__((overloadable)) -rsQuaternionMultiply(rs_quaternion *q, const rs_quaternion *rhs) { - q->w = -q->x*rhs->x - q->y*rhs->y - q->z*rhs->z + q->w*rhs->w; - q->x = q->x*rhs->w + q->y*rhs->z - q->z*rhs->y + q->w*rhs->x; - q->y = -q->x*rhs->z + q->y*rhs->w + q->z*rhs->z + q->w*rhs->y; - q->z = q->x*rhs->y - q->y*rhs->x + q->z*rhs->w + q->w*rhs->z; -} - -_RS_STATIC void -rsQuaternionAdd(rs_quaternion *q, const rs_quaternion *rhs) { - q->w *= rhs->w; - q->x *= rhs->x; - q->y *= rhs->y; - q->z *= rhs->z; -} - -_RS_STATIC void -rsQuaternionLoadRotateUnit(rs_quaternion *q, float rot, float x, float y, float z) { - rot *= (float)(M_PI / 180.0f) * 0.5f; - float c = cos(rot); - float s = sin(rot); - - q->w = c; - q->x = x * s; - q->y = y * s; - q->z = z * s; -} - -_RS_STATIC void -rsQuaternionLoadRotate(rs_quaternion *q, float rot, float x, float y, float z) { - const float len = x*x + y*y + z*z; - if (len != 1) { - const float recipLen = 1.f / sqrt(len); - x *= recipLen; - y *= recipLen; - z *= recipLen; - } - rsQuaternionLoadRotateUnit(q, rot, x, y, z); -} - -_RS_STATIC void -rsQuaternionConjugate(rs_quaternion *q) { - q->x = -q->x; - q->y = -q->y; - q->z = -q->z; -} - -_RS_STATIC float -rsQuaternionDot(const rs_quaternion *q0, const rs_quaternion *q1) { - return q0->w*q1->w + q0->x*q1->x + q0->y*q1->y + q0->z*q1->z; -} - -_RS_STATIC void -rsQuaternionNormalize(rs_quaternion *q) { - const float len = rsQuaternionDot(q, q); - if (len != 1) { - const float recipLen = 1.f / sqrt(len); - rsQuaternionMultiply(q, recipLen); - } -} - -_RS_STATIC void -rsQuaternionSlerp(rs_quaternion *q, const rs_quaternion *q0, const rs_quaternion *q1, float t) { - if (t <= 0.0f) { - rsQuaternionSet(q, q0); - return; - } - if (t >= 1.0f) { - rsQuaternionSet(q, q1); - return; - } - - rs_quaternion tempq0, tempq1; - rsQuaternionSet(&tempq0, q0); - rsQuaternionSet(&tempq1, q1); - - float angle = rsQuaternionDot(q0, q1); - if (angle < 0) { - rsQuaternionMultiply(&tempq0, -1.0f); - angle *= -1.0f; - } - - float scale, invScale; - if (angle + 1.0f > 0.05f) { - if (1.0f - angle >= 0.05f) { - float theta = acos(angle); - float invSinTheta = 1.0f / sin(theta); - scale = sin(theta * (1.0f - t)) * invSinTheta; - invScale = sin(theta * t) * invSinTheta; - } else { - scale = 1.0f - t; - invScale = t; - } - } else { - rsQuaternionSet(&tempq1, tempq0.z, -tempq0.y, tempq0.x, -tempq0.w); - scale = sin(M_PI * (0.5f - t)); - invScale = sin(M_PI * t); - } - - rsQuaternionSet(q, tempq0.w*scale + tempq1.w*invScale, tempq0.x*scale + tempq1.x*invScale, - tempq0.y*scale + tempq1.y*invScale, tempq0.z*scale + tempq1.z*invScale); -} - -_RS_STATIC void rsQuaternionGetMatrixUnit(rs_matrix4x4 *m, const rs_quaternion *q) { - float x2 = 2.0f * q->x * q->x; - float y2 = 2.0f * q->y * q->y; - float z2 = 2.0f * q->z * q->z; - float xy = 2.0f * q->x * q->y; - float wz = 2.0f * q->w * q->z; - float xz = 2.0f * q->x * q->z; - float wy = 2.0f * q->w * q->y; - float wx = 2.0f * q->w * q->x; - float yz = 2.0f * q->y * q->z; - - m->m[0] = 1.0f - y2 - z2; - m->m[1] = xy - wz; - m->m[2] = xz + wy; - m->m[3] = 0.0f; - - m->m[4] = xy + wz; - m->m[5] = 1.0f - x2 - z2; - m->m[6] = yz - wx; - m->m[7] = 0.0f; - - m->m[8] = xz - wy; - m->m[9] = yz - wx; - m->m[10] = 1.0f - x2 - y2; - m->m[11] = 0.0f; - - m->m[12] = 0.0f; - m->m[13] = 0.0f; - m->m[14] = 0.0f; - m->m[15] = 1.0f; -} +_RS_RUNTIME void __attribute__((overloadable)) +rsMatrixSet(rs_matrix2x2 *m, uint32_t row, uint32_t col, float v); -///////////////////////////////////////////////////// -// utility funcs -///////////////////////////////////////////////////// -__inline__ _RS_STATIC void __attribute__((overloadable, always_inline)) -rsExtractFrustumPlanes(const rs_matrix4x4 *modelViewProj, - float4 *left, float4 *right, - float4 *top, float4 *bottom, - float4 *near, float4 *far) { - // x y z w = a b c d in the plane equation - left->x = modelViewProj->m[3] + modelViewProj->m[0]; - left->y = modelViewProj->m[7] + modelViewProj->m[4]; - left->z = modelViewProj->m[11] + modelViewProj->m[8]; - left->w = modelViewProj->m[15] + modelViewProj->m[12]; - - right->x = modelViewProj->m[3] - modelViewProj->m[0]; - right->y = modelViewProj->m[7] - modelViewProj->m[4]; - right->z = modelViewProj->m[11] - modelViewProj->m[8]; - right->w = modelViewProj->m[15] - modelViewProj->m[12]; - - top->x = modelViewProj->m[3] - modelViewProj->m[1]; - top->y = modelViewProj->m[7] - modelViewProj->m[5]; - top->z = modelViewProj->m[11] - modelViewProj->m[9]; - top->w = modelViewProj->m[15] - modelViewProj->m[13]; - - bottom->x = modelViewProj->m[3] + modelViewProj->m[1]; - bottom->y = modelViewProj->m[7] + modelViewProj->m[5]; - bottom->z = modelViewProj->m[11] + modelViewProj->m[9]; - bottom->w = modelViewProj->m[15] + modelViewProj->m[13]; - - near->x = modelViewProj->m[3] + modelViewProj->m[2]; - near->y = modelViewProj->m[7] + modelViewProj->m[6]; - near->z = modelViewProj->m[11] + modelViewProj->m[10]; - near->w = modelViewProj->m[15] + modelViewProj->m[14]; - - far->x = modelViewProj->m[3] - modelViewProj->m[2]; - far->y = modelViewProj->m[7] - modelViewProj->m[6]; - far->z = modelViewProj->m[11] - modelViewProj->m[10]; - far->w = modelViewProj->m[15] - modelViewProj->m[14]; - - float len = length(left->xyz); - *left /= len; - len = length(right->xyz); - *right /= len; - len = length(top->xyz); - *top /= len; - len = length(bottom->xyz); - *bottom /= len; - len = length(near->xyz); - *near /= len; - len = length(far->xyz); - *far /= len; -} - -__inline__ _RS_STATIC bool __attribute__((overloadable, always_inline)) -rsIsSphereInFrustum(float4 *sphere, - float4 *left, float4 *right, - float4 *top, float4 *bottom, - float4 *near, float4 *far) { - - float distToCenter = dot(left->xyz, sphere->xyz) + left->w; - if (distToCenter < -sphere->w) { - return false; - } - distToCenter = dot(right->xyz, sphere->xyz) + right->w; - if (distToCenter < -sphere->w) { - return false; - } - distToCenter = dot(top->xyz, sphere->xyz) + top->w; - if (distToCenter < -sphere->w) { - return false; - } - distToCenter = dot(bottom->xyz, sphere->xyz) + bottom->w; - if (distToCenter < -sphere->w) { - return false; - } - distToCenter = dot(near->xyz, sphere->xyz) + near->w; - if (distToCenter < -sphere->w) { - return false; - } - distToCenter = dot(far->xyz, sphere->xyz) + far->w; - if (distToCenter < -sphere->w) { - return false; - } - return true; -} +_RS_RUNTIME float __attribute__((overloadable)) +rsMatrixGet(const rs_matrix2x2 *m, uint32_t row, uint32_t col); + +extern void __attribute__((overloadable)) rsMatrixLoadIdentity(rs_matrix4x4 *m); +extern void __attribute__((overloadable)) rsMatrixLoadIdentity(rs_matrix3x3 *m); +extern void __attribute__((overloadable)) rsMatrixLoadIdentity(rs_matrix2x2 *m); +extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const float *v); +extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix3x3 *m, const float *v); +extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix2x2 *m, const float *v); +extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix4x4 *v); +extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix3x3 *v); +extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix2x2 *v); +extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix3x3 *m, const rs_matrix3x3 *v); +extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix2x2 *m, const rs_matrix2x2 *v); + +extern void __attribute__((overloadable)) +rsMatrixLoadRotate(rs_matrix4x4 *m, float rot, float x, float y, float z); + +extern void __attribute__((overloadable)) +rsMatrixLoadScale(rs_matrix4x4 *m, float x, float y, float z); + +extern void __attribute__((overloadable)) +rsMatrixLoadTranslate(rs_matrix4x4 *m, float x, float y, float z); + +extern void __attribute__((overloadable)) +rsMatrixLoadMultiply(rs_matrix4x4 *m, const rs_matrix4x4 *lhs, const rs_matrix4x4 *rhs); + +extern void __attribute__((overloadable)) +rsMatrixMultiply(rs_matrix4x4 *m, const rs_matrix4x4 *rhs); + +extern void __attribute__((overloadable)) +rsMatrixLoadMultiply(rs_matrix3x3 *m, const rs_matrix3x3 *lhs, const rs_matrix3x3 *rhs); + +extern void __attribute__((overloadable)) +rsMatrixMultiply(rs_matrix3x3 *m, const rs_matrix3x3 *rhs); + +extern void __attribute__((overloadable)) +rsMatrixLoadMultiply(rs_matrix2x2 *m, const rs_matrix2x2 *lhs, const rs_matrix2x2 *rhs); + +extern void __attribute__((overloadable)) +rsMatrixMultiply(rs_matrix2x2 *m, const rs_matrix2x2 *rhs); + +extern void __attribute__((overloadable)) +rsMatrixRotate(rs_matrix4x4 *m, float rot, float x, float y, float z); + +extern void __attribute__((overloadable)) +rsMatrixScale(rs_matrix4x4 *m, float x, float y, float z); + +extern void __attribute__((overloadable)) +rsMatrixTranslate(rs_matrix4x4 *m, float x, float y, float z); + +extern void __attribute__((overloadable)) +rsMatrixLoadOrtho(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far); +extern void __attribute__((overloadable)) +rsMatrixLoadFrustum(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far); + +extern void __attribute__((overloadable)) +rsMatrixLoadPerspective(rs_matrix4x4* m, float fovy, float aspect, float near, float far); + +_RS_RUNTIME float4 __attribute__((overloadable)) +rsMatrixMultiply(rs_matrix4x4 *m, float4 in); + +_RS_RUNTIME float4 __attribute__((overloadable)) +rsMatrixMultiply(rs_matrix4x4 *m, float3 in); + +_RS_RUNTIME float4 __attribute__((overloadable)) +rsMatrixMultiply(rs_matrix4x4 *m, float2 in); + +_RS_RUNTIME float3 __attribute__((overloadable)) +rsMatrixMultiply(rs_matrix3x3 *m, float3 in); + +_RS_RUNTIME float3 __attribute__((overloadable)) +rsMatrixMultiply(rs_matrix3x3 *m, float2 in); + +_RS_RUNTIME float2 __attribute__((overloadable)) +rsMatrixMultiply(rs_matrix2x2 *m, float2 in); + +// Returns true if the matrix was successfully inversed +extern bool __attribute__((overloadable)) rsMatrixInverse(rs_matrix4x4 *m); +extern bool __attribute__((overloadable)) rsMatrixInverseTranspose(rs_matrix4x4 *m); +extern void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix4x4 *m); +extern void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix3x3 *m); +extern void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix2x2 *m); ///////////////////////////////////////////////////// // int ops ///////////////////////////////////////////////////// -__inline__ _RS_STATIC uint __attribute__((overloadable, always_inline)) rsClamp(uint amount, uint low, uint high) { - return amount < low ? low : (amount > high ? high : amount); -} -__inline__ _RS_STATIC int __attribute__((overloadable, always_inline)) rsClamp(int amount, int low, int high) { - return amount < low ? low : (amount > high ? high : amount); -} -__inline__ _RS_STATIC ushort __attribute__((overloadable, always_inline)) rsClamp(ushort amount, ushort low, ushort high) { - return amount < low ? low : (amount > high ? high : amount); -} -__inline__ _RS_STATIC short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high) { - return amount < low ? low : (amount > high ? high : amount); -} -__inline__ _RS_STATIC uchar __attribute__((overloadable, always_inline)) rsClamp(uchar amount, uchar low, uchar high) { - return amount < low ? low : (amount > high ? high : amount); -} -__inline__ _RS_STATIC char __attribute__((overloadable, always_inline)) rsClamp(char amount, char low, char high) { - return amount < low ? low : (amount > high ? high : amount); -} - -#undef _RS_STATIC +_RS_RUNTIME uint __attribute__((overloadable, always_inline)) rsClamp(uint amount, uint low, uint high); +_RS_RUNTIME int __attribute__((overloadable, always_inline)) rsClamp(int amount, int low, int high); +_RS_RUNTIME ushort __attribute__((overloadable, always_inline)) rsClamp(ushort amount, ushort low, ushort high); +_RS_RUNTIME short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high); +_RS_RUNTIME uchar __attribute__((overloadable, always_inline)) rsClamp(uchar amount, uchar low, uchar high); +_RS_RUNTIME char __attribute__((overloadable, always_inline)) rsClamp(char amount, char low, char high); -#endif +#undef _RS_RUNTIME +#endif |
