summaryrefslogtreecommitdiffstats
path: root/libs
diff options
context:
space:
mode:
authorJason Sams <rjsams@android.com>2010-07-19 15:38:19 -0700
committerJason Sams <rjsams@android.com>2010-07-19 15:38:19 -0700
commit8e6c17fcc40bfb2391dcadecb397431239ab8a18 (patch)
tree78253130ab783fb7bf0885a8ef121edd0bba2f04 /libs
parent6ef3593c3666dda3718e7e06547498ec33451690 (diff)
downloadframeworks_base-8e6c17fcc40bfb2391dcadecb397431239ab8a18.zip
frameworks_base-8e6c17fcc40bfb2391dcadecb397431239ab8a18.tar.gz
frameworks_base-8e6c17fcc40bfb2391dcadecb397431239ab8a18.tar.bz2
Threading RS ForEach.
Change-Id: I5d6fe4db2b6ac0613394bc5a066ff90ec146d60e
Diffstat (limited to 'libs')
-rw-r--r--libs/rs/java/ImageProcessing/res/raw/horizontal_blur.rs7
-rw-r--r--libs/rs/java/ImageProcessing/res/raw/horizontal_blur_bc.bcbin1324 -> 1348 bytes
-rw-r--r--libs/rs/java/ImageProcessing/res/raw/ip.rsh2
-rw-r--r--libs/rs/java/ImageProcessing/res/raw/threshold.rs123
-rw-r--r--libs/rs/java/ImageProcessing/res/raw/threshold_bc.bcbin7208 -> 3304 bytes
-rw-r--r--libs/rs/java/ImageProcessing/res/raw/vertical_blur.rs20
-rw-r--r--libs/rs/java/ImageProcessing/res/raw/vertical_blur_bc.bcbin1464 -> 1468 bytes
-rw-r--r--libs/rs/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java5
-rw-r--r--libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Horizontal_blur.java12
-rw-r--r--libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Vertical_blur.java12
-rw-r--r--libs/rs/rsContext.cpp67
-rw-r--r--libs/rs/rsContext.h18
-rw-r--r--libs/rs/rsScriptC.cpp161
-rw-r--r--libs/rs/rsScriptC_Lib.cpp33
-rw-r--r--libs/rs/rsUtils.h1
-rw-r--r--libs/rs/scriptc/rs_math.rsh6
16 files changed, 286 insertions, 181 deletions
diff --git a/libs/rs/java/ImageProcessing/res/raw/horizontal_blur.rs b/libs/rs/java/ImageProcessing/res/raw/horizontal_blur.rs
index 7b0e6bc..10815fb 100644
--- a/libs/rs/java/ImageProcessing/res/raw/horizontal_blur.rs
+++ b/libs/rs/java/ImageProcessing/res/raw/horizontal_blur.rs
@@ -5,17 +5,14 @@
#include "ip.rsh"
-uchar4 * ScratchPixel;
-
-#pragma rs export_var(ScratchPixel)
-
void root(const void *v_in, void *v_out, const void *usrData, uint32_t x, uint32_t y) {
uchar4 *output = (uchar4 *)v_out;
- const uchar4 *input = (uchar4 *)v_in;
const FilterStruct *fs = (const FilterStruct *)usrData;
+ const uchar4 *input = (const uchar4 *)rsGetElementAt(fs->ain, 0, y);
float4 blurredPixel = 0;
float4 currentPixel = 0;
+
for(int r = -fs->radius; r <= fs->radius; r ++) {
// Stepping left and right away from the pixel
int validW = x + r;
diff --git a/libs/rs/java/ImageProcessing/res/raw/horizontal_blur_bc.bc b/libs/rs/java/ImageProcessing/res/raw/horizontal_blur_bc.bc
index c9ba5d9..5920f3a 100644
--- a/libs/rs/java/ImageProcessing/res/raw/horizontal_blur_bc.bc
+++ b/libs/rs/java/ImageProcessing/res/raw/horizontal_blur_bc.bc
Binary files differ
diff --git a/libs/rs/java/ImageProcessing/res/raw/ip.rsh b/libs/rs/java/ImageProcessing/res/raw/ip.rsh
index 4073304..dea92c3 100644
--- a/libs/rs/java/ImageProcessing/res/raw/ip.rsh
+++ b/libs/rs/java/ImageProcessing/res/raw/ip.rsh
@@ -3,6 +3,8 @@
#define MAX_RADIUS 25
typedef struct {
+ rs_allocation ain;
+
float *gaussian; //[MAX_RADIUS * 2 + 1];
rs_matrix3x3 colorMat;
diff --git a/libs/rs/java/ImageProcessing/res/raw/threshold.rs b/libs/rs/java/ImageProcessing/res/raw/threshold.rs
index ecbfac4..aa6b6fa 100644
--- a/libs/rs/java/ImageProcessing/res/raw/threshold.rs
+++ b/libs/rs/java/ImageProcessing/res/raw/threshold.rs
@@ -24,7 +24,6 @@ float saturation;
static float inWMinInB;
static float outWMinOutB;
static float overInWMinInB;
-static FilterStruct filterStruct;
#pragma rs export_var(height, width, radius, InPixel, OutPixel, ScratchPixel, inBlack, outBlack, inWhite, outWhite, gamma, saturation, InPixel, OutPixel, ScratchPixel, vBlurScript, hBlurScript)
#pragma rs export_func(filter, filterBenchmark);
@@ -106,138 +105,70 @@ static void computeGaussianWeights() {
}
}
-// This needs to be inline
-static float4 levelsSaturation(float4 currentPixel) {
- float3 temp = rsMatrixMultiply(&colorMat, currentPixel.xyz);
- temp = (clamp(temp, 0.1f, 255.f) - inBlack) * overInWMinInB;
- temp = pow(temp, (float3)gamma);
- currentPixel.xyz = clamp(temp * outWMinOutB + outBlack, 0.1f, 255.f);
- return currentPixel;
-}
-
static void processNoBlur() {
- int w, h, r;
- int count = 0;
-
float inWMinInB = inWhite - inBlack;
float outWMinOutB = outWhite - outBlack;
float4 currentPixel = 0;
- for(h = 0; h < height; h ++) {
- for(w = 0; w < width; w ++) {
- uchar4 *input = InPixel + h*width + w;
+ for(int h = 0; h < height; h ++) {
+ uchar4 *input = InPixel + h*width;
+ uchar4 *output = OutPixel + h*width;
+ for(int w = 0; w < width; w ++) {
//currentPixel.xyz = convert_float3(input.xyz);
currentPixel.x = (float)(input->x);
currentPixel.y = (float)(input->y);
currentPixel.z = (float)(input->z);
- currentPixel = levelsSaturation(currentPixel);
+ float3 temp = rsMatrixMultiply(&colorMat, currentPixel.xyz);
+ temp = (clamp(temp, 0.f, 255.f) - inBlack) * overInWMinInB;
+ temp = pow(temp, (float3)gamma);
+ currentPixel.xyz = clamp(temp * outWMinOutB + outBlack, 0.f, 255.f);
- uchar4 *output = OutPixel + h*width + w;
//output.xyz = convert_uchar3(currentPixel.xyz);
output->x = (uint8_t)currentPixel.x;
output->y = (uint8_t)currentPixel.y;
output->z = (uint8_t)currentPixel.z;
output->w = input->w;
- }
- }
- rsSendToClient(&count, 1, 4, 0);
-}
-
-static void horizontalBlurLevels() {
- float4 blurredPixel = 0;
- float4 currentPixel = 0;
- // Horizontal blur
- int w, h, r;
- for(h = 0; h < height; h ++) {
- uchar4 *output = OutPixel + h*width;
- for(w = 0; w < width; w ++) {
- blurredPixel = 0;
-
- for(r = -radius; r <= radius; r ++) {
- // Stepping left and right away from the pixel
- int validW = w + r;
- // Clamp to zero and width max() isn't exposed for ints yet
- if(validW < 0) {
- validW = 0;
- }
- if(validW > width - 1) {
- validW = width - 1;
- }
- //int validW = rsClamp(w + r, 0, width - 1);
-
- uchar4 *input = InPixel + h*width + validW;
-
- float weight = gaussian[r + radius];
- currentPixel.x = (float)(input->x);
- currentPixel.y = (float)(input->y);
- currentPixel.z = (float)(input->z);
- //currentPixel.w = (float)(input->a);
-
- blurredPixel.xyz += currentPixel.xyz * weight;
- }
-
- blurredPixel = levelsSaturation(blurredPixel);
-
- output->x = (uint8_t)blurredPixel.x;
- output->y = (uint8_t)blurredPixel.y;
- output->z = (uint8_t)blurredPixel.z;
- //output->a = (uint8_t)blurredPixel.w;
+ input++;
output++;
}
}
}
-static void initStructs() {
- filterStruct.gaussian = gaussian;
- filterStruct.width = width;
- filterStruct.height = height;
- filterStruct.radius = radius;
+static void blur() {
+ computeGaussianWeights();
+
+ FilterStruct fs;
+ fs.gaussian = gaussian;
+ fs.width = width;
+ fs.height = height;
+ fs.radius = radius;
+
+ fs.ain = rsGetAllocation(InPixel);
+ rsForEach(hBlurScript, fs.ain, rsGetAllocation(ScratchPixel), &fs);
+
+ fs.ain = rsGetAllocation(ScratchPixel);
+ rsForEach(vBlurScript, fs.ain, rsGetAllocation(OutPixel), &fs);
}
void filter() {
- RS_DEBUG(height);
- RS_DEBUG(width);
RS_DEBUG(radius);
- initStructs();
-
computeColorMatrix();
- if(radius == 0) {
- processNoBlur();
- return;
+ if(radius > 0) {
+ blur();
}
-
- computeGaussianWeights();
-
- horizontalBlurLevels();
-
- rsForEach(vBlurScript,
- rsGetAllocation(InPixel),
- rsGetAllocation(OutPixel),
- &filterStruct);
+ processNoBlur();
int count = 0;
rsSendToClient(&count, 1, 4, 0);
}
void filterBenchmark() {
- initStructs();
-
- computeGaussianWeights();
-
- rsForEach(hBlurScript,
- rsGetAllocation(InPixel),
- rsGetAllocation(OutPixel),
- &filterStruct);
-
- rsForEach(vBlurScript,
- rsGetAllocation(InPixel),
- rsGetAllocation(OutPixel),
- &filterStruct);
+ blur();
int count = 0;
rsSendToClient(&count, 1, 4, 0);
diff --git a/libs/rs/java/ImageProcessing/res/raw/threshold_bc.bc b/libs/rs/java/ImageProcessing/res/raw/threshold_bc.bc
index 8f37fdc..2b5d254 100644
--- a/libs/rs/java/ImageProcessing/res/raw/threshold_bc.bc
+++ b/libs/rs/java/ImageProcessing/res/raw/threshold_bc.bc
Binary files differ
diff --git a/libs/rs/java/ImageProcessing/res/raw/vertical_blur.rs b/libs/rs/java/ImageProcessing/res/raw/vertical_blur.rs
index 846f515..f5f2d69 100644
--- a/libs/rs/java/ImageProcessing/res/raw/vertical_blur.rs
+++ b/libs/rs/java/ImageProcessing/res/raw/vertical_blur.rs
@@ -5,14 +5,10 @@
#include "ip.rsh"
-uchar4 * ScratchPixel;
-
-#pragma rs export_var(ScratchPixel)
-
void root(const void *v_in, void *v_out, const void *usrData, uint32_t x, uint32_t y) {
uchar4 *output = (uchar4 *)v_out;
- const uchar4 *input = (uchar4 *)v_in;
const FilterStruct *fs = (const FilterStruct *)usrData;
+ const uchar4 *input = (const uchar4 *)rsGetElementAt(fs->ain, x, 0);
float4 blurredPixel = 0;
float4 currentPixel = 0;
@@ -27,19 +23,21 @@ void root(const void *v_in, void *v_out, const void *usrData, uint32_t x, uint32
validH = fs->height - 1;
}
- uchar4 *input = ScratchPixel + validH * fs->width + x;
+ const uchar4 *i = input + validH * fs->width;
+ //const uchar4 *i = (const uchar4 *)rsGetElementAt(fs->ain, x, validH);
float weight = fs->gaussian[r + fs->radius];
- currentPixel.x = (float)(input->x);
- currentPixel.y = (float)(input->y);
- currentPixel.z = (float)(input->z);
+ currentPixel.x = (float)(i->x);
+ currentPixel.y = (float)(i->y);
+ currentPixel.z = (float)(i->z);
blurredPixel.xyz += currentPixel.xyz * weight;
#else
int validH = rsClamp(y + r, 0, height - 1);
- uchar4 *input = ScratchPixel + validH * width + x;
- blurredPixel.xyz += convert_float3(input->xyz) * gaussian[r + fs->radius];
+ validH -= y;
+ uchar4 *i = input + validH * width + x;
+ blurredPixel.xyz += convert_float3(i->xyz) * gaussian[r + fs->radius];
#endif
}
diff --git a/libs/rs/java/ImageProcessing/res/raw/vertical_blur_bc.bc b/libs/rs/java/ImageProcessing/res/raw/vertical_blur_bc.bc
index af1cd8e..be5d0e4 100644
--- a/libs/rs/java/ImageProcessing/res/raw/vertical_blur_bc.bc
+++ b/libs/rs/java/ImageProcessing/res/raw/vertical_blur_bc.bc
Binary files differ
diff --git a/libs/rs/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java b/libs/rs/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java
index 21c3d74..0ed1185 100644
--- a/libs/rs/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java
+++ b/libs/rs/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java
@@ -376,10 +376,7 @@ public class ImageProcessingActivity extends Activity
mScratchPixelsAllocation = Allocation.createBitmapRef(mRS, mBitmapScratch);
mScriptVBlur = new ScriptC_Vertical_blur(mRS, getResources(), R.raw.vertical_blur_bc, false);
- mScriptVBlur.bind_ScratchPixel(mScratchPixelsAllocation);
-
mScriptHBlur = new ScriptC_Horizontal_blur(mRS, getResources(), R.raw.horizontal_blur_bc, false);
- mScriptHBlur.bind_ScratchPixel(mScratchPixelsAllocation);
mScript = new ScriptC_Threshold(mRS, getResources(), R.raw.threshold_bc, false);
mScript.set_width(mBitmapIn.getWidth());
@@ -431,8 +428,8 @@ public class ImageProcessingActivity extends Activity
android.util.Log.v("Img", "Renderscript frame time core ms " + t);
long javaTime = javaFilter();
-
mBenchmarkResult.setText("RS: " + t + " ms Java: " + javaTime + " ms");
+ //mBenchmarkResult.setText("RS: " + t + " ms");
mRadius = oldRadius;
mScript.set_radius(mRadius);
diff --git a/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Horizontal_blur.java b/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Horizontal_blur.java
index 8ee50a8..c447b9b 100644
--- a/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Horizontal_blur.java
+++ b/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Horizontal_blur.java
@@ -26,17 +26,5 @@ public class ScriptC_Horizontal_blur extends ScriptC {
super(rs, resources, id, isRoot);
}
- private final static int mExportVarIdx_ScratchPixel = 0;
- private Allocation mExportVar_ScratchPixel;
- public void bind_ScratchPixel(Allocation v) {
- mExportVar_ScratchPixel = v;
- if(v == null) bindAllocation(null, mExportVarIdx_ScratchPixel);
- else bindAllocation(v, mExportVarIdx_ScratchPixel);
- }
-
- public Allocation get_ScratchPixel() {
- return mExportVar_ScratchPixel;
- }
-
}
diff --git a/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Vertical_blur.java b/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Vertical_blur.java
index 0215f60..cee74d9 100644
--- a/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Vertical_blur.java
+++ b/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Vertical_blur.java
@@ -26,17 +26,5 @@ public class ScriptC_Vertical_blur extends ScriptC {
super(rs, resources, id, isRoot);
}
- private final static int mExportVarIdx_ScratchPixel = 0;
- private Allocation mExportVar_ScratchPixel;
- public void bind_ScratchPixel(Allocation v) {
- mExportVar_ScratchPixel = v;
- if(v == null) bindAllocation(null, mExportVarIdx_ScratchPixel);
- else bindAllocation(v, mExportVarIdx_ScratchPixel);
- }
-
- public Allocation get_ScratchPixel() {
- return mExportVar_ScratchPixel;
- }
-
}
diff --git a/libs/rs/rsContext.cpp b/libs/rs/rsContext.cpp
index 68eca44..629b481 100644
--- a/libs/rs/rsContext.cpp
+++ b/libs/rs/rsContext.cpp
@@ -23,6 +23,7 @@
#include <sys/types.h>
#include <sys/resource.h>
+#include <sched.h>
#include <cutils/properties.h>
@@ -355,6 +356,49 @@ void * Context::threadProc(void *vrsc)
return NULL;
}
+void * Context::helperThreadProc(void *vrsc)
+{
+ Context *rsc = static_cast<Context *>(vrsc);
+ uint32_t idx = (uint32_t)android_atomic_inc(&rsc->mWorkers.mLaunchCount);
+
+ LOGE("helperThreadProc 1 %p idx=%i", rsc, idx);
+
+ rsc->mWorkers.mLaunchSignals[idx].init();
+ rsc->mWorkers.mNativeThreadId[idx] = gettid();
+
+ //cpu_set_t cpset[16];
+ //int ret = sched_getaffinity(rsc->mWorkers.mNativeThreadId[idx], sizeof(cpset), &cpset);
+ //LOGE("ret = %i", ret);
+
+//sched_setaffinity
+
+ setpriority(PRIO_PROCESS, rsc->mWorkers.mNativeThreadId[idx], rsc->mThreadPriority);
+ while(rsc->mRunning) {
+ rsc->mWorkers.mLaunchSignals[idx].wait();
+ if (rsc->mWorkers.mLaunchCallback) {
+ LOGE("helperThreadProc 4");
+ rsc->mWorkers.mLaunchCallback(rsc->mWorkers.mLaunchData, idx);
+ }
+ LOGE("helperThreadProc 5");
+ android_atomic_dec(&rsc->mWorkers.mRunningCount);
+ rsc->mWorkers.mCompleteSignal.set();
+ }
+ return NULL;
+}
+
+void Context::launchThreads(WorkerCallback_t cbk, void *data)
+{
+ mWorkers.mLaunchData = data;
+ mWorkers.mLaunchCallback = cbk;
+ mWorkers.mRunningCount = (int)mWorkers.mCount;
+ for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
+ mWorkers.mLaunchSignals[ct].set();
+ }
+ while(mWorkers.mRunningCount) {
+ mWorkers.mCompleteSignal.wait();
+ }
+}
+
void Context::setPriority(int32_t p)
{
// Note: If we put this in the proper "background" policy
@@ -371,7 +415,10 @@ void Context::setPriority(int32_t p)
// success; reset the priority as well
}
#else
- setpriority(PRIO_PROCESS, mNativeThreadId, p);
+ setpriority(PRIO_PROCESS, mNativeThreadId, p);
+ for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
+ setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], p);
+ }
#endif
}
@@ -421,10 +468,26 @@ Context::Context(Device *dev, bool isGraphics, bool useDepth)
timerInit();
timerSet(RS_TIMER_INTERNAL);
- LOGV("RS Launching thread");
+ LOGV("RS Launching thread(s)");
+ mWorkers.mCount = 2;
+ mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t));
+ mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t));
+ mWorkers.mLaunchSignals = new Signal[mWorkers.mCount];
+ mWorkers.mLaunchCallback = NULL;
status = pthread_create(&mThreadId, &threadAttr, threadProc, this);
if (status) {
LOGE("Failed to start rs context thread.");
+ return;
+ }
+ mWorkers.mRunningCount = 0;
+ mWorkers.mLaunchCount = 0;
+ for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
+ status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this);
+ if (status) {
+ mWorkers.mCount = ct;
+ LOGE("Created fewer than expected number of RS threads.");
+ break;
+ }
}
while(!mRunning) {
diff --git a/libs/rs/rsContext.h b/libs/rs/rsContext.h
index 06433a1..98ad3a4 100644
--- a/libs/rs/rsContext.h
+++ b/libs/rs/rsContext.h
@@ -65,6 +65,7 @@ public:
Script * mScript;
};
+ typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
//StructuredAllocationContext mStateAllocation;
ElementState mStateElement;
@@ -172,6 +173,8 @@ public:
bool ext_OES_texture_npot() const {return mGL.OES_texture_npot;}
+ void launchThreads(WorkerCallback_t cbk, void *data);
+
protected:
Device *mDev;
@@ -222,6 +225,20 @@ protected:
pthread_t mThreadId;
pid_t mNativeThreadId;
+ struct Workers {
+ volatile int mRunningCount;
+ volatile int mLaunchCount;
+ uint32_t mCount;
+ pthread_t *mThreadId;
+ pid_t *mNativeThreadId;
+ Signal mCompleteSignal;
+
+ Signal *mLaunchSignals;
+ WorkerCallback_t mLaunchCallback;
+ void *mLaunchData;
+ };
+ Workers mWorkers;
+
ObjectBaseRef<Script> mRootScript;
ObjectBaseRef<ProgramFragment> mFragment;
ObjectBaseRef<ProgramVertex> mVertex;
@@ -248,6 +265,7 @@ private:
uint32_t runRootScript();
static void * threadProc(void *);
+ static void * helperThreadProc(void *);
ANativeWindow *mWndSurface;
diff --git a/libs/rs/rsScriptC.cpp b/libs/rs/rsScriptC.cpp
index b87ac28..9693b16 100644
--- a/libs/rs/rsScriptC.cpp
+++ b/libs/rs/rsScriptC.cpp
@@ -137,72 +137,155 @@ uint32_t ScriptC::run(Context *rsc)
}
+typedef struct {
+ Context *rsc;
+ ScriptC *script;
+ const Allocation * ain;
+ Allocation * aout;
+ const void * usr;
+
+ uint32_t mSliceSize;
+ volatile int mSliceNum;
+
+ const uint8_t *ptrIn;
+ uint32_t eStrideIn;
+ uint8_t *ptrOut;
+ uint32_t eStrideOut;
+
+ uint32_t xStart;
+ uint32_t xEnd;
+ uint32_t yStart;
+ uint32_t yEnd;
+ uint32_t zStart;
+ uint32_t zEnd;
+ uint32_t arrayStart;
+ uint32_t arrayEnd;
+
+ uint32_t dimX;
+ uint32_t dimY;
+ uint32_t dimZ;
+ uint32_t dimArray;
+} MTLaunchStruct;
+typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
+
+static void wc_xy(void *usr, uint32_t idx)
+{
+ MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
+ LOGE("usr %p, idx %i", usr, idx);
+
+ while (1) {
+ uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
+ uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
+ uint32_t yEnd = yStart + mtls->mSliceSize;
+ yEnd = rsMin(yEnd, mtls->yEnd);
+ if (yEnd <= yStart) {
+ return;
+ }
+
+ //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
+
+ for (uint32_t y = yStart; y < yEnd; y++) {
+ uint32_t offset = mtls->dimX * y;
+ uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
+ const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
+
+ for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) {
+ ((rs_t)mtls->script->mProgram.mRoot) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0);
+ xPtrIn += mtls->eStrideIn;
+ xPtrOut += mtls->eStrideOut;
+ }
+ }
+ }
+
+}
+
void ScriptC::runForEach(Context *rsc,
const Allocation * ain,
Allocation * aout,
const void * usr,
const RsScriptCall *sc)
{
- uint32_t dimX = ain->getType()->getDimX();
- uint32_t dimY = ain->getType()->getDimY();
- uint32_t dimZ = ain->getType()->getDimZ();
- uint32_t dimA = 0;//ain->getType()->getDimArray();
-
- uint32_t xStart = 0;
- uint32_t xEnd = 0;
- uint32_t yStart = 0;
- uint32_t yEnd = 0;
- uint32_t zStart = 0;
- uint32_t zEnd = 0;
- uint32_t arrayStart = 0;
- uint32_t arrayEnd = 0;
+ MTLaunchStruct mtls;
+ memset(&mtls, 0, sizeof(mtls));
+
+ if (ain) {
+ mtls.dimX = ain->getType()->getDimX();
+ mtls.dimY = ain->getType()->getDimY();
+ mtls.dimZ = ain->getType()->getDimZ();
+ //mtls.dimArray = ain->getType()->getDimArray();
+ } else if (aout) {
+ mtls.dimX = aout->getType()->getDimX();
+ mtls.dimY = aout->getType()->getDimY();
+ mtls.dimZ = aout->getType()->getDimZ();
+ //mtls.dimArray = aout->getType()->getDimArray();
+ } else {
+ rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
+ return;
+ }
if (!sc || (sc->xEnd == 0)) {
- xStart = 0;
- xEnd = ain->getType()->getDimX();
+ mtls.xEnd = mtls.dimX;
} else {
- rsAssert(xStart < dimX);
- rsAssert(xEnd <= dimX);
+ rsAssert(sc->xStart < mtls.dimX);
+ rsAssert(sc->xEnd <= mtls.dimX);
rsAssert(sc->xStart < sc->xEnd);
- xStart = rsMin(dimX, sc->xStart);
- xEnd = rsMin(dimX, sc->xEnd);
- if (xStart >= xEnd) return;
+ mtls.xStart = rsMin(mtls.dimX, sc->xStart);
+ mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
+ if (mtls.xStart >= mtls.xEnd) return;
}
if (!sc || (sc->yEnd == 0)) {
- yStart = 0;
- yEnd = ain->getType()->getDimY();
+ mtls.yEnd = mtls.dimY;
} else {
- rsAssert(yStart < dimY);
- rsAssert(yEnd <= dimY);
+ rsAssert(sc->yStart < mtls.dimY);
+ rsAssert(sc->yEnd <= mtls.dimY);
rsAssert(sc->yStart < sc->yEnd);
- yStart = rsMin(dimY, sc->yStart);
- yEnd = rsMin(dimY, sc->yEnd);
- if (yStart >= yEnd) return;
+ mtls.yStart = rsMin(mtls.dimY, sc->yStart);
+ mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
+ if (mtls.yStart >= mtls.yEnd) return;
}
- xEnd = rsMax((uint32_t)1, xEnd);
- yEnd = rsMax((uint32_t)1, yEnd);
- zEnd = rsMax((uint32_t)1, zEnd);
- arrayEnd = rsMax((uint32_t)1, arrayEnd);
+ mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
+ mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
+ mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
+ mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
rsAssert(ain->getType()->getDimZ() == 0);
setupScript(rsc);
Script * oldTLS = setTLS(this);
- typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
- const uint8_t *ptrIn = (const uint8_t *)ain->getPtr();
- uint32_t eStrideIn = ain->getType()->getElementSizeBytes();
+ mtls.rsc = rsc;
+ mtls.ain = ain;
+ mtls.aout = aout;
+ mtls.script = this;
+ mtls.usr = usr;
+ mtls.mSliceSize = 10;
+ mtls.mSliceNum = 0;
+
+ mtls.ptrIn = NULL;
+ mtls.eStrideIn = 0;
+ if (ain) {
+ mtls.ptrIn = (const uint8_t *)ain->getPtr();
+ mtls.eStrideIn = ain->getType()->getElementSizeBytes();
+ }
- uint8_t *ptrOut = NULL;
- uint32_t eStrideOut = 0;
+ mtls.ptrOut = NULL;
+ mtls.eStrideOut = 0;
if (aout) {
- ptrOut = (uint8_t *)aout->getPtr();
- eStrideOut = aout->getType()->getElementSizeBytes();
+ mtls.ptrOut = (uint8_t *)aout->getPtr();
+ mtls.eStrideOut = aout->getType()->getElementSizeBytes();
}
+
+ {
+ LOGE("launch 1");
+ rsc->launchThreads(wc_xy, &mtls);
+ LOGE("launch 2");
+ }
+
+/*
for (uint32_t ar = arrayStart; ar < arrayEnd; ar++) {
for (uint32_t z = zStart; z < zEnd; z++) {
for (uint32_t y = yStart; y < yEnd; y++) {
@@ -221,7 +304,7 @@ void ScriptC::runForEach(Context *rsc,
}
}
-
+*/
setTLS(oldTLS);
}
diff --git a/libs/rs/rsScriptC_Lib.cpp b/libs/rs/rsScriptC_Lib.cpp
index 8d9ca9f..9c29ca6 100644
--- a/libs/rs/rsScriptC_Lib.cpp
+++ b/libs/rs/rsScriptC_Lib.cpp
@@ -329,6 +329,29 @@ static uint32_t SC_allocGetDimFaces(RsAllocation va)
return a->getType()->getDimFaces();
}
+const void * SC_getElementAtX(RsAllocation va, uint32_t x)
+{
+ const Allocation *a = static_cast<const Allocation *>(va);
+ const Type *t = a->getType();
+ const uint8_t *p = (const uint8_t *)a->getPtr();
+ return &p[t->getElementSizeBytes() * x];
+}
+
+const void * SC_getElementAtXY(RsAllocation va, uint32_t x, uint32_t y)
+{
+ const Allocation *a = static_cast<const Allocation *>(va);
+ const Type *t = a->getType();
+ const uint8_t *p = (const uint8_t *)a->getPtr();
+ return &p[t->getElementSizeBytes() * (x + y*t->getDimX())];
+}
+
+const void * SC_getElementAtXYZ(RsAllocation va, uint32_t x, uint32_t y, uint32_t z)
+{
+ const Allocation *a = static_cast<const Allocation *>(va);
+ const Type *t = a->getType();
+ const uint8_t *p = (const uint8_t *)a->getPtr();
+ return &p[t->getElementSizeBytes() * (x + y*t->getDimX())];
+}
static void SC_debugF(const char *s, float f) {
@@ -350,6 +373,10 @@ static void SC_debugI32(const char *s, int32_t i) {
LOGE("%s %i 0x%x", s, i, i);
}
+static void SC_debugP(const char *s, const void *p) {
+ LOGE("%s %p", s, p);
+}
+
static uint32_t SC_toClient(void *data, int cmdID, int len, int waitForSpace)
{
GET_TLS();
@@ -433,12 +460,18 @@ static ScriptCState::SymbolTable_t gSyms[] = {
{ "rsAllocationGetDimFaces", (void *)&SC_allocGetDimFaces },
{ "rsGetAllocation", (void *)&SC_getAllocation },
+ { "_Z14rsGetElementAt13rs_allocationj", (void *)&SC_getElementAtX },
+ { "_Z14rsGetElementAt13rs_allocationjj", (void *)&SC_getElementAtXY },
+ { "_Z14rsGetElementAt13rs_allocationjjj", (void *)&SC_getElementAtXYZ },
+
+
// Debug
{ "_Z7rsDebugPKcf", (void *)&SC_debugF },
{ "_Z7rsDebugPKcDv2_f", (void *)&SC_debugFv2 },
{ "_Z7rsDebugPKcDv3_f", (void *)&SC_debugFv3 },
{ "_Z7rsDebugPKcDv4_f", (void *)&SC_debugFv4 },
{ "_Z7rsDebugPKci", (void *)&SC_debugI32 },
+ { "_Z7rsDebugPKcPKv", (void *)&SC_debugP },
//extern void __attribute__((overloadable))rsDebug(const char *, const void *);
diff --git a/libs/rs/rsUtils.h b/libs/rs/rsUtils.h
index 0a37a5b..17feb22 100644
--- a/libs/rs/rsUtils.h
+++ b/libs/rs/rsUtils.h
@@ -30,6 +30,7 @@
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
+#include <cutils/atomic.h>
#ifndef ANDROID_RS_BUILD_FOR_HOST
#include <EGL/egl.h>
diff --git a/libs/rs/scriptc/rs_math.rsh b/libs/rs/scriptc/rs_math.rsh
index e11c832..bd6e5a9 100644
--- a/libs/rs/scriptc/rs_math.rsh
+++ b/libs/rs/scriptc/rs_math.rsh
@@ -14,6 +14,12 @@ extern uint32_t rsAllocationGetDimZ(rs_allocation);
extern uint32_t rsAllocationGetDimLOD(rs_allocation);
extern uint32_t rsAllocationGetDimFaces(rs_allocation);
+extern const void * __attribute__((overloadable))
+ rsGetElementAt(rs_allocation, uint32_t x);
+extern const void * __attribute__((overloadable))
+ rsGetElementAt(rs_allocation, uint32_t x, uint32_t y);
+extern const void * __attribute__((overloadable))
+ rsGetElementAt(rs_allocation, uint32_t x, uint32_t y, uint32_t z);
// Debugging