diff options
Diffstat (limited to 'rs')
-rw-r--r-- | rs/java/android/renderscript/Allocation.java | 694 | ||||
-rw-r--r-- | rs/java/android/renderscript/AllocationAdapter.java | 4 | ||||
-rw-r--r-- | rs/java/android/renderscript/Element.java | 20 | ||||
-rw-r--r-- | rs/java/android/renderscript/RenderScript.java | 49 | ||||
-rw-r--r-- | rs/java/android/renderscript/Script.java | 29 | ||||
-rw-r--r-- | rs/java/android/renderscript/ScriptGroup.java | 94 | ||||
-rw-r--r-- | rs/java/android/renderscript/ScriptGroup2.java | 449 | ||||
-rw-r--r-- | rs/java/android/renderscript/ScriptIntrinsicBLAS.java | 2182 | ||||
-rw-r--r-- | rs/java/android/renderscript/Type.java | 20 | ||||
-rw-r--r-- | rs/jni/Android.mk | 5 | ||||
-rw-r--r-- | rs/jni/android_renderscript_RenderScript.cpp | 249 |
11 files changed, 2679 insertions, 1116 deletions
diff --git a/rs/java/android/renderscript/Allocation.java b/rs/java/android/renderscript/Allocation.java index 4fa2c81..0a50593 100644 --- a/rs/java/android/renderscript/Allocation.java +++ b/rs/java/android/renderscript/Allocation.java @@ -51,6 +51,7 @@ import android.os.Trace; * <a href="{@docRoot}guide/topics/renderscript/index.html">RenderScript</a> developer guide.</p> * </div> **/ + public class Allocation extends BaseObj { Type mType; Bitmap mBitmap; @@ -273,8 +274,8 @@ public class Allocation extends BaseObj { } /** - * @hide * Enable/Disable AutoPadding for Vec3 elements. + * By default: Diabled. * * @param useAutoPadding True: enable AutoPadding; False: disable AutoPadding * @@ -455,28 +456,31 @@ public class Allocation extends BaseObj { * */ public void syncAll(int srcLocation) { - Trace.traceBegin(RenderScript.TRACE_TAG, "syncAll"); - switch (srcLocation) { - case USAGE_GRAPHICS_TEXTURE: - case USAGE_SCRIPT: - if ((mUsage & USAGE_SHARED) != 0) { - copyFrom(mBitmap); - } - break; - case USAGE_GRAPHICS_CONSTANTS: - case USAGE_GRAPHICS_VERTEX: - break; - case USAGE_SHARED: - if ((mUsage & USAGE_SHARED) != 0) { - copyTo(mBitmap); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "syncAll"); + switch (srcLocation) { + case USAGE_GRAPHICS_TEXTURE: + case USAGE_SCRIPT: + if ((mUsage & USAGE_SHARED) != 0) { + copyFrom(mBitmap); + } + break; + case USAGE_GRAPHICS_CONSTANTS: + case USAGE_GRAPHICS_VERTEX: + break; + case USAGE_SHARED: + if ((mUsage & USAGE_SHARED) != 0) { + copyTo(mBitmap); + } + break; + default: + throw new RSIllegalArgumentException("Source must be exactly one usage type."); } - break; - default: - throw new RSIllegalArgumentException("Source must be exactly one usage type."); + mRS.validate(); + mRS.nAllocationSyncAll(getIDSafe(), srcLocation); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - mRS.validate(); - mRS.nAllocationSyncAll(getIDSafe(), srcLocation); - Trace.traceEnd(RenderScript.TRACE_TAG); } /** @@ -487,14 +491,17 @@ public class Allocation extends BaseObj { * */ public void ioSend() { - Trace.traceBegin(RenderScript.TRACE_TAG, "ioSend"); - if ((mUsage & USAGE_IO_OUTPUT) == 0) { - throw new RSIllegalArgumentException( - "Can only send buffer if IO_OUTPUT usage specified."); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "ioSend"); + if ((mUsage & USAGE_IO_OUTPUT) == 0) { + throw new RSIllegalArgumentException( + "Can only send buffer if IO_OUTPUT usage specified."); + } + mRS.validate(); + mRS.nAllocationIoSend(getID(mRS)); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - mRS.validate(); - mRS.nAllocationIoSend(getID(mRS)); - Trace.traceEnd(RenderScript.TRACE_TAG); } /** @@ -503,14 +510,17 @@ public class Allocation extends BaseObj { * */ public void ioReceive() { - Trace.traceBegin(RenderScript.TRACE_TAG, "ioReceive"); - if ((mUsage & USAGE_IO_INPUT) == 0) { - throw new RSIllegalArgumentException( - "Can only receive if IO_INPUT usage specified."); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "ioReceive"); + if ((mUsage & USAGE_IO_INPUT) == 0) { + throw new RSIllegalArgumentException( + "Can only receive if IO_INPUT usage specified."); + } + mRS.validate(); + mRS.nAllocationIoReceive(getID(mRS)); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - mRS.validate(); - mRS.nAllocationIoReceive(getID(mRS)); - Trace.traceEnd(RenderScript.TRACE_TAG); } /** @@ -519,28 +529,31 @@ public class Allocation extends BaseObj { * @param d Source array. */ public void copyFrom(BaseObj[] d) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom"); - mRS.validate(); - validateIsObject(); - if (d.length != mCurrentCount) { - throw new RSIllegalArgumentException("Array size mismatch, allocation sizeX = " + - mCurrentCount + ", array length = " + d.length); - } - - if (RenderScript.sPointerSize == 8) { - long i[] = new long[d.length * 4]; - for (int ct=0; ct < d.length; ct++) { - i[ct * 4] = d[ct].getID(mRS); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom"); + mRS.validate(); + validateIsObject(); + if (d.length != mCurrentCount) { + throw new RSIllegalArgumentException("Array size mismatch, allocation sizeX = " + + mCurrentCount + ", array length = " + d.length); } - copy1DRangeFromUnchecked(0, mCurrentCount, i); - } else { - int i[] = new int[d.length]; - for (int ct=0; ct < d.length; ct++) { - i[ct] = (int)d[ct].getID(mRS); + + if (RenderScript.sPointerSize == 8) { + long i[] = new long[d.length * 4]; + for (int ct=0; ct < d.length; ct++) { + i[ct * 4] = d[ct].getID(mRS); + } + copy1DRangeFromUnchecked(0, mCurrentCount, i); + } else { + int i[] = new int[d.length]; + for (int ct=0; ct < d.length; ct++) { + i[ct] = (int) d[ct].getID(mRS); + } + copy1DRangeFromUnchecked(0, mCurrentCount, i); } - copy1DRangeFromUnchecked(0, mCurrentCount, i); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - Trace.traceEnd(RenderScript.TRACE_TAG); } private void validateBitmapFormat(Bitmap b) { @@ -599,16 +612,19 @@ public class Allocation extends BaseObj { } private void copyFromUnchecked(Object array, Element.DataType dt, int arrayLen) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copyFromUnchecked"); - mRS.validate(); - if (mCurrentDimZ > 0) { - copy3DRangeFromUnchecked(0, 0, 0, mCurrentDimX, mCurrentDimY, mCurrentDimZ, array, dt, arrayLen); - } else if (mCurrentDimY > 0) { - copy2DRangeFromUnchecked(0, 0, mCurrentDimX, mCurrentDimY, array, dt, arrayLen); - } else { - copy1DRangeFromUnchecked(0, mCurrentCount, array, dt, arrayLen); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copyFromUnchecked"); + mRS.validate(); + if (mCurrentDimZ > 0) { + copy3DRangeFromUnchecked(0, 0, 0, mCurrentDimX, mCurrentDimY, mCurrentDimZ, array, dt, arrayLen); + } else if (mCurrentDimY > 0) { + copy2DRangeFromUnchecked(0, 0, mCurrentDimX, mCurrentDimY, array, dt, arrayLen); + } else { + copy1DRangeFromUnchecked(0, mCurrentCount, array, dt, arrayLen); + } + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - Trace.traceEnd(RenderScript.TRACE_TAG); } /** @@ -619,10 +635,13 @@ public class Allocation extends BaseObj { * @param array The source data array */ public void copyFromUnchecked(Object array) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copyFromUnchecked"); - copyFromUnchecked(array, validateObjectIsPrimitiveArray(array, false), - java.lang.reflect.Array.getLength(array)); - Trace.traceEnd(RenderScript.TRACE_TAG); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copyFromUnchecked"); + copyFromUnchecked(array, validateObjectIsPrimitiveArray(array, false), + java.lang.reflect.Array.getLength(array)); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); + } } /** @@ -679,10 +698,13 @@ public class Allocation extends BaseObj { * @param array The source data array */ public void copyFrom(Object array) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom"); - copyFromUnchecked(array, validateObjectIsPrimitiveArray(array, true), - java.lang.reflect.Array.getLength(array)); - Trace.traceEnd(RenderScript.TRACE_TAG); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom"); + copyFromUnchecked(array, validateObjectIsPrimitiveArray(array, true), + java.lang.reflect.Array.getLength(array)); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); + } } /** @@ -747,19 +769,22 @@ public class Allocation extends BaseObj { * @param b the source bitmap */ public void copyFrom(Bitmap b) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom"); - mRS.validate(); - if (b.getConfig() == null) { - Bitmap newBitmap = Bitmap.createBitmap(b.getWidth(), b.getHeight(), Bitmap.Config.ARGB_8888); - Canvas c = new Canvas(newBitmap); - c.drawBitmap(b, 0, 0, null); - copyFrom(newBitmap); - return; + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom"); + mRS.validate(); + if (b.getConfig() == null) { + Bitmap newBitmap = Bitmap.createBitmap(b.getWidth(), b.getHeight(), Bitmap.Config.ARGB_8888); + Canvas c = new Canvas(newBitmap); + c.drawBitmap(b, 0, 0, null); + copyFrom(newBitmap); + return; + } + validateBitmapSize(b); + validateBitmapFormat(b); + mRS.nAllocationCopyFromBitmap(getID(mRS), b); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - validateBitmapSize(b); - validateBitmapFormat(b); - mRS.nAllocationCopyFromBitmap(getID(mRS), b); - Trace.traceEnd(RenderScript.TRACE_TAG); } /** @@ -769,13 +794,16 @@ public class Allocation extends BaseObj { * @param a the source allocation */ public void copyFrom(Allocation a) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom"); - mRS.validate(); - if (!mType.equals(a.getType())) { - throw new RSIllegalArgumentException("Types of allocations must match."); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom"); + mRS.validate(); + if (!mType.equals(a.getType())) { + throw new RSIllegalArgumentException("Types of allocations must match."); + } + copy2DRangeFrom(0, 0, mCurrentDimX, mCurrentDimY, a, 0, 0); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - copy2DRangeFrom(0, 0, mCurrentDimX, mCurrentDimY, a, 0, 0); - Trace.traceEnd(RenderScript.TRACE_TAG); } /** @@ -802,7 +830,7 @@ public class Allocation extends BaseObj { /** * This is only intended to be used by auto-generated code reflected from - * the RenderScript script files. + * the RenderScript script files and should not be used by developers. * * @param xoff * @param component_number @@ -813,9 +841,8 @@ public class Allocation extends BaseObj { } /** - * @hide * This is only intended to be used by auto-generated code reflected from - * the RenderScript script files. + * the RenderScript script files and should not be used by developers. * * @param xoff * @param yoff @@ -891,17 +918,20 @@ public class Allocation extends BaseObj { private void copy1DRangeFromUnchecked(int off, int count, Object array, Element.DataType dt, int arrayLen) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copy1DRangeFromUnchecked"); - final int dataSize = mType.mElement.getBytesSize() * count; - // AutoPadding for Vec3 Element - boolean usePadding = false; - if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { - usePadding = true; - } - data1DChecks(off, count, arrayLen * dt.mSize, dataSize, usePadding); - mRS.nAllocationData1D(getIDSafe(), off, mSelectedLOD, count, array, dataSize, dt, - mType.mElement.mType.mSize, usePadding); - Trace.traceEnd(RenderScript.TRACE_TAG); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copy1DRangeFromUnchecked"); + final int dataSize = mType.mElement.getBytesSize() * count; + // AutoPadding for Vec3 Element + boolean usePadding = false; + if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { + usePadding = true; + } + data1DChecks(off, count, arrayLen * dt.mSize, dataSize, usePadding); + mRS.nAllocationData1D(getIDSafe(), off, mSelectedLOD, count, array, dataSize, dt, + mType.mElement.mType.mSize, usePadding); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); + } } /** @@ -1053,6 +1083,7 @@ public class Allocation extends BaseObj { mSelectedLOD, mSelectedFace.mID, count, 1, data.getID(mRS), dataOff, 0, data.mSelectedLOD, data.mSelectedFace.mID); + Trace.traceEnd(RenderScript.TRACE_TAG); } private void validate2DRange(int xoff, int yoff, int w, int h) { @@ -1074,28 +1105,31 @@ public class Allocation extends BaseObj { void copy2DRangeFromUnchecked(int xoff, int yoff, int w, int h, Object array, Element.DataType dt, int arrayLen) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFromUnchecked"); - mRS.validate(); - validate2DRange(xoff, yoff, w, h); - final int dataSize = mType.mElement.getBytesSize() * w * h; - // AutoPadding for Vec3 Element - boolean usePadding = false; - int sizeBytes = arrayLen * dt.mSize; - if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { - if (dataSize / 4 * 3 > sizeBytes) { - throw new RSIllegalArgumentException("Array too small for allocation type."); - } - usePadding = true; - sizeBytes = dataSize; - } else { - if (dataSize > sizeBytes) { - throw new RSIllegalArgumentException("Array too small for allocation type."); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFromUnchecked"); + mRS.validate(); + validate2DRange(xoff, yoff, w, h); + final int dataSize = mType.mElement.getBytesSize() * w * h; + // AutoPadding for Vec3 Element + boolean usePadding = false; + int sizeBytes = arrayLen * dt.mSize; + if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { + if (dataSize / 4 * 3 > sizeBytes) { + throw new RSIllegalArgumentException("Array too small for allocation type."); + } + usePadding = true; + sizeBytes = dataSize; + } else { + if (dataSize > sizeBytes) { + throw new RSIllegalArgumentException("Array too small for allocation type."); + } } + mRS.nAllocationData2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, w, h, + array, sizeBytes, dt, + mType.mElement.mType.mSize, usePadding); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - mRS.nAllocationData2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, w, h, - array, sizeBytes, dt, - mType.mElement.mType.mSize, usePadding); - Trace.traceEnd(RenderScript.TRACE_TAG); } /** @@ -1109,11 +1143,14 @@ public class Allocation extends BaseObj { * @param array Data to be placed into the Allocation */ public void copy2DRangeFrom(int xoff, int yoff, int w, int h, Object array) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFrom"); - copy2DRangeFromUnchecked(xoff, yoff, w, h, array, - validateObjectIsPrimitiveArray(array, true), - java.lang.reflect.Array.getLength(array)); - Trace.traceEnd(RenderScript.TRACE_TAG); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFrom"); + copy2DRangeFromUnchecked(xoff, yoff, w, h, array, + validateObjectIsPrimitiveArray(array, true), + java.lang.reflect.Array.getLength(array)); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); + } } /** @@ -1194,14 +1231,17 @@ public class Allocation extends BaseObj { */ public void copy2DRangeFrom(int xoff, int yoff, int w, int h, Allocation data, int dataXoff, int dataYoff) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFrom"); - mRS.validate(); - validate2DRange(xoff, yoff, w, h); - mRS.nAllocationData2D(getIDSafe(), xoff, yoff, - mSelectedLOD, mSelectedFace.mID, - w, h, data.getID(mRS), dataXoff, dataYoff, - data.mSelectedLOD, data.mSelectedFace.mID); - Trace.traceEnd(RenderScript.TRACE_TAG); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFrom"); + mRS.validate(); + validate2DRange(xoff, yoff, w, h); + mRS.nAllocationData2D(getIDSafe(), xoff, yoff, + mSelectedLOD, mSelectedFace.mID, + w, h, data.getID(mRS), dataXoff, dataYoff, + data.mSelectedLOD, data.mSelectedFace.mID); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); + } } /** @@ -1214,19 +1254,22 @@ public class Allocation extends BaseObj { * @param data the Bitmap to be copied */ public void copy2DRangeFrom(int xoff, int yoff, Bitmap data) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFrom"); - mRS.validate(); - if (data.getConfig() == null) { - Bitmap newBitmap = Bitmap.createBitmap(data.getWidth(), data.getHeight(), Bitmap.Config.ARGB_8888); - Canvas c = new Canvas(newBitmap); - c.drawBitmap(data, 0, 0, null); - copy2DRangeFrom(xoff, yoff, newBitmap); - return; + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFrom"); + mRS.validate(); + if (data.getConfig() == null) { + Bitmap newBitmap = Bitmap.createBitmap(data.getWidth(), data.getHeight(), Bitmap.Config.ARGB_8888); + Canvas c = new Canvas(newBitmap); + c.drawBitmap(data, 0, 0, null); + copy2DRangeFrom(xoff, yoff, newBitmap); + return; + } + validateBitmapFormat(data); + validate2DRange(xoff, yoff, data.getWidth(), data.getHeight()); + mRS.nAllocationData2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, data); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - validateBitmapFormat(data); - validate2DRange(xoff, yoff, data.getWidth(), data.getHeight()); - mRS.nAllocationData2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, data); - Trace.traceEnd(RenderScript.TRACE_TAG); } private void validate3DRange(int xoff, int yoff, int zoff, int w, int h, int d) { @@ -1247,37 +1290,42 @@ public class Allocation extends BaseObj { } /** - * @hide + * Copy a rectangular region from the array into the allocation. + * The array is assumed to be tightly packed. * + * The data type of the array is not required to be the same as + * the element data type. */ private void copy3DRangeFromUnchecked(int xoff, int yoff, int zoff, int w, int h, int d, Object array, Element.DataType dt, int arrayLen) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeFromUnchecked"); - mRS.validate(); - validate3DRange(xoff, yoff, zoff, w, h, d); - final int dataSize = mType.mElement.getBytesSize() * w * h * d; - // AutoPadding for Vec3 Element - boolean usePadding = false; - int sizeBytes = arrayLen * dt.mSize; - if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { - if (dataSize / 4 * 3 > sizeBytes) { - throw new RSIllegalArgumentException("Array too small for allocation type."); - } - usePadding = true; - sizeBytes = dataSize; - } else { - if (dataSize > sizeBytes) { - throw new RSIllegalArgumentException("Array too small for allocation type."); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeFromUnchecked"); + mRS.validate(); + validate3DRange(xoff, yoff, zoff, w, h, d); + final int dataSize = mType.mElement.getBytesSize() * w * h * d; + // AutoPadding for Vec3 Element + boolean usePadding = false; + int sizeBytes = arrayLen * dt.mSize; + if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { + if (dataSize / 4 * 3 > sizeBytes) { + throw new RSIllegalArgumentException("Array too small for allocation type."); + } + usePadding = true; + sizeBytes = dataSize; + } else { + if (dataSize > sizeBytes) { + throw new RSIllegalArgumentException("Array too small for allocation type."); + } } + mRS.nAllocationData3D(getIDSafe(), xoff, yoff, zoff, mSelectedLOD, w, h, d, + array, sizeBytes, dt, + mType.mElement.mType.mSize, usePadding); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - mRS.nAllocationData3D(getIDSafe(), xoff, yoff, zoff, mSelectedLOD, w, h, d, - array, sizeBytes, dt, - mType.mElement.mType.mSize, usePadding); - Trace.traceEnd(RenderScript.TRACE_TAG); } /** - * @hide * Copy a rectangular region from the array into the allocation. * The array is assumed to be tightly packed. * @@ -1290,15 +1338,17 @@ public class Allocation extends BaseObj { * @param array to be placed into the allocation */ public void copy3DRangeFrom(int xoff, int yoff, int zoff, int w, int h, int d, Object array) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeFrom"); - copy3DRangeFromUnchecked(xoff, yoff, zoff, w, h, d, array, - validateObjectIsPrimitiveArray(array, true), - java.lang.reflect.Array.getLength(array)); - Trace.traceEnd(RenderScript.TRACE_TAG); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeFrom"); + copy3DRangeFromUnchecked(xoff, yoff, zoff, w, h, d, array, + validateObjectIsPrimitiveArray(array, true), + java.lang.reflect.Array.getLength(array)); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); + } } /** - * @hide * Copy a rectangular region into the allocation from another * allocation. * @@ -1330,34 +1380,40 @@ public class Allocation extends BaseObj { * @param b The bitmap to be set from the Allocation. */ public void copyTo(Bitmap b) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copyTo"); - mRS.validate(); - validateBitmapFormat(b); - validateBitmapSize(b); - mRS.nAllocationCopyToBitmap(getID(mRS), b); - Trace.traceEnd(RenderScript.TRACE_TAG); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copyTo"); + mRS.validate(); + validateBitmapFormat(b); + validateBitmapSize(b); + mRS.nAllocationCopyToBitmap(getID(mRS), b); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); + } } private void copyTo(Object array, Element.DataType dt, int arrayLen) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copyTo"); - mRS.validate(); - boolean usePadding = false; - if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { - usePadding = true; - } - if (usePadding) { - if (dt.mSize * arrayLen < mSize / 4 * 3) { - throw new RSIllegalArgumentException( - "Size of output array cannot be smaller than size of allocation."); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copyTo"); + mRS.validate(); + boolean usePadding = false; + if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { + usePadding = true; } - } else { - if (dt.mSize * arrayLen < mSize) { - throw new RSIllegalArgumentException( - "Size of output array cannot be smaller than size of allocation."); + if (usePadding) { + if (dt.mSize * arrayLen < mSize / 4 * 3) { + throw new RSIllegalArgumentException( + "Size of output array cannot be smaller than size of allocation."); + } + } else { + if (dt.mSize * arrayLen < mSize) { + throw new RSIllegalArgumentException( + "Size of output array cannot be smaller than size of allocation."); + } } + mRS.nAllocationRead(getID(mRS), array, dt, mType.mElement.mType.mSize, usePadding); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - mRS.nAllocationRead(getID(mRS), array, dt, mType.mElement.mType.mSize, usePadding); - Trace.traceEnd(RenderScript.TRACE_TAG); } /** @@ -1423,6 +1479,7 @@ public class Allocation extends BaseObj { /** * @hide + * * This is only intended to be used by auto-generated code reflected from * the RenderScript script files and should not be used by developers. * @@ -1430,7 +1487,7 @@ public class Allocation extends BaseObj { * @param yoff * @param zoff * @param component_number - * @param array + * @param fp */ public void copyToFieldPacker(int xoff, int yoff, int zoff, int component_number, FieldPacker fp) { mRS.validate(); @@ -1494,21 +1551,23 @@ public class Allocation extends BaseObj { private void copy1DRangeToUnchecked(int off, int count, Object array, Element.DataType dt, int arrayLen) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copy1DRangeToUnchecked"); - final int dataSize = mType.mElement.getBytesSize() * count; - // AutoPadding for Vec3 Element - boolean usePadding = false; - if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { - usePadding = true; - } - data1DChecks(off, count, arrayLen * dt.mSize, dataSize, usePadding); - mRS.nAllocationRead1D(getIDSafe(), off, mSelectedLOD, count, array, dataSize, dt, - mType.mElement.mType.mSize, usePadding); - Trace.traceEnd(RenderScript.TRACE_TAG); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copy1DRangeToUnchecked"); + final int dataSize = mType.mElement.getBytesSize() * count; + // AutoPadding for Vec3 Element + boolean usePadding = false; + if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { + usePadding = true; + } + data1DChecks(off, count, arrayLen * dt.mSize, dataSize, usePadding); + mRS.nAllocationRead1D(getIDSafe(), off, mSelectedLOD, count, array, dataSize, dt, + mType.mElement.mType.mSize, usePadding); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); + } } /** - * @hide * Copy part of this Allocation into an array. This method does not * guarantee that the Allocation is compatible with the input buffer. * @@ -1523,7 +1582,6 @@ public class Allocation extends BaseObj { } /** - * @hide * Copy part of this Allocation into an array. This method does not * guarantee that the Allocation is compatible with the input buffer. * @@ -1536,7 +1594,6 @@ public class Allocation extends BaseObj { } /** - * @hide * Copy part of this Allocation into an array. This method does not * guarantee that the Allocation is compatible with the input buffer. * @@ -1549,7 +1606,6 @@ public class Allocation extends BaseObj { } /** - * @hide * Copy part of this Allocation into an array. This method does not * guarantee that the Allocation is compatible with the input buffer. * @@ -1562,7 +1618,6 @@ public class Allocation extends BaseObj { } /** - * @hide * Copy part of this Allocation into an array. This method does not * guarantee that the Allocation is compatible with the input buffer. * @@ -1576,7 +1631,6 @@ public class Allocation extends BaseObj { /** - * @hide * Copy part of this Allocation into an array. This method does not * and will generate exceptions if the Allocation type does not * match the component type of the array passed in. @@ -1592,7 +1646,6 @@ public class Allocation extends BaseObj { } /** - * @hide * Copy part of this Allocation into an array. This method does not * and will generate exceptions if the Allocation type is not a 32 bit * integer type. @@ -1607,7 +1660,6 @@ public class Allocation extends BaseObj { } /** - * @hide * Copy part of this Allocation into an array. This method does not * and will generate exceptions if the Allocation type is not a 16 bit * integer type. @@ -1622,7 +1674,6 @@ public class Allocation extends BaseObj { } /** - * @hide * Copy part of this Allocation into an array. This method does not * and will generate exceptions if the Allocation type is not an 8 bit * integer type. @@ -1637,7 +1688,6 @@ public class Allocation extends BaseObj { } /** - * @hide * Copy part of this Allocation into an array. This method does not * and will generate exceptions if the Allocation type is not a 32 bit float * type. @@ -1654,31 +1704,33 @@ public class Allocation extends BaseObj { void copy2DRangeToUnchecked(int xoff, int yoff, int w, int h, Object array, Element.DataType dt, int arrayLen) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeToUnchecked"); - mRS.validate(); - validate2DRange(xoff, yoff, w, h); - final int dataSize = mType.mElement.getBytesSize() * w * h; - // AutoPadding for Vec3 Element - boolean usePadding = false; - int sizeBytes = arrayLen * dt.mSize; - if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { - if (dataSize / 4 * 3 > sizeBytes) { - throw new RSIllegalArgumentException("Array too small for allocation type."); - } - usePadding = true; - sizeBytes = dataSize; - } else { - if (dataSize > sizeBytes) { - throw new RSIllegalArgumentException("Array too small for allocation type."); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeToUnchecked"); + mRS.validate(); + validate2DRange(xoff, yoff, w, h); + final int dataSize = mType.mElement.getBytesSize() * w * h; + // AutoPadding for Vec3 Element + boolean usePadding = false; + int sizeBytes = arrayLen * dt.mSize; + if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { + if (dataSize / 4 * 3 > sizeBytes) { + throw new RSIllegalArgumentException("Array too small for allocation type."); + } + usePadding = true; + sizeBytes = dataSize; + } else { + if (dataSize > sizeBytes) { + throw new RSIllegalArgumentException("Array too small for allocation type."); + } } + mRS.nAllocationRead2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, w, h, + array, sizeBytes, dt, mType.mElement.mType.mSize, usePadding); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - mRS.nAllocationRead2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, w, h, - array, sizeBytes, dt, mType.mElement.mType.mSize, usePadding); - Trace.traceEnd(RenderScript.TRACE_TAG); } /** - * @hide * Copy from a rectangular region in this Allocation into an array. * * @param xoff X offset of the region to copy in this Allocation @@ -1694,7 +1746,6 @@ public class Allocation extends BaseObj { } /** - * @hide * Copy from a rectangular region in this Allocation into an array. * * @param xoff X offset of the region to copy in this Allocation @@ -1710,7 +1761,6 @@ public class Allocation extends BaseObj { } /** - * @hide * Copy from a rectangular region in this Allocation into an array. * * @param xoff X offset of the region to copy in this Allocation @@ -1726,7 +1776,6 @@ public class Allocation extends BaseObj { } /** - * @hide * Copy from a rectangular region in this Allocation into an array. * * @param xoff X offset of the region to copy in this Allocation @@ -1742,7 +1791,6 @@ public class Allocation extends BaseObj { } /** - * @hide * Copy from a rectangular region in this Allocation into an array. * * @param xoff X offset of the region to copy in this Allocation @@ -1759,36 +1807,41 @@ public class Allocation extends BaseObj { /** - * @hide + * Copy from a rectangular region in this Allocation into an array. + * The array is assumed to be tightly packed. * + * The data type of the array is not required to be the same as + * the element data type. */ private void copy3DRangeToUnchecked(int xoff, int yoff, int zoff, int w, int h, int d, Object array, Element.DataType dt, int arrayLen) { - Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeToUnchecked"); - mRS.validate(); - validate3DRange(xoff, yoff, zoff, w, h, d); - final int dataSize = mType.mElement.getBytesSize() * w * h * d; - // AutoPadding for Vec3 Element - boolean usePadding = false; - int sizeBytes = arrayLen * dt.mSize; - if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { - if (dataSize / 4 * 3 > sizeBytes) { - throw new RSIllegalArgumentException("Array too small for allocation type."); - } - usePadding = true; - sizeBytes = dataSize; - } else { - if (dataSize > sizeBytes) { - throw new RSIllegalArgumentException("Array too small for allocation type."); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeToUnchecked"); + mRS.validate(); + validate3DRange(xoff, yoff, zoff, w, h, d); + final int dataSize = mType.mElement.getBytesSize() * w * h * d; + // AutoPadding for Vec3 Element + boolean usePadding = false; + int sizeBytes = arrayLen * dt.mSize; + if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) { + if (dataSize / 4 * 3 > sizeBytes) { + throw new RSIllegalArgumentException("Array too small for allocation type."); + } + usePadding = true; + sizeBytes = dataSize; + } else { + if (dataSize > sizeBytes) { + throw new RSIllegalArgumentException("Array too small for allocation type."); + } } + mRS.nAllocationRead3D(getIDSafe(), xoff, yoff, zoff, mSelectedLOD, w, h, d, + array, sizeBytes, dt, mType.mElement.mType.mSize, usePadding); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - mRS.nAllocationRead3D(getIDSafe(), xoff, yoff, zoff, mSelectedLOD, w, h, d, - array, sizeBytes, dt, mType.mElement.mType.mSize, usePadding); - Trace.traceEnd(RenderScript.TRACE_TAG); } - /** - * @hide + /* * Copy from a rectangular region in this Allocation into an array. * * @param xoff X offset of the region to copy in this Allocation @@ -1823,17 +1876,20 @@ public class Allocation extends BaseObj { * utilized */ static public Allocation createTyped(RenderScript rs, Type type, MipmapControl mips, int usage) { - Trace.traceBegin(RenderScript.TRACE_TAG, "createTyped"); - rs.validate(); - if (type.getID(rs) == 0) { - throw new RSInvalidStateException("Bad Type"); - } - long id = rs.nAllocationCreateTyped(type.getID(rs), mips.mID, usage, 0); - if (id == 0) { - throw new RSRuntimeException("Allocation creation failed."); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "createTyped"); + rs.validate(); + if (type.getID(rs) == 0) { + throw new RSInvalidStateException("Bad Type"); + } + long id = rs.nAllocationCreateTyped(type.getID(rs), mips.mID, usage, 0); + if (id == 0) { + throw new RSRuntimeException("Allocation creation failed."); + } + return new Allocation(id, rs, type, usage); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - Trace.traceEnd(RenderScript.TRACE_TAG); - return new Allocation(id, rs, type, usage); } /** @@ -1877,18 +1933,21 @@ public class Allocation extends BaseObj { */ static public Allocation createSized(RenderScript rs, Element e, int count, int usage) { - Trace.traceBegin(RenderScript.TRACE_TAG, "createSized"); - rs.validate(); - Type.Builder b = new Type.Builder(rs, e); - b.setX(count); - Type t = b.create(); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "createSized"); + rs.validate(); + Type.Builder b = new Type.Builder(rs, e); + b.setX(count); + Type t = b.create(); - long id = rs.nAllocationCreateTyped(t.getID(rs), MipmapControl.MIPMAP_NONE.mID, usage, 0); - if (id == 0) { - throw new RSRuntimeException("Allocation creation failed."); + long id = rs.nAllocationCreateTyped(t.getID(rs), MipmapControl.MIPMAP_NONE.mID, usage, 0); + if (id == 0) { + throw new RSRuntimeException("Allocation creation failed."); + } + return new Allocation(id, rs, t, usage); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - Trace.traceEnd(RenderScript.TRACE_TAG); - return new Allocation(id, rs, t, usage); } /** @@ -1947,44 +2006,47 @@ public class Allocation extends BaseObj { static public Allocation createFromBitmap(RenderScript rs, Bitmap b, MipmapControl mips, int usage) { - Trace.traceBegin(RenderScript.TRACE_TAG, "createFromBitmap"); - rs.validate(); + try { + Trace.traceBegin(RenderScript.TRACE_TAG, "createFromBitmap"); + rs.validate(); + + // WAR undocumented color formats + if (b.getConfig() == null) { + if ((usage & USAGE_SHARED) != 0) { + throw new RSIllegalArgumentException("USAGE_SHARED cannot be used with a Bitmap that has a null config."); + } + Bitmap newBitmap = Bitmap.createBitmap(b.getWidth(), b.getHeight(), Bitmap.Config.ARGB_8888); + Canvas c = new Canvas(newBitmap); + c.drawBitmap(b, 0, 0, null); + return createFromBitmap(rs, newBitmap, mips, usage); + } - // WAR undocumented color formats - if (b.getConfig() == null) { - if ((usage & USAGE_SHARED) != 0) { - throw new RSIllegalArgumentException("USAGE_SHARED cannot be used with a Bitmap that has a null config."); + Type t = typeFromBitmap(rs, b, mips); + + // enable optimized bitmap path only with no mipmap and script-only usage + if (mips == MipmapControl.MIPMAP_NONE && + t.getElement().isCompatible(Element.RGBA_8888(rs)) && + usage == (USAGE_SHARED | USAGE_SCRIPT | USAGE_GRAPHICS_TEXTURE)) { + long id = rs.nAllocationCreateBitmapBackedAllocation(t.getID(rs), mips.mID, b, usage); + if (id == 0) { + throw new RSRuntimeException("Load failed."); + } + + // keep a reference to the Bitmap around to prevent GC + Allocation alloc = new Allocation(id, rs, t, usage); + alloc.setBitmap(b); + return alloc; } - Bitmap newBitmap = Bitmap.createBitmap(b.getWidth(), b.getHeight(), Bitmap.Config.ARGB_8888); - Canvas c = new Canvas(newBitmap); - c.drawBitmap(b, 0, 0, null); - return createFromBitmap(rs, newBitmap, mips, usage); - } - Type t = typeFromBitmap(rs, b, mips); - // enable optimized bitmap path only with no mipmap and script-only usage - if (mips == MipmapControl.MIPMAP_NONE && - t.getElement().isCompatible(Element.RGBA_8888(rs)) && - usage == (USAGE_SHARED | USAGE_SCRIPT | USAGE_GRAPHICS_TEXTURE)) { - long id = rs.nAllocationCreateBitmapBackedAllocation(t.getID(rs), mips.mID, b, usage); + long id = rs.nAllocationCreateFromBitmap(t.getID(rs), mips.mID, b, usage); if (id == 0) { throw new RSRuntimeException("Load failed."); } - - // keep a reference to the Bitmap around to prevent GC - Allocation alloc = new Allocation(id, rs, t, usage); - alloc.setBitmap(b); - return alloc; + return new Allocation(id, rs, t, usage); + } finally { + Trace.traceEnd(RenderScript.TRACE_TAG); } - - - long id = rs.nAllocationCreateFromBitmap(t.getID(rs), mips.mID, b, usage); - if (id == 0) { - throw new RSRuntimeException("Load failed."); - } - Trace.traceEnd(RenderScript.TRACE_TAG); - return new Allocation(id, rs, t, usage); } /** diff --git a/rs/java/android/renderscript/AllocationAdapter.java b/rs/java/android/renderscript/AllocationAdapter.java index 183726f..9bfd6ec 100644 --- a/rs/java/android/renderscript/AllocationAdapter.java +++ b/rs/java/android/renderscript/AllocationAdapter.java @@ -136,7 +136,7 @@ public class AllocationAdapter extends Allocation { /** - * @hide + * * Set the active X. The x value must be within the range for * the allocation being adapted. * @@ -242,7 +242,7 @@ public class AllocationAdapter extends Allocation { } /** - * @hide + * * * Create an arbitrary window into the base allocation * The type describes the shape of the window. diff --git a/rs/java/android/renderscript/Element.java b/rs/java/android/renderscript/Element.java index 60ff996..6efb6d6 100644 --- a/rs/java/android/renderscript/Element.java +++ b/rs/java/android/renderscript/Element.java @@ -119,9 +119,6 @@ public class Element extends BaseObj { */ public enum DataType { NONE (0, 0), - /** - * @hide - */ FLOAT_16 (1, 2), FLOAT_32 (2, 4), FLOAT_64 (3, 8), @@ -390,9 +387,6 @@ public class Element extends BaseObj { return rs.mElement_I64; } - /** - * @hide - */ public static Element F16(RenderScript rs) { if(rs.mElement_F16 == null) { rs.mElement_F16 = createUser(rs, DataType.FLOAT_16); @@ -534,9 +528,6 @@ public class Element extends BaseObj { return rs.mElement_RGBA_8888; } - /** - * @hide - */ public static Element F16_2(RenderScript rs) { if(rs.mElement_HALF_2 == null) { rs.mElement_HALF_2 = createVector(rs, DataType.FLOAT_16, 2); @@ -544,19 +535,13 @@ public class Element extends BaseObj { return rs.mElement_HALF_2; } - /** - * @hide - */ public static Element F16_3(RenderScript rs) { - if(rs.mElement_FLOAT_3 == null) { - rs.mElement_FLOAT_3 = createVector(rs, DataType.FLOAT_16, 3); + if(rs.mElement_HALF_3 == null) { + rs.mElement_HALF_3 = createVector(rs, DataType.FLOAT_16, 3); } return rs.mElement_HALF_3; } - /** - * @hide - */ public static Element F16_4(RenderScript rs) { if(rs.mElement_HALF_4 == null) { rs.mElement_HALF_4 = createVector(rs, DataType.FLOAT_16, 4); @@ -926,6 +911,7 @@ public class Element extends BaseObj { switch (dt) { // Support only primitive integer/float/boolean types as vectors. + case FLOAT_16: case FLOAT_32: case FLOAT_64: case SIGNED_8: diff --git a/rs/java/android/renderscript/RenderScript.java b/rs/java/android/renderscript/RenderScript.java index 6b1939c..8b1a032 100644 --- a/rs/java/android/renderscript/RenderScript.java +++ b/rs/java/android/renderscript/RenderScript.java @@ -24,7 +24,6 @@ import android.content.Context; import android.content.res.AssetManager; import android.graphics.Bitmap; import android.graphics.SurfaceTexture; -import android.os.Process; import android.util.Log; import android.view.Surface; import android.os.SystemProperties; @@ -132,16 +131,31 @@ public class RenderScript { // this should be a monotonically increasing ID // used in conjunction with the API version of a device - static final long sMinorID = 1; + static final long sMinorVersion = 1; + + /** + * @hide + * + * Only exist to be compatible with old version RenderScript Support lib. + * Will eventually be removed. + * + * @return Always return 1 + * + */ + public static long getMinorID() { + return 1; + } + /** * Returns an identifier that can be used to identify a particular * minor version of RS. * - * @hide + * @return The minor RenderScript version number + * */ - public static long getMinorID() { - return sMinorID; + public static long getMinorVersion() { + return sMinorVersion; } /** @@ -302,8 +316,12 @@ public class RenderScript { long[] fieldIDs, long[] values, int[] sizes, long[] depClosures, long[] depFieldIDs) { validate(); - return rsnClosureCreate(mContext, kernelID, returnValue, fieldIDs, values, + long c = rsnClosureCreate(mContext, kernelID, returnValue, fieldIDs, values, sizes, depClosures, depFieldIDs); + if (c == 0) { + throw new RSRuntimeException("Failed creating closure."); + } + return c; } native long rsnInvokeClosureCreate(long con, long invokeID, byte[] params, @@ -311,8 +329,12 @@ public class RenderScript { synchronized long nInvokeClosureCreate(long invokeID, byte[] params, long[] fieldIDs, long[] values, int[] sizes) { validate(); - return rsnInvokeClosureCreate(mContext, invokeID, params, fieldIDs, + long c = rsnInvokeClosureCreate(mContext, invokeID, params, fieldIDs, values, sizes); + if (c == 0) { + throw new RSRuntimeException("Failed creating closure."); + } + return c; } native void rsnClosureSetArg(long con, long closureID, int index, @@ -337,7 +359,11 @@ public class RenderScript { synchronized long nScriptGroup2Create(String name, String cachePath, long[] closures) { validate(); - return rsnScriptGroup2Create(mContext, name, cachePath, closures); + long g = rsnScriptGroup2Create(mContext, name, cachePath, closures); + if (g == 0) { + throw new RSRuntimeException("Failed creating script group."); + } + return g; } native void rsnScriptGroup2Execute(long con, long groupID); @@ -1321,7 +1347,6 @@ public class RenderScript { /** * Create a RenderScript context. * - * @hide * @param ctx The context. * @return RenderScript */ @@ -1426,14 +1451,13 @@ public class RenderScript { /** * Gets or creates a RenderScript context of the specified type. * - * @hide * @param ctx The context. * @param ct The type of context to be created. * @param sdkVersion The target SDK Version. * @param flags The OR of the CREATE_FLAG_* options desired * @return RenderScript */ - public static RenderScript create(Context ctx, int sdkVersion, ContextType ct, int flags) { + private static RenderScript create(Context ctx, int sdkVersion, ContextType ct, int flags) { if (sdkVersion < 23) { return internalCreate(ctx, sdkVersion, ct, flags); } @@ -1456,8 +1480,6 @@ public class RenderScript { } /** - * @hide - * * Releases all the process contexts. This is the same as * calling .destroy() on each unique context retreived with * create(...). If no contexts have been created this @@ -1494,7 +1516,6 @@ public class RenderScript { * * If you need a single context please use create() * - * @hide * @param ctx The context. * @return RenderScript */ diff --git a/rs/java/android/renderscript/Script.java b/rs/java/android/renderscript/Script.java index dda468a..7cd6d09 100644 --- a/rs/java/android/renderscript/Script.java +++ b/rs/java/android/renderscript/Script.java @@ -182,9 +182,9 @@ public class Script extends BaseObj { mRS.validateObject(ain); mRS.validateObject(aout); - if (ain == null && aout == null) { + if (ain == null && aout == null && sc == null) { throw new RSIllegalArgumentException( - "At least one of ain or aout is required to be non-null."); + "At least one of input allocation, output allocation, or LaunchOptions is required to be non-null."); } long[] in_ids = null; @@ -220,22 +220,21 @@ public class Script extends BaseObj { /** * Only intended for use by generated reflected code. - * - * @hide */ protected void forEach(int slot, Allocation[] ains, Allocation aout, FieldPacker v) { + + // FieldPacker is kept here to support regular params in the future. forEach(slot, ains, aout, v, null); } /** * Only intended for use by generated reflected code. - * - * @hide */ protected void forEach(int slot, Allocation[] ains, Allocation aout, FieldPacker v, LaunchOptions sc) { // TODO: Is this necessary if nScriptForEach calls validate as well? + // FieldPacker is kept here to support regular params in the future. mRS.validate(); if (ains != null) { for (Allocation ain : ains) { @@ -474,7 +473,23 @@ public class Script extends BaseObj { /** - * Class used to specify clipping for a kernel launch. + * Class for specifying the specifics about how a kernel will be + * launched + * + * This class can specify a potential range of cells on which to + * run a kernel. If no set is called for a dimension then this + * class will have no impact on that dimension when the kernel + * is executed. + * + * The forEach launch will operate over the intersection of the + * dimensions. + * + * Example: + * LaunchOptions with setX(5, 15) + * Allocation with dimension X=10, Y=10 + * The resulting forEach run would execute over x = 5 to 10 and + * y = 0 to 10. + * * */ public static final class LaunchOptions { diff --git a/rs/java/android/renderscript/ScriptGroup.java b/rs/java/android/renderscript/ScriptGroup.java index be8b0fd..54180f4 100644 --- a/rs/java/android/renderscript/ScriptGroup.java +++ b/rs/java/android/renderscript/ScriptGroup.java @@ -131,28 +131,16 @@ public final class ScriptGroup extends BaseObj { int i; for (i = 0; i < args.length; i++) { - Object obj = args[i]; fieldIDs[i] = 0; - if (obj instanceof Input) { - Input unbound = (Input)obj; - unbound.addReference(this, i); - } else { - retrieveValueAndDependenceInfo(rs, i, args[i], values, sizes, - depClosures, depFieldIDs); - } + retrieveValueAndDependenceInfo(rs, i, null, args[i], + values, sizes, depClosures, depFieldIDs); } - for (Map.Entry<Script.FieldID, Object> entry : globals.entrySet()) { Object obj = entry.getValue(); Script.FieldID fieldID = entry.getKey(); fieldIDs[i] = fieldID.getID(rs); - if (obj instanceof Input) { - Input unbound = (Input)obj; - unbound.addReference(this, fieldID); - } else { - retrieveValueAndDependenceInfo(rs, i, obj, values, - sizes, depClosures, depFieldIDs); - } + retrieveValueAndDependenceInfo(rs, i, fieldID, obj, + values, sizes, depClosures, depFieldIDs); i++; } @@ -184,13 +172,8 @@ public final class ScriptGroup extends BaseObj { Object obj = entry.getValue(); Script.FieldID fieldID = entry.getKey(); fieldIDs[i] = fieldID.getID(rs); - if (obj instanceof Input) { - Input unbound = (Input)obj; - unbound.addReference(this, fieldID); - } else { - retrieveValueAndDependenceInfo(rs, i, obj, values, - sizes, depClosures, depFieldIDs); - } + retrieveValueAndDependenceInfo(rs, i, fieldID, obj, values, + sizes, depClosures, depFieldIDs); i++; } @@ -200,9 +183,8 @@ public final class ScriptGroup extends BaseObj { setID(id); } - private static - void retrieveValueAndDependenceInfo(RenderScript rs, - int index, Object obj, + private void retrieveValueAndDependenceInfo(RenderScript rs, + int index, Script.FieldID fid, Object obj, long[] values, int[] sizes, long[] depClosures, long[] depFieldIDs) { @@ -213,20 +195,25 @@ public final class ScriptGroup extends BaseObj { depClosures[index] = f.getClosure().getID(rs); Script.FieldID fieldID = f.getFieldID(); depFieldIDs[index] = fieldID != null ? fieldID.getID(rs) : 0; - if (obj == null) { - // Value is originally created by the owner closure - values[index] = 0; - sizes[index] = 0; - return; - } } else { depClosures[index] = 0; depFieldIDs[index] = 0; } - ValueAndSize vs = new ValueAndSize(rs, obj); - values[index] = vs.value; - sizes[index] = vs.size; + if (obj instanceof Input) { + Input unbound = (Input)obj; + if (index < mArgs.length) { + unbound.addReference(this, index); + } else { + unbound.addReference(this, fid); + } + values[index] = 0; + sizes[index] = 0; + } else { + ValueAndSize vs = new ValueAndSize(rs, obj); + values[index] = vs.value; + sizes[index] = vs.size; + } } /** @@ -258,7 +245,11 @@ public final class ScriptGroup extends BaseObj { // without an associated value (reference). So this is not working for // cross-module (cross-script) linking in this case where a field not // explicitly bound. - f = new Future(this, field, mBindings.get(field)); + Object obj = mBindings.get(field); + if (obj instanceof Future) { + obj = ((Future)obj).getValue(); + } + f = new Future(this, field, obj); mGlobalFuture.put(field, f); } @@ -266,12 +257,18 @@ public final class ScriptGroup extends BaseObj { } void setArg(int index, Object obj) { + if (obj instanceof Future) { + obj = ((Future)obj).getValue(); + } mArgs[index] = obj; ValueAndSize vs = new ValueAndSize(mRS, obj); mRS.nClosureSetArg(getID(mRS), index, vs.value, vs.size); } void setGlobal(Script.FieldID fieldID, Object obj) { + if (obj instanceof Future) { + obj = ((Future)obj).getValue(); + } mBindings.put(fieldID, obj); ValueAndSize vs = new ValueAndSize(mRS, obj); mRS.nClosureSetGlobal(getID(mRS), fieldID.getID(mRS), vs.value, vs.size); @@ -344,6 +341,7 @@ public final class ScriptGroup extends BaseObj { // -1 means unset. Legal values are 0 .. n-1, where n is the number of // arguments for the referencing closure. List<Pair<Closure, Integer>> mArgIndex; + Object mValue; Input() { mFieldID = new ArrayList<Pair<Closure, Script.FieldID>>(); @@ -359,6 +357,7 @@ public final class ScriptGroup extends BaseObj { } void set(Object value) { + mValue = value; for (Pair<Closure, Integer> p : mArgIndex) { Closure closure = p.first; int index = p.second.intValue(); @@ -370,6 +369,8 @@ public final class ScriptGroup extends BaseObj { closure.setGlobal(fieldID, value); } } + + Object get() { return mValue; } } private String mName; @@ -400,8 +401,10 @@ public final class ScriptGroup extends BaseObj { /** * Executes a script group * - * @param inputs inputs to the script group - * @return outputs of the script group as an array of objects + * @param inputs Values for inputs to the script group, in the order as the + * inputs are added via {@link Builder2#addInput}. + * @return Outputs of the script group as an array of objects, in the order + * as futures are passed to {@link Builder2#create}. */ public Object[] execute(Object... inputs) { @@ -432,7 +435,11 @@ public final class ScriptGroup extends BaseObj { Object[] outputObjs = new Object[mOutputs2.length]; int i = 0; for (Future f : mOutputs2) { - outputObjs[i++] = f.getValue(); + Object output = f.getValue(); + if (output instanceof Input) { + output = ((Input)output).get(); + } + outputObjs[i++] = output; } return outputObjs; } @@ -590,7 +597,8 @@ public final class ScriptGroup extends BaseObj { Node n = mNodes.get(ct); if (n.mInputs.size() == 0) { if (n.mOutputs.size() == 0 && mNodes.size() > 1) { - throw new RSInvalidStateException("Groups cannot contain unconnected scripts"); + String msg = "Groups cannot contain unconnected scripts"; + throw new RSInvalidStateException(msg); } validateDAGRecurse(n, ct+1); } @@ -843,13 +851,13 @@ public final class ScriptGroup extends BaseObj { * Returns the field ID */ - public Script.FieldID getField() { return mField; } + Script.FieldID getField() { return mField; } /** * Returns the value */ - public Object getValue() { return mValue; } + Object getValue() { return mValue; } } /** @@ -987,6 +995,8 @@ public final class ScriptGroup extends BaseObj { * * @param name name for the script group. Legal names can only contain letters, digits, * '-', or '_'. The name can be no longer than 100 characters. + * Try to use unique names, to avoid name conflicts and reduce + * the cost of group creation. * @param outputs futures intended as outputs of the script group * @return a script group */ diff --git a/rs/java/android/renderscript/ScriptGroup2.java b/rs/java/android/renderscript/ScriptGroup2.java deleted file mode 100644 index 417bbee..0000000 --- a/rs/java/android/renderscript/ScriptGroup2.java +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package android.renderscript; - -import android.util.Log; -import android.util.Pair; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - -****************************** -You have tried to change the API from what has been previously approved. - -To make these errors go away, you have two choices: -1) You can add "@hide" javadoc comments to the methods, etc. listed in the -errors above. - -2) You can update current.txt by executing the following command: -make update-api - -To submit the revised current.txt to the main Android repository, -you will need approval. -****************************** - -@hide Pending Android public API approval. -*/ -public class ScriptGroup2 extends BaseObj { - - public static class Closure extends BaseObj { - private Object[] mArgs; - private Allocation mReturnValue; - private Map<Script.FieldID, Object> mBindings; - - private Future mReturnFuture; - private Map<Script.FieldID, Future> mGlobalFuture; - - private FieldPacker mFP; - - private static final String TAG = "Closure"; - - public Closure(long id, RenderScript rs) { - super(id, rs); - } - - public Closure(RenderScript rs, Script.KernelID kernelID, Type returnType, - Object[] args, Map<Script.FieldID, Object> globals) { - super(0, rs); - - mArgs = args; - mReturnValue = Allocation.createTyped(rs, returnType); - mBindings = globals; - mGlobalFuture = new HashMap<Script.FieldID, Future>(); - - int numValues = args.length + globals.size(); - - long[] fieldIDs = new long[numValues]; - long[] values = new long[numValues]; - int[] sizes = new int[numValues]; - long[] depClosures = new long[numValues]; - long[] depFieldIDs = new long[numValues]; - - int i; - for (i = 0; i < args.length; i++) { - Object obj = args[i]; - fieldIDs[i] = 0; - if (obj instanceof UnboundValue) { - UnboundValue unbound = (UnboundValue)obj; - unbound.addReference(this, i); - } else { - retrieveValueAndDependenceInfo(rs, i, args[i], values, sizes, - depClosures, depFieldIDs); - } - } - - for (Map.Entry<Script.FieldID, Object> entry : globals.entrySet()) { - Object obj = entry.getValue(); - Script.FieldID fieldID = entry.getKey(); - fieldIDs[i] = fieldID.getID(rs); - if (obj instanceof UnboundValue) { - UnboundValue unbound = (UnboundValue)obj; - unbound.addReference(this, fieldID); - } else { - retrieveValueAndDependenceInfo(rs, i, obj, values, - sizes, depClosures, depFieldIDs); - } - i++; - } - - long id = rs.nClosureCreate(kernelID.getID(rs), mReturnValue.getID(rs), - fieldIDs, values, sizes, depClosures, depFieldIDs); - - setID(id); - } - - public Closure(RenderScript rs, Script.InvokeID invokeID, - Object[] args, Map<Script.FieldID, Object> globals) { - super(0, rs); - mFP = FieldPacker.createFromArray(args); - - mArgs = args; - mBindings = globals; - mGlobalFuture = new HashMap<Script.FieldID, Future>(); - - int numValues = globals.size(); - - long[] fieldIDs = new long[numValues]; - long[] values = new long[numValues]; - int[] sizes = new int[numValues]; - long[] depClosures = new long[numValues]; - long[] depFieldIDs = new long[numValues]; - - int i = 0; - for (Map.Entry<Script.FieldID, Object> entry : globals.entrySet()) { - Object obj = entry.getValue(); - Script.FieldID fieldID = entry.getKey(); - fieldIDs[i] = fieldID.getID(rs); - if (obj instanceof UnboundValue) { - UnboundValue unbound = (UnboundValue)obj; - unbound.addReference(this, fieldID); - } else { - // TODO(yangni): Verify obj not a future. - retrieveValueAndDependenceInfo(rs, i, obj, values, - sizes, depClosures, depFieldIDs); - } - i++; - } - - long id = rs.nInvokeClosureCreate(invokeID.getID(rs), mFP.getData(), fieldIDs, - values, sizes); - - setID(id); - } - - private static - void retrieveValueAndDependenceInfo(RenderScript rs, - int index, Object obj, - long[] values, int[] sizes, - long[] depClosures, - long[] depFieldIDs) { - - if (obj instanceof Future) { - Future f = (Future)obj; - obj = f.getValue(); - depClosures[index] = f.getClosure().getID(rs); - Script.FieldID fieldID = f.getFieldID(); - depFieldIDs[index] = fieldID != null ? fieldID.getID(rs) : 0; - if (obj == null) { - // Value is originally created by the owner closure - values[index] = 0; - sizes[index] = 0; - return; - } - } else { - depClosures[index] = 0; - depFieldIDs[index] = 0; - } - - ValueAndSize vs = new ValueAndSize(rs, obj); - values[index] = vs.value; - sizes[index] = vs.size; - } - - public Future getReturn() { - if (mReturnFuture == null) { - mReturnFuture = new Future(this, null, mReturnValue); - } - - return mReturnFuture; - } - - public Future getGlobal(Script.FieldID field) { - Future f = mGlobalFuture.get(field); - - if (f == null) { - // If the field is not bound to this closure, this will return a future - // without an associated value (reference). So this is not working for - // cross-module (cross-script) linking in this case where a field not - // explicitly bound. - f = new Future(this, field, mBindings.get(field)); - mGlobalFuture.put(field, f); - } - - return f; - } - - void setArg(int index, Object obj) { - mArgs[index] = obj; - ValueAndSize vs = new ValueAndSize(mRS, obj); - mRS.nClosureSetArg(getID(mRS), index, vs.value, vs.size); - } - - void setGlobal(Script.FieldID fieldID, Object obj) { - mBindings.put(fieldID, obj); - ValueAndSize vs = new ValueAndSize(mRS, obj); - mRS.nClosureSetGlobal(getID(mRS), fieldID.getID(mRS), vs.value, vs.size); - } - - private static final class ValueAndSize { - public ValueAndSize(RenderScript rs, Object obj) { - if (obj instanceof Allocation) { - value = ((Allocation)obj).getID(rs); - size = -1; - } else if (obj instanceof Boolean) { - value = ((Boolean)obj).booleanValue() ? 1 : 0; - size = 4; - } else if (obj instanceof Integer) { - value = ((Integer)obj).longValue(); - size = 4; - } else if (obj instanceof Long) { - value = ((Long)obj).longValue(); - size = 8; - } else if (obj instanceof Float) { - value = ((Float)obj).longValue(); - size = 4; - } else if (obj instanceof Double) { - value = ((Double)obj).longValue(); - size = 8; - } - } - public long value; - public int size; - } - } - - public static class Future { - Closure mClosure; - Script.FieldID mFieldID; - Object mValue; - - Future(Closure closure, Script.FieldID fieldID, Object value) { - mClosure = closure; - mFieldID = fieldID; - mValue = value; - } - - Closure getClosure() { return mClosure; } - Script.FieldID getFieldID() { return mFieldID; } - Object getValue() { return mValue; } - } - - public static class UnboundValue { - // Either mFieldID or mArgIndex should be set but not both. - List<Pair<Closure, Script.FieldID>> mFieldID; - // -1 means unset. Legal values are 0 .. n-1, where n is the number of - // arguments for the referencing closure. - List<Pair<Closure, Integer>> mArgIndex; - - UnboundValue() { - mFieldID = new ArrayList<Pair<Closure, Script.FieldID>>(); - mArgIndex = new ArrayList<Pair<Closure, Integer>>(); - } - - void addReference(Closure closure, int index) { - mArgIndex.add(Pair.create(closure, Integer.valueOf(index))); - } - - void addReference(Closure closure, Script.FieldID fieldID) { - mFieldID.add(Pair.create(closure, fieldID)); - } - - void set(Object value) { - for (Pair<Closure, Integer> p : mArgIndex) { - Closure closure = p.first; - int index = p.second.intValue(); - closure.setArg(index, value); - } - for (Pair<Closure, Script.FieldID> p : mFieldID) { - Closure closure = p.first; - Script.FieldID fieldID = p.second; - closure.setGlobal(fieldID, value); - } - } - } - - String mName; - List<Closure> mClosures; - List<UnboundValue> mInputs; - Future[] mOutputs; - - private static final String TAG = "ScriptGroup2"; - - public ScriptGroup2(long id, RenderScript rs) { - super(id, rs); - } - - ScriptGroup2(RenderScript rs, String name, List<Closure> closures, - List<UnboundValue> inputs, Future[] outputs) { - super(0, rs); - mName = name; - mClosures = closures; - mInputs = inputs; - mOutputs = outputs; - - long[] closureIDs = new long[closures.size()]; - for (int i = 0; i < closureIDs.length; i++) { - closureIDs[i] = closures.get(i).getID(rs); - } - long id = rs.nScriptGroup2Create(name, ScriptC.mCachePath, closureIDs); - setID(id); - } - - public Object[] execute(Object... inputs) { - if (inputs.length < mInputs.size()) { - Log.e(TAG, this.toString() + " receives " + inputs.length + " inputs, " + - "less than expected " + mInputs.size()); - return null; - } - - if (inputs.length > mInputs.size()) { - Log.i(TAG, this.toString() + " receives " + inputs.length + " inputs, " + - "more than expected " + mInputs.size()); - } - - for (int i = 0; i < mInputs.size(); i++) { - Object obj = inputs[i]; - if (obj instanceof Future || obj instanceof UnboundValue) { - Log.e(TAG, this.toString() + ": input " + i + - " is a future or unbound value"); - return null; - } - UnboundValue unbound = mInputs.get(i); - unbound.set(obj); - } - - mRS.nScriptGroup2Execute(getID(mRS)); - - Object[] outputObjs = new Object[mOutputs.length]; - int i = 0; - for (Future f : mOutputs) { - outputObjs[i++] = f.getValue(); - } - return outputObjs; - } - - /** - @hide Pending Android public API approval. - */ - public static final class Binding { - public Script.FieldID mField; - public Object mValue; - public Binding(Script.FieldID field, Object value) { - mField = field; - mValue = value; - } - } - - /** - @hide Pending Android public API approval. - */ - public static final class Builder { - RenderScript mRS; - List<Closure> mClosures; - List<UnboundValue> mInputs; - private static final String TAG = "ScriptGroup2.Builder"; - - public Builder(RenderScript rs) { - mRS = rs; - mClosures = new ArrayList<Closure>(); - mInputs = new ArrayList<UnboundValue>(); - } - - public Closure addKernel(Script.KernelID k, Type returnType, Object[] args, - Map<Script.FieldID, Object> globalBindings) { - Closure c = new Closure(mRS, k, returnType, args, globalBindings); - mClosures.add(c); - return c; - } - - public Closure addInvoke(Script.InvokeID invoke, Object[] args, - Map<Script.FieldID, Object> globalBindings) { - Closure c = new Closure(mRS, invoke, args, globalBindings); - mClosures.add(c); - return c; - } - - public UnboundValue addInput() { - UnboundValue unbound = new UnboundValue(); - mInputs.add(unbound); - return unbound; - } - - public Closure addKernel(Script.KernelID k, Type returnType, Object... argsAndBindings) { - ArrayList<Object> args = new ArrayList<Object>(); - Map<Script.FieldID, Object> bindingMap = new HashMap<Script.FieldID, Object>(); - if (!seperateArgsAndBindings(argsAndBindings, args, bindingMap)) { - return null; - } - return addKernel(k, returnType, args.toArray(), bindingMap); - } - - public Closure addInvoke(Script.InvokeID invoke, Object... argsAndBindings) { - ArrayList<Object> args = new ArrayList<Object>(); - Map<Script.FieldID, Object> bindingMap = new HashMap<Script.FieldID, Object>(); - if (!seperateArgsAndBindings(argsAndBindings, args, bindingMap)) { - return null; - } - return addInvoke(invoke, args.toArray(), bindingMap); - } - - public ScriptGroup2 create(String name, Future... outputs) { - if (name == null || name.isEmpty() || name.length() > 100 || - !name.equals(name.replaceAll("[^a-zA-Z0-9-]", "_"))) { - throw new RSIllegalArgumentException("invalid script group name"); - } - ScriptGroup2 ret = new ScriptGroup2(mRS, name, mClosures, mInputs, outputs); - return ret; - } - - private boolean seperateArgsAndBindings(Object[] argsAndBindings, - ArrayList<Object> args, - Map<Script.FieldID, Object> bindingMap) { - int i; - for (i = 0; i < argsAndBindings.length; i++) { - if (argsAndBindings[i] instanceof Binding) { - break; - } - args.add(argsAndBindings[i]); - } - - for (; i < argsAndBindings.length; i++) { - if (!(argsAndBindings[i] instanceof Binding)) { - return false; - } - Binding b = (Binding)argsAndBindings[i]; - bindingMap.put(b.mField, b.mValue); - } - - return true; - } - - } -} diff --git a/rs/java/android/renderscript/ScriptIntrinsicBLAS.java b/rs/java/android/renderscript/ScriptIntrinsicBLAS.java index 16b7033..06134e5 100644 --- a/rs/java/android/renderscript/ScriptIntrinsicBLAS.java +++ b/rs/java/android/renderscript/ScriptIntrinsicBLAS.java @@ -22,9 +22,13 @@ import java.lang.annotation.RetentionPolicy; /** * - * BLAS + * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS. + * + * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard + * building blocks for performing basic vector and matrix operations. + * + * For detailed description of BLAS, please refer to http://www.netlib.org/blas/ * - * @hide **/ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { private Allocation mLUT; @@ -180,24 +184,40 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { private static final int RsBlas_bnnm = 1000; /** + * Create an intrinsic to access BLAS subroutines. + * + * @param rs The RenderScript context + * @return ScriptIntrinsicBLAS */ public static ScriptIntrinsicBLAS create(RenderScript rs) { long id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs)); return new ScriptIntrinsicBLAS(id, rs); } + /** + * @hide + */ @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE}) @Retention(RetentionPolicy.SOURCE) public @interface Transpose {} + /** + * @hide + */ @IntDef({UPPER, LOWER}) @Retention(RetentionPolicy.SOURCE) public @interface Uplo {} + /** + * @hide + */ @IntDef({NON_UNIT, UNIT}) @Retention(RetentionPolicy.SOURCE) public @interface Diag {} + /** + * @hide + */ @IntDef({LEFT, RIGHT}) @Retention(RetentionPolicy.SOURCE) public @interface Side {} @@ -242,7 +262,7 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { } static void validateUplo(@Uplo int Uplo) { - if (Uplo != LEFT && Uplo != RIGHT) { + if (Uplo != UPPER && Uplo != LOWER) { throw new RSRuntimeException("Invalid uplo passed to BLAS"); } } @@ -277,36 +297,124 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { expectedYDim = 1 + (N - 1) * incY; } if (X.getType().getX() != expectedXDim || - Y.getType().getY() != expectedXDim) { + Y.getType().getX() != expectedYDim) { throw new RSRuntimeException("Incorrect vector dimensions for GEMV"); } } - void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { + + /** + * SGEMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); int M = A.getType().getY(); int N = A.getType().getX(); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); } - void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { + + /** + * DGEMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); int M = A.getType().getY(); int N = A.getType().getX(); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); } - void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { + + /** + * CGEMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); int M = A.getType().getY(); int N = A.getType().getX(); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); } - void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { + + /** + * ZGEMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); int M = A.getType().getY(); int N = A.getType().getX(); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); } - void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { + /** + * SGBMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html + * + * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), + * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an + * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. + * for i in range(0, m): + * for j in range(max(0, i-kl), min(i+ku+1, n)): + * b[i, j-i+kl] = a[i, j] + * + * @param TransA The type of transpose applied to matrix A. + * @param KL The number of sub-diagonals of the matrix A. + * @param KU The number of super-diagonals of the matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { // GBMV has the same validation requirements as GEMV + KL and KU >= 0 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); if (KL < 0 || KU < 0) { @@ -316,7 +424,32 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { int N = A.getType().getX(); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU); } - void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { + + /** + * DGBMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html + * + * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), + * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an + * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. + * for i in range(0, m): + * for j in range(max(0, i-kl), min(i+ku+1, n)): + * b[i, j-i+kl] = a[i, j] + * + * @param TransA The type of transpose applied to matrix A. + * @param KL The number of sub-diagonals of the matrix A. + * @param KU The number of super-diagonals of the matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { // GBMV has the same validation requirements as GEMV + KL and KU >= 0 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); if (KL < 0 || KU < 0) { @@ -326,7 +459,32 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { int N = A.getType().getX(); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU); } - void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { + + /** + * CGBMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html + * + * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), + * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an + * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. + * for i in range(0, m): + * for j in range(max(0, i-kl), min(i+ku+1, n)): + * b[i, j-i+kl] = a[i, j] + * + * @param TransA The type of transpose applied to matrix A. + * @param KL The number of sub-diagonals of the matrix A. + * @param KU The number of super-diagonals of the matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { // GBMV has the same validation requirements as GEMV + KL and KU >= 0 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); if (KL < 0 || KU < 0) { @@ -336,7 +494,32 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { int N = A.getType().getX(); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU); } - void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { + + /** + * ZGBMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html + * + * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), + * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an + * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. + * for i in range(0, m): + * for j in range(max(0, i-kl), min(i+ku+1, n)): + * b[i, j-i+kl] = a[i, j] + * + * @param TransA The type of transpose applied to matrix A. + * @param KL The number of sub-diagonals of the matrix A. + * @param KU The number of super-diagonals of the matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { // GBMV has the same validation requirements as GEMV + KL and KU >= 0 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); if (KL < 0 || KU < 0) { @@ -347,8 +530,10 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU); } - static void validateTRMV(Element e, @Transpose int TransA, Allocation A, Allocation X, int incX) { + static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { validateTranspose(TransA); + validateUplo(Uplo); + validateDiag(Diag); int N = A.getType().getY(); if (A.getType().getX() != N) { throw new RSRuntimeException("A must be a square matrix for TRMV"); @@ -387,158 +572,636 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { } int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); + //is it really doing anything? if (Ap.getType().getX() != ((N * (N+1)) / 2)) { throw new RSRuntimeException("Invalid dimension for Ap"); } - + if (incX <= 0) { + throw new RSRuntimeException("Vector increments must be greater than 0"); + } int expectedXDim = 1 + (N - 1) * incX; if (X.getType().getX() != expectedXDim) { - throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); + throw new RSRuntimeException("Incorrect vector dimensions for TPMV"); } return N; } - void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { - validateTRMV(Element.F32(mRS), TransA, A, X, incX); + /** + * STRMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x + * + * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); } - void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { - validateTRMV(Element.F64(mRS), TransA, A, X, incX); + + /** + * DTRMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x + * + * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); } - void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { - validateTRMV(Element.F32_2(mRS), TransA, A, X, incX); + + /** + * CTRMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x or x := A**H*x + * + * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); } - void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { - validateTRMV(Element.F64_2(mRS), TransA, A, X, incX); + + /** + * ZTRMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x or x := A**H*x + * + * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); } - void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { - // TBMV has the same requirements as TRMV - validateTRMV(Element.F32(mRS), TransA, A, X, incX); + + /** + * STBMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x + * + * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBMV has the same requirements as TRMV + K >= 0 + if (K < 0) { + throw new RSRuntimeException("K must be greater than or equal to 0"); + } + validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); } - void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { - // TBMV has the same requirements as TRMV - validateTRMV(Element.F64(mRS), TransA, A, X, incX); + + /** + * DTBMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x + * + * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBMV has the same requirements as TRMV + K >= 0 + if (K < 0) { + throw new RSRuntimeException("K must be greater than or equal to 0"); + } + validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); } - void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { - // TBMV has the same requirements as TRMV - validateTRMV(Element.F32_2(mRS), TransA, A, X, incX); + + /** + * CTBMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x or x := A**H*x + * + * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBMV has the same requirements as TRMV + K >= 0 + if (K < 0) { + throw new RSRuntimeException("K must be greater than or equal to 0"); + } + validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); } - void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { - // TBMV has the same requirements as TRMV - validateTRMV(Element.F64_2(mRS), TransA, A, X, incX); + + /** + * ZTBMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x or x := A**H*x + * + * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBMV has the same requirements as TRMV + K >= 0 + if (K < 0) { + throw new RSRuntimeException("K must be greater than or equal to 0"); + } + validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); } - void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + + /** + * STPMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x + * + * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); } - void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + + /** + * DTPMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x + * + * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); } - void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + + /** + * CTPMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x or x := A**H*x + * + * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); } - void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + + /** + * ZTPMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x or x := A**H*x + * + * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); } - void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + + /** + * STRSV solves one of the systems of equations + * A*x = b or A**T*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { // TRSV is the same as TRMV - validateTRMV(Element.F32(mRS), TransA, A, X, incX); + validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); } - void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + + /** + * DTRSV solves one of the systems of equations + * A*x = b or A**T*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { // TRSV is the same as TRMV - validateTRMV(Element.F64(mRS), TransA, A, X, incX); + validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); } - void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + + /** + * CTRSV solves one of the systems of equations + * A*x = b or A**T*x = b or A**H*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { // TRSV is the same as TRMV - validateTRMV(Element.F32_2(mRS), TransA, A, X, incX); + validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); } - void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + + /** + * ZTRSV solves one of the systems of equations + * A*x = b or A**T*x = b or A**H*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { // TRSV is the same as TRMV - validateTRMV(Element.F64_2(mRS), TransA, A, X, incX); + validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); } - void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { - // TBSV is the same as TRMV - validateTRMV(Element.F32(mRS), TransA, A, X, incX); + + /** + * STBSV solves one of the systems of equations + * A*x = b or A**T*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBSV is the same as TRMV + K >= 0 + validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); if (K < 0) { throw new RSRuntimeException("Number of diagonals must be positive"); } mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); } - void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { - // TBSV is the same as TRMV - validateTRMV(Element.F64(mRS), TransA, A, X, incX); + + /** + * DTBSV solves one of the systems of equations + * A*x = b or A**T*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBSV is the same as TRMV + K >= 0 + validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); if (K < 0) { throw new RSRuntimeException("Number of diagonals must be positive"); } mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); } - void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { - // TBSV is the same as TRMV - validateTRMV(Element.F32_2(mRS), TransA, A, X, incX); + + /** + * CTBSV solves one of the systems of equations + * A*x = b or A**T*x = b or A**H*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBSV is the same as TRMV + K >= 0 + validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); if (K < 0) { throw new RSRuntimeException("Number of diagonals must be positive"); } mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); } - void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { - // TBSV is the same as TRMV - validateTRMV(Element.F64_2(mRS), TransA, A, X, incX); + + /** + * ZTBSV solves one of the systems of equations + * A*x = b or A**T*x = b or A**H*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBSV is the same as TRMV + K >= 0 + validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); int N = A.getType().getY(); if (K < 0) { throw new RSRuntimeException("Number of diagonals must be positive"); } mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); } - void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + + /** + * STPSV solves one of the systems of equations + * A*x = b or A**T*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { // TPSV is same as TPMV int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); } - void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + + /** + * DTPSV solves one of the systems of equations + * A*x = b or A**T*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { // TPSV is same as TPMV int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); } - void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + + /** + * CTPSV solves one of the systems of equations + * A*x = b or A**T*x = b or A**H*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { // TPSV is same as TPMV int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); } - void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + + /** + * ZTPSV solves one of the systems of equations + * A*x = b or A**T*x = b or A**H*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { // TPSV is same as TPMV int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); @@ -594,7 +1257,9 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { if (Ap.getType().getX() != ((N * (N+1)) / 2)) { throw new RSRuntimeException("Invalid dimension for Ap"); } - + if (incX <= 0 || incY <= 0) { + throw new RSRuntimeException("Vector increments must be greater than 0"); + } int expectedXDim = 1 + (N - 1) * incX; if (X.getType().getX() != expectedXDim) { throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); @@ -623,8 +1288,10 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { if (N < 1 || M < 1) { throw new RSRuntimeException("M and N must be 1 or greater for GER"); } - - int expectedXDim = 1 + (N - 1) * incX; + if (incX <= 0 || incY <= 0) { + throw new RSRuntimeException("Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (M - 1) * incX; if (X.getType().getX() != expectedXDim) { throw new RSRuntimeException("Incorrect vector dimensions for GER"); } @@ -650,7 +1317,9 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { if (N != A.getType().getY()) { throw new RSRuntimeException("A must be a symmetric matrix"); } - + if (incX <= 0) { + throw new RSRuntimeException("Vector increments must be greater than 0"); + } int expectedXDim = 1 + (N - 1) * incX; if (X.getType().getX() != expectedXDim) { throw new RSRuntimeException("Incorrect vector dimensions for SYR"); @@ -675,10 +1344,12 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { if (Ap.getType().getX() != ((N * (N+1)) / 2)) { throw new RSRuntimeException("Invalid dimension for Ap"); } - + if (incX <= 0) { + throw new RSRuntimeException("Vector increments must be greater than 0"); + } int expectedXDim = 1 + (N - 1) * incX; if (X.getType().getX() != expectedXDim) { - throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); + throw new RSRuntimeException("Incorrect vector dimensions for SPR"); } return N; @@ -701,7 +1372,9 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { if (N != A.getType().getY()) { throw new RSRuntimeException("A must be a symmetric matrix"); } - + if (incX <= 0 || incY <= 0) { + throw new RSRuntimeException("Vector increments must be greater than 0"); + } int expectedXDim = 1 + (N - 1) * incX; int expectedYDim = 1 + (N - 1) * incY; if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { @@ -729,81 +1402,390 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { if (Ap.getType().getX() != ((N * (N+1)) / 2)) { throw new RSRuntimeException("Invalid dimension for Ap"); } - + if (incX <= 0 || incY <= 0) { + throw new RSRuntimeException("Vector increments must be greater than 0"); + } int expectedXDim = 1 + (N - 1) * incX; int expectedYDim = 1 + (N - 1) * incY; if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { - throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); + throw new RSRuntimeException("Incorrect vector dimensions for SPR2"); } return N; } - void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { + /** + * SSYMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); } - void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { - // SBMV is the same as SYMV + + /** + * SSBMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. + * @param K The number of off-diagonals of the matrix A + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { + // SBMV is the same as SYMV + K >= 0 + if (K < 0) { + throw new RSRuntimeException("K must be greater than or equal to 0"); + } int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); } - void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) { + + /** + * SSPMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. + * @param alpha The scalar alpha. + * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) { int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); } - void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + + /** + * SGER performs the rank 1 operation + * A := alpha*x*y**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html + * + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + */ + public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { int M = A.getType().getY(); int N = A.getType().getX(); + validateGER(Element.F32(mRS), X, incX, Y, incY, A); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0); } - void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { + + /** + * SSYR performs the rank 1 operation + * A := alpha*x*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + */ + public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0); } - void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { + + /** + * SSPR performs the rank 1 operation + * A := alpha*x*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. + */ + public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0); } - void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + + /** + * SSYR2 performs the symmetric rank 2 operation + * A := alpha*x*y**T + alpha*y*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + */ + public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0); } - void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { + + /** + * SSPR2 performs the symmetric rank 2 operation + * A := alpha*x*y**T + alpha*y*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. + */ + public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0); } - void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { + + /** + * DSYMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); } - void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { - // SBMV is the same as SYMV + + /** + * DSBMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. + * @param K The number of off-diagonals of the matrix A + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { + // SBMV is the same as SYMV + K >= 0 + if (K < 0) { + throw new RSRuntimeException("K must be greater than or equal to 0"); + } int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); } - void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) { + + /** + * DSPMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. + * @param alpha The scalar alpha. + * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) { int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); } - void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + + /** + * DGER performs the rank 1 operation + * A := alpha*x*y**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html + * + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + */ + public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { int M = A.getType().getY(); int N = A.getType().getX(); + validateGER(Element.F64(mRS), X, incX, Y, incY, A); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0); } - void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { + + /** + * DSYR performs the rank 1 operation + * A := alpha*x*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + */ + public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0); } - void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { + + /** + * DSPR performs the rank 1 operation + * A := alpha*x*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. + */ + public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0); } - void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + + /** + * DSYR2 performs the symmetric rank 2 operation + * A := alpha*x*y**T + alpha*y*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + */ + public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0); } - void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { + + /** + * DSPR2 performs the symmetric rank 2 operation + * A := alpha*x*y**T + alpha*y*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. + */ + public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0); } @@ -825,8 +1807,10 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { int M = A.getType().getY(); int N = A.getType().getX(); - - int expectedXDim = 1 + (N - 1) * incX; + if (incX <= 0 || incY <= 0) { + throw new RSRuntimeException("Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (M - 1) * incX; if (X.getType().getX() != expectedXDim) { throw new RSRuntimeException("Incorrect vector dimensions for GERU"); } @@ -837,12 +1821,51 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { } - void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { + /** + * CHEMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { // HEMV is the same as SYR2 validation-wise int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); } - void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { + + /** + * CHBMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. + * @param K The number of off-diagonals of the matrix A + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { // HBMV is the same as SYR2 validation-wise int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); if (K < 0) { @@ -850,50 +1873,214 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { } mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); } - void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { + + /** + * CHPMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. + * @param alpha The scalar alpha. + * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { // HPMV is the same as SPR2 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); } - void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + + /** + * CGERU performs the rank 1 operation + * A := alpha*x*y**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html + * + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + */ + public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); int M = A.getType().getY(); int N = A.getType().getX(); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); } - void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + + /** + * CGERC performs the rank 1 operation + * A := alpha*x*y**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html + * + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + */ + public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { // same as GERU validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); int M = A.getType().getY(); int N = A.getType().getX(); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); } - void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { + + /** + * CHER performs the rank 1 operation + * A := alpha*x*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + */ + public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { // same as SYR - int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A); + int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0); } - void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { + + /** + * CHPR performs the rank 1 operation + * A := alpha*x*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + */ + public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { // equivalent to SPR for validation int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0); } - void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + + /** + * CHER2 performs the symmetric rank 2 operation + * A := alpha*x*y**H + alpha*y*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + */ + public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { // same as SYR2 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); } - void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { + + /** + * CHPR2 performs the symmetric rank 2 operation + * A := alpha*x*y**H + alpha*y*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + */ + public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { // same as SPR2 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0); } - void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { + + /** + * ZHEMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { // HEMV is the same as SYR2 validation-wise int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); } - void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { + + /** + * ZHBMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. + * @param K The number of off-diagonals of the matrix A + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { // HBMV is the same as SYR2 validation-wise int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); if (K < 0) { @@ -901,40 +2088,164 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { } mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); } - void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { + + /** + * ZHPMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. + * @param alpha The scalar alpha. + * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { // HPMV is the same as SPR2 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); } - void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + + /** + * ZGERU performs the rank 1 operation + * A := alpha*x*y**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html + * + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + */ + public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); int M = A.getType().getY(); int N = A.getType().getX(); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); } - void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + + /** + * ZGERC performs the rank 1 operation + * A := alpha*x*y**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html + * + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + */ + public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { // same as GERU validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); int M = A.getType().getY(); int N = A.getType().getX(); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); } - void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { + + /** + * ZHER performs the rank 1 operation + * A := alpha*x*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + */ + public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { // same as SYR - int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A); + int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0); } - void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { + + /** + * ZHPR performs the rank 1 operation + * A := alpha*x*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + */ + public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { // equivalent to SPR for validation int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0); } - void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + + /** + * ZHER2 performs the symmetric rank 2 operation + * A := alpha*x*y**H + alpha*y*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + */ + public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { // same as SYR2 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); } - void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { + + /** + * ZHPR2 performs the symmetric rank 2 operation + * A := alpha*x*y**H + alpha*y*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + */ + public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { // same as SPR2 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0); @@ -946,60 +2257,86 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { */ static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) { - int aX = -1, aY = -1, bX = -1, bY = -1, cX = -1, cY = -1; + int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1; if ((A != null && !A.getType().getElement().isCompatible(e)) || (B != null && !B.getType().getElement().isCompatible(e)) || (C != null && !C.getType().getElement().isCompatible(e))) { throw new RSRuntimeException("Called BLAS with wrong Element type"); } - if (C != null) { - cX = C.getType().getY(); - cY = C.getType().getX(); + if (C == null) { + //since matrix C is used to store the result, it cannot be null. + throw new RSRuntimeException("Allocation C cannot be null"); } + cM = C.getType().getY(); + cN = C.getType().getX(); + if (Side == RIGHT) { + if ((A == null && B != null) || (A != null && B == null)) { + throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa"); + } if (B != null) { - bX = A.getType().getY(); - bY = A.getType().getX(); + bM = A.getType().getY(); + bN = A.getType().getX(); } if (A != null) { - aX = B.getType().getY(); - aY = B.getType().getX(); + aM = B.getType().getY(); + aN = B.getType().getX(); } } else { if (A != null) { - if (TransA == TRANSPOSE) { - aY = A.getType().getY(); - aX = A.getType().getX(); + if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) { + aN = A.getType().getY(); + aM = A.getType().getX(); } else { - aX = A.getType().getY(); - aY = A.getType().getX(); + aM = A.getType().getY(); + aN = A.getType().getX(); } } if (B != null) { - if (TransB == TRANSPOSE) { - bY = B.getType().getY(); - bX = B.getType().getX(); + if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) { + bN = B.getType().getY(); + bM = B.getType().getX(); } else { - bX = B.getType().getY(); - bY = B.getType().getX(); + bM = B.getType().getY(); + bN = B.getType().getX(); } } } if (A != null && B != null && C != null) { - if (aY != bX || aX != cX || bY != cY) { + if (aN != bM || aM != cM || bN != cN) { throw new RSRuntimeException("Called BLAS with invalid dimensions"); } } else if (A != null && C != null) { - // A and C only - if (aX != cY || aY != cX) { + // A and C only, for SYRK + if (cM != cN) { + throw new RSRuntimeException("Matrix C is not symmetric"); + } + if (aM != cM) { throw new RSRuntimeException("Called BLAS with invalid dimensions"); } } else if (A != null && B != null) { // A and B only + if (aN != bM) { + throw new RSRuntimeException("Called BLAS with invalid dimensions"); + } } } + /** + * SGEMM performs one of the matrix-matrix operations + * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T + * + * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param TransB The type of transpose applied to matrix B. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. + */ public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A, Allocation B, float beta, Allocation C) { validateTranspose(TransA); @@ -1007,14 +2344,14 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C); int M = -1, N = -1, K = -1; - if (TransA == TRANSPOSE) { + if (TransA != NO_TRANSPOSE) { M = A.getType().getX(); K = A.getType().getY(); } else { M = A.getType().getY(); K = A.getType().getX(); } - if (TransB == TRANSPOSE) { + if (TransB != NO_TRANSPOSE) { N = B.getType().getY(); } else { N = B.getType().getX(); @@ -1022,20 +2359,35 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); } + + /** + * DGEMM performs one of the matrix-matrix operations + * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T + * + * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param TransB The type of transpose applied to matrix B. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. + */ public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A, Allocation B, double beta, Allocation C) { validateTranspose(TransA); validateTranspose(TransB); validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C); int M = -1, N = -1, K = -1; - if (TransA == TRANSPOSE) { + if (TransA != NO_TRANSPOSE) { M = A.getType().getX(); K = A.getType().getY(); } else { M = A.getType().getY(); K = A.getType().getX(); } - if (TransB == TRANSPOSE) { + if (TransB != NO_TRANSPOSE) { N = B.getType().getY(); } else { N = B.getType().getX(); @@ -1043,20 +2395,35 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); } + + /** + * CGEMM performs one of the matrix-matrix operations + * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H + * + * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param TransB The type of transpose applied to matrix B. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. + */ public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { validateTranspose(TransA); validateTranspose(TransB); validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C); int M = -1, N = -1, K = -1; - if (TransA == TRANSPOSE) { + if (TransA != NO_TRANSPOSE) { M = A.getType().getX(); K = A.getType().getY(); } else { M = A.getType().getY(); K = A.getType().getX(); } - if (TransB == TRANSPOSE) { + if (TransB != NO_TRANSPOSE) { N = B.getType().getY(); } else { N = B.getType().getX(); @@ -1065,20 +2432,34 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); } + /** + * ZGEMM performs one of the matrix-matrix operations + * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H + * + * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param TransB The type of transpose applied to matrix B. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2 + * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2 + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2 + */ public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { validateTranspose(TransA); validateTranspose(TransB); validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C); int M = -1, N = -1, K = -1; - if (TransA == TRANSPOSE) { + if (TransA != NO_TRANSPOSE) { M = A.getType().getX(); K = A.getType().getY(); } else { M = A.getType().getY(); K = A.getType().getX(); } - if (TransB == TRANSPOSE) { + if (TransB != NO_TRANSPOSE) { N = B.getType().getY(); } else { N = B.getType().getX(); @@ -1087,45 +2468,130 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); } + /** + * SSYMM performs one of the matrix-matrix operations + * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. + */ public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, Allocation B, float beta, Allocation C) { validateSide(Side); validateUplo(Uplo); + //For SYMM, Matrix A should be symmetric + if (A.getType().getX() != A.getType().getY()) { + throw new RSRuntimeException("Matrix A is not symmetric"); + } validateL3(Element.F32(mRS), 0, 0, Side, A, B, C); mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); } + + /** + * DSYMM performs one of the matrix-matrix operations + * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. + */ public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, Allocation B, double beta, Allocation C) { validateSide(Side); validateUplo(Uplo); + if (A.getType().getX() != A.getType().getY()) { + throw new RSRuntimeException("Matrix A is not symmetric"); + } validateL3(Element.F64(mRS), 0, 0, Side, A, B, C); mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); } + + /** + * CSYMM performs one of the matrix-matrix operations + * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. + */ public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { validateSide(Side); validateUplo(Uplo); + if (A.getType().getX() != A.getType().getY()) { + throw new RSRuntimeException("Matrix A is not symmetric"); + } validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); } + + /** + * ZSYMM performs one of the matrix-matrix operations + * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. + */ public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { validateSide(Side); validateUplo(Uplo); + if (A.getType().getX() != A.getType().getY()) { + throw new RSRuntimeException("Matrix A is not symmetric"); + } validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); } + /** + * SSYRK performs one of the symmetric rank k operations + * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. + */ public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { validateTranspose(Trans); validateUplo(Uplo); validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C); int K = -1; - if (Trans == TRANSPOSE) { + if (Trans != NO_TRANSPOSE) { K = A.getType().getY(); } else { K = A.getType().getX(); @@ -1134,42 +2600,83 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0); } + /** + * DSYRK performs one of the symmetric rank k operations + * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. + */ public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { validateTranspose(Trans); validateUplo(Uplo); validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C); int K = -1; - if (Trans == TRANSPOSE) { + if (Trans != NO_TRANSPOSE) { K = A.getType().getY(); } else { K = A.getType().getX(); } mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0); } - public void CSYRK(@Uplo int Uplo, @Transpose int Trans, float alphaX, float alphaY, Allocation A, float betaX, float betaY, Allocation C) { + + /** + * CSYRK performs one of the symmetric rank k operations + * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. + */ + public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) { validateTranspose(Trans); validateUplo(Uplo); validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C); int K = -1; - if (Trans == TRANSPOSE) { + if (Trans != NO_TRANSPOSE) { K = A.getType().getY(); } else { K = A.getType().getX(); } - mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alphaX, alphaY, A.getID(mRS), 0, betaX, betaY, + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); } - public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, double alphaX, double alphaY, Allocation A, double betaX, double betaY, Allocation C) { + + /** + * ZSYRK performs one of the symmetric rank k operations + * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. + */ + public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) { validateTranspose(Trans); validateUplo(Uplo); validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C); int K = -1; - if (Trans == TRANSPOSE) { + if (Trans != NO_TRANSPOSE) { K = A.getType().getY(); } else { K = A.getType().getX(); } - mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alphaX, alphaY, A.getID(mRS), 0, betaX, betaY, + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); } @@ -1190,7 +2697,7 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { // check rows versus C Cdim = A.getType().getY(); } - if (C.getType().getX() != Cdim && C.getType().getY() != Cdim) { + if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) { throw new RSRuntimeException("Invalid symmetric matrix in SYR2K"); } // A dims == B dims @@ -1198,78 +2705,154 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { throw new RSRuntimeException("Invalid A and B in SYR2K"); } } + + /** + * SSYR2K performs one of the symmetric rank 2k operations + * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. + */ public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) { validateUplo(Uplo); validateSYR2K(Element.F32(mRS), Trans, A, B, C); int K = -1; - if (Trans == TRANSPOSE) { + if (Trans != NO_TRANSPOSE) { K = A.getType().getY(); } else { K = A.getType().getX(); } mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); } + + /** + * DSYR2K performs one of the symmetric rank 2k operations + * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. + */ public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) { validateUplo(Uplo); validateSYR2K(Element.F64(mRS), Trans, A, B, C); int K = -1; - if (Trans == TRANSPOSE) { + if (Trans != NO_TRANSPOSE) { K = A.getType().getY(); } else { K = A.getType().getX(); } - mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); } + + /** + * CSYR2K performs one of the symmetric rank 2k operations + * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. + */ public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { validateUplo(Uplo); validateSYR2K(Element.F32_2(mRS), Trans, A, B, C); int K = -1; - if (Trans == TRANSPOSE) { + if (Trans != NO_TRANSPOSE) { K = A.getType().getY(); } else { K = A.getType().getX(); } - mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); } + + /** + * ZSYR2K performs one of the symmetric rank 2k operations + * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. + */ public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { validateUplo(Uplo); validateSYR2K(Element.F64_2(mRS), Trans, A, B, C); int K = -1; - if (Trans == TRANSPOSE) { + if (Trans != NO_TRANSPOSE) { K = A.getType().getY(); } else { K = A.getType().getX(); } - mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); } static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { validateSide(Side); validateTranspose(TransA); - int aX = -1, aY = -1, bX = -1, bY = -1; + int aM = -1, aN = -1, bM = -1, bN = -1; if (!A.getType().getElement().isCompatible(e) || !B.getType().getElement().isCompatible(e)) { throw new RSRuntimeException("Called BLAS with wrong Element type"); } - if (TransA == TRANSPOSE) { - aY = A.getType().getY(); - aX = A.getType().getX(); - } else { - aY = A.getType().getX(); - aX = A.getType().getY(); + + aM = A.getType().getY(); + aN = A.getType().getX(); + if (aM != aN) { + throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A"); } - bX = B.getType().getY(); - bY = B.getType().getX(); + + bM = B.getType().getY(); + bN = B.getType().getX(); if (Side == LEFT) { - if (aX == 0 || aY != bX) { + if (aN != bM) { throw new RSRuntimeException("Called TRMM with invalid matrices"); } } else { - if (bY != aX || aY == 0) { + if (bN != aM) { throw new RSRuntimeException("Called TRMM with invalid matrices"); } } } + + /** + * STRMM performs one of the matrix-matrix operations + * B := alpha*op(A)*B or B := alpha*B*op(A) + * op(A) is one of op(A) = A or op(A) = A**T + * + * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. + */ public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { validateUplo(Uplo); validateDiag(Diag); @@ -1277,30 +2860,78 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0); } + + /** + * DTRMM performs one of the matrix-matrix operations + * B := alpha*op(A)*B or B := alpha*B*op(A) + * op(A) is one of op(A) = A or op(A) = A**T + * + * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. + */ public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { validateUplo(Uplo); validateDiag(Diag); validateTRMM(Element.F64(mRS), Side, TransA, A, B); - mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, - alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); } + + /** + * CTRMM performs one of the matrix-matrix operations + * B := alpha*op(A)*B or B := alpha*B*op(A) + * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H + * + * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. + */ public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { validateUplo(Uplo); validateDiag(Diag); validateTRMM(Element.F32_2(mRS), Side, TransA, A, B); - mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); } + + /** + * ZTRMM performs one of the matrix-matrix operations + * B := alpha*op(A)*B or B := alpha*B*op(A) + * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H + * + * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. + */ public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { validateUplo(Uplo); validateDiag(Diag); validateTRMM(Element.F64_2(mRS), Side, TransA, A, B); - mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); } static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { - int adim = -1, bX = -1, bY = -1; + int adim = -1, bM = -1, bN = -1; validateSide(Side); validateTranspose(TransA); if (!A.getType().getElement().isCompatible(e) || @@ -1314,20 +2945,36 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { // for now we assume adapters are sufficient, will reevaluate in the future throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A"); } - bX = B.getType().getY(); - bY = B.getType().getX(); + bM = B.getType().getY(); + bN = B.getType().getX(); if (Side == LEFT) { // A is M*M - if (adim != bY) { + if (adim != bM) { throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); } } else { // A is N*N - if (adim != bX) { + if (adim != bN) { throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); } } } + + /** + * STRSM solves one of the matrix equations + * op(A)*X := alpha*B or X*op(A) := alpha*B + * op(A) is one of op(A) = A or op(A) = A**T + * + * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. + */ public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { validateUplo(Uplo); validateDiag(Diag); @@ -1335,25 +2982,73 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); } + + /** + * DTRSM solves one of the matrix equations + * op(A)*X := alpha*B or X*op(A) := alpha*B + * op(A) is one of op(A) = A or op(A) = A**T + * + * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. + */ public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { validateUplo(Uplo); validateDiag(Diag); validateTRSM(Element.F64(mRS), Side, TransA, A, B); - mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); } + + /** + * CTRSM solves one of the matrix equations + * op(A)*X := alpha*B or X*op(A) := alpha*B + * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H + * + * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. + */ public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { validateUplo(Uplo); validateDiag(Diag); validateTRSM(Element.F32_2(mRS), Side, TransA, A, B); - mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); } + + /** + * ZTRSM solves one of the matrix equations + * op(A)*X := alpha*B or X*op(A) := alpha*B + * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H + * + * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. + */ public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { validateUplo(Uplo); validateDiag(Diag); validateTRSM(Element.F64_2(mRS), Side, TransA, A, B); - mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); } @@ -1380,17 +3075,47 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { throw new RSRuntimeException("Called HEMM with mismatched B and C"); } } - public void CHEMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, Allocation B, float beta, Allocation C) { + + /** + * CHEMM performs one of the matrix-matrix operations + * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. + */ + public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { validateUplo(Uplo); validateHEMM(Element.F32_2(mRS), Side, A, B, C); mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, - alpha, 0, A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); + alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); } - public void ZHEMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, Allocation B, double beta, Allocation C) { + + /** + * ZHEMM performs one of the matrix-matrix operations + * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. + */ + public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { validateUplo(Uplo); - validateHEMM(Element.F32_2(mRS), Side, A, B, C); + validateHEMM(Element.F64_2(mRS), Side, A, B, C); mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, - alpha, 0, A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); + alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); } static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) { @@ -1404,20 +3129,34 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { throw new RSRuntimeException("Called HERK with non-square C"); } if (Trans == NO_TRANSPOSE) { - if (cdim != A.getType().getX()) { + if (cdim != A.getType().getY()) { throw new RSRuntimeException("Called HERK with invalid A"); } } else { - if (cdim != A.getType().getY()) { + if (cdim != A.getType().getX()) { throw new RSRuntimeException("Called HERK with invalid A"); } } } + + /** + * CHERK performs one of the hermitian rank k operations + * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. + */ public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { validateUplo(Uplo); validateHERK(Element.F32_2(mRS), Trans, A, C); int k = 0; - if (Trans == TRANSPOSE) { + if (Trans == CONJ_TRANSPOSE) { k = A.getType().getY(); } else { k = A.getType().getX(); @@ -1425,11 +3164,25 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0); } + + /** + * ZHERK performs one of the hermitian rank k operations + * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. + */ public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { validateUplo(Uplo); validateHERK(Element.F64_2(mRS), Trans, A, C); int k = 0; - if (Trans == TRANSPOSE) { + if (Trans == CONJ_TRANSPOSE) { k = A.getType().getY(); } else { k = A.getType().getX(); @@ -1462,6 +3215,21 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { throw new RSRuntimeException("Called HER2K with invalid A and B matrices"); } } + + /** + * CHER2K performs one of the hermitian rank 2k operations + * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. + */ public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) { validateUplo(Uplo); validateHER2K(Element.F32_2(mRS), Trans, A, B, C); @@ -1474,6 +3242,21 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); } + + /** + * ZHER2K performs one of the hermitian rank 2k operations + * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. + * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. + */ public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) { validateUplo(Uplo); validateHER2K(Element.F64_2(mRS), Trans, A, B, C); @@ -1489,14 +3272,29 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { /** + * 8-bit GEMM-like operation for neural networks: C = B.transposed() * A + * Calculations are done in 1.10.21 fixed-point format for the final output, + * just before there's a shift down to drop the fractional parts. The output + * values are gated to 0 to 255 to fit in a byte, but the 10-bit format + * gives some headroom to avoid wrapping around on small overflows. * - * 8-bit GEMM-like operation for neural networks - * - * @hide + * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}. + * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255. + * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}. + * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255. + * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}. + * @param c_offset The offset for all values in matrix C. + * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult. **/ public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) { validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C); + if (a_offset < 0 || a_offset > 255) { + throw new RSRuntimeException("Invalid a_offset passed to BNNM"); + } + if (b_offset < 0 || b_offset > 255) { + throw new RSRuntimeException("Invalid b_offset passed to BNNM"); + } int M = -1, N = -1, K = -1; M = A.getType().getY(); N = B.getType().getY(); diff --git a/rs/java/android/renderscript/Type.java b/rs/java/android/renderscript/Type.java index a58e42c..dc23785 100644 --- a/rs/java/android/renderscript/Type.java +++ b/rs/java/android/renderscript/Type.java @@ -151,23 +151,30 @@ public class Type extends BaseObj { /** * @hide - */ - public int getArray(int dim) { - if ((dim < 0) || (dim >= mMaxArrays)) { + * Return the dimension of the specified array. + * + * @param arrayNum The array dimension to query + * @return int + */ + public int getArray(int arrayNum) { + if ((arrayNum < 0) || (arrayNum >= mMaxArrays)) { throw new RSIllegalArgumentException("Array dimension out of range."); } - if (mArrays == null || dim >= mArrays.length) { + if (mArrays == null || arrayNum >= mArrays.length) { // Dimension in range but no array for that dimension allocated return 0; } - return mArrays[dim]; + return mArrays[arrayNum]; } /** * @hide - */ + * Return the number of array dimensions. + * + * @return int + */ public int getArrayCount() { if (mArrays != null) return mArrays.length; return 0; @@ -378,6 +385,7 @@ public class Type extends BaseObj { /** * @hide + * Adds an array dimension to the builder * * @param dim * @param value diff --git a/rs/jni/Android.mk b/rs/jni/Android.mk index f1f0bfc..0658620 100644 --- a/rs/jni/Android.mk +++ b/rs/jni/Android.mk @@ -14,7 +14,8 @@ LOCAL_SHARED_LIBRARIES := \ libskia \ libutils \ libui \ - libgui + libgui \ + libjnigraphics LOCAL_STATIC_LIBRARIES := @@ -23,6 +24,8 @@ rs_generated_include_dir := $(call intermediates-dir-for,SHARED_LIBRARIES,libRS, LOCAL_C_INCLUDES += \ $(JNI_H_INCLUDE) \ frameworks/rs \ + frameworks/base/core/jni \ + frameworks/base/libs/hwui \ $(rs_generated_include_dir) LOCAL_CFLAGS += -Wno-unused-parameter -std=c++11 diff --git a/rs/jni/android_renderscript_RenderScript.cpp b/rs/jni/android_renderscript_RenderScript.cpp index 6f6729b..ffc4fd8 100644 --- a/rs/jni/android_renderscript_RenderScript.cpp +++ b/rs/jni/android_renderscript_RenderScript.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#define LOG_TAG "libRS_jni" +#define LOG_TAG "RenderScript_jni" #include <stdlib.h> #include <stdio.h> @@ -24,8 +24,6 @@ #include <utils/misc.h> #include <inttypes.h> -#include <SkBitmap.h> - #include <androidfw/Asset.h> #include <androidfw/AssetManager.h> #include <androidfw/ResourceTypes.h> @@ -35,6 +33,7 @@ #include "android_runtime/AndroidRuntime.h" #include "android_runtime/android_view_Surface.h" #include "android_runtime/android_util_AssetManager.h" +#include "android/graphics/GraphicsJNI.h" #include <rs.h> #include <rsEnv.h> @@ -281,14 +280,10 @@ private: // --------------------------------------------------------------------------- static jfieldID gContextId = 0; -static jfieldID gNativeBitmapID = 0; static void _nInit(JNIEnv *_env, jclass _this) { gContextId = _env->GetFieldID(_this, "mContext", "J"); - - jclass bitmapClass = _env->FindClass("android/graphics/Bitmap"); - gNativeBitmapID = _env->GetFieldID(bitmapClass, "mNativeBitmap", "J"); } // --------------------------------------------------------------------------- @@ -333,79 +328,167 @@ nClosureCreate(JNIEnv *_env, jobject _this, jlong con, jlong kernelID, jlong returnValue, jlongArray fieldIDArray, jlongArray valueArray, jintArray sizeArray, jlongArray depClosureArray, jlongArray depFieldIDArray) { + jlong ret = 0; + jlong* jFieldIDs = _env->GetLongArrayElements(fieldIDArray, nullptr); jsize fieldIDs_length = _env->GetArrayLength(fieldIDArray); - RsScriptFieldID* fieldIDs = - (RsScriptFieldID*)alloca(sizeof(RsScriptFieldID) * fieldIDs_length); - for (int i = 0; i< fieldIDs_length; i++) { + jlong* jValues = _env->GetLongArrayElements(valueArray, nullptr); + jsize values_length = _env->GetArrayLength(valueArray); + jint* jSizes = _env->GetIntArrayElements(sizeArray, nullptr); + jsize sizes_length = _env->GetArrayLength(sizeArray); + jlong* jDepClosures = + _env->GetLongArrayElements(depClosureArray, nullptr); + jsize depClosures_length = _env->GetArrayLength(depClosureArray); + jlong* jDepFieldIDs = + _env->GetLongArrayElements(depFieldIDArray, nullptr); + jsize depFieldIDs_length = _env->GetArrayLength(depFieldIDArray); + + size_t numValues, numDependencies; + RsScriptFieldID* fieldIDs; + uintptr_t* values; + RsClosure* depClosures; + RsScriptFieldID* depFieldIDs; + + if (fieldIDs_length != values_length || values_length != sizes_length) { + ALOGE("Unmatched field IDs, values, and sizes in closure creation."); + goto exit; + } + + numValues = (size_t)fieldIDs_length; + + if (depClosures_length != depFieldIDs_length) { + ALOGE("Unmatched closures and field IDs for dependencies in closure creation."); + goto exit; + } + + numDependencies = (size_t)depClosures_length; + + if (numDependencies > numValues) { + ALOGE("Unexpected number of dependencies in closure creation"); + goto exit; + } + + if (numValues > RS_CLOSURE_MAX_NUMBER_ARGS_AND_BINDINGS) { + ALOGE("Too many arguments or globals in closure creation"); + goto exit; + } + + fieldIDs = (RsScriptFieldID*)alloca(sizeof(RsScriptFieldID) * numValues); + if (fieldIDs == nullptr) { + goto exit; + } + + for (size_t i = 0; i < numValues; i++) { fieldIDs[i] = (RsScriptFieldID)jFieldIDs[i]; } - jlong* jValues = _env->GetLongArrayElements(valueArray, nullptr); - jsize values_length = _env->GetArrayLength(valueArray); - uintptr_t* values = (uintptr_t*)alloca(sizeof(uintptr_t) * values_length); - for (int i = 0; i < values_length; i++) { + values = (uintptr_t*)alloca(sizeof(uintptr_t) * numValues); + if (values == nullptr) { + goto exit; + } + + for (size_t i = 0; i < numValues; i++) { values[i] = (uintptr_t)jValues[i]; } - jint* sizes = _env->GetIntArrayElements(sizeArray, nullptr); - jsize sizes_length = _env->GetArrayLength(sizeArray); + depClosures = (RsClosure*)alloca(sizeof(RsClosure) * numDependencies); + if (depClosures == nullptr) { + goto exit; + } - jlong* jDepClosures = - _env->GetLongArrayElements(depClosureArray, nullptr); - jsize depClosures_length = _env->GetArrayLength(depClosureArray); - RsClosure* depClosures = - (RsClosure*)alloca(sizeof(RsClosure) * depClosures_length); - for (int i = 0; i < depClosures_length; i++) { + for (size_t i = 0; i < numDependencies; i++) { depClosures[i] = (RsClosure)jDepClosures[i]; } - jlong* jDepFieldIDs = - _env->GetLongArrayElements(depFieldIDArray, nullptr); - jsize depFieldIDs_length = _env->GetArrayLength(depFieldIDArray); - RsScriptFieldID* depFieldIDs = - (RsScriptFieldID*)alloca(sizeof(RsScriptFieldID) * depFieldIDs_length); - for (int i = 0; i < depClosures_length; i++) { + depFieldIDs = (RsScriptFieldID*)alloca(sizeof(RsScriptFieldID) * numDependencies); + if (depFieldIDs == nullptr) { + goto exit; + } + + for (size_t i = 0; i < numDependencies; i++) { depFieldIDs[i] = (RsClosure)jDepFieldIDs[i]; } - return (jlong)(uintptr_t)rsClosureCreate( + ret = (jlong)(uintptr_t)rsClosureCreate( (RsContext)con, (RsScriptKernelID)kernelID, (RsAllocation)returnValue, - fieldIDs, (size_t)fieldIDs_length, values, (size_t)values_length, - (int*)sizes, (size_t)sizes_length, - depClosures, (size_t)depClosures_length, - depFieldIDs, (size_t)depFieldIDs_length); + fieldIDs, numValues, values, numValues, + (int*)jSizes, numValues, + depClosures, numDependencies, + depFieldIDs, numDependencies); + +exit: + + _env->ReleaseLongArrayElements(depFieldIDArray, jDepFieldIDs, JNI_ABORT); + _env->ReleaseLongArrayElements(depClosureArray, jDepClosures, JNI_ABORT); + _env->ReleaseIntArrayElements (sizeArray, jSizes, JNI_ABORT); + _env->ReleaseLongArrayElements(valueArray, jValues, JNI_ABORT); + _env->ReleaseLongArrayElements(fieldIDArray, jFieldIDs, JNI_ABORT); + + return ret; } static jlong nInvokeClosureCreate(JNIEnv *_env, jobject _this, jlong con, jlong invokeID, jbyteArray paramArray, jlongArray fieldIDArray, jlongArray valueArray, jintArray sizeArray) { + jlong ret = 0; + jbyte* jParams = _env->GetByteArrayElements(paramArray, nullptr); jsize jParamLength = _env->GetArrayLength(paramArray); - jlong* jFieldIDs = _env->GetLongArrayElements(fieldIDArray, nullptr); jsize fieldIDs_length = _env->GetArrayLength(fieldIDArray); - RsScriptFieldID* fieldIDs = - (RsScriptFieldID*)alloca(sizeof(RsScriptFieldID) * fieldIDs_length); - for (int i = 0; i< fieldIDs_length; i++) { + jlong* jValues = _env->GetLongArrayElements(valueArray, nullptr); + jsize values_length = _env->GetArrayLength(valueArray); + jint* jSizes = _env->GetIntArrayElements(sizeArray, nullptr); + jsize sizes_length = _env->GetArrayLength(sizeArray); + + size_t numValues; + RsScriptFieldID* fieldIDs; + uintptr_t* values; + + if (fieldIDs_length != values_length || values_length != sizes_length) { + ALOGE("Unmatched field IDs, values, and sizes in closure creation."); + goto exit; + } + + numValues = (size_t) fieldIDs_length; + + if (numValues > RS_CLOSURE_MAX_NUMBER_ARGS_AND_BINDINGS) { + ALOGE("Too many arguments or globals in closure creation"); + goto exit; + } + + fieldIDs = (RsScriptFieldID*)alloca(sizeof(RsScriptFieldID) * numValues); + if (fieldIDs == nullptr) { + goto exit; + } + + for (size_t i = 0; i< numValues; i++) { fieldIDs[i] = (RsScriptFieldID)jFieldIDs[i]; } - jlong* jValues = _env->GetLongArrayElements(valueArray, nullptr); - jsize values_length = _env->GetArrayLength(valueArray); - uintptr_t* values = (uintptr_t*)alloca(sizeof(uintptr_t) * values_length); - for (int i = 0; i < values_length; i++) { - values[i] = (uintptr_t)jValues[i]; + values = (uintptr_t*)alloca(sizeof(uintptr_t) * numValues); + if (values == nullptr) { + goto exit; } - jint* sizes = _env->GetIntArrayElements(sizeArray, nullptr); - jsize sizes_length = _env->GetArrayLength(sizeArray); + for (size_t i = 0; i < numValues; i++) { + values[i] = (uintptr_t)jValues[i]; + } - return (jlong)(uintptr_t)rsInvokeClosureCreate( + ret = (jlong)(uintptr_t)rsInvokeClosureCreate( (RsContext)con, (RsScriptInvokeID)invokeID, jParams, jParamLength, - fieldIDs, (size_t)fieldIDs_length, values, (size_t)values_length, - (int*)sizes, (size_t)sizes_length); + fieldIDs, numValues, values, numValues, + (int*)jSizes, numValues); + +exit: + + _env->ReleaseIntArrayElements (sizeArray, jSizes, JNI_ABORT); + _env->ReleaseLongArrayElements(valueArray, jValues, JNI_ABORT); + _env->ReleaseLongArrayElements(fieldIDArray, jFieldIDs, JNI_ABORT); + _env->ReleaseByteArrayElements(paramArray, jParams, JNI_ABORT); + + return ret; } static void @@ -425,20 +508,40 @@ nClosureSetGlobal(JNIEnv *_env, jobject _this, jlong con, jlong closureID, static long nScriptGroup2Create(JNIEnv *_env, jobject _this, jlong con, jstring name, jstring cacheDir, jlongArray closureArray) { + jlong ret = 0; + AutoJavaStringToUTF8 nameUTF(_env, name); AutoJavaStringToUTF8 cacheDirUTF(_env, cacheDir); jlong* jClosures = _env->GetLongArrayElements(closureArray, nullptr); jsize numClosures = _env->GetArrayLength(closureArray); - RsClosure* closures = (RsClosure*)alloca(sizeof(RsClosure) * numClosures); + + RsClosure* closures; + + if (numClosures > (jsize) RS_SCRIPT_GROUP_MAX_NUMBER_CLOSURES) { + ALOGE("Too many closures in script group"); + goto exit; + } + + closures = (RsClosure*)alloca(sizeof(RsClosure) * numClosures); + if (closures == nullptr) { + goto exit; + } + for (int i = 0; i < numClosures; i++) { closures[i] = (RsClosure)jClosures[i]; } - return (jlong)(uintptr_t)rsScriptGroup2Create( + ret = (jlong)(uintptr_t)rsScriptGroup2Create( (RsContext)con, nameUTF.c_str(), nameUTF.length(), cacheDirUTF.c_str(), cacheDirUTF.length(), closures, numClosures); + +exit: + + _env->ReleaseLongArrayElements(closureArray, jClosures, JNI_ABORT); + + return ret; } static void @@ -531,7 +634,7 @@ nScriptIntrinsicBLAS_Complex(JNIEnv *_env, jobject _this, jlong con, jlong id, j call.alpha.c.r = alphaX; call.alpha.c.i = alphaY; call.beta.c.r = betaX; - call.beta.c.r = betaY; + call.beta.c.i = betaY; call.incX = incX; call.incY = incY; call.KL = KL; @@ -566,7 +669,7 @@ nScriptIntrinsicBLAS_Z(JNIEnv *_env, jobject _this, jlong con, jlong id, jint fu call.alpha.z.r = alphaX; call.alpha.z.i = alphaY; call.beta.z.r = betaX; - call.beta.z.r = betaY; + call.beta.z.i = betaY; call.incX = incX; call.incY = incY; call.KL = KL; @@ -593,8 +696,8 @@ nScriptIntrinsicBLAS_BNNM(JNIEnv *_env, jobject _this, jlong con, jlong id, jint call.M = M; call.N = N; call.K = K; - call.a_offset = a_offset; - call.b_offset = b_offset; + call.a_offset = a_offset & 0xFF; + call.b_offset = b_offset & 0xFF; call.c_offset = c_offset; call.c_mult_int = c_mult_int; @@ -1107,9 +1210,8 @@ static jlong nAllocationCreateFromBitmap(JNIEnv *_env, jobject _this, jlong con, jlong type, jint mip, jobject jbitmap, jint usage) { - SkBitmap const * nativeBitmap = - (SkBitmap const *)_env->GetLongField(jbitmap, gNativeBitmapID); - const SkBitmap& bitmap(*nativeBitmap); + SkBitmap bitmap; + GraphicsJNI::getSkBitmap(_env, jbitmap, &bitmap); bitmap.lockPixels(); const void* ptr = bitmap.getPixels(); @@ -1124,9 +1226,8 @@ static jlong nAllocationCreateBitmapBackedAllocation(JNIEnv *_env, jobject _this, jlong con, jlong type, jint mip, jobject jbitmap, jint usage) { - SkBitmap const * nativeBitmap = - (SkBitmap const *)_env->GetLongField(jbitmap, gNativeBitmapID); - const SkBitmap& bitmap(*nativeBitmap); + SkBitmap bitmap; + GraphicsJNI::getSkBitmap(_env, jbitmap, &bitmap); bitmap.lockPixels(); const void* ptr = bitmap.getPixels(); @@ -1141,9 +1242,8 @@ static jlong nAllocationCubeCreateFromBitmap(JNIEnv *_env, jobject _this, jlong con, jlong type, jint mip, jobject jbitmap, jint usage) { - SkBitmap const * nativeBitmap = - (SkBitmap const *)_env->GetLongField(jbitmap, gNativeBitmapID); - const SkBitmap& bitmap(*nativeBitmap); + SkBitmap bitmap; + GraphicsJNI::getSkBitmap(_env, jbitmap, &bitmap); bitmap.lockPixels(); const void* ptr = bitmap.getPixels(); @@ -1157,9 +1257,8 @@ nAllocationCubeCreateFromBitmap(JNIEnv *_env, jobject _this, jlong con, jlong ty static void nAllocationCopyFromBitmap(JNIEnv *_env, jobject _this, jlong con, jlong alloc, jobject jbitmap) { - SkBitmap const * nativeBitmap = - (SkBitmap const *)_env->GetLongField(jbitmap, gNativeBitmapID); - const SkBitmap& bitmap(*nativeBitmap); + SkBitmap bitmap; + GraphicsJNI::getSkBitmap(_env, jbitmap, &bitmap); int w = bitmap.width(); int h = bitmap.height(); @@ -1174,9 +1273,8 @@ nAllocationCopyFromBitmap(JNIEnv *_env, jobject _this, jlong con, jlong alloc, j static void nAllocationCopyToBitmap(JNIEnv *_env, jobject _this, jlong con, jlong alloc, jobject jbitmap) { - SkBitmap const * nativeBitmap = - (SkBitmap const *)_env->GetLongField(jbitmap, gNativeBitmapID); - const SkBitmap& bitmap(*nativeBitmap); + SkBitmap bitmap; + GraphicsJNI::getSkBitmap(_env, jbitmap, &bitmap); bitmap.lockPixels(); void* ptr = bitmap.getPixels(); @@ -1756,7 +1854,7 @@ nScriptForEach(JNIEnv *_env, jobject _this, jlong con, jlong script, jint slot, jintArray limits) { if (kLogApi) { - ALOGD("nScriptForEach, con(%p), s(%p), slot(%i)", (RsContext)con, (void *)script, slot); + ALOGD("nScriptForEach, con(%p), s(%p), slot(%i) ains(%p) aout(%" PRId64 ")", (RsContext)con, (void *)script, slot, ains, aout); } jint in_len = 0; @@ -1766,8 +1864,14 @@ nScriptForEach(JNIEnv *_env, jobject _this, jlong con, jlong script, jint slot, if (ains != nullptr) { in_len = _env->GetArrayLength(ains); - in_ptr = _env->GetLongArrayElements(ains, nullptr); + if (in_len > (jint)RS_KERNEL_MAX_ARGUMENTS) { + ALOGE("Too many arguments in kernel launch."); + // TODO (b/20758983): Report back to Java and throw an exception + return; + } + // TODO (b/20760800): Check in_ptr is not null + in_ptr = _env->GetLongArrayElements(ains, nullptr); if (sizeof(RsAllocation) == sizeof(jlong)) { in_allocs = (RsAllocation*)in_ptr; @@ -1775,6 +1879,11 @@ nScriptForEach(JNIEnv *_env, jobject _this, jlong con, jlong script, jint slot, // Convert from 64-bit jlong types to the native pointer type. in_allocs = (RsAllocation*)alloca(in_len * sizeof(RsAllocation)); + if (in_allocs == nullptr) { + ALOGE("Failed launching kernel for lack of memory."); + _env->ReleaseLongArrayElements(ains, in_ptr, JNI_ABORT); + return; + } for (int index = in_len; --index >= 0;) { in_allocs[index] = (RsAllocation)in_ptr[index]; |