11 files changed, 2679 insertions, 1116 deletions
diff --git a/rs/java/android/renderscript/Allocation.java b/rs/java/android/renderscript/Allocation.java
index 4fa2c81..0a50593 100644
--- a/rs/java/android/renderscript/Allocation.java
+++ b/rs/java/android/renderscript/Allocation.java
@@ -51,6 +51,7 @@ import android.os.Trace;
  * <a href="{@docRoot}guide/topics/renderscript/index.html">RenderScript</a> developer guide.</p>
  * </div>
  **/
+
 public class Allocation extends BaseObj {
     Type mType;
     Bitmap mBitmap;
@@ -273,8 +274,8 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Enable/Disable AutoPadding for Vec3 elements.
+     * By default: Diabled.
      *
      * @param useAutoPadding True: enable AutoPadding; False: disable AutoPadding
      *
@@ -455,28 +456,31 @@ public class Allocation extends BaseObj {
      *
      */
     public void syncAll(int srcLocation) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "syncAll");
-        switch (srcLocation) {
-        case USAGE_GRAPHICS_TEXTURE:
-        case USAGE_SCRIPT:
-            if ((mUsage & USAGE_SHARED) != 0) {
-                copyFrom(mBitmap);
-            }
-            break;
-        case USAGE_GRAPHICS_CONSTANTS:
-        case USAGE_GRAPHICS_VERTEX:
-            break;
-        case USAGE_SHARED:
-            if ((mUsage & USAGE_SHARED) != 0) {
-                copyTo(mBitmap);
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "syncAll");
+            switch (srcLocation) {
+                case USAGE_GRAPHICS_TEXTURE:
+                case USAGE_SCRIPT:
+                    if ((mUsage & USAGE_SHARED) != 0) {
+                        copyFrom(mBitmap);
+                    }
+                    break;
+                case USAGE_GRAPHICS_CONSTANTS:
+                case USAGE_GRAPHICS_VERTEX:
+                    break;
+                case USAGE_SHARED:
+                    if ((mUsage & USAGE_SHARED) != 0) {
+                        copyTo(mBitmap);
+                    }
+                    break;
+                default:
+                    throw new RSIllegalArgumentException("Source must be exactly one usage type.");
             }
-            break;
-        default:
-            throw new RSIllegalArgumentException("Source must be exactly one usage type.");
+            mRS.validate();
+            mRS.nAllocationSyncAll(getIDSafe(), srcLocation);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        mRS.validate();
-        mRS.nAllocationSyncAll(getIDSafe(), srcLocation);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     /**
@@ -487,14 +491,17 @@ public class Allocation extends BaseObj {
      *
      */
     public void ioSend() {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "ioSend");
-        if ((mUsage & USAGE_IO_OUTPUT) == 0) {
-            throw new RSIllegalArgumentException(
-                "Can only send buffer if IO_OUTPUT usage specified.");
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "ioSend");
+            if ((mUsage & USAGE_IO_OUTPUT) == 0) {
+                throw new RSIllegalArgumentException(
+                    "Can only send buffer if IO_OUTPUT usage specified.");
+            }
+            mRS.validate();
+            mRS.nAllocationIoSend(getID(mRS));
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        mRS.validate();
-        mRS.nAllocationIoSend(getID(mRS));
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     /**
@@ -503,14 +510,17 @@ public class Allocation extends BaseObj {
      *
      */
     public void ioReceive() {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "ioReceive");
-        if ((mUsage & USAGE_IO_INPUT) == 0) {
-            throw new RSIllegalArgumentException(
-                "Can only receive if IO_INPUT usage specified.");
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "ioReceive");
+            if ((mUsage & USAGE_IO_INPUT) == 0) {
+                throw new RSIllegalArgumentException(
+                    "Can only receive if IO_INPUT usage specified.");
+            }
+            mRS.validate();
+            mRS.nAllocationIoReceive(getID(mRS));
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        mRS.validate();
-        mRS.nAllocationIoReceive(getID(mRS));
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     /**
@@ -519,28 +529,31 @@ public class Allocation extends BaseObj {
      * @param d Source array.
      */
     public void copyFrom(BaseObj[] d) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom");
-        mRS.validate();
-        validateIsObject();
-        if (d.length != mCurrentCount) {
-            throw new RSIllegalArgumentException("Array size mismatch, allocation sizeX = " +
-                                                 mCurrentCount + ", array length = " + d.length);
-        }
-
-        if (RenderScript.sPointerSize == 8) {
-            long i[] = new long[d.length * 4];
-            for (int ct=0; ct < d.length; ct++) {
-                i[ct * 4] = d[ct].getID(mRS);
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom");
+            mRS.validate();
+            validateIsObject();
+            if (d.length != mCurrentCount) {
+                throw new RSIllegalArgumentException("Array size mismatch, allocation sizeX = " +
+                                                      mCurrentCount + ", array length = " + d.length);
             }
-            copy1DRangeFromUnchecked(0, mCurrentCount, i);
-        } else {
-            int i[] = new int[d.length];
-            for (int ct=0; ct < d.length; ct++) {
-                i[ct] = (int)d[ct].getID(mRS);
+
+            if (RenderScript.sPointerSize == 8) {
+                long i[] = new long[d.length * 4];
+                for (int ct=0; ct < d.length; ct++) {
+                    i[ct * 4] = d[ct].getID(mRS);
+                }
+                copy1DRangeFromUnchecked(0, mCurrentCount, i);
+            } else {
+                int i[] = new int[d.length];
+                for (int ct=0; ct < d.length; ct++) {
+                    i[ct] = (int) d[ct].getID(mRS);
+                }
+                copy1DRangeFromUnchecked(0, mCurrentCount, i);
             }
-            copy1DRangeFromUnchecked(0, mCurrentCount, i);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     private void validateBitmapFormat(Bitmap b) {
@@ -599,16 +612,19 @@ public class Allocation extends BaseObj {
     }
 
     private void copyFromUnchecked(Object array, Element.DataType dt, int arrayLen) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copyFromUnchecked");
-        mRS.validate();
-        if (mCurrentDimZ > 0) {
-            copy3DRangeFromUnchecked(0, 0, 0, mCurrentDimX, mCurrentDimY, mCurrentDimZ, array, dt, arrayLen);
-        } else if (mCurrentDimY > 0) {
-            copy2DRangeFromUnchecked(0, 0, mCurrentDimX, mCurrentDimY, array, dt, arrayLen);
-        } else {
-            copy1DRangeFromUnchecked(0, mCurrentCount, array, dt, arrayLen);
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copyFromUnchecked");
+            mRS.validate();
+            if (mCurrentDimZ > 0) {
+                copy3DRangeFromUnchecked(0, 0, 0, mCurrentDimX, mCurrentDimY, mCurrentDimZ, array, dt, arrayLen);
+            } else if (mCurrentDimY > 0) {
+                copy2DRangeFromUnchecked(0, 0, mCurrentDimX, mCurrentDimY, array, dt, arrayLen);
+            } else {
+                copy1DRangeFromUnchecked(0, mCurrentCount, array, dt, arrayLen);
+            }
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     /**
@@ -619,10 +635,13 @@ public class Allocation extends BaseObj {
      * @param array The source data array
      */
     public void copyFromUnchecked(Object array) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copyFromUnchecked");
-        copyFromUnchecked(array, validateObjectIsPrimitiveArray(array, false),
-                          java.lang.reflect.Array.getLength(array));
-        Trace.traceEnd(RenderScript.TRACE_TAG);
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copyFromUnchecked");
+            copyFromUnchecked(array, validateObjectIsPrimitiveArray(array, false),
+                              java.lang.reflect.Array.getLength(array));
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
+        }
     }
 
     /**
@@ -679,10 +698,13 @@ public class Allocation extends BaseObj {
      * @param array The source data array
      */
     public void copyFrom(Object array) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom");
-        copyFromUnchecked(array, validateObjectIsPrimitiveArray(array, true),
-                          java.lang.reflect.Array.getLength(array));
-        Trace.traceEnd(RenderScript.TRACE_TAG);
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom");
+            copyFromUnchecked(array, validateObjectIsPrimitiveArray(array, true),
+                              java.lang.reflect.Array.getLength(array));
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
+        }
     }
 
     /**
@@ -747,19 +769,22 @@ public class Allocation extends BaseObj {
      * @param b the source bitmap
      */
     public void copyFrom(Bitmap b) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom");
-        mRS.validate();
-        if (b.getConfig() == null) {
-            Bitmap newBitmap = Bitmap.createBitmap(b.getWidth(), b.getHeight(), Bitmap.Config.ARGB_8888);
-            Canvas c = new Canvas(newBitmap);
-            c.drawBitmap(b, 0, 0, null);
-            copyFrom(newBitmap);
-            return;
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom");
+            mRS.validate();
+            if (b.getConfig() == null) {
+                Bitmap newBitmap = Bitmap.createBitmap(b.getWidth(), b.getHeight(), Bitmap.Config.ARGB_8888);
+                Canvas c = new Canvas(newBitmap);
+                c.drawBitmap(b, 0, 0, null);
+                copyFrom(newBitmap);
+                return;
+            }
+            validateBitmapSize(b);
+            validateBitmapFormat(b);
+            mRS.nAllocationCopyFromBitmap(getID(mRS), b);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        validateBitmapSize(b);
-        validateBitmapFormat(b);
-        mRS.nAllocationCopyFromBitmap(getID(mRS), b);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     /**
@@ -769,13 +794,16 @@ public class Allocation extends BaseObj {
      * @param a the source allocation
      */
     public void copyFrom(Allocation a) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom");
-        mRS.validate();
-        if (!mType.equals(a.getType())) {
-            throw new RSIllegalArgumentException("Types of allocations must match.");
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copyFrom");
+            mRS.validate();
+            if (!mType.equals(a.getType())) {
+                throw new RSIllegalArgumentException("Types of allocations must match.");
+            }
+            copy2DRangeFrom(0, 0, mCurrentDimX, mCurrentDimY, a, 0, 0);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        copy2DRangeFrom(0, 0, mCurrentDimX, mCurrentDimY, a, 0, 0);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     /**
@@ -802,7 +830,7 @@ public class Allocation extends BaseObj {
 
     /**
      * This is only intended to be used by auto-generated code reflected from
-     * the RenderScript script files.
+     * the RenderScript script files and should not be used by developers.
      *
      * @param xoff
      * @param component_number
@@ -813,9 +841,8 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * This is only intended to be used by auto-generated code reflected from
-     * the RenderScript script files.
+     * the RenderScript script files and should not be used by developers.
      *
      * @param xoff
      * @param yoff
@@ -891,17 +918,20 @@ public class Allocation extends BaseObj {
 
     private void copy1DRangeFromUnchecked(int off, int count, Object array,
                                           Element.DataType dt, int arrayLen) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copy1DRangeFromUnchecked");
-        final int dataSize = mType.mElement.getBytesSize() * count;
-        // AutoPadding for Vec3 Element
-        boolean usePadding = false;
-        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
-            usePadding = true;
-        }
-        data1DChecks(off, count, arrayLen * dt.mSize, dataSize, usePadding);
-        mRS.nAllocationData1D(getIDSafe(), off, mSelectedLOD, count, array, dataSize, dt,
-                              mType.mElement.mType.mSize, usePadding);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copy1DRangeFromUnchecked");
+            final int dataSize = mType.mElement.getBytesSize() * count;
+            // AutoPadding for Vec3 Element
+            boolean usePadding = false;
+            if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+                usePadding = true;
+            }
+            data1DChecks(off, count, arrayLen * dt.mSize, dataSize, usePadding);
+            mRS.nAllocationData1D(getIDSafe(), off, mSelectedLOD, count, array, dataSize, dt,
+                                  mType.mElement.mType.mSize, usePadding);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
+        }
     }
 
     /**
@@ -1053,6 +1083,7 @@ public class Allocation extends BaseObj {
                               mSelectedLOD, mSelectedFace.mID,
                               count, 1, data.getID(mRS), dataOff, 0,
                               data.mSelectedLOD, data.mSelectedFace.mID);
+        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     private void validate2DRange(int xoff, int yoff, int w, int h) {
@@ -1074,28 +1105,31 @@ public class Allocation extends BaseObj {
 
     void copy2DRangeFromUnchecked(int xoff, int yoff, int w, int h, Object array,
                                   Element.DataType dt, int arrayLen) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFromUnchecked");
-        mRS.validate();
-        validate2DRange(xoff, yoff, w, h);
-        final int dataSize = mType.mElement.getBytesSize() * w * h;
-        // AutoPadding for Vec3 Element
-        boolean usePadding = false;
-        int sizeBytes = arrayLen * dt.mSize;
-        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
-            if (dataSize / 4 * 3 > sizeBytes) {
-                throw new RSIllegalArgumentException("Array too small for allocation type.");
-            }
-            usePadding = true;
-            sizeBytes = dataSize;
-        } else {
-            if (dataSize > sizeBytes) {
-                throw new RSIllegalArgumentException("Array too small for allocation type.");
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFromUnchecked");
+            mRS.validate();
+            validate2DRange(xoff, yoff, w, h);
+            final int dataSize = mType.mElement.getBytesSize() * w * h;
+            // AutoPadding for Vec3 Element
+            boolean usePadding = false;
+            int sizeBytes = arrayLen * dt.mSize;
+            if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+                if (dataSize / 4 * 3 > sizeBytes) {
+                    throw new RSIllegalArgumentException("Array too small for allocation type.");
+                }
+                usePadding = true;
+                sizeBytes = dataSize;
+            } else {
+                if (dataSize > sizeBytes) {
+                    throw new RSIllegalArgumentException("Array too small for allocation type.");
+                }
             }
+            mRS.nAllocationData2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, w, h,
+                                  array, sizeBytes, dt,
+                                  mType.mElement.mType.mSize, usePadding);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        mRS.nAllocationData2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, w, h,
-                              array, sizeBytes, dt,
-                              mType.mElement.mType.mSize, usePadding);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     /**
@@ -1109,11 +1143,14 @@ public class Allocation extends BaseObj {
      * @param array Data to be placed into the Allocation
      */
     public void copy2DRangeFrom(int xoff, int yoff, int w, int h, Object array) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFrom");
-        copy2DRangeFromUnchecked(xoff, yoff, w, h, array,
-                                 validateObjectIsPrimitiveArray(array, true),
-                                 java.lang.reflect.Array.getLength(array));
-        Trace.traceEnd(RenderScript.TRACE_TAG);
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFrom");
+            copy2DRangeFromUnchecked(xoff, yoff, w, h, array,
+                                     validateObjectIsPrimitiveArray(array, true),
+                                     java.lang.reflect.Array.getLength(array));
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
+        }
     }
 
     /**
@@ -1194,14 +1231,17 @@ public class Allocation extends BaseObj {
      */
     public void copy2DRangeFrom(int xoff, int yoff, int w, int h,
                                 Allocation data, int dataXoff, int dataYoff) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFrom");
-        mRS.validate();
-        validate2DRange(xoff, yoff, w, h);
-        mRS.nAllocationData2D(getIDSafe(), xoff, yoff,
-                              mSelectedLOD, mSelectedFace.mID,
-                              w, h, data.getID(mRS), dataXoff, dataYoff,
-                              data.mSelectedLOD, data.mSelectedFace.mID);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFrom");
+            mRS.validate();
+            validate2DRange(xoff, yoff, w, h);
+            mRS.nAllocationData2D(getIDSafe(), xoff, yoff,
+                                  mSelectedLOD, mSelectedFace.mID,
+                                  w, h, data.getID(mRS), dataXoff, dataYoff,
+                                  data.mSelectedLOD, data.mSelectedFace.mID);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
+        }
     }
 
     /**
@@ -1214,19 +1254,22 @@ public class Allocation extends BaseObj {
      * @param data the Bitmap to be copied
      */
     public void copy2DRangeFrom(int xoff, int yoff, Bitmap data) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFrom");
-        mRS.validate();
-        if (data.getConfig() == null) {
-            Bitmap newBitmap = Bitmap.createBitmap(data.getWidth(), data.getHeight(), Bitmap.Config.ARGB_8888);
-            Canvas c = new Canvas(newBitmap);
-            c.drawBitmap(data, 0, 0, null);
-            copy2DRangeFrom(xoff, yoff, newBitmap);
-            return;
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFrom");
+            mRS.validate();
+            if (data.getConfig() == null) {
+                Bitmap newBitmap = Bitmap.createBitmap(data.getWidth(), data.getHeight(), Bitmap.Config.ARGB_8888);
+                Canvas c = new Canvas(newBitmap);
+                c.drawBitmap(data, 0, 0, null);
+                copy2DRangeFrom(xoff, yoff, newBitmap);
+                return;
+            }
+            validateBitmapFormat(data);
+            validate2DRange(xoff, yoff, data.getWidth(), data.getHeight());
+            mRS.nAllocationData2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, data);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        validateBitmapFormat(data);
-        validate2DRange(xoff, yoff, data.getWidth(), data.getHeight());
-        mRS.nAllocationData2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, data);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     private void validate3DRange(int xoff, int yoff, int zoff, int w, int h, int d) {
@@ -1247,37 +1290,42 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
+     * Copy a rectangular region from the array into the allocation.
+     * The array is assumed to be tightly packed.
      *
+     * The data type of the array is not required to be the same as
+     * the element data type.
      */
     private void copy3DRangeFromUnchecked(int xoff, int yoff, int zoff, int w, int h, int d,
                                           Object array, Element.DataType dt, int arrayLen) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeFromUnchecked");
-        mRS.validate();
-        validate3DRange(xoff, yoff, zoff, w, h, d);
-        final int dataSize = mType.mElement.getBytesSize() * w * h * d;
-        // AutoPadding for Vec3 Element
-        boolean usePadding = false;
-        int sizeBytes = arrayLen * dt.mSize;
-        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
-            if (dataSize / 4 * 3 > sizeBytes) {
-                throw new RSIllegalArgumentException("Array too small for allocation type.");
-            }
-            usePadding = true;
-            sizeBytes = dataSize;
-        } else {
-            if (dataSize > sizeBytes) {
-                throw new RSIllegalArgumentException("Array too small for allocation type.");
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeFromUnchecked");
+            mRS.validate();
+            validate3DRange(xoff, yoff, zoff, w, h, d);
+            final int dataSize = mType.mElement.getBytesSize() * w * h * d;
+            // AutoPadding for Vec3 Element
+            boolean usePadding = false;
+            int sizeBytes = arrayLen * dt.mSize;
+            if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+                if (dataSize / 4 * 3 > sizeBytes) {
+                    throw new RSIllegalArgumentException("Array too small for allocation type.");
+                }
+                usePadding = true;
+                sizeBytes = dataSize;
+            } else {
+                if (dataSize > sizeBytes) {
+                    throw new RSIllegalArgumentException("Array too small for allocation type.");
+                }
             }
+            mRS.nAllocationData3D(getIDSafe(), xoff, yoff, zoff, mSelectedLOD, w, h, d,
+                                  array, sizeBytes, dt,
+                                  mType.mElement.mType.mSize, usePadding);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        mRS.nAllocationData3D(getIDSafe(), xoff, yoff, zoff, mSelectedLOD, w, h, d,
-                              array, sizeBytes, dt,
-                              mType.mElement.mType.mSize, usePadding);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     /**
-     * @hide
      * Copy a rectangular region from the array into the allocation.
      * The array is assumed to be tightly packed.
      *
@@ -1290,15 +1338,17 @@ public class Allocation extends BaseObj {
      * @param array to be placed into the allocation
      */
     public void copy3DRangeFrom(int xoff, int yoff, int zoff, int w, int h, int d, Object array) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeFrom");
-        copy3DRangeFromUnchecked(xoff, yoff, zoff, w, h, d, array,
-                                 validateObjectIsPrimitiveArray(array, true),
-                                 java.lang.reflect.Array.getLength(array));
-        Trace.traceEnd(RenderScript.TRACE_TAG);
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeFrom");
+            copy3DRangeFromUnchecked(xoff, yoff, zoff, w, h, d, array,
+                                     validateObjectIsPrimitiveArray(array, true),
+                                     java.lang.reflect.Array.getLength(array));
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
+        }
     }
 
     /**
-     * @hide
      * Copy a rectangular region into the allocation from another
      * allocation.
      *
@@ -1330,34 +1380,40 @@ public class Allocation extends BaseObj {
      * @param b The bitmap to be set from the Allocation.
      */
     public void copyTo(Bitmap b) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copyTo");
-        mRS.validate();
-        validateBitmapFormat(b);
-        validateBitmapSize(b);
-        mRS.nAllocationCopyToBitmap(getID(mRS), b);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copyTo");
+            mRS.validate();
+            validateBitmapFormat(b);
+            validateBitmapSize(b);
+            mRS.nAllocationCopyToBitmap(getID(mRS), b);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
+        }
     }
 
     private void copyTo(Object array, Element.DataType dt, int arrayLen) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copyTo");
-        mRS.validate();
-        boolean usePadding = false;
-        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
-            usePadding = true;
-        }
-        if (usePadding) {
-            if (dt.mSize * arrayLen < mSize / 4 * 3) {
-                throw new RSIllegalArgumentException(
-                    "Size of output array cannot be smaller than size of allocation.");
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copyTo");
+            mRS.validate();
+            boolean usePadding = false;
+            if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+                usePadding = true;
             }
-        } else {
-            if (dt.mSize * arrayLen < mSize) {
-                throw new RSIllegalArgumentException(
-                    "Size of output array cannot be smaller than size of allocation.");
+            if (usePadding) {
+                if (dt.mSize * arrayLen < mSize / 4 * 3) {
+                    throw new RSIllegalArgumentException(
+                        "Size of output array cannot be smaller than size of allocation.");
+                }
+            } else {
+                if (dt.mSize * arrayLen < mSize) {
+                    throw new RSIllegalArgumentException(
+                        "Size of output array cannot be smaller than size of allocation.");
+                }
             }
+            mRS.nAllocationRead(getID(mRS), array, dt, mType.mElement.mType.mSize, usePadding);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        mRS.nAllocationRead(getID(mRS), array, dt, mType.mElement.mType.mSize, usePadding);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     /**
@@ -1423,6 +1479,7 @@ public class Allocation extends BaseObj {
 
     /**
      * @hide
+     *
      * This is only intended to be used by auto-generated code reflected from
      * the RenderScript script files and should not be used by developers.
      *
@@ -1430,7 +1487,7 @@ public class Allocation extends BaseObj {
      * @param yoff
      * @param zoff
      * @param component_number
-     * @param array
+     * @param fp
      */
     public void copyToFieldPacker(int xoff, int yoff, int zoff, int component_number, FieldPacker fp) {
         mRS.validate();
@@ -1494,21 +1551,23 @@ public class Allocation extends BaseObj {
 
     private void copy1DRangeToUnchecked(int off, int count, Object array,
                                         Element.DataType dt, int arrayLen) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copy1DRangeToUnchecked");
-        final int dataSize = mType.mElement.getBytesSize() * count;
-        // AutoPadding for Vec3 Element
-        boolean usePadding = false;
-        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
-            usePadding = true;
-        }
-        data1DChecks(off, count, arrayLen * dt.mSize, dataSize, usePadding);
-        mRS.nAllocationRead1D(getIDSafe(), off, mSelectedLOD, count, array, dataSize, dt,
-                              mType.mElement.mType.mSize, usePadding);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copy1DRangeToUnchecked");
+            final int dataSize = mType.mElement.getBytesSize() * count;
+            // AutoPadding for Vec3 Element
+            boolean usePadding = false;
+            if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+                usePadding = true;
+            }
+            data1DChecks(off, count, arrayLen * dt.mSize, dataSize, usePadding);
+            mRS.nAllocationRead1D(getIDSafe(), off, mSelectedLOD, count, array, dataSize, dt,
+                                  mType.mElement.mType.mSize, usePadding);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
+        }
     }
 
     /**
-     * @hide
      * Copy part of this Allocation into an array.  This method does not
      * guarantee that the Allocation is compatible with the input buffer.
      *
@@ -1523,7 +1582,6 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Copy part of this Allocation into an array.  This method does not
      * guarantee that the Allocation is compatible with the input buffer.
      *
@@ -1536,7 +1594,6 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Copy part of this Allocation into an array.  This method does not
      * guarantee that the Allocation is compatible with the input buffer.
      *
@@ -1549,7 +1606,6 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Copy part of this Allocation into an array.  This method does not
      * guarantee that the Allocation is compatible with the input buffer.
      *
@@ -1562,7 +1618,6 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Copy part of this Allocation into an array.  This method does not
      * guarantee that the Allocation is compatible with the input buffer.
      *
@@ -1576,7 +1631,6 @@ public class Allocation extends BaseObj {
 
 
     /**
-     * @hide
      * Copy part of this Allocation into an array.  This method does not
      * and will generate exceptions if the Allocation type does not
      * match the component type of the array passed in.
@@ -1592,7 +1646,6 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Copy part of this Allocation into an array.  This method does not
      * and will generate exceptions if the Allocation type is not a 32 bit
      * integer type.
@@ -1607,7 +1660,6 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Copy part of this Allocation into an array.  This method does not
      * and will generate exceptions if the Allocation type is not a 16 bit
      * integer type.
@@ -1622,7 +1674,6 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Copy part of this Allocation into an array.  This method does not
      * and will generate exceptions if the Allocation type is not an 8 bit
      * integer type.
@@ -1637,7 +1688,6 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Copy part of this Allocation into an array.  This method does not
      * and will generate exceptions if the Allocation type is not a 32 bit float
      * type.
@@ -1654,31 +1704,33 @@ public class Allocation extends BaseObj {
 
     void copy2DRangeToUnchecked(int xoff, int yoff, int w, int h, Object array,
                                 Element.DataType dt, int arrayLen) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeToUnchecked");
-        mRS.validate();
-        validate2DRange(xoff, yoff, w, h);
-        final int dataSize = mType.mElement.getBytesSize() * w * h;
-        // AutoPadding for Vec3 Element
-        boolean usePadding = false;
-        int sizeBytes = arrayLen * dt.mSize;
-        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
-            if (dataSize / 4 * 3 > sizeBytes) {
-                throw new RSIllegalArgumentException("Array too small for allocation type.");
-            }
-            usePadding = true;
-            sizeBytes = dataSize;
-        } else {
-            if (dataSize > sizeBytes) {
-                throw new RSIllegalArgumentException("Array too small for allocation type.");
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeToUnchecked");
+            mRS.validate();
+            validate2DRange(xoff, yoff, w, h);
+            final int dataSize = mType.mElement.getBytesSize() * w * h;
+            // AutoPadding for Vec3 Element
+            boolean usePadding = false;
+            int sizeBytes = arrayLen * dt.mSize;
+            if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+                if (dataSize / 4 * 3 > sizeBytes) {
+                    throw new RSIllegalArgumentException("Array too small for allocation type.");
+                }
+                usePadding = true;
+                sizeBytes = dataSize;
+            } else {
+                if (dataSize > sizeBytes) {
+                    throw new RSIllegalArgumentException("Array too small for allocation type.");
+                }
             }
+            mRS.nAllocationRead2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, w, h,
+                                  array, sizeBytes, dt, mType.mElement.mType.mSize, usePadding);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        mRS.nAllocationRead2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, w, h,
-                              array, sizeBytes, dt, mType.mElement.mType.mSize, usePadding);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
     /**
-     * @hide
      * Copy from a rectangular region in this Allocation into an array.
      *
      * @param xoff X offset of the region to copy in this Allocation
@@ -1694,7 +1746,6 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Copy from a rectangular region in this Allocation into an array.
      *
      * @param xoff X offset of the region to copy in this Allocation
@@ -1710,7 +1761,6 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Copy from a rectangular region in this Allocation into an array.
      *
      * @param xoff X offset of the region to copy in this Allocation
@@ -1726,7 +1776,6 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Copy from a rectangular region in this Allocation into an array.
      *
      * @param xoff X offset of the region to copy in this Allocation
@@ -1742,7 +1791,6 @@ public class Allocation extends BaseObj {
     }
 
     /**
-     * @hide
      * Copy from a rectangular region in this Allocation into an array.
      *
      * @param xoff X offset of the region to copy in this Allocation
@@ -1759,36 +1807,41 @@ public class Allocation extends BaseObj {
 
 
     /**
-     * @hide
+     * Copy from a rectangular region in this Allocation into an array.
+     * The array is assumed to be tightly packed.
      *
+     * The data type of the array is not required to be the same as
+     * the element data type.
      */
     private void copy3DRangeToUnchecked(int xoff, int yoff, int zoff, int w, int h, int d,
                                         Object array, Element.DataType dt, int arrayLen) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeToUnchecked");
-        mRS.validate();
-        validate3DRange(xoff, yoff, zoff, w, h, d);
-        final int dataSize = mType.mElement.getBytesSize() * w * h * d;
-        // AutoPadding for Vec3 Element
-        boolean usePadding = false;
-        int sizeBytes = arrayLen * dt.mSize;
-        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
-            if (dataSize / 4 * 3 > sizeBytes) {
-                throw new RSIllegalArgumentException("Array too small for allocation type.");
-            }
-            usePadding = true;
-            sizeBytes = dataSize;
-        } else {
-            if (dataSize > sizeBytes) {
-                throw new RSIllegalArgumentException("Array too small for allocation type.");
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeToUnchecked");
+            mRS.validate();
+            validate3DRange(xoff, yoff, zoff, w, h, d);
+            final int dataSize = mType.mElement.getBytesSize() * w * h * d;
+            // AutoPadding for Vec3 Element
+            boolean usePadding = false;
+            int sizeBytes = arrayLen * dt.mSize;
+            if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+                if (dataSize / 4 * 3 > sizeBytes) {
+                    throw new RSIllegalArgumentException("Array too small for allocation type.");
+                }
+                usePadding = true;
+                sizeBytes = dataSize;
+            } else {
+                if (dataSize > sizeBytes) {
+                    throw new RSIllegalArgumentException("Array too small for allocation type.");
+                }
             }
+            mRS.nAllocationRead3D(getIDSafe(), xoff, yoff, zoff, mSelectedLOD, w, h, d,
+                                  array, sizeBytes, dt, mType.mElement.mType.mSize, usePadding);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        mRS.nAllocationRead3D(getIDSafe(), xoff, yoff, zoff, mSelectedLOD, w, h, d,
-                              array, sizeBytes, dt, mType.mElement.mType.mSize, usePadding);
-        Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
-    /**
-     * @hide
+    /*
      * Copy from a rectangular region in this Allocation into an array.
      *
      * @param xoff X offset of the region to copy in this Allocation
@@ -1823,17 +1876,20 @@ public class Allocation extends BaseObj {
      *              utilized
      */
     static public Allocation createTyped(RenderScript rs, Type type, MipmapControl mips, int usage) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "createTyped");
-        rs.validate();
-        if (type.getID(rs) == 0) {
-            throw new RSInvalidStateException("Bad Type");
-        }
-        long id = rs.nAllocationCreateTyped(type.getID(rs), mips.mID, usage, 0);
-        if (id == 0) {
-            throw new RSRuntimeException("Allocation creation failed.");
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "createTyped");
+            rs.validate();
+            if (type.getID(rs) == 0) {
+                throw new RSInvalidStateException("Bad Type");
+            }
+            long id = rs.nAllocationCreateTyped(type.getID(rs), mips.mID, usage, 0);
+            if (id == 0) {
+                throw new RSRuntimeException("Allocation creation failed.");
+            }
+            return new Allocation(id, rs, type, usage);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        Trace.traceEnd(RenderScript.TRACE_TAG);
-        return new Allocation(id, rs, type, usage);
     }
 
     /**
@@ -1877,18 +1933,21 @@ public class Allocation extends BaseObj {
      */
     static public Allocation createSized(RenderScript rs, Element e,
                                          int count, int usage) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "createSized");
-        rs.validate();
-        Type.Builder b = new Type.Builder(rs, e);
-        b.setX(count);
-        Type t = b.create();
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "createSized");
+            rs.validate();
+            Type.Builder b = new Type.Builder(rs, e);
+            b.setX(count);
+            Type t = b.create();
 
-        long id = rs.nAllocationCreateTyped(t.getID(rs), MipmapControl.MIPMAP_NONE.mID, usage, 0);
-        if (id == 0) {
-            throw new RSRuntimeException("Allocation creation failed.");
+            long id = rs.nAllocationCreateTyped(t.getID(rs), MipmapControl.MIPMAP_NONE.mID, usage, 0);
+            if (id == 0) {
+                throw new RSRuntimeException("Allocation creation failed.");
+            }
+            return new Allocation(id, rs, t, usage);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-        Trace.traceEnd(RenderScript.TRACE_TAG);
-        return new Allocation(id, rs, t, usage);
     }
 
     /**
@@ -1947,44 +2006,47 @@ public class Allocation extends BaseObj {
     static public Allocation createFromBitmap(RenderScript rs, Bitmap b,
                                               MipmapControl mips,
                                               int usage) {
-        Trace.traceBegin(RenderScript.TRACE_TAG, "createFromBitmap");
-        rs.validate();
+        try {
+            Trace.traceBegin(RenderScript.TRACE_TAG, "createFromBitmap");
+            rs.validate();
+
+            // WAR undocumented color formats
+            if (b.getConfig() == null) {
+                if ((usage & USAGE_SHARED) != 0) {
+                    throw new RSIllegalArgumentException("USAGE_SHARED cannot be used with a Bitmap that has a null config.");
+                }
+                Bitmap newBitmap = Bitmap.createBitmap(b.getWidth(), b.getHeight(), Bitmap.Config.ARGB_8888);
+                Canvas c = new Canvas(newBitmap);
+                c.drawBitmap(b, 0, 0, null);
+                return createFromBitmap(rs, newBitmap, mips, usage);
+            }
 
-        // WAR undocumented color formats
-        if (b.getConfig() == null) {
-            if ((usage & USAGE_SHARED) != 0) {
-                throw new RSIllegalArgumentException("USAGE_SHARED cannot be used with a Bitmap that has a null config.");
+            Type t = typeFromBitmap(rs, b, mips);
+
+            // enable optimized bitmap path only with no mipmap and script-only usage
+            if (mips == MipmapControl.MIPMAP_NONE &&
+                 t.getElement().isCompatible(Element.RGBA_8888(rs)) &&
+                 usage == (USAGE_SHARED | USAGE_SCRIPT | USAGE_GRAPHICS_TEXTURE)) {
+                long id = rs.nAllocationCreateBitmapBackedAllocation(t.getID(rs), mips.mID, b, usage);
+                if (id == 0) {
+                    throw new RSRuntimeException("Load failed.");
+                }
+
+                // keep a reference to the Bitmap around to prevent GC
+                Allocation alloc = new Allocation(id, rs, t, usage);
+                alloc.setBitmap(b);
+                return alloc;
             }
-            Bitmap newBitmap = Bitmap.createBitmap(b.getWidth(), b.getHeight(), Bitmap.Config.ARGB_8888);
-            Canvas c = new Canvas(newBitmap);
-            c.drawBitmap(b, 0, 0, null);
-            return createFromBitmap(rs, newBitmap, mips, usage);
-        }
 
-        Type t = typeFromBitmap(rs, b, mips);
 
-        // enable optimized bitmap path only with no mipmap and script-only usage
-        if (mips == MipmapControl.MIPMAP_NONE &&
-            t.getElement().isCompatible(Element.RGBA_8888(rs)) &&
-            usage == (USAGE_SHARED | USAGE_SCRIPT | USAGE_GRAPHICS_TEXTURE)) {
-            long id = rs.nAllocationCreateBitmapBackedAllocation(t.getID(rs), mips.mID, b, usage);
+            long id = rs.nAllocationCreateFromBitmap(t.getID(rs), mips.mID, b, usage);
             if (id == 0) {
                 throw new RSRuntimeException("Load failed.");
             }
-
-            // keep a reference to the Bitmap around to prevent GC
-            Allocation alloc = new Allocation(id, rs, t, usage);
-            alloc.setBitmap(b);
-            return alloc;
+            return new Allocation(id, rs, t, usage);
+        } finally {
+            Trace.traceEnd(RenderScript.TRACE_TAG);
         }
-
-
-        long id = rs.nAllocationCreateFromBitmap(t.getID(rs), mips.mID, b, usage);
-        if (id == 0) {
-            throw new RSRuntimeException("Load failed.");
-        }
-        Trace.traceEnd(RenderScript.TRACE_TAG);
-        return new Allocation(id, rs, t, usage);
     }
 
     /**
diff --git a/rs/java/android/renderscript/AllocationAdapter.java b/rs/java/android/renderscript/AllocationAdapter.java
index 183726f..9bfd6ec 100644
--- a/rs/java/android/renderscript/AllocationAdapter.java
+++ b/rs/java/android/renderscript/AllocationAdapter.java
@@ -136,7 +136,7 @@ public class AllocationAdapter extends Allocation {
 
 
     /**
-     * @hide
+     *
      * Set the active X.  The x value must be within the range for
      * the allocation being adapted.
      *
@@ -242,7 +242,7 @@ public class AllocationAdapter extends Allocation {
     }
 
     /**
-     * @hide
+     *
      *
      * Create an arbitrary window into the base allocation
      * The type describes the shape of the window.
diff --git a/rs/java/android/renderscript/Element.java b/rs/java/android/renderscript/Element.java
index 60ff996..6efb6d6 100644
--- a/rs/java/android/renderscript/Element.java
+++ b/rs/java/android/renderscript/Element.java
@@ -119,9 +119,6 @@ public class Element extends BaseObj {
      */
     public enum DataType {
         NONE (0, 0),
-        /**
-         *     @hide
-         */
         FLOAT_16 (1, 2),
         FLOAT_32 (2, 4),
         FLOAT_64 (3, 8),
@@ -390,9 +387,6 @@ public class Element extends BaseObj {
         return rs.mElement_I64;
     }
 
-    /**
-     *     @hide
-     */
     public static Element F16(RenderScript rs) {
         if(rs.mElement_F16 == null) {
             rs.mElement_F16 = createUser(rs, DataType.FLOAT_16);
@@ -534,9 +528,6 @@ public class Element extends BaseObj {
         return rs.mElement_RGBA_8888;
     }
 
-    /**
-     *     @hide
-     */
     public static Element F16_2(RenderScript rs) {
         if(rs.mElement_HALF_2 == null) {
             rs.mElement_HALF_2 = createVector(rs, DataType.FLOAT_16, 2);
@@ -544,19 +535,13 @@ public class Element extends BaseObj {
         return rs.mElement_HALF_2;
     }
 
-    /**
-     *     @hide
-     */
     public static Element F16_3(RenderScript rs) {
-        if(rs.mElement_FLOAT_3 == null) {
-            rs.mElement_FLOAT_3 = createVector(rs, DataType.FLOAT_16, 3);
+        if(rs.mElement_HALF_3 == null) {
+            rs.mElement_HALF_3 = createVector(rs, DataType.FLOAT_16, 3);
         }
         return rs.mElement_HALF_3;
     }
 
-    /**
-     *     @hide
-     */
     public static Element F16_4(RenderScript rs) {
         if(rs.mElement_HALF_4 == null) {
             rs.mElement_HALF_4 = createVector(rs, DataType.FLOAT_16, 4);
@@ -926,6 +911,7 @@ public class Element extends BaseObj {
 
         switch (dt) {
         // Support only primitive integer/float/boolean types as vectors.
+        case FLOAT_16:
         case FLOAT_32:
         case FLOAT_64:
         case SIGNED_8:
diff --git a/rs/java/android/renderscript/RenderScript.java b/rs/java/android/renderscript/RenderScript.java
index 6b1939c..8b1a032 100644
--- a/rs/java/android/renderscript/RenderScript.java
+++ b/rs/java/android/renderscript/RenderScript.java
@@ -24,7 +24,6 @@ import android.content.Context;
 import android.content.res.AssetManager;
 import android.graphics.Bitmap;
 import android.graphics.SurfaceTexture;
-import android.os.Process;
 import android.util.Log;
 import android.view.Surface;
 import android.os.SystemProperties;
@@ -132,16 +131,31 @@ public class RenderScript {
 
     // this should be a monotonically increasing ID
     // used in conjunction with the API version of a device
-    static final long sMinorID = 1;
+    static final long sMinorVersion = 1;
+
+    /**
+     * @hide
+     *
+     * Only exist to be compatible with old version RenderScript Support lib.
+     * Will eventually be removed.
+     *
+     * @return Always return 1
+     *
+     */
+    public static long getMinorID() {
+        return 1;
+    }
+
 
     /**
      * Returns an identifier that can be used to identify a particular
      * minor version of RS.
      *
-     * @hide
+     * @return The minor RenderScript version number
+     *
      */
-    public static long getMinorID() {
-        return sMinorID;
+    public static long getMinorVersion() {
+        return sMinorVersion;
     }
 
     /**
@@ -302,8 +316,12 @@ public class RenderScript {
         long[] fieldIDs, long[] values, int[] sizes, long[] depClosures,
         long[] depFieldIDs) {
       validate();
-      return rsnClosureCreate(mContext, kernelID, returnValue, fieldIDs, values,
+      long c = rsnClosureCreate(mContext, kernelID, returnValue, fieldIDs, values,
           sizes, depClosures, depFieldIDs);
+      if (c == 0) {
+          throw new RSRuntimeException("Failed creating closure.");
+      }
+      return c;
     }
 
     native long rsnInvokeClosureCreate(long con, long invokeID, byte[] params,
@@ -311,8 +329,12 @@ public class RenderScript {
     synchronized long nInvokeClosureCreate(long invokeID, byte[] params,
         long[] fieldIDs, long[] values, int[] sizes) {
       validate();
-      return rsnInvokeClosureCreate(mContext, invokeID, params, fieldIDs,
+      long c = rsnInvokeClosureCreate(mContext, invokeID, params, fieldIDs,
           values, sizes);
+      if (c == 0) {
+          throw new RSRuntimeException("Failed creating closure.");
+      }
+      return c;
     }
 
     native void rsnClosureSetArg(long con, long closureID, int index,
@@ -337,7 +359,11 @@ public class RenderScript {
     synchronized long nScriptGroup2Create(String name, String cachePath,
                                           long[] closures) {
       validate();
-      return rsnScriptGroup2Create(mContext, name, cachePath, closures);
+      long g = rsnScriptGroup2Create(mContext, name, cachePath, closures);
+      if (g == 0) {
+          throw new RSRuntimeException("Failed creating script group.");
+      }
+      return g;
     }
 
     native void rsnScriptGroup2Execute(long con, long groupID);
@@ -1321,7 +1347,6 @@ public class RenderScript {
     /**
      * Create a RenderScript context.
      *
-     * @hide
      * @param ctx The context.
      * @return RenderScript
      */
@@ -1426,14 +1451,13 @@ public class RenderScript {
      /**
      * Gets or creates a RenderScript context of the specified type.
      *
-     * @hide
      * @param ctx The context.
      * @param ct The type of context to be created.
      * @param sdkVersion The target SDK Version.
      * @param flags The OR of the CREATE_FLAG_* options desired
      * @return RenderScript
      */
-    public static RenderScript create(Context ctx, int sdkVersion, ContextType ct, int flags) {
+    private static RenderScript create(Context ctx, int sdkVersion, ContextType ct, int flags) {
         if (sdkVersion < 23) {
             return internalCreate(ctx, sdkVersion, ct, flags);
         }
@@ -1456,8 +1480,6 @@ public class RenderScript {
     }
 
     /**
-     * @hide
-     *
      * Releases all the process contexts.  This is the same as
      * calling .destroy() on each unique context retreived with
      * create(...). If no contexts have been created this
@@ -1494,7 +1516,6 @@ public class RenderScript {
      *
      * If you need a single context please use create()
      *
-     * @hide
      * @param ctx The context.
      * @return RenderScript
      */
diff --git a/rs/java/android/renderscript/Script.java b/rs/java/android/renderscript/Script.java
index dda468a..7cd6d09 100644
--- a/rs/java/android/renderscript/Script.java
+++ b/rs/java/android/renderscript/Script.java
@@ -182,9 +182,9 @@ public class Script extends BaseObj {
         mRS.validateObject(ain);
         mRS.validateObject(aout);
 
-        if (ain == null && aout == null) {
+        if (ain == null && aout == null && sc == null) {
             throw new RSIllegalArgumentException(
-                "At least one of ain or aout is required to be non-null.");
+                "At least one of input allocation, output allocation, or LaunchOptions is required to be non-null.");
         }
 
         long[] in_ids = null;
@@ -220,22 +220,21 @@ public class Script extends BaseObj {
 
     /**
      * Only intended for use by generated reflected code.
-     *
-     * @hide
      */
     protected void forEach(int slot, Allocation[] ains, Allocation aout,
                            FieldPacker v) {
+
+        // FieldPacker is kept here to support regular params in the future.
         forEach(slot, ains, aout, v, null);
     }
 
     /**
      * Only intended for use by generated reflected code.
-     *
-     * @hide
      */
     protected void forEach(int slot, Allocation[] ains, Allocation aout,
                            FieldPacker v, LaunchOptions sc) {
         // TODO: Is this necessary if nScriptForEach calls validate as well?
+        // FieldPacker is kept here to support regular params in the future.
         mRS.validate();
         if (ains != null) {
             for (Allocation ain : ains) {
@@ -474,7 +473,23 @@ public class Script extends BaseObj {
 
 
     /**
-     * Class used to specify clipping for a kernel launch.
+     * Class for specifying the specifics about how a kernel will be
+     * launched
+     *
+     * This class can specify a potential range of cells on which to
+     * run a kernel.  If no set is called for a dimension then this
+     * class will have no impact on that dimension when the kernel
+     * is executed.
+     *
+     * The forEach launch will operate over the intersection of the
+     * dimensions.
+     *
+     * Example:
+     * LaunchOptions with setX(5, 15)
+     * Allocation with dimension X=10, Y=10
+     * The resulting forEach run would execute over x = 5 to 10 and
+     * y = 0 to 10.
+     *
      *
      */
     public static final class LaunchOptions {
diff --git a/rs/java/android/renderscript/ScriptGroup.java b/rs/java/android/renderscript/ScriptGroup.java
index be8b0fd..54180f4 100644
--- a/rs/java/android/renderscript/ScriptGroup.java
+++ b/rs/java/android/renderscript/ScriptGroup.java
@@ -131,28 +131,16 @@ public final class ScriptGroup extends BaseObj {
 
             int i;
             for (i = 0; i < args.length; i++) {
-                Object obj = args[i];
                 fieldIDs[i] = 0;
-                if (obj instanceof Input) {
-                    Input unbound = (Input)obj;
-                    unbound.addReference(this, i);
-                } else {
-                    retrieveValueAndDependenceInfo(rs, i, args[i], values, sizes,
-                                                   depClosures, depFieldIDs);
-                }
+                retrieveValueAndDependenceInfo(rs, i, null, args[i],
+                                               values, sizes, depClosures, depFieldIDs);
             }
-
             for (Map.Entry<Script.FieldID, Object> entry : globals.entrySet()) {
                 Object obj = entry.getValue();
                 Script.FieldID fieldID = entry.getKey();
                 fieldIDs[i] = fieldID.getID(rs);
-                if (obj instanceof Input) {
-                    Input unbound = (Input)obj;
-                    unbound.addReference(this, fieldID);
-                } else {
-                    retrieveValueAndDependenceInfo(rs, i, obj, values,
-                                                   sizes, depClosures, depFieldIDs);
-                }
+                retrieveValueAndDependenceInfo(rs, i, fieldID, obj,
+                                               values, sizes, depClosures, depFieldIDs);
                 i++;
             }
 
@@ -184,13 +172,8 @@ public final class ScriptGroup extends BaseObj {
                 Object obj = entry.getValue();
                 Script.FieldID fieldID = entry.getKey();
                 fieldIDs[i] = fieldID.getID(rs);
-                if (obj instanceof Input) {
-                    Input unbound = (Input)obj;
-                    unbound.addReference(this, fieldID);
-                } else {
-                    retrieveValueAndDependenceInfo(rs, i, obj, values,
-                                                   sizes, depClosures, depFieldIDs);
-                }
+                retrieveValueAndDependenceInfo(rs, i, fieldID, obj, values,
+                                               sizes, depClosures, depFieldIDs);
                 i++;
             }
 
@@ -200,9 +183,8 @@ public final class ScriptGroup extends BaseObj {
             setID(id);
         }
 
-        private static
-                void retrieveValueAndDependenceInfo(RenderScript rs,
-                                                    int index, Object obj,
+        private void retrieveValueAndDependenceInfo(RenderScript rs,
+                                                    int index, Script.FieldID fid, Object obj,
                                                     long[] values, int[] sizes,
                                                     long[] depClosures,
                                                     long[] depFieldIDs) {
@@ -213,20 +195,25 @@ public final class ScriptGroup extends BaseObj {
                 depClosures[index] = f.getClosure().getID(rs);
                 Script.FieldID fieldID = f.getFieldID();
                 depFieldIDs[index] = fieldID != null ? fieldID.getID(rs) : 0;
-                if (obj == null) {
-                    // Value is originally created by the owner closure
-                    values[index] = 0;
-                    sizes[index] = 0;
-                    return;
-                }
             } else {
                 depClosures[index] = 0;
                 depFieldIDs[index] = 0;
             }
 
-            ValueAndSize vs = new ValueAndSize(rs, obj);
-            values[index] = vs.value;
-            sizes[index] = vs.size;
+            if (obj instanceof Input) {
+                Input unbound = (Input)obj;
+                if (index < mArgs.length) {
+                    unbound.addReference(this, index);
+                } else {
+                    unbound.addReference(this, fid);
+                }
+                values[index] = 0;
+                sizes[index] = 0;
+            } else {
+                ValueAndSize vs = new ValueAndSize(rs, obj);
+                values[index] = vs.value;
+                sizes[index] = vs.size;
+            }
         }
 
         /**
@@ -258,7 +245,11 @@ public final class ScriptGroup extends BaseObj {
                 // without an associated value (reference). So this is not working for
                 // cross-module (cross-script) linking in this case where a field not
                 // explicitly bound.
-                f = new Future(this, field, mBindings.get(field));
+                Object obj = mBindings.get(field);
+                if (obj instanceof Future) {
+                    obj = ((Future)obj).getValue();
+                }
+                f = new Future(this, field, obj);
                 mGlobalFuture.put(field, f);
             }
 
@@ -266,12 +257,18 @@ public final class ScriptGroup extends BaseObj {
         }
 
         void setArg(int index, Object obj) {
+            if (obj instanceof Future) {
+                obj = ((Future)obj).getValue();
+            }
             mArgs[index] = obj;
             ValueAndSize vs = new ValueAndSize(mRS, obj);
             mRS.nClosureSetArg(getID(mRS), index, vs.value, vs.size);
         }
 
         void setGlobal(Script.FieldID fieldID, Object obj) {
+            if (obj instanceof Future) {
+                obj = ((Future)obj).getValue();
+            }
             mBindings.put(fieldID, obj);
             ValueAndSize vs = new ValueAndSize(mRS, obj);
             mRS.nClosureSetGlobal(getID(mRS), fieldID.getID(mRS), vs.value, vs.size);
@@ -344,6 +341,7 @@ public final class ScriptGroup extends BaseObj {
         // -1 means unset. Legal values are 0 .. n-1, where n is the number of
         // arguments for the referencing closure.
         List<Pair<Closure, Integer>> mArgIndex;
+        Object mValue;
 
         Input() {
             mFieldID = new ArrayList<Pair<Closure, Script.FieldID>>();
@@ -359,6 +357,7 @@ public final class ScriptGroup extends BaseObj {
         }
 
         void set(Object value) {
+            mValue = value;
             for (Pair<Closure, Integer> p : mArgIndex) {
                 Closure closure = p.first;
                 int index = p.second.intValue();
@@ -370,6 +369,8 @@ public final class ScriptGroup extends BaseObj {
                 closure.setGlobal(fieldID, value);
             }
         }
+
+        Object get() { return mValue; }
     }
 
     private String mName;
@@ -400,8 +401,10 @@ public final class ScriptGroup extends BaseObj {
     /**
      * Executes a script group
      *
-     * @param inputs inputs to the script group
-     * @return outputs of the script group as an array of objects
+     * @param inputs Values for inputs to the script group, in the order as the
+     *        inputs are added via {@link Builder2#addInput}.
+     * @return Outputs of the script group as an array of objects, in the order
+     *         as futures are passed to {@link Builder2#create}.
      */
 
     public Object[] execute(Object... inputs) {
@@ -432,7 +435,11 @@ public final class ScriptGroup extends BaseObj {
         Object[] outputObjs = new Object[mOutputs2.length];
         int i = 0;
         for (Future f : mOutputs2) {
-            outputObjs[i++] = f.getValue();
+            Object output = f.getValue();
+            if (output instanceof Input) {
+                output = ((Input)output).get();
+            }
+            outputObjs[i++] = output;
         }
         return outputObjs;
     }
@@ -590,7 +597,8 @@ public final class ScriptGroup extends BaseObj {
                 Node n = mNodes.get(ct);
                 if (n.mInputs.size() == 0) {
                     if (n.mOutputs.size() == 0 && mNodes.size() > 1) {
-                        throw new RSInvalidStateException("Groups cannot contain unconnected scripts");
+                        String msg = "Groups cannot contain unconnected scripts";
+                        throw new RSInvalidStateException(msg);
                     }
                     validateDAGRecurse(n, ct+1);
                 }
@@ -843,13 +851,13 @@ public final class ScriptGroup extends BaseObj {
          * Returns the field ID
          */
 
-        public Script.FieldID getField() { return mField; }
+        Script.FieldID getField() { return mField; }
 
         /**
          * Returns the value
          */
 
-        public Object getValue() { return mValue; }
+        Object getValue() { return mValue; }
     }
 
     /**
@@ -987,6 +995,8 @@ public final class ScriptGroup extends BaseObj {
          *
          * @param name name for the script group. Legal names can only contain letters, digits,
          *        '-', or '_'. The name can be no longer than 100 characters.
+         *        Try to use unique names, to avoid name conflicts and reduce
+         *        the cost of group creation.
          * @param outputs futures intended as outputs of the script group
          * @return a script group
          */
diff --git a/rs/java/android/renderscript/ScriptGroup2.java b/rs/java/android/renderscript/ScriptGroup2.java
deleted file mode 100644
index 417bbee..0000000
--- a/rs/java/android/renderscript/ScriptGroup2.java
+++ /dev/null
@@ -1,449 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package android.renderscript;
-
-import android.util.Log;
-import android.util.Pair;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
-
-******************************
-You have tried to change the API from what has been previously approved.
-
-To make these errors go away, you have two choices:
-1) You can add "@hide" javadoc comments to the methods, etc. listed in the
-errors above.
-
-2) You can update current.txt by executing the following command:
-make update-api
-
-To submit the revised current.txt to the main Android repository,
-you will need approval.
-******************************
-
-@hide Pending Android public API approval.
-*/
-public class ScriptGroup2 extends BaseObj {
-
-    public static class Closure extends BaseObj {
-        private Object[] mArgs;
-        private Allocation mReturnValue;
-        private Map<Script.FieldID, Object> mBindings;
-
-        private Future mReturnFuture;
-        private Map<Script.FieldID, Future> mGlobalFuture;
-
-        private FieldPacker mFP;
-
-        private static final String TAG = "Closure";
-
-        public Closure(long id, RenderScript rs) {
-            super(id, rs);
-        }
-
-        public Closure(RenderScript rs, Script.KernelID kernelID, Type returnType,
-                       Object[] args, Map<Script.FieldID, Object> globals) {
-            super(0, rs);
-
-            mArgs = args;
-            mReturnValue = Allocation.createTyped(rs, returnType);
-            mBindings = globals;
-            mGlobalFuture = new HashMap<Script.FieldID, Future>();
-
-            int numValues = args.length + globals.size();
-
-            long[] fieldIDs = new long[numValues];
-            long[] values = new long[numValues];
-            int[] sizes = new int[numValues];
-            long[] depClosures = new long[numValues];
-            long[] depFieldIDs = new long[numValues];
-
-            int i;
-            for (i = 0; i < args.length; i++) {
-                Object obj = args[i];
-                fieldIDs[i] = 0;
-                if (obj instanceof UnboundValue) {
-                    UnboundValue unbound = (UnboundValue)obj;
-                    unbound.addReference(this, i);
-                } else {
-                    retrieveValueAndDependenceInfo(rs, i, args[i], values, sizes,
-                                                   depClosures, depFieldIDs);
-                }
-            }
-
-            for (Map.Entry<Script.FieldID, Object> entry : globals.entrySet()) {
-                Object obj = entry.getValue();
-                Script.FieldID fieldID = entry.getKey();
-                fieldIDs[i] = fieldID.getID(rs);
-                if (obj instanceof UnboundValue) {
-                    UnboundValue unbound = (UnboundValue)obj;
-                    unbound.addReference(this, fieldID);
-                } else {
-                    retrieveValueAndDependenceInfo(rs, i, obj, values,
-                                                   sizes, depClosures, depFieldIDs);
-                }
-                i++;
-            }
-
-            long id = rs.nClosureCreate(kernelID.getID(rs), mReturnValue.getID(rs),
-                                        fieldIDs, values, sizes, depClosures, depFieldIDs);
-
-            setID(id);
-        }
-
-        public Closure(RenderScript rs, Script.InvokeID invokeID,
-                       Object[] args, Map<Script.FieldID, Object> globals) {
-            super(0, rs);
-            mFP = FieldPacker.createFromArray(args);
-
-            mArgs = args;
-            mBindings = globals;
-            mGlobalFuture = new HashMap<Script.FieldID, Future>();
-
-            int numValues = globals.size();
-
-            long[] fieldIDs = new long[numValues];
-            long[] values = new long[numValues];
-            int[] sizes = new int[numValues];
-            long[] depClosures = new long[numValues];
-            long[] depFieldIDs = new long[numValues];
-
-            int i = 0;
-            for (Map.Entry<Script.FieldID, Object> entry : globals.entrySet()) {
-                Object obj = entry.getValue();
-                Script.FieldID fieldID = entry.getKey();
-                fieldIDs[i] = fieldID.getID(rs);
-                if (obj instanceof UnboundValue) {
-                    UnboundValue unbound = (UnboundValue)obj;
-                    unbound.addReference(this, fieldID);
-                } else {
-                    // TODO(yangni): Verify obj not a future.
-                    retrieveValueAndDependenceInfo(rs, i, obj, values,
-                                                   sizes, depClosures, depFieldIDs);
-                }
-                i++;
-            }
-
-            long id = rs.nInvokeClosureCreate(invokeID.getID(rs), mFP.getData(), fieldIDs,
-                                              values, sizes);
-
-            setID(id);
-        }
-
-        private static
-                void retrieveValueAndDependenceInfo(RenderScript rs,
-                                                    int index, Object obj,
-                                                    long[] values, int[] sizes,
-                                                    long[] depClosures,
-                                                    long[] depFieldIDs) {
-
-            if (obj instanceof Future) {
-                Future f = (Future)obj;
-                obj = f.getValue();
-                depClosures[index] = f.getClosure().getID(rs);
-                Script.FieldID fieldID = f.getFieldID();
-                depFieldIDs[index] = fieldID != null ? fieldID.getID(rs) : 0;
-                if (obj == null) {
-                    // Value is originally created by the owner closure
-                    values[index] = 0;
-                    sizes[index] = 0;
-                    return;
-                }
-            } else {
-                depClosures[index] = 0;
-                depFieldIDs[index] = 0;
-            }
-
-            ValueAndSize vs = new ValueAndSize(rs, obj);
-            values[index] = vs.value;
-            sizes[index] = vs.size;
-        }
-
-        public Future getReturn() {
-            if (mReturnFuture == null) {
-                mReturnFuture = new Future(this, null, mReturnValue);
-            }
-
-            return mReturnFuture;
-        }
-
-        public Future getGlobal(Script.FieldID field) {
-            Future f = mGlobalFuture.get(field);
-
-            if (f == null) {
-                // If the field is not bound to this closure, this will return a future
-                // without an associated value (reference). So this is not working for
-                // cross-module (cross-script) linking in this case where a field not
-                // explicitly bound.
-                f = new Future(this, field, mBindings.get(field));
-                mGlobalFuture.put(field, f);
-            }
-
-            return f;
-        }
-
-        void setArg(int index, Object obj) {
-            mArgs[index] = obj;
-            ValueAndSize vs = new ValueAndSize(mRS, obj);
-            mRS.nClosureSetArg(getID(mRS), index, vs.value, vs.size);
-        }
-
-        void setGlobal(Script.FieldID fieldID, Object obj) {
-            mBindings.put(fieldID, obj);
-            ValueAndSize vs = new ValueAndSize(mRS, obj);
-            mRS.nClosureSetGlobal(getID(mRS), fieldID.getID(mRS), vs.value, vs.size);
-        }
-
-        private static final class ValueAndSize {
-            public ValueAndSize(RenderScript rs, Object obj) {
-                if (obj instanceof Allocation) {
-                    value = ((Allocation)obj).getID(rs);
-                    size = -1;
-                } else if (obj instanceof Boolean) {
-                    value = ((Boolean)obj).booleanValue() ? 1 : 0;
-                    size = 4;
-                } else if (obj instanceof Integer) {
-                    value = ((Integer)obj).longValue();
-                    size = 4;
-                } else if (obj instanceof Long) {
-                    value = ((Long)obj).longValue();
-                    size = 8;
-                } else if (obj instanceof Float) {
-                    value = ((Float)obj).longValue();
-                    size = 4;
-                } else if (obj instanceof Double) {
-                    value = ((Double)obj).longValue();
-                    size = 8;
-                }
-            }
-            public long value;
-            public int size;
-        }
-    }
-
-    public static class Future {
-        Closure mClosure;
-        Script.FieldID mFieldID;
-        Object mValue;
-
-        Future(Closure closure, Script.FieldID fieldID, Object value) {
-            mClosure = closure;
-            mFieldID = fieldID;
-            mValue = value;
-        }
-
-        Closure getClosure() { return mClosure; }
-        Script.FieldID getFieldID() { return mFieldID; }
-        Object getValue() { return mValue; }
-    }
-
-    public static class UnboundValue {
-        // Either mFieldID or mArgIndex should be set but not both.
-        List<Pair<Closure, Script.FieldID>> mFieldID;
-        // -1 means unset. Legal values are 0 .. n-1, where n is the number of
-        // arguments for the referencing closure.
-        List<Pair<Closure, Integer>> mArgIndex;
-
-        UnboundValue() {
-            mFieldID = new ArrayList<Pair<Closure, Script.FieldID>>();
-            mArgIndex = new ArrayList<Pair<Closure, Integer>>();
-        }
-
-        void addReference(Closure closure, int index) {
-            mArgIndex.add(Pair.create(closure, Integer.valueOf(index)));
-        }
-
-        void addReference(Closure closure, Script.FieldID fieldID) {
-            mFieldID.add(Pair.create(closure, fieldID));
-        }
-
-        void set(Object value) {
-            for (Pair<Closure, Integer> p : mArgIndex) {
-                Closure closure = p.first;
-                int index = p.second.intValue();
-                closure.setArg(index, value);
-            }
-            for (Pair<Closure, Script.FieldID> p : mFieldID) {
-                Closure closure = p.first;
-                Script.FieldID fieldID = p.second;
-                closure.setGlobal(fieldID, value);
-            }
-        }
-    }
-
-    String mName;
-    List<Closure> mClosures;
-    List<UnboundValue> mInputs;
-    Future[] mOutputs;
-
-    private static final String TAG = "ScriptGroup2";
-
-    public ScriptGroup2(long id, RenderScript rs) {
-        super(id, rs);
-    }
-
-    ScriptGroup2(RenderScript rs, String name, List<Closure> closures,
-                 List<UnboundValue> inputs, Future[] outputs) {
-        super(0, rs);
-        mName = name;
-        mClosures = closures;
-        mInputs = inputs;
-        mOutputs = outputs;
-
-        long[] closureIDs = new long[closures.size()];
-        for (int i = 0; i < closureIDs.length; i++) {
-            closureIDs[i] = closures.get(i).getID(rs);
-        }
-        long id = rs.nScriptGroup2Create(name, ScriptC.mCachePath, closureIDs);
-        setID(id);
-    }
-
-    public Object[] execute(Object... inputs) {
-        if (inputs.length < mInputs.size()) {
-            Log.e(TAG, this.toString() + " receives " + inputs.length + " inputs, " +
-                  "less than expected " + mInputs.size());
-            return null;
-        }
-
-        if (inputs.length > mInputs.size()) {
-            Log.i(TAG, this.toString() + " receives " + inputs.length + " inputs, " +
-                  "more than expected " + mInputs.size());
-        }
-
-        for (int i = 0; i < mInputs.size(); i++) {
-            Object obj = inputs[i];
-            if (obj instanceof Future || obj instanceof UnboundValue) {
-                Log.e(TAG, this.toString() + ": input " + i +
-                      " is a future or unbound value");
-                return null;
-            }
-            UnboundValue unbound = mInputs.get(i);
-            unbound.set(obj);
-        }
-
-        mRS.nScriptGroup2Execute(getID(mRS));
-
-        Object[] outputObjs = new Object[mOutputs.length];
-        int i = 0;
-        for (Future f : mOutputs) {
-            outputObjs[i++] = f.getValue();
-        }
-        return outputObjs;
-    }
-
-    /**
-       @hide Pending Android public API approval.
-    */
-    public static final class Binding {
-        public Script.FieldID mField;
-        public Object mValue;
-        public Binding(Script.FieldID field, Object value) {
-            mField = field;
-            mValue = value;
-        }
-    }
-
-    /**
-       @hide Pending Android public API approval.
-    */
-    public static final class Builder {
-        RenderScript mRS;
-        List<Closure> mClosures;
-        List<UnboundValue> mInputs;
-        private static final String TAG = "ScriptGroup2.Builder";
-
-        public Builder(RenderScript rs) {
-            mRS = rs;
-            mClosures = new ArrayList<Closure>();
-            mInputs = new ArrayList<UnboundValue>();
-        }
-
-        public Closure addKernel(Script.KernelID k, Type returnType, Object[] args,
-                                 Map<Script.FieldID, Object> globalBindings) {
-            Closure c = new Closure(mRS, k, returnType, args, globalBindings);
-            mClosures.add(c);
-            return c;
-        }
-
-        public Closure addInvoke(Script.InvokeID invoke, Object[] args,
-                                 Map<Script.FieldID, Object> globalBindings) {
-            Closure c = new Closure(mRS, invoke, args, globalBindings);
-            mClosures.add(c);
-            return c;
-        }
-
-        public UnboundValue addInput() {
-            UnboundValue unbound = new UnboundValue();
-            mInputs.add(unbound);
-            return unbound;
-        }
-
-        public Closure addKernel(Script.KernelID k, Type returnType, Object... argsAndBindings) {
-            ArrayList<Object> args = new ArrayList<Object>();
-            Map<Script.FieldID, Object> bindingMap = new HashMap<Script.FieldID, Object>();
-            if (!seperateArgsAndBindings(argsAndBindings, args, bindingMap)) {
-                return null;
-            }
-            return addKernel(k, returnType, args.toArray(), bindingMap);
-        }
-
-        public Closure addInvoke(Script.InvokeID invoke, Object... argsAndBindings) {
-            ArrayList<Object> args = new ArrayList<Object>();
-            Map<Script.FieldID, Object> bindingMap = new HashMap<Script.FieldID, Object>();
-            if (!seperateArgsAndBindings(argsAndBindings, args, bindingMap)) {
-                return null;
-            }
-            return addInvoke(invoke, args.toArray(), bindingMap);
-        }
-
-        public ScriptGroup2 create(String name, Future... outputs) {
-            if (name == null || name.isEmpty() || name.length() > 100 ||
-                !name.equals(name.replaceAll("[^a-zA-Z0-9-]", "_"))) {
-                throw new RSIllegalArgumentException("invalid script group name");
-            }
-            ScriptGroup2 ret = new ScriptGroup2(mRS, name, mClosures, mInputs, outputs);
-            return ret;
-        }
-
-        private boolean seperateArgsAndBindings(Object[] argsAndBindings,
-                                                ArrayList<Object> args,
-                                                Map<Script.FieldID, Object> bindingMap) {
-            int i;
-            for (i = 0; i < argsAndBindings.length; i++) {
-                if (argsAndBindings[i] instanceof Binding) {
-                    break;
-                }
-                args.add(argsAndBindings[i]);
-            }
-
-            for (; i < argsAndBindings.length; i++) {
-                if (!(argsAndBindings[i] instanceof Binding)) {
-                    return false;
-                }
-                Binding b = (Binding)argsAndBindings[i];
-                bindingMap.put(b.mField, b.mValue);
-            }
-
-            return true;
-        }
-
-    }
-}
diff --git a/rs/java/android/renderscript/ScriptIntrinsicBLAS.java b/rs/java/android/renderscript/ScriptIntrinsicBLAS.java
index 16b7033..06134e5 100644
--- a/rs/java/android/renderscript/ScriptIntrinsicBLAS.java
+++ b/rs/java/android/renderscript/ScriptIntrinsicBLAS.java
@@ -22,9 +22,13 @@ import java.lang.annotation.RetentionPolicy;
 
 /**
  *
- * BLAS
+ * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS.
+ *
+ * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
+ * building blocks for performing basic vector and matrix operations.
+ *
+ * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
  *
- * @hide
  **/
 public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
     private Allocation mLUT;
@@ -180,24 +184,40 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
     private static final int RsBlas_bnnm = 1000;
 
     /**
+     * Create an intrinsic to access BLAS subroutines.
+     *
+     * @param rs The RenderScript context
+     * @return ScriptIntrinsicBLAS
      */
     public static ScriptIntrinsicBLAS create(RenderScript rs) {
         long id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs));
         return new ScriptIntrinsicBLAS(id, rs);
     }
 
+    /**
+     * @hide
+     */
     @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE})
     @Retention(RetentionPolicy.SOURCE)
     public @interface Transpose {}
 
+    /**
+     * @hide
+     */
     @IntDef({UPPER, LOWER})
     @Retention(RetentionPolicy.SOURCE)
     public @interface Uplo {}
 
+    /**
+     * @hide
+     */
     @IntDef({NON_UNIT, UNIT})
     @Retention(RetentionPolicy.SOURCE)
     public @interface Diag {}
 
+    /**
+     * @hide
+     */
     @IntDef({LEFT, RIGHT})
     @Retention(RetentionPolicy.SOURCE)
     public @interface Side {}
@@ -242,7 +262,7 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
     }
 
     static void validateUplo(@Uplo int Uplo) {
-        if (Uplo != LEFT && Uplo != RIGHT) {
+        if (Uplo != UPPER && Uplo != LOWER) {
             throw new RSRuntimeException("Invalid uplo passed to BLAS");
         }
     }
@@ -277,36 +297,124 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
             expectedYDim = 1 + (N - 1) * incY;
         }
         if (X.getType().getX() != expectedXDim ||
-            Y.getType().getY() != expectedXDim) {
+            Y.getType().getX() != expectedYDim) {
             throw new RSRuntimeException("Incorrect vector dimensions for GEMV");
         }
     }
-    void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
+
+    /**
+     * SGEMV performs one of the matrix-vector operations
+     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
+     *
+     * @param TransA The type of transpose applied to matrix A.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
         validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
         int M = A.getType().getY();
         int N = A.getType().getX();
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
+
+    /**
+     * DGEMV performs one of the matrix-vector operations
+     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
+     *
+     * @param TransA The type of transpose applied to matrix A.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
         validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
         int M = A.getType().getY();
         int N = A.getType().getX();
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
+
+    /**
+     * CGEMV performs one of the matrix-vector operations
+     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
+     *
+     * @param TransA The type of transpose applied to matrix A.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
         validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
         int M = A.getType().getY();
         int N = A.getType().getX();
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
+
+    /**
+     * ZGEMV performs one of the matrix-vector operations
+     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
+     *
+     * @param TransA The type of transpose applied to matrix A.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
         validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
         int M = A.getType().getY();
         int N = A.getType().getX();
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
     }
 
-    void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
+    /**
+     * SGBMV performs one of the matrix-vector operations
+     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
+     *
+     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
+     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, m):
+     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
+     *                  b[i, j-i+kl] = a[i, j]
+     *
+     * @param TransA The type of transpose applied to matrix A.
+     * @param KL The number of sub-diagonals of the matrix A.
+     * @param KU The number of super-diagonals of the matrix A.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
         validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
         if (KL < 0 || KU < 0) {
@@ -316,7 +424,32 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         int N = A.getType().getX();
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);
     }
-    void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
+
+    /**
+     * DGBMV performs one of the matrix-vector operations
+     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
+     *
+     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
+     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, m):
+     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
+     *                  b[i, j-i+kl] = a[i, j]
+     *
+     * @param TransA The type of transpose applied to matrix A.
+     * @param KL The number of sub-diagonals of the matrix A.
+     * @param KU The number of super-diagonals of the matrix A.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
         validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
         if (KL < 0 || KU < 0) {
@@ -326,7 +459,32 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         int N = A.getType().getX();
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);
     }
-    void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
+
+    /**
+     * CGBMV performs one of the matrix-vector operations
+     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
+     *
+     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
+     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, m):
+     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
+     *                  b[i, j-i+kl] = a[i, j]
+     *
+     * @param TransA The type of transpose applied to matrix A.
+     * @param KL The number of sub-diagonals of the matrix A.
+     * @param KU The number of super-diagonals of the matrix A.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
         validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
         if (KL < 0 || KU < 0) {
@@ -336,7 +494,32 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         int N = A.getType().getX();
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);
     }
-    void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
+
+    /**
+     * ZGBMV performs one of the matrix-vector operations
+     * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
+     *
+     * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
+     *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, m):
+     *              for j in range(max(0, i-kl), min(i+ku+1, n)):
+     *                  b[i, j-i+kl] = a[i, j]
+     *
+     * @param TransA The type of transpose applied to matrix A.
+     * @param KL The number of sub-diagonals of the matrix A.
+     * @param KU The number of super-diagonals of the matrix A.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
         validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
         if (KL < 0 || KU < 0) {
@@ -347,8 +530,10 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);
     }
 
-    static void validateTRMV(Element e, @Transpose int TransA, Allocation A, Allocation X, int incX) {
+    static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
         validateTranspose(TransA);
+        validateUplo(Uplo);
+        validateDiag(Diag);
         int N = A.getType().getY();
         if (A.getType().getX() != N) {
             throw new RSRuntimeException("A must be a square matrix for TRMV");
@@ -387,158 +572,636 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         }
 
         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
+        //is it really doing anything?
         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
             throw new RSRuntimeException("Invalid dimension for Ap");
         }
-
+        if (incX <= 0) {
+            throw new RSRuntimeException("Vector increments must be greater than 0");
+        }
         int expectedXDim = 1 + (N - 1) * incX;
         if (X.getType().getX() != expectedXDim) {
-            throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
+            throw new RSRuntimeException("Incorrect vector dimensions for TPMV");
         }
 
         return N;
     }
 
-    void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
-        validateTRMV(Element.F32(mRS), TransA, A, X, incX);
+    /**
+     * STRMV performs one of the matrix-vector operations
+     * x := A*x   or   x := A**T*x
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
+        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
     }
-    void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
-        validateTRMV(Element.F64(mRS), TransA, A, X, incX);
+
+    /**
+     * DTRMV performs one of the matrix-vector operations
+     * x := A*x   or   x := A**T*x
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
+        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
     }
-    void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
-        validateTRMV(Element.F32_2(mRS), TransA, A, X, incX);
+
+    /**
+     * CTRMV performs one of the matrix-vector operations
+     * x := A*x   or   x := A**T*x   or   x := A**H*x
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
+        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
     }
-    void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
-        validateTRMV(Element.F64_2(mRS), TransA, A, X, incX);
+
+    /**
+     * ZTRMV performs one of the matrix-vector operations
+     * x := A*x   or   x := A**T*x   or   x := A**H*x
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
+        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
     }
-    void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
-        // TBMV has the same requirements as TRMV
-        validateTRMV(Element.F32(mRS), TransA, A, X, incX);
+
+    /**
+     * STBMV performs one of the matrix-vector operations
+     * x := A*x   or   x := A**T*x
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
+     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, n):
+     *              for j in range(i, min(i+k+1, n)):
+     *                  b[i, j-i] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param K The number of off-diagonals of the matrix A
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
+        // TBMV has the same requirements as TRMV + K >= 0
+        if (K < 0) {
+            throw new RSRuntimeException("K must be greater than or equal to 0");
+        }
+        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
     }
-    void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
-        // TBMV has the same requirements as TRMV
-        validateTRMV(Element.F64(mRS), TransA, A, X, incX);
+
+    /**
+     * DTBMV performs one of the matrix-vector operations
+     * x := A*x   or   x := A**T*x
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
+     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, n):
+     *              for j in range(i, min(i+k+1, n)):
+     *                  b[i, j-i] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param K The number of off-diagonals of the matrix A
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
+        // TBMV has the same requirements as TRMV + K >= 0
+        if (K < 0) {
+            throw new RSRuntimeException("K must be greater than or equal to 0");
+        }
+        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
     }
-    void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
-        // TBMV has the same requirements as TRMV
-        validateTRMV(Element.F32_2(mRS), TransA, A, X, incX);
+
+    /**
+     * CTBMV performs one of the matrix-vector operations
+     * x := A*x   or   x := A**T*x   or   x := A**H*x
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
+     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, n):
+     *              for j in range(i, min(i+k+1, n)):
+     *                  b[i, j-i] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param K The number of off-diagonals of the matrix A
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
+        // TBMV has the same requirements as TRMV + K >= 0
+        if (K < 0) {
+            throw new RSRuntimeException("K must be greater than or equal to 0");
+        }
+        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
     }
-    void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
-        // TBMV has the same requirements as TRMV
-        validateTRMV(Element.F64_2(mRS), TransA, A, X, incX);
+
+    /**
+     * ZTBMV performs one of the matrix-vector operations
+     * x := A*x   or   x := A**T*x   or   x := A**H*x
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
+     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, n):
+     *              for j in range(i, min(i+k+1, n)):
+     *                  b[i, j-i] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param K The number of off-diagonals of the matrix A
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
+        // TBMV has the same requirements as TRMV + K >= 0
+        if (K < 0) {
+            throw new RSRuntimeException("K must be greater than or equal to 0");
+        }
+        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
     }
-    void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
+
+    /**
+     * STPMV performs one of the matrix-vector operations
+     * x := A*x   or   x := A**T*x
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
         int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
     }
-    void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
+
+    /**
+     * DTPMV performs one of the matrix-vector operations
+     * x := A*x   or   x := A**T*x
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
         int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
     }
-    void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
+
+    /**
+     * CTPMV performs one of the matrix-vector operations
+     * x := A*x   or   x := A**T*x   or   x := A**H*x
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
         int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
     }
-    void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
+
+    /**
+     * ZTPMV performs one of the matrix-vector operations
+     * x := A*x   or   x := A**T*x   or   x := A**H*x
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
         int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
     }
-    void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
+
+    /**
+     * STRSV solves one of the systems of equations
+     * A*x = b   or   A**T*x = b
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
         // TRSV is the same as TRMV
-        validateTRMV(Element.F32(mRS), TransA, A, X, incX);
+        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
 
     }
-    void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
+
+    /**
+     * DTRSV solves one of the systems of equations
+     * A*x = b   or   A**T*x = b
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
         // TRSV is the same as TRMV
-        validateTRMV(Element.F64(mRS), TransA, A, X, incX);
+        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
 
     }
-    void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
+
+    /**
+     * CTRSV solves one of the systems of equations
+     * A*x = b   or   A**T*x = b   or   A**H*x = b
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
         // TRSV is the same as TRMV
-        validateTRMV(Element.F32_2(mRS), TransA, A, X, incX);
+        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
 
     }
-    void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
+
+    /**
+     * ZTRSV solves one of the systems of equations
+     * A*x = b   or   A**T*x = b   or   A**H*x = b
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
         // TRSV is the same as TRMV
-        validateTRMV(Element.F64_2(mRS), TransA, A, X, incX);
+        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
 
     }
-    void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
-        // TBSV is the same as TRMV
-        validateTRMV(Element.F32(mRS), TransA, A, X, incX);
+
+    /**
+     * STBSV solves one of the systems of equations
+     * A*x = b   or   A**T*x = b
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
+     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, n):
+     *              for j in range(i, min(i+k+1, n)):
+     *                  b[i, j-i] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param K The number of off-diagonals of the matrix A
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
+        // TBSV is the same as TRMV + K >= 0
+        validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         if (K < 0) {
             throw new RSRuntimeException("Number of diagonals must be positive");
         }
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
     }
-    void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
-        // TBSV is the same as TRMV
-        validateTRMV(Element.F64(mRS), TransA, A, X, incX);
+
+    /**
+     * DTBSV solves one of the systems of equations
+     * A*x = b   or   A**T*x = b
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
+     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, n):
+     *              for j in range(i, min(i+k+1, n)):
+     *                  b[i, j-i] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param K The number of off-diagonals of the matrix A
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
+        // TBSV is the same as TRMV + K >= 0
+        validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         if (K < 0) {
             throw new RSRuntimeException("Number of diagonals must be positive");
         }
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
     }
-    void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
-        // TBSV is the same as TRMV
-        validateTRMV(Element.F32_2(mRS), TransA, A, X, incX);
+
+    /**
+     * CTBSV solves one of the systems of equations
+     * A*x = b   or   A**T*x = b   or   A**H*x = b
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
+     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, n):
+     *              for j in range(i, min(i+k+1, n)):
+     *                  b[i, j-i] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param K The number of off-diagonals of the matrix A
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
+        // TBSV is the same as TRMV + K >= 0
+        validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         if (K < 0) {
             throw new RSRuntimeException("Number of diagonals must be positive");
         }
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
     }
-    void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
-        // TBSV is the same as TRMV
-        validateTRMV(Element.F64_2(mRS), TransA, A, X, incX);
+
+    /**
+     * ZTBSV solves one of the systems of equations
+     * A*x = b   or   A**T*x = b   or   A**H*x = b
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
+     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, n):
+     *              for j in range(i, min(i+k+1, n)):
+     *                  b[i, j-i] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param K The number of off-diagonals of the matrix A
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
+        // TBSV is the same as TRMV + K >= 0
+        validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
         int N = A.getType().getY();
         if (K < 0) {
             throw new RSRuntimeException("Number of diagonals must be positive");
         }
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
     }
-    void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
+
+    /**
+     * STPSV solves one of the systems of equations
+     * A*x = b   or   A**T*x = b
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
         // TPSV is same as TPMV
         int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
     }
-    void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
+
+    /**
+     * DTPSV solves one of the systems of equations
+     * A*x = b   or   A**T*x = b
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
         // TPSV is same as TPMV
         int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
     }
-    void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
+
+    /**
+     * CTPSV solves one of the systems of equations
+     * A*x = b   or   A**T*x = b   or   A**H*x = b
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
         // TPSV is same as TPMV
         int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
     }
-    void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
+
+    /**
+     * ZTPSV solves one of the systems of equations
+     * A*x = b   or   A**T*x = b   or   A**H*x = b
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     */
+    public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
         // TPSV is same as TPMV
         int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
@@ -594,7 +1257,9 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
             throw new RSRuntimeException("Invalid dimension for Ap");
         }
-
+        if (incX <= 0 || incY <= 0) {
+            throw new RSRuntimeException("Vector increments must be greater than 0");
+        }
         int expectedXDim = 1 + (N - 1) * incX;
         if (X.getType().getX() != expectedXDim) {
             throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
@@ -623,8 +1288,10 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         if (N < 1 || M < 1) {
             throw new RSRuntimeException("M and N must be 1 or greater for GER");
         }
-
-        int expectedXDim = 1 + (N - 1) * incX;
+        if (incX <= 0 || incY <= 0) {
+            throw new RSRuntimeException("Vector increments must be greater than 0");
+        }
+        int expectedXDim = 1 + (M - 1) * incX;
         if (X.getType().getX() != expectedXDim) {
             throw new RSRuntimeException("Incorrect vector dimensions for GER");
         }
@@ -650,7 +1317,9 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         if (N != A.getType().getY()) {
             throw new RSRuntimeException("A must be a symmetric matrix");
         }
-
+        if (incX <= 0) {
+            throw new RSRuntimeException("Vector increments must be greater than 0");
+        }
         int expectedXDim = 1 + (N - 1) * incX;
         if (X.getType().getX() != expectedXDim) {
             throw new RSRuntimeException("Incorrect vector dimensions for SYR");
@@ -675,10 +1344,12 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
             throw new RSRuntimeException("Invalid dimension for Ap");
         }
-
+        if (incX <= 0) {
+            throw new RSRuntimeException("Vector increments must be greater than 0");
+        }
         int expectedXDim = 1 + (N - 1) * incX;
         if (X.getType().getX() != expectedXDim) {
-            throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
+            throw new RSRuntimeException("Incorrect vector dimensions for SPR");
         }
 
         return N;
@@ -701,7 +1372,9 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         if (N != A.getType().getY()) {
             throw new RSRuntimeException("A must be a symmetric matrix");
         }
-
+        if (incX <= 0 || incY <= 0) {
+            throw new RSRuntimeException("Vector increments must be greater than 0");
+        }
         int expectedXDim = 1 + (N - 1) * incX;
         int expectedYDim = 1 + (N - 1) * incY;
         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
@@ -729,81 +1402,390 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
             throw new RSRuntimeException("Invalid dimension for Ap");
         }
-
+        if (incX <= 0 || incY <= 0) {
+            throw new RSRuntimeException("Vector increments must be greater than 0");
+        }
         int expectedXDim = 1 + (N - 1) * incX;
         int expectedYDim = 1 + (N - 1) * incY;
         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
-            throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
+            throw new RSRuntimeException("Incorrect vector dimensions for SPR2");
         }
 
         return N;
     }
 
-    void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
+    /**
+     * SSYMV performs the matrix-vector operation
+     * y := alpha*A*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
         int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
-        // SBMV is the same as SYMV
+
+    /**
+     * SSBMV performs the matrix-vector operation
+     * y := alpha*A*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
+     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, n):
+     *              for j in range(i, min(i+k+1, n)):
+     *                  b[i, j-i] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
+     * @param K The number of off-diagonals of the matrix A
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
+        // SBMV is the same as SYMV + K >= 0
+        if (K < 0) {
+            throw new RSRuntimeException("K must be greater than or equal to 0");
+        }
         int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {
+
+    /**
+     * SSPMV performs the matrix-vector operation
+     * y := alpha*A*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
+     * @param alpha The scalar alpha.
+     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {
         int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY);
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
+
+    /**
+     * SGER performs the rank 1 operation
+     * A := alpha*x*y**T + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
+     *
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     */
+    public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
         int M = A.getType().getY();
         int N = A.getType().getX();
+        validateGER(Element.F32(mRS), X, incX, Y, incY, A);
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);
     }
-    void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
+
+    /**
+     * SSYR performs the rank 1 operation
+     * A := alpha*x*x**T + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     */
+    public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
         int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);
     }
-    void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
+
+    /**
+     * SSPR performs the rank 1 operation
+     * A := alpha*x*x**T + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     */
+    public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
         int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap);
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);
     }
-    void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
+
+    /**
+     * SSYR2 performs the symmetric rank 2 operation
+     * A := alpha*x*y**T + alpha*y*x**T + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     */
+    public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
         int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A);
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);
     }
-    void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
+
+    /**
+     * SSPR2 performs the symmetric rank 2 operation
+     * A := alpha*x*y**T + alpha*y*x**T + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     */
+    public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
         int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap);
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);
     }
-    void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
+
+    /**
+     * DSYMV performs the matrix-vector operation
+     * y := alpha*A*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
         int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
-        // SBMV is the same as SYMV
+
+    /**
+     * DSBMV performs the matrix-vector operation
+     * y := alpha*A*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
+     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, n):
+     *              for j in range(i, min(i+k+1, n)):
+     *                  b[i, j-i] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
+     * @param K The number of off-diagonals of the matrix A
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
+        // SBMV is the same as SYMV + K >= 0
+        if (K < 0) {
+            throw new RSRuntimeException("K must be greater than or equal to 0");
+        }
         int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {
+
+    /**
+     * DSPMV performs the matrix-vector operation
+     * y := alpha*A*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
+     * @param alpha The scalar alpha.
+     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {
         int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY);
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
+
+    /**
+     * DGER performs the rank 1 operation
+     * A := alpha*x*y**T + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
+     *
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     */
+    public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
         int M = A.getType().getY();
         int N = A.getType().getX();
+        validateGER(Element.F64(mRS), X, incX, Y, incY, A);
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);
     }
-    void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
+
+    /**
+     * DSYR performs the rank 1 operation
+     * A := alpha*x*x**T + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     */
+    public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
         int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);
     }
-    void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
+
+    /**
+     * DSPR performs the rank 1 operation
+     * A := alpha*x*x**T + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     */
+    public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
         int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap);
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);
     }
-    void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
+
+    /**
+     * DSYR2 performs the symmetric rank 2 operation
+     * A := alpha*x*y**T + alpha*y*x**T + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     */
+    public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
         int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A);
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);
     }
-    void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
+
+    /**
+     * DSPR2 performs the symmetric rank 2 operation
+     * A := alpha*x*y**T + alpha*y*x**T + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     */
+    public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
         int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap);
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);
     }
@@ -825,8 +1807,10 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
 
         int M = A.getType().getY();
         int N = A.getType().getX();
-
-        int expectedXDim = 1 + (N - 1) * incX;
+        if (incX <= 0 || incY <= 0) {
+            throw new RSRuntimeException("Vector increments must be greater than 0");
+        }
+        int expectedXDim = 1 + (M - 1) * incX;
         if (X.getType().getX() != expectedXDim) {
             throw new RSRuntimeException("Incorrect vector dimensions for GERU");
         }
@@ -837,12 +1821,51 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
 
     }
 
-    void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
+    /**
+     * CHEMV performs the matrix-vector operation
+     * y := alpha*A*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
         // HEMV is the same as SYR2 validation-wise
         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
+
+    /**
+     * CHBMV performs the matrix-vector operation
+     * y := alpha*A*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
+     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, n):
+     *              for j in range(i, min(i+k+1, n)):
+     *                  b[i, j-i] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
+     * @param K The number of off-diagonals of the matrix A
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
         // HBMV is the same as SYR2 validation-wise
         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
         if (K < 0) {
@@ -850,50 +1873,214 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         }
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
+
+    /**
+     * CHPMV performs the matrix-vector operation
+     * y := alpha*A*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
+     * @param alpha The scalar alpha.
+     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
         // HPMV is the same as SPR2
         int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
+
+    /**
+     * CGERU performs the rank 1 operation
+     * A := alpha*x*y**T + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
+     *
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     */
+    public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
         validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
         int M = A.getType().getY();
         int N = A.getType().getX();
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
     }
-    void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
+
+    /**
+     * CGERC performs the rank 1 operation
+     * A := alpha*x*y**H + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
+     *
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     */
+    public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
         // same as GERU
         validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
         int M = A.getType().getY();
         int N = A.getType().getX();
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
     }
-    void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
+
+    /**
+     * CHER performs the rank 1 operation
+     * A := alpha*x*x**H + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     */
+    public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
         // same as SYR
-        int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);
+        int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A);
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);
     }
-    void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
+
+    /**
+     * CHPR performs the rank 1 operation
+     * A := alpha*x*x**H + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     */
+    public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
         // equivalent to SPR for validation
         int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap);
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);
     }
-    void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
+
+    /**
+     * CHER2 performs the symmetric rank 2 operation
+     * A := alpha*x*y**H + alpha*y*x**H + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     */
+    public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
         // same as SYR2
         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
     }
-    void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
+
+    /**
+     * CHPR2 performs the symmetric rank 2 operation
+     * A := alpha*x*y**H + alpha*y*x**H + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     */
+    public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
         // same as SPR2
         int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);
     }
-    void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
+
+    /**
+     * ZHEMV performs the matrix-vector operation
+     * y := alpha*A*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
         // HEMV is the same as SYR2 validation-wise
         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
+
+    /**
+     * ZHBMV performs the matrix-vector operation
+     * y := alpha*A*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
+     *       but only the region N*(K+1) will be referenced. The following subroutine can is an
+     *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
+     *           for i in range(0, n):
+     *              for j in range(i, min(i+k+1, n)):
+     *                  b[i, j-i] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
+     * @param K The number of off-diagonals of the matrix A
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
         // HBMV is the same as SYR2 validation-wise
         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
         if (K < 0) {
@@ -901,40 +2088,164 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         }
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
+
+    /**
+     * ZHPMV performs the matrix-vector operation
+     * y := alpha*A*x + beta*y
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
+     * @param alpha The scalar alpha.
+     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param beta The scalar beta.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     */
+    public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
         // HPMV is the same as SPR2
         int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
     }
-    void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
+
+    /**
+     * ZGERU performs the rank 1 operation
+     * A := alpha*x*y**T + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
+     *
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     */
+    public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
         validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
         int M = A.getType().getY();
         int N = A.getType().getX();
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
     }
-    void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
+
+    /**
+     * ZGERC performs the rank 1 operation
+     * A := alpha*x*y**H + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
+     *
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     */
+    public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
         // same as GERU
         validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
         int M = A.getType().getY();
         int N = A.getType().getX();
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
     }
-    void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
+
+    /**
+     * ZHER performs the rank 1 operation
+     * A := alpha*x*x**H + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     */
+    public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
         // same as SYR
-        int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);
+        int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A);
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);
     }
-    void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
+
+    /**
+     * ZHPR performs the rank 1 operation
+     * A := alpha*x*x**H + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     */
+    public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
         // equivalent to SPR for validation
         int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap);
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);
     }
-    void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
+
+    /**
+     * ZHER2 performs the symmetric rank 2 operation
+     * A := alpha*x*y**H + alpha*y*x**H + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     */
+    public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
         // same as SYR2
         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
     }
-    void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
+
+    /**
+     * ZHPR2 performs the symmetric rank 2 operation
+     * A := alpha*x*y**H + alpha*y*x**H + A
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
+     *
+     * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
+     *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
+     *       'a' to packed matrix 'b'.
+     *           k = 0
+     *           for i in range(0, n):
+     *              for j in range(i, n):
+     *                  b[k++] = a[i, j]
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
+     * @param alpha The scalar alpha.
+     * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
+     * @param incX The increment for the elements of vector x, must be larger than zero.
+     * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
+     * @param incY The increment for the elements of vector y, must be larger than zero.
+     * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     */
+    public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
         // same as SPR2
         int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);
@@ -946,60 +2257,86 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
      */
 
     static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
-        int aX = -1, aY = -1, bX = -1, bY = -1, cX = -1, cY = -1;
+        int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
         if ((A != null && !A.getType().getElement().isCompatible(e)) ||
             (B != null && !B.getType().getElement().isCompatible(e)) ||
             (C != null && !C.getType().getElement().isCompatible(e))) {
             throw new RSRuntimeException("Called BLAS with wrong Element type");
         }
-        if (C != null) {
-            cX = C.getType().getY();
-            cY = C.getType().getX();
+        if (C == null) {
+            //since matrix C is used to store the result, it cannot be null.
+            throw new RSRuntimeException("Allocation C cannot be null");
         }
+        cM = C.getType().getY();
+        cN = C.getType().getX();
+
         if (Side == RIGHT) {
+            if ((A == null && B != null) || (A != null && B == null)) {
+                throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa");
+            }
             if (B != null) {
-                bX = A.getType().getY();
-                bY = A.getType().getX();
+                bM = A.getType().getY();
+                bN = A.getType().getX();
             }
             if (A != null) {
-                aX = B.getType().getY();
-                aY = B.getType().getX();
+                aM = B.getType().getY();
+                aN = B.getType().getX();
             }
         } else {
             if (A != null) {
-                if (TransA == TRANSPOSE) {
-                    aY = A.getType().getY();
-                    aX = A.getType().getX();
+                if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) {
+                    aN = A.getType().getY();
+                    aM = A.getType().getX();
                 } else {
-                    aX = A.getType().getY();
-                    aY = A.getType().getX();
+                    aM = A.getType().getY();
+                    aN = A.getType().getX();
                 }
             }
             if (B != null) {
-                if (TransB == TRANSPOSE) {
-                    bY = B.getType().getY();
-                    bX = B.getType().getX();
+                if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) {
+                    bN = B.getType().getY();
+                    bM = B.getType().getX();
                 } else {
-                    bX = B.getType().getY();
-                    bY = B.getType().getX();
+                    bM = B.getType().getY();
+                    bN = B.getType().getX();
                 }
             }
         }
         if (A != null && B != null && C != null) {
-            if (aY != bX || aX != cX || bY != cY) {
+            if (aN != bM || aM != cM || bN != cN) {
                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
             }
         } else if (A != null && C != null) {
-            // A and C only
-            if (aX != cY || aY != cX) {
+            // A and C only, for SYRK
+            if (cM != cN) {
+                throw new RSRuntimeException("Matrix C is not symmetric");
+            }
+            if (aM != cM) {
                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
             }
         } else if (A != null && B != null) {
             // A and B only
+            if (aN != bM) {
+                throw new RSRuntimeException("Called BLAS with invalid dimensions");
+            }
         }
 
     }
 
+    /**
+     * SGEMM performs one of the matrix-matrix operations
+     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
+     *
+     * @param TransA The type of transpose applied to matrix A.
+     * @param TransB The type of transpose applied to matrix B.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
+     */
     public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A,
                       Allocation B, float beta, Allocation C) {
         validateTranspose(TransA);
@@ -1007,14 +2344,14 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C);
 
         int M = -1, N = -1, K = -1;
-        if (TransA == TRANSPOSE) {
+        if (TransA != NO_TRANSPOSE) {
             M = A.getType().getX();
             K = A.getType().getY();
         } else {
             M = A.getType().getY();
             K = A.getType().getX();
         }
-        if (TransB == TRANSPOSE) {
+        if (TransB != NO_TRANSPOSE) {
             N = B.getType().getY();
         } else {
             N = B.getType().getX();
@@ -1022,20 +2359,35 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, A.getID(mRS), B.getID(mRS),
                                         beta, C.getID(mRS), 0, 0, 0, 0);
     }
+
+    /**
+     * DGEMM performs one of the matrix-matrix operations
+     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
+     *
+     * @param TransA The type of transpose applied to matrix A.
+     * @param TransB The type of transpose applied to matrix B.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
+     */
     public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A,
                       Allocation B, double beta, Allocation C) {
         validateTranspose(TransA);
         validateTranspose(TransB);
         validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C);
         int M = -1, N = -1, K = -1;
-        if (TransA == TRANSPOSE) {
+        if (TransA != NO_TRANSPOSE) {
             M = A.getType().getX();
             K = A.getType().getY();
         } else {
             M = A.getType().getY();
             K = A.getType().getX();
         }
-        if (TransB == TRANSPOSE) {
+        if (TransB != NO_TRANSPOSE) {
             N = B.getType().getY();
         } else {
             N = B.getType().getX();
@@ -1043,20 +2395,35 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, A.getID(mRS), B.getID(mRS),
                                         beta, C.getID(mRS), 0, 0, 0, 0);
     }
+
+    /**
+     * CGEMM performs one of the matrix-matrix operations
+     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
+     *
+     * @param TransA The type of transpose applied to matrix A.
+     * @param TransB The type of transpose applied to matrix B.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
+     */
     public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A,
                       Allocation B, Float2 beta, Allocation C) {
         validateTranspose(TransA);
         validateTranspose(TransB);
         validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C);
         int M = -1, N = -1, K = -1;
-        if (TransA == TRANSPOSE) {
+        if (TransA != NO_TRANSPOSE) {
             M = A.getType().getX();
             K = A.getType().getY();
         } else {
             M = A.getType().getY();
             K = A.getType().getX();
         }
-        if (TransB == TRANSPOSE) {
+        if (TransB != NO_TRANSPOSE) {
             N = B.getType().getY();
         } else {
             N = B.getType().getX();
@@ -1065,20 +2432,34 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
                                          beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
     }
 
+    /**
+     * ZGEMM performs one of the matrix-matrix operations
+     * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
+     *
+     * @param TransA The type of transpose applied to matrix A.
+     * @param TransB The type of transpose applied to matrix B.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2
+     */
     public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A,
                       Allocation B, Double2 beta, Allocation C) {
         validateTranspose(TransA);
         validateTranspose(TransB);
         validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C);
         int M = -1, N = -1, K = -1;
-        if (TransA == TRANSPOSE) {
+        if (TransA != NO_TRANSPOSE) {
             M = A.getType().getX();
             K = A.getType().getY();
         } else {
             M = A.getType().getY();
             K = A.getType().getX();
         }
-        if (TransB == TRANSPOSE) {
+        if (TransB != NO_TRANSPOSE) {
             N = B.getType().getY();
         } else {
             N = B.getType().getX();
@@ -1087,45 +2468,130 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
                                    beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
     }
 
+    /**
+     * SSYMM performs one of the matrix-matrix operations
+     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
+     */
     public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A,
                       Allocation B, float beta, Allocation C) {
         validateSide(Side);
         validateUplo(Uplo);
+        //For SYMM, Matrix A should be symmetric
+        if (A.getType().getX() != A.getType().getY()) {
+            throw new RSRuntimeException("Matrix A is not symmetric");
+        }
         validateL3(Element.F32(mRS), 0, 0, Side, A, B, C);
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),
                                         beta, C.getID(mRS), 0, 0, 0, 0);
     }
+
+    /**
+     * DSYMM performs one of the matrix-matrix operations
+     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
+     */
     public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A,
                       Allocation B, double beta, Allocation C) {
         validateSide(Side);
         validateUplo(Uplo);
+        if (A.getType().getX() != A.getType().getY()) {
+            throw new RSRuntimeException("Matrix A is not symmetric");
+        }
         validateL3(Element.F64(mRS), 0, 0, Side, A, B, C);
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),
                                         beta, C.getID(mRS), 0, 0, 0, 0);
     }
+
+    /**
+     * CSYMM performs one of the matrix-matrix operations
+     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
+     */
     public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A,
                       Allocation B, Float2 beta, Allocation C) {
         validateSide(Side);
         validateUplo(Uplo);
+        if (A.getType().getX() != A.getType().getY()) {
+            throw new RSRuntimeException("Matrix A is not symmetric");
+        }
         validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C);
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
                                          beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
     }
+
+    /**
+     * ZSYMM performs one of the matrix-matrix operations
+     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
+     */
     public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A,
                       Allocation B, Double2 beta, Allocation C) {
         validateSide(Side);
         validateUplo(Uplo);
+        if (A.getType().getX() != A.getType().getY()) {
+            throw new RSRuntimeException("Matrix A is not symmetric");
+        }
         validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C);
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
                                    beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
     }
 
+    /**
+     * SSYRK performs one of the symmetric rank k operations
+     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
+     * @param Trans The type of transpose applied to the operation.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
+     */
     public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
         validateTranspose(Trans);
         validateUplo(Uplo);
         validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C);
         int K = -1;
-        if (Trans == TRANSPOSE) {
+        if (Trans != NO_TRANSPOSE) {
             K = A.getType().getY();
         } else {
             K = A.getType().getX();
@@ -1134,42 +2600,83 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);
     }
 
+    /**
+     * DSYRK performs one of the symmetric rank k operations
+     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
+     * @param Trans The type of transpose applied to the operation.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
+     */
     public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
         validateTranspose(Trans);
         validateUplo(Uplo);
         validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C);
         int K = -1;
-        if (Trans == TRANSPOSE) {
+        if (Trans != NO_TRANSPOSE) {
             K = A.getType().getY();
         } else {
             K = A.getType().getX();
         }
         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);
     }
-    public void CSYRK(@Uplo int Uplo, @Transpose int Trans, float alphaX, float alphaY, Allocation A, float betaX, float betaY, Allocation C) {
+
+    /**
+     * CSYRK performs one of the symmetric rank k operations
+     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
+     * @param Trans The type of transpose applied to the operation.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
+     */
+    public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) {
         validateTranspose(Trans);
         validateUplo(Uplo);
         validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C);
         int K = -1;
-        if (Trans == TRANSPOSE) {
+        if (Trans != NO_TRANSPOSE) {
             K = A.getType().getY();
         } else {
             K = A.getType().getX();
         }
-        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alphaX, alphaY, A.getID(mRS), 0, betaX, betaY,
+        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,
                                          C.getID(mRS), 0, 0, 0, 0);
     }
-    public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, double alphaX, double alphaY, Allocation A, double betaX, double betaY, Allocation C) {
+
+    /**
+     * ZSYRK performs one of the symmetric rank k operations
+     * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
+     * @param Trans The type of transpose applied to the operation.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
+     */
+    public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) {
         validateTranspose(Trans);
         validateUplo(Uplo);
         validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C);
         int K = -1;
-        if (Trans == TRANSPOSE) {
+        if (Trans != NO_TRANSPOSE) {
             K = A.getType().getY();
         } else {
             K = A.getType().getX();
         }
-        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alphaX, alphaY, A.getID(mRS), 0, betaX, betaY,
+        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,
                                    C.getID(mRS), 0, 0, 0, 0);
     }
 
@@ -1190,7 +2697,7 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
             // check rows versus C
             Cdim = A.getType().getY();
         }
-        if (C.getType().getX() != Cdim && C.getType().getY() != Cdim) {
+        if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
             throw new RSRuntimeException("Invalid symmetric matrix in SYR2K");
         }
         // A dims == B dims
@@ -1198,78 +2705,154 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
             throw new RSRuntimeException("Invalid A and B in SYR2K");
         }
     }
+
+    /**
+     * SSYR2K performs one of the symmetric rank 2k operations
+     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
+     * @param Trans The type of transpose applied to the operation.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
+     */
     public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) {
         validateUplo(Uplo);
         validateSYR2K(Element.F32(mRS), Trans, A, B, C);
         int K = -1;
-        if (Trans == TRANSPOSE) {
+        if (Trans != NO_TRANSPOSE) {
             K = A.getType().getY();
         } else {
             K = A.getType().getX();
         }
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);
     }
+
+    /**
+     * DSYR2K performs one of the symmetric rank 2k operations
+     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
+     * @param Trans The type of transpose applied to the operation.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
+     */
     public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) {
         validateUplo(Uplo);
         validateSYR2K(Element.F64(mRS), Trans, A, B, C);
         int K = -1;
-        if (Trans == TRANSPOSE) {
+        if (Trans != NO_TRANSPOSE) {
             K = A.getType().getY();
         } else {
             K = A.getType().getX();
         }
-        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);
+        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);
     }
+
+    /**
+     * CSYR2K performs one of the symmetric rank 2k operations
+     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
+     * @param Trans The type of transpose applied to the operation.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
+     */
     public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
         validateUplo(Uplo);
         validateSYR2K(Element.F32_2(mRS), Trans, A, B, C);
         int K = -1;
-        if (Trans == TRANSPOSE) {
+        if (Trans != NO_TRANSPOSE) {
             K = A.getType().getY();
         } else {
             K = A.getType().getX();
         }
-        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
+        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
     }
+
+    /**
+     * ZSYR2K performs one of the symmetric rank 2k operations
+     * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
+     * @param Trans The type of transpose applied to the operation.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
+     */
     public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
         validateUplo(Uplo);
         validateSYR2K(Element.F64_2(mRS), Trans, A, B, C);
         int K = -1;
-        if (Trans == TRANSPOSE) {
+        if (Trans != NO_TRANSPOSE) {
             K = A.getType().getY();
         } else {
             K = A.getType().getX();
         }
-        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
+        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
     }
 
     static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
         validateSide(Side);
         validateTranspose(TransA);
-        int aX = -1, aY = -1, bX = -1, bY = -1;
+        int aM = -1, aN = -1, bM = -1, bN = -1;
         if (!A.getType().getElement().isCompatible(e) ||
             !B.getType().getElement().isCompatible(e)) {
             throw new RSRuntimeException("Called BLAS with wrong Element type");
         }
-        if (TransA == TRANSPOSE) {
-            aY = A.getType().getY();
-            aX = A.getType().getX();
-        } else {
-            aY = A.getType().getX();
-            aX = A.getType().getY();
+
+        aM = A.getType().getY();
+        aN = A.getType().getX();
+        if (aM != aN) {
+            throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A");
         }
-        bX = B.getType().getY();
-        bY = B.getType().getX();
+
+        bM = B.getType().getY();
+        bN = B.getType().getX();
         if (Side == LEFT) {
-            if (aX == 0 || aY != bX) {
+            if (aN != bM) {
                 throw new RSRuntimeException("Called TRMM with invalid matrices");
             }
         } else {
-            if (bY != aX || aY == 0) {
+            if (bN != aM) {
                 throw new RSRuntimeException("Called TRMM with invalid matrices");
             }
         }
     }
+
+    /**
+     * STRMM performs one of the matrix-matrix operations
+     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
+     * op(A) is one of  op(A) = A  or  op(A) = A**T
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether matrix A is upper or lower triangular.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
+     */
     public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
         validateUplo(Uplo);
         validateDiag(Diag);
@@ -1277,30 +2860,78 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
                                         alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0);
     }
+
+    /**
+     * DTRMM performs one of the matrix-matrix operations
+     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
+     * op(A) is one of  op(A) = A  or  op(A) = A**T
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether matrix A is upper or lower triangular.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
+     */
     public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
         validateUplo(Uplo);
         validateDiag(Diag);
         validateTRMM(Element.F64(mRS), Side, TransA, A, B);
-        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
-                                        alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0);
+        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
+                                        alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
     }
+
+    /**
+     * CTRMM performs one of the matrix-matrix operations
+     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
+     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether matrix A is upper or lower triangular.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
+     */
     public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
         validateUplo(Uplo);
         validateDiag(Diag);
         validateTRMM(Element.F32_2(mRS), Side, TransA, A, B);
-        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
+        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
                                          alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
     }
+
+    /**
+     * ZTRMM performs one of the matrix-matrix operations
+     * B := alpha*op(A)*B   or   B := alpha*B*op(A)
+     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether matrix A is upper or lower triangular.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
+     */
     public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
         validateUplo(Uplo);
         validateDiag(Diag);
         validateTRMM(Element.F64_2(mRS), Side, TransA, A, B);
-        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
+        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
                                    alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
     }
 
     static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
-        int adim = -1, bX = -1, bY = -1;
+        int adim = -1, bM = -1, bN = -1;
         validateSide(Side);
         validateTranspose(TransA);
         if (!A.getType().getElement().isCompatible(e) ||
@@ -1314,20 +2945,36 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
             // for now we assume adapters are sufficient, will reevaluate in the future
             throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A");
         }
-        bX = B.getType().getY();
-        bY = B.getType().getX();
+        bM = B.getType().getY();
+        bN = B.getType().getX();
         if (Side == LEFT) {
             // A is M*M
-            if (adim != bY) {
+            if (adim != bM) {
                 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
             }
         } else {
             // A is N*N
-            if (adim != bX) {
+            if (adim != bN) {
                 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
             }
         }
     }
+
+    /**
+     * STRSM solves one of the matrix equations
+     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
+     * op(A) is one of  op(A) = A  or  op(A) = A**T
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether matrix A is upper or lower triangular.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
+     */
     public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
         validateUplo(Uplo);
         validateDiag(Diag);
@@ -1335,25 +2982,73 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
                                         alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
     }
+
+    /**
+     * DTRSM solves one of the matrix equations
+     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
+     * op(A) is one of  op(A) = A  or  op(A) = A**T
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether matrix A is upper or lower triangular.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
+     */
     public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
         validateUplo(Uplo);
         validateDiag(Diag);
         validateTRSM(Element.F64(mRS), Side, TransA, A, B);
-        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
+        mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
                                         alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
     }
+
+    /**
+     * CTRSM solves one of the matrix equations
+     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
+     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether matrix A is upper or lower triangular.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
+     */
     public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
         validateUplo(Uplo);
         validateDiag(Diag);
         validateTRSM(Element.F32_2(mRS), Side, TransA, A, B);
-        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
+        mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
                                          alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
     }
+
+    /**
+     * ZTRSM solves one of the matrix equations
+     * op(A)*X := alpha*B   or   X*op(A) := alpha*B
+     * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether matrix A is upper or lower triangular.
+     * @param TransA The type of transpose applied to matrix A.
+     * @param Diag Specifies whether or not A is unit triangular.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
+     */
     public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
         validateUplo(Uplo);
         validateDiag(Diag);
         validateTRSM(Element.F64_2(mRS), Side, TransA, A, B);
-        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
+        mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
                                    alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
     }
 
@@ -1380,17 +3075,47 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
             throw new RSRuntimeException("Called HEMM with mismatched B and C");
         }
     }
-    public void CHEMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, Allocation B, float beta, Allocation C) {
+
+    /**
+     * CHEMM performs one of the matrix-matrix operations
+     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
+     */
+    public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
         validateUplo(Uplo);
         validateHEMM(Element.F32_2(mRS), Side, A, B, C);
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
-                                         alpha, 0, A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);
+                                         alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
     }
-    public void ZHEMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, Allocation B, double beta, Allocation C) {
+
+    /**
+     * ZHEMM performs one of the matrix-matrix operations
+     * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
+     *
+     * @param Side Specifies whether the symmetric matrix A appears on the left or right.
+     * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
+     */
+    public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
         validateUplo(Uplo);
-        validateHEMM(Element.F32_2(mRS), Side, A, B, C);
+        validateHEMM(Element.F64_2(mRS), Side, A, B, C);
         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
-                                   alpha, 0, A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);
+                                   alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
     }
 
     static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) {
@@ -1404,20 +3129,34 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
             throw new RSRuntimeException("Called HERK with non-square C");
         }
         if (Trans == NO_TRANSPOSE) {
-            if (cdim != A.getType().getX()) {
+            if (cdim != A.getType().getY()) {
                 throw new RSRuntimeException("Called HERK with invalid A");
             }
         } else {
-            if (cdim != A.getType().getY()) {
+            if (cdim != A.getType().getX()) {
                 throw new RSRuntimeException("Called HERK with invalid A");
             }
         }
     }
+
+    /**
+     * CHERK performs one of the hermitian rank k operations
+     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
+     * @param Trans The type of transpose applied to the operation.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
+     */
     public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
         validateUplo(Uplo);
         validateHERK(Element.F32_2(mRS), Trans, A, C);
         int k = 0;
-        if (Trans == TRANSPOSE) {
+        if (Trans == CONJ_TRANSPOSE) {
             k = A.getType().getY();
         } else {
             k = A.getType().getX();
@@ -1425,11 +3164,25 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
                                          alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);
     }
+
+    /**
+     * ZHERK performs one of the hermitian rank k operations
+     * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
+     * @param Trans The type of transpose applied to the operation.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
+     */
     public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
         validateUplo(Uplo);
         validateHERK(Element.F64_2(mRS), Trans, A, C);
         int k = 0;
-        if (Trans == TRANSPOSE) {
+        if (Trans == CONJ_TRANSPOSE) {
             k = A.getType().getY();
         } else {
             k = A.getType().getX();
@@ -1462,6 +3215,21 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
             throw new RSRuntimeException("Called HER2K with invalid A and B matrices");
         }
     }
+
+    /**
+     * CHER2K performs one of the hermitian rank 2k operations
+     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
+     * @param Trans The type of transpose applied to the operation.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
+     */
     public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) {
         validateUplo(Uplo);
         validateHER2K(Element.F32_2(mRS), Trans, A, B, C);
@@ -1474,6 +3242,21 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
                                          A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);
     }
+
+    /**
+     * ZHER2K performs one of the hermitian rank 2k operations
+     * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
+     *
+     * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
+     *
+     * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
+     * @param Trans The type of transpose applied to the operation.
+     * @param alpha The scalar alpha.
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
+     * @param beta The scalar beta.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
+     */
     public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) {
         validateUplo(Uplo);
         validateHER2K(Element.F64_2(mRS), Trans, A, B, C);
@@ -1489,14 +3272,29 @@ public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
 
 
     /**
+     * 8-bit GEMM-like operation for neural networks: C = B.transposed() * A
+     * Calculations are done in 1.10.21 fixed-point format for the final output,
+     * just before there's a shift down to drop the fractional parts. The output
+     * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
+     * gives some headroom to avoid wrapping around on small overflows.
      *
-     * 8-bit GEMM-like operation for neural networks
-     *
-     * @hide
+     * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}.
+     * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
+     * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}.
+     * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
+     * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}.
+     * @param c_offset The offset for all values in matrix C.
+     * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
      **/
     public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) {
         validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C);
 
+        if (a_offset < 0 || a_offset > 255) {
+            throw new RSRuntimeException("Invalid a_offset passed to BNNM");
+        }
+        if (b_offset < 0 || b_offset > 255) {
+            throw new RSRuntimeException("Invalid b_offset passed to BNNM");
+        }
         int M = -1, N = -1, K = -1;
         M = A.getType().getY();
         N = B.getType().getY();
diff --git a/rs/java/android/renderscript/Type.java b/rs/java/android/renderscript/Type.java
index a58e42c..dc23785 100644
--- a/rs/java/android/renderscript/Type.java
+++ b/rs/java/android/renderscript/Type.java
@@ -151,23 +151,30 @@ public class Type extends BaseObj {
 
     /**
      * @hide
-     */
-    public int getArray(int dim) {
-        if ((dim < 0) || (dim >= mMaxArrays)) {
+      * Return the dimension of the specified array.
+      *
+      * @param arrayNum  The array dimension to query
+      * @return int
+      */
+    public int getArray(int arrayNum) {
+        if ((arrayNum < 0) || (arrayNum >= mMaxArrays)) {
             throw new RSIllegalArgumentException("Array dimension out of range.");
         }
 
-        if (mArrays == null || dim >= mArrays.length) {
+        if (mArrays == null || arrayNum >= mArrays.length) {
             // Dimension in range but no array for that dimension allocated
             return 0;
         }
 
-        return mArrays[dim];
+        return mArrays[arrayNum];
     }
 
     /**
      * @hide
-     */
+      * Return the number of array dimensions.
+      *
+      * @return int
+      */
     public int getArrayCount() {
         if (mArrays != null) return mArrays.length;
         return 0;
@@ -378,6 +385,7 @@ public class Type extends BaseObj {
 
         /**
          * @hide
+         * Adds an array dimension to the builder
          *
          * @param dim
          * @param value
diff --git a/rs/jni/Android.mk b/rs/jni/Android.mk
index f1f0bfc..0658620 100644
--- a/rs/jni/Android.mk
+++ b/rs/jni/Android.mk
@@ -14,7 +14,8 @@ LOCAL_SHARED_LIBRARIES := \
     libskia \
     libutils \
     libui \
-    libgui
+    libgui \
+    libjnigraphics
 
 LOCAL_STATIC_LIBRARIES :=
 
@@ -23,6 +24,8 @@ rs_generated_include_dir := $(call intermediates-dir-for,SHARED_LIBRARIES,libRS,
 LOCAL_C_INCLUDES += \
     $(JNI_H_INCLUDE) \
     frameworks/rs \
+    frameworks/base/core/jni \
+    frameworks/base/libs/hwui \
     $(rs_generated_include_dir)
 
 LOCAL_CFLAGS += -Wno-unused-parameter -std=c++11
diff --git a/rs/jni/android_renderscript_RenderScript.cpp b/rs/jni/android_renderscript_RenderScript.cpp
index 6f6729b..ffc4fd8 100644
--- a/rs/jni/android_renderscript_RenderScript.cpp
+++ b/rs/jni/android_renderscript_RenderScript.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#define LOG_TAG "libRS_jni"
+#define LOG_TAG "RenderScript_jni"
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -24,8 +24,6 @@
 #include <utils/misc.h>
 #include <inttypes.h>
 
-#include <SkBitmap.h>
-
 #include <androidfw/Asset.h>
 #include <androidfw/AssetManager.h>
 #include <androidfw/ResourceTypes.h>
@@ -35,6 +33,7 @@
 #include "android_runtime/AndroidRuntime.h"
 #include "android_runtime/android_view_Surface.h"
 #include "android_runtime/android_util_AssetManager.h"
+#include "android/graphics/GraphicsJNI.h"
 
 #include <rs.h>
 #include <rsEnv.h>
@@ -281,14 +280,10 @@ private:
 // ---------------------------------------------------------------------------
 
 static jfieldID gContextId = 0;
-static jfieldID gNativeBitmapID = 0;
 
 static void _nInit(JNIEnv *_env, jclass _this)
 {
     gContextId             = _env->GetFieldID(_this, "mContext", "J");
-
-    jclass bitmapClass = _env->FindClass("android/graphics/Bitmap");
-    gNativeBitmapID = _env->GetFieldID(bitmapClass, "mNativeBitmap", "J");
 }
 
 // ---------------------------------------------------------------------------
@@ -333,79 +328,167 @@ nClosureCreate(JNIEnv *_env, jobject _this, jlong con, jlong kernelID,
                jlong returnValue, jlongArray fieldIDArray,
                jlongArray valueArray, jintArray sizeArray,
                jlongArray depClosureArray, jlongArray depFieldIDArray) {
+  jlong ret = 0;
+
   jlong* jFieldIDs = _env->GetLongArrayElements(fieldIDArray, nullptr);
   jsize fieldIDs_length = _env->GetArrayLength(fieldIDArray);
-  RsScriptFieldID* fieldIDs =
-      (RsScriptFieldID*)alloca(sizeof(RsScriptFieldID) * fieldIDs_length);
-  for (int i = 0; i< fieldIDs_length; i++) {
+  jlong* jValues = _env->GetLongArrayElements(valueArray, nullptr);
+  jsize values_length = _env->GetArrayLength(valueArray);
+  jint* jSizes = _env->GetIntArrayElements(sizeArray, nullptr);
+  jsize sizes_length = _env->GetArrayLength(sizeArray);
+  jlong* jDepClosures =
+      _env->GetLongArrayElements(depClosureArray, nullptr);
+  jsize depClosures_length = _env->GetArrayLength(depClosureArray);
+  jlong* jDepFieldIDs =
+      _env->GetLongArrayElements(depFieldIDArray, nullptr);
+  jsize depFieldIDs_length = _env->GetArrayLength(depFieldIDArray);
+
+  size_t numValues, numDependencies;
+  RsScriptFieldID* fieldIDs;
+  uintptr_t* values;
+  RsClosure* depClosures;
+  RsScriptFieldID* depFieldIDs;
+
+  if (fieldIDs_length != values_length || values_length != sizes_length) {
+      ALOGE("Unmatched field IDs, values, and sizes in closure creation.");
+      goto exit;
+  }
+
+  numValues = (size_t)fieldIDs_length;
+
+  if (depClosures_length != depFieldIDs_length) {
+      ALOGE("Unmatched closures and field IDs for dependencies in closure creation.");
+      goto exit;
+  }
+
+  numDependencies = (size_t)depClosures_length;
+
+  if (numDependencies > numValues) {
+      ALOGE("Unexpected number of dependencies in closure creation");
+      goto exit;
+  }
+
+  if (numValues > RS_CLOSURE_MAX_NUMBER_ARGS_AND_BINDINGS) {
+      ALOGE("Too many arguments or globals in closure creation");
+      goto exit;
+  }
+
+  fieldIDs = (RsScriptFieldID*)alloca(sizeof(RsScriptFieldID) * numValues);
+  if (fieldIDs == nullptr) {
+      goto exit;
+  }
+
+  for (size_t i = 0; i < numValues; i++) {
     fieldIDs[i] = (RsScriptFieldID)jFieldIDs[i];
   }
 
-  jlong* jValues = _env->GetLongArrayElements(valueArray, nullptr);
-  jsize values_length = _env->GetArrayLength(valueArray);
-  uintptr_t* values = (uintptr_t*)alloca(sizeof(uintptr_t) * values_length);
-  for (int i = 0; i < values_length; i++) {
+  values = (uintptr_t*)alloca(sizeof(uintptr_t) * numValues);
+  if (values == nullptr) {
+      goto exit;
+  }
+
+  for (size_t i = 0; i < numValues; i++) {
     values[i] = (uintptr_t)jValues[i];
   }
 
-  jint* sizes = _env->GetIntArrayElements(sizeArray, nullptr);
-  jsize sizes_length = _env->GetArrayLength(sizeArray);
+  depClosures = (RsClosure*)alloca(sizeof(RsClosure) * numDependencies);
+  if (depClosures == nullptr) {
+      goto exit;
+  }
 
-  jlong* jDepClosures =
-      _env->GetLongArrayElements(depClosureArray, nullptr);
-  jsize depClosures_length = _env->GetArrayLength(depClosureArray);
-  RsClosure* depClosures =
-      (RsClosure*)alloca(sizeof(RsClosure) * depClosures_length);
-  for (int i = 0; i < depClosures_length; i++) {
+  for (size_t i = 0; i < numDependencies; i++) {
     depClosures[i] = (RsClosure)jDepClosures[i];
   }
 
-  jlong* jDepFieldIDs =
-      _env->GetLongArrayElements(depFieldIDArray, nullptr);
-  jsize depFieldIDs_length = _env->GetArrayLength(depFieldIDArray);
-  RsScriptFieldID* depFieldIDs =
-      (RsScriptFieldID*)alloca(sizeof(RsScriptFieldID) * depFieldIDs_length);
-  for (int i = 0; i < depClosures_length; i++) {
+  depFieldIDs = (RsScriptFieldID*)alloca(sizeof(RsScriptFieldID) * numDependencies);
+  if (depFieldIDs == nullptr) {
+      goto exit;
+  }
+
+  for (size_t i = 0; i < numDependencies; i++) {
     depFieldIDs[i] = (RsClosure)jDepFieldIDs[i];
   }
 
-  return (jlong)(uintptr_t)rsClosureCreate(
+  ret = (jlong)(uintptr_t)rsClosureCreate(
       (RsContext)con, (RsScriptKernelID)kernelID, (RsAllocation)returnValue,
-      fieldIDs, (size_t)fieldIDs_length, values, (size_t)values_length,
-      (int*)sizes, (size_t)sizes_length,
-      depClosures, (size_t)depClosures_length,
-      depFieldIDs, (size_t)depFieldIDs_length);
+      fieldIDs, numValues, values, numValues,
+      (int*)jSizes, numValues,
+      depClosures, numDependencies,
+      depFieldIDs, numDependencies);
+
+exit:
+
+  _env->ReleaseLongArrayElements(depFieldIDArray, jDepFieldIDs, JNI_ABORT);
+  _env->ReleaseLongArrayElements(depClosureArray, jDepClosures, JNI_ABORT);
+  _env->ReleaseIntArrayElements (sizeArray,       jSizes,       JNI_ABORT);
+  _env->ReleaseLongArrayElements(valueArray,      jValues,      JNI_ABORT);
+  _env->ReleaseLongArrayElements(fieldIDArray,    jFieldIDs,    JNI_ABORT);
+
+  return ret;
 }
 
 static jlong
 nInvokeClosureCreate(JNIEnv *_env, jobject _this, jlong con, jlong invokeID,
                      jbyteArray paramArray, jlongArray fieldIDArray, jlongArray valueArray,
                      jintArray sizeArray) {
+  jlong ret = 0;
+
   jbyte* jParams = _env->GetByteArrayElements(paramArray, nullptr);
   jsize jParamLength = _env->GetArrayLength(paramArray);
-
   jlong* jFieldIDs = _env->GetLongArrayElements(fieldIDArray, nullptr);
   jsize fieldIDs_length = _env->GetArrayLength(fieldIDArray);
-  RsScriptFieldID* fieldIDs =
-      (RsScriptFieldID*)alloca(sizeof(RsScriptFieldID) * fieldIDs_length);
-  for (int i = 0; i< fieldIDs_length; i++) {
+  jlong* jValues = _env->GetLongArrayElements(valueArray, nullptr);
+  jsize values_length = _env->GetArrayLength(valueArray);
+  jint* jSizes = _env->GetIntArrayElements(sizeArray, nullptr);
+  jsize sizes_length = _env->GetArrayLength(sizeArray);
+
+  size_t numValues;
+  RsScriptFieldID* fieldIDs;
+  uintptr_t* values;
+
+  if (fieldIDs_length != values_length || values_length != sizes_length) {
+      ALOGE("Unmatched field IDs, values, and sizes in closure creation.");
+      goto exit;
+  }
+
+  numValues = (size_t) fieldIDs_length;
+
+  if (numValues > RS_CLOSURE_MAX_NUMBER_ARGS_AND_BINDINGS) {
+      ALOGE("Too many arguments or globals in closure creation");
+      goto exit;
+  }
+
+  fieldIDs = (RsScriptFieldID*)alloca(sizeof(RsScriptFieldID) * numValues);
+  if (fieldIDs == nullptr) {
+      goto exit;
+  }
+
+  for (size_t i = 0; i< numValues; i++) {
     fieldIDs[i] = (RsScriptFieldID)jFieldIDs[i];
   }
 
-  jlong* jValues = _env->GetLongArrayElements(valueArray, nullptr);
-  jsize values_length = _env->GetArrayLength(valueArray);
-  uintptr_t* values = (uintptr_t*)alloca(sizeof(uintptr_t) * values_length);
-  for (int i = 0; i < values_length; i++) {
-    values[i] = (uintptr_t)jValues[i];
+  values = (uintptr_t*)alloca(sizeof(uintptr_t) * numValues);
+  if (values == nullptr) {
+      goto exit;
   }
 
-  jint* sizes = _env->GetIntArrayElements(sizeArray, nullptr);
-  jsize sizes_length = _env->GetArrayLength(sizeArray);
+  for (size_t i = 0; i < numValues; i++) {
+    values[i] = (uintptr_t)jValues[i];
+  }
 
-  return (jlong)(uintptr_t)rsInvokeClosureCreate(
+  ret = (jlong)(uintptr_t)rsInvokeClosureCreate(
       (RsContext)con, (RsScriptInvokeID)invokeID, jParams, jParamLength,
-      fieldIDs, (size_t)fieldIDs_length, values, (size_t)values_length,
-      (int*)sizes, (size_t)sizes_length);
+      fieldIDs, numValues, values, numValues,
+      (int*)jSizes, numValues);
+
+exit:
+
+  _env->ReleaseIntArrayElements (sizeArray,       jSizes,       JNI_ABORT);
+  _env->ReleaseLongArrayElements(valueArray,      jValues,      JNI_ABORT);
+  _env->ReleaseLongArrayElements(fieldIDArray,    jFieldIDs,    JNI_ABORT);
+  _env->ReleaseByteArrayElements(paramArray,      jParams,      JNI_ABORT);
+
+  return ret;
 }
 
 static void
@@ -425,20 +508,40 @@ nClosureSetGlobal(JNIEnv *_env, jobject _this, jlong con, jlong closureID,
 static long
 nScriptGroup2Create(JNIEnv *_env, jobject _this, jlong con, jstring name,
                     jstring cacheDir, jlongArray closureArray) {
+  jlong ret = 0;
+
   AutoJavaStringToUTF8 nameUTF(_env, name);
   AutoJavaStringToUTF8 cacheDirUTF(_env, cacheDir);
 
   jlong* jClosures = _env->GetLongArrayElements(closureArray, nullptr);
   jsize numClosures = _env->GetArrayLength(closureArray);
-  RsClosure* closures = (RsClosure*)alloca(sizeof(RsClosure) * numClosures);
+
+  RsClosure* closures;
+
+  if (numClosures > (jsize) RS_SCRIPT_GROUP_MAX_NUMBER_CLOSURES) {
+    ALOGE("Too many closures in script group");
+    goto exit;
+  }
+
+  closures = (RsClosure*)alloca(sizeof(RsClosure) * numClosures);
+  if (closures == nullptr) {
+      goto exit;
+  }
+
   for (int i = 0; i < numClosures; i++) {
     closures[i] = (RsClosure)jClosures[i];
   }
 
-  return (jlong)(uintptr_t)rsScriptGroup2Create(
+  ret = (jlong)(uintptr_t)rsScriptGroup2Create(
       (RsContext)con, nameUTF.c_str(), nameUTF.length(),
       cacheDirUTF.c_str(), cacheDirUTF.length(),
       closures, numClosures);
+
+exit:
+
+  _env->ReleaseLongArrayElements(closureArray, jClosures, JNI_ABORT);
+
+  return ret;
 }
 
 static void
@@ -531,7 +634,7 @@ nScriptIntrinsicBLAS_Complex(JNIEnv *_env, jobject _this, jlong con, jlong id, j
     call.alpha.c.r = alphaX;
     call.alpha.c.i = alphaY;
     call.beta.c.r = betaX;
-    call.beta.c.r = betaY;
+    call.beta.c.i = betaY;
     call.incX = incX;
     call.incY = incY;
     call.KL = KL;
@@ -566,7 +669,7 @@ nScriptIntrinsicBLAS_Z(JNIEnv *_env, jobject _this, jlong con, jlong id, jint fu
     call.alpha.z.r = alphaX;
     call.alpha.z.i = alphaY;
     call.beta.z.r = betaX;
-    call.beta.z.r = betaY;
+    call.beta.z.i = betaY;
     call.incX = incX;
     call.incY = incY;
     call.KL = KL;
@@ -593,8 +696,8 @@ nScriptIntrinsicBLAS_BNNM(JNIEnv *_env, jobject _this, jlong con, jlong id, jint
     call.M = M;
     call.N = N;
     call.K = K;
-    call.a_offset = a_offset;
-    call.b_offset = b_offset;
+    call.a_offset = a_offset & 0xFF;
+    call.b_offset = b_offset & 0xFF;
     call.c_offset = c_offset;
     call.c_mult_int = c_mult_int;
 
@@ -1107,9 +1210,8 @@ static jlong
 nAllocationCreateFromBitmap(JNIEnv *_env, jobject _this, jlong con, jlong type, jint mip,
                             jobject jbitmap, jint usage)
 {
-    SkBitmap const * nativeBitmap =
-            (SkBitmap const *)_env->GetLongField(jbitmap, gNativeBitmapID);
-    const SkBitmap& bitmap(*nativeBitmap);
+    SkBitmap bitmap;
+    GraphicsJNI::getSkBitmap(_env, jbitmap, &bitmap);
 
     bitmap.lockPixels();
     const void* ptr = bitmap.getPixels();
@@ -1124,9 +1226,8 @@ static jlong
 nAllocationCreateBitmapBackedAllocation(JNIEnv *_env, jobject _this, jlong con, jlong type,
                                         jint mip, jobject jbitmap, jint usage)
 {
-    SkBitmap const * nativeBitmap =
-            (SkBitmap const *)_env->GetLongField(jbitmap, gNativeBitmapID);
-    const SkBitmap& bitmap(*nativeBitmap);
+    SkBitmap bitmap;
+    GraphicsJNI::getSkBitmap(_env, jbitmap, &bitmap);
 
     bitmap.lockPixels();
     const void* ptr = bitmap.getPixels();
@@ -1141,9 +1242,8 @@ static jlong
 nAllocationCubeCreateFromBitmap(JNIEnv *_env, jobject _this, jlong con, jlong type, jint mip,
                                 jobject jbitmap, jint usage)
 {
-    SkBitmap const * nativeBitmap =
-            (SkBitmap const *)_env->GetLongField(jbitmap, gNativeBitmapID);
-    const SkBitmap& bitmap(*nativeBitmap);
+    SkBitmap bitmap;
+    GraphicsJNI::getSkBitmap(_env, jbitmap, &bitmap);
 
     bitmap.lockPixels();
     const void* ptr = bitmap.getPixels();
@@ -1157,9 +1257,8 @@ nAllocationCubeCreateFromBitmap(JNIEnv *_env, jobject _this, jlong con, jlong ty
 static void
 nAllocationCopyFromBitmap(JNIEnv *_env, jobject _this, jlong con, jlong alloc, jobject jbitmap)
 {
-    SkBitmap const * nativeBitmap =
-            (SkBitmap const *)_env->GetLongField(jbitmap, gNativeBitmapID);
-    const SkBitmap& bitmap(*nativeBitmap);
+    SkBitmap bitmap;
+    GraphicsJNI::getSkBitmap(_env, jbitmap, &bitmap);
     int w = bitmap.width();
     int h = bitmap.height();
 
@@ -1174,9 +1273,8 @@ nAllocationCopyFromBitmap(JNIEnv *_env, jobject _this, jlong con, jlong alloc, j
 static void
 nAllocationCopyToBitmap(JNIEnv *_env, jobject _this, jlong con, jlong alloc, jobject jbitmap)
 {
-    SkBitmap const * nativeBitmap =
-            (SkBitmap const *)_env->GetLongField(jbitmap, gNativeBitmapID);
-    const SkBitmap& bitmap(*nativeBitmap);
+    SkBitmap bitmap;
+    GraphicsJNI::getSkBitmap(_env, jbitmap, &bitmap);
 
     bitmap.lockPixels();
     void* ptr = bitmap.getPixels();
@@ -1756,7 +1854,7 @@ nScriptForEach(JNIEnv *_env, jobject _this, jlong con, jlong script, jint slot,
                jintArray limits)
 {
     if (kLogApi) {
-        ALOGD("nScriptForEach, con(%p), s(%p), slot(%i)", (RsContext)con, (void *)script, slot);
+        ALOGD("nScriptForEach, con(%p), s(%p), slot(%i) ains(%p) aout(%" PRId64 ")", (RsContext)con, (void *)script, slot, ains, aout);
     }
 
     jint   in_len = 0;
@@ -1766,8 +1864,14 @@ nScriptForEach(JNIEnv *_env, jobject _this, jlong con, jlong script, jint slot,
 
     if (ains != nullptr) {
         in_len = _env->GetArrayLength(ains);
-        in_ptr = _env->GetLongArrayElements(ains, nullptr);
+        if (in_len > (jint)RS_KERNEL_MAX_ARGUMENTS) {
+            ALOGE("Too many arguments in kernel launch.");
+            // TODO (b/20758983): Report back to Java and throw an exception
+            return;
+        }
 
+        // TODO (b/20760800): Check in_ptr is not null
+        in_ptr = _env->GetLongArrayElements(ains, nullptr);
         if (sizeof(RsAllocation) == sizeof(jlong)) {
             in_allocs = (RsAllocation*)in_ptr;
 
@@ -1775,6 +1879,11 @@ nScriptForEach(JNIEnv *_env, jobject _this, jlong con, jlong script, jint slot,
             // Convert from 64-bit jlong types to the native pointer type.
 
             in_allocs = (RsAllocation*)alloca(in_len * sizeof(RsAllocation));
+            if (in_allocs == nullptr) {
+                ALOGE("Failed launching kernel for lack of memory.");
+                _env->ReleaseLongArrayElements(ains, in_ptr, JNI_ABORT);
+                return;
+            }
 
             for (int index = in_len; --index >= 0;) {
                 in_allocs[index] = (RsAllocation)in_ptr[index];