diff options
-rw-r--r-- | rs/java/android/renderscript/RenderScript.java | 49 | ||||
-rw-r--r-- | rs/java/android/renderscript/ScriptIntrinsicBLAS.java | 1489 | ||||
-rw-r--r-- | rs/jni/android_renderscript_RenderScript.cpp | 142 |
3 files changed, 1680 insertions, 0 deletions
diff --git a/rs/java/android/renderscript/RenderScript.java b/rs/java/android/renderscript/RenderScript.java index 5e150e9..e7487aa 100644 --- a/rs/java/android/renderscript/RenderScript.java +++ b/rs/java/android/renderscript/RenderScript.java @@ -887,6 +887,55 @@ public class RenderScript { return rsnPathCreate(mContext, prim, isStatic, vtx, loop, q); } + native void rsnScriptIntrinsicBLAS_Single(long con, long id, int func, int TransA, + int TransB, int Side, int Uplo, int Diag, int M, int N, int K, + float alpha, long A, long B, float beta, long C, int incX, int incY, + int KL, int KU); + synchronized void nScriptIntrinsicBLAS_Single(long id, int func, int TransA, + int TransB, int Side, int Uplo, int Diag, int M, int N, int K, + float alpha, long A, long B, float beta, long C, int incX, int incY, + int KL, int KU) { + validate(); + rsnScriptIntrinsicBLAS_Single(mContext, id, func, TransA, TransB, Side, Uplo, Diag, M, N, K, alpha, A, B, beta, C, incX, incY, KL, KU); + } + + native void rsnScriptIntrinsicBLAS_Double(long con, long id, int func, int TransA, + int TransB, int Side, int Uplo, int Diag, int M, int N, int K, + double alpha, long A, long B, double beta, long C, int incX, int incY, + int KL, int KU); + synchronized void nScriptIntrinsicBLAS_Double(long id, int func, int TransA, + int TransB, int Side, int Uplo, int Diag, int M, int N, int K, + double alpha, long A, long B, double beta, long C, int incX, int incY, + int KL, int KU) { + validate(); + rsnScriptIntrinsicBLAS_Double(mContext, id, func, TransA, TransB, Side, Uplo, Diag, M, N, K, alpha, A, B, beta, C, incX, incY, KL, KU); + } + + native void rsnScriptIntrinsicBLAS_Complex(long con, long id, int func, int TransA, + int TransB, int Side, int Uplo, int Diag, int M, int N, int K, + float alphaX, float alphaY, long A, long B, float betaX, float betaY, long C, int incX, int incY, + int KL, int KU); + synchronized void nScriptIntrinsicBLAS_Complex(long id, int func, int TransA, + int TransB, int Side, int Uplo, int Diag, int M, int N, int K, + float alphaX, float alphaY, long A, long B, float betaX, float betaY, long C, int incX, int incY, + int KL, int KU) { + validate(); + rsnScriptIntrinsicBLAS_Complex(mContext, id, func, TransA, TransB, Side, Uplo, Diag, M, N, K, alphaX, alphaY, A, B, betaX, betaY, C, incX, incY, KL, KU); + } + + native void rsnScriptIntrinsicBLAS_Z(long con, long id, int func, int TransA, + int TransB, int Side, int Uplo, int Diag, int M, int N, int K, + double alphaX, double alphaY, long A, long B, double betaX, double betaY, long C, int incX, int incY, + int KL, int KU); + synchronized void nScriptIntrinsicBLAS_Z(long id, int func, int TransA, + int TransB, int Side, int Uplo, int Diag, int M, int N, int K, + double alphaX, double alphaY, long A, long B, double betaX, double betaY, long C, int incX, int incY, + int KL, int KU) { + validate(); + rsnScriptIntrinsicBLAS_Z(mContext, id, func, TransA, TransB, Side, Uplo, Diag, M, N, K, alphaX, alphaY, A, B, betaX, betaY, C, incX, incY, KL, KU); + } + + long mDev; long mContext; @SuppressWarnings({"FieldCanBeLocal"}) diff --git a/rs/java/android/renderscript/ScriptIntrinsicBLAS.java b/rs/java/android/renderscript/ScriptIntrinsicBLAS.java new file mode 100644 index 0000000..90d2300 --- /dev/null +++ b/rs/java/android/renderscript/ScriptIntrinsicBLAS.java @@ -0,0 +1,1489 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package android.renderscript; + +import android.annotation.IntDef; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; + +/** + * + * BLAS + * + * @hide + **/ +public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { + private Allocation mLUT; + + private ScriptIntrinsicBLAS(long id, RenderScript rs) { + super(id, rs); + } + + private static final int RsBlas_sdsdot = 1; + private static final int RsBlas_dsdot = 2; + private static final int RsBlas_sdot = 3; + private static final int RsBlas_ddot = 4; + private static final int RsBlas_cdotu_sub = 5; + private static final int RsBlas_cdotc_sub = 6; + private static final int RsBlas_zdotu_sub = 7; + private static final int RsBlas_zdotc_sub = 8; + private static final int RsBlas_snrm2 = 9; + private static final int RsBlas_sasum = 10; + private static final int RsBlas_dnrm2 = 11; + private static final int RsBlas_dasum = 12; + private static final int RsBlas_scnrm2 = 13; + private static final int RsBlas_scasum = 14; + private static final int RsBlas_dznrm2 = 15; + private static final int RsBlas_dzasum = 16; + private static final int RsBlas_isamax = 17; + private static final int RsBlas_idamax = 18; + private static final int RsBlas_icamax = 19; + private static final int RsBlas_izamax = 20; + private static final int RsBlas_sswap = 21; + private static final int RsBlas_scopy = 22; + private static final int RsBlas_saxpy = 23; + private static final int RsBlas_dswap = 24; + private static final int RsBlas_dcopy = 25; + private static final int RsBlas_daxpy = 26; + private static final int RsBlas_cswap = 27; + private static final int RsBlas_ccopy = 28; + private static final int RsBlas_caxpy = 29; + private static final int RsBlas_zswap = 30; + private static final int RsBlas_zcopy = 31; + private static final int RsBlas_zaxpy = 32; + private static final int RsBlas_srotg = 33; + private static final int RsBlas_srotmg = 34; + private static final int RsBlas_srot = 35; + private static final int RsBlas_srotm = 36; + private static final int RsBlas_drotg = 37; + private static final int RsBlas_drotmg = 38; + private static final int RsBlas_drot = 39; + private static final int RsBlas_drotm = 40; + private static final int RsBlas_sscal = 41; + private static final int RsBlas_dscal = 42; + private static final int RsBlas_cscal = 43; + private static final int RsBlas_zscal = 44; + private static final int RsBlas_csscal = 45; + private static final int RsBlas_zdscal = 46; + private static final int RsBlas_sgemv = 47; + private static final int RsBlas_sgbmv = 48; + private static final int RsBlas_strmv = 49; + private static final int RsBlas_stbmv = 50; + private static final int RsBlas_stpmv = 51; + private static final int RsBlas_strsv = 52; + private static final int RsBlas_stbsv = 53; + private static final int RsBlas_stpsv = 54; + private static final int RsBlas_dgemv = 55; + private static final int RsBlas_dgbmv = 56; + private static final int RsBlas_dtrmv = 57; + private static final int RsBlas_dtbmv = 58; + private static final int RsBlas_dtpmv = 59; + private static final int RsBlas_dtrsv = 60; + private static final int RsBlas_dtbsv = 61; + private static final int RsBlas_dtpsv = 62; + private static final int RsBlas_cgemv = 63; + private static final int RsBlas_cgbmv = 64; + private static final int RsBlas_ctrmv = 65; + private static final int RsBlas_ctbmv = 66; + private static final int RsBlas_ctpmv = 67; + private static final int RsBlas_ctrsv = 68; + private static final int RsBlas_ctbsv = 69; + private static final int RsBlas_ctpsv = 70; + private static final int RsBlas_zgemv = 71; + private static final int RsBlas_zgbmv = 72; + private static final int RsBlas_ztrmv = 73; + private static final int RsBlas_ztbmv = 74; + private static final int RsBlas_ztpmv = 75; + private static final int RsBlas_ztrsv = 76; + private static final int RsBlas_ztbsv = 77; + private static final int RsBlas_ztpsv = 78; + private static final int RsBlas_ssymv = 79; + private static final int RsBlas_ssbmv = 80; + private static final int RsBlas_sspmv = 81; + private static final int RsBlas_sger = 82; + private static final int RsBlas_ssyr = 83; + private static final int RsBlas_sspr = 84; + private static final int RsBlas_ssyr2 = 85; + private static final int RsBlas_sspr2 = 86; + private static final int RsBlas_dsymv = 87; + private static final int RsBlas_dsbmv = 88; + private static final int RsBlas_dspmv = 89; + private static final int RsBlas_dger = 90; + private static final int RsBlas_dsyr = 91; + private static final int RsBlas_dspr = 92; + private static final int RsBlas_dsyr2 = 93; + private static final int RsBlas_dspr2 = 94; + private static final int RsBlas_chemv = 95; + private static final int RsBlas_chbmv = 96; + private static final int RsBlas_chpmv = 97; + private static final int RsBlas_cgeru = 98; + private static final int RsBlas_cgerc = 99; + private static final int RsBlas_cher = 100; + private static final int RsBlas_chpr = 101; + private static final int RsBlas_cher2 = 102; + private static final int RsBlas_chpr2 = 103; + private static final int RsBlas_zhemv = 104; + private static final int RsBlas_zhbmv = 105; + private static final int RsBlas_zhpmv = 106; + private static final int RsBlas_zgeru = 107; + private static final int RsBlas_zgerc = 108; + private static final int RsBlas_zher = 109; + private static final int RsBlas_zhpr = 110; + private static final int RsBlas_zher2 = 111; + private static final int RsBlas_zhpr2 = 112; + private static final int RsBlas_sgemm = 113; + private static final int RsBlas_ssymm = 114; + private static final int RsBlas_ssyrk = 115; + private static final int RsBlas_ssyr2k = 116; + private static final int RsBlas_strmm = 117; + private static final int RsBlas_strsm = 118; + private static final int RsBlas_dgemm = 119; + private static final int RsBlas_dsymm = 120; + private static final int RsBlas_dsyrk = 121; + private static final int RsBlas_dsyr2k = 122; + private static final int RsBlas_dtrmm = 123; + private static final int RsBlas_dtrsm = 124; + private static final int RsBlas_cgemm = 125; + private static final int RsBlas_csymm = 126; + private static final int RsBlas_csyrk = 127; + private static final int RsBlas_csyr2k = 128; + private static final int RsBlas_ctrmm = 129; + private static final int RsBlas_ctrsm = 130; + private static final int RsBlas_zgemm = 131; + private static final int RsBlas_zsymm = 132; + private static final int RsBlas_zsyrk = 133; + private static final int RsBlas_zsyr2k = 134; + private static final int RsBlas_ztrmm = 135; + private static final int RsBlas_ztrsm = 136; + private static final int RsBlas_chemm = 137; + private static final int RsBlas_cherk = 138; + private static final int RsBlas_cher2k = 139; + private static final int RsBlas_zhemm = 140; + private static final int RsBlas_zherk = 141; + private static final int RsBlas_zher2k = 142; + + /** + */ + public static ScriptIntrinsicBLAS create(RenderScript rs) { + long id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs)); + return new ScriptIntrinsicBLAS(id, rs); + } + + @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE}) + @Retention(RetentionPolicy.SOURCE) + public @interface Transpose {} + + @IntDef({UPPER, LOWER}) + @Retention(RetentionPolicy.SOURCE) + public @interface Uplo {} + + @IntDef({NON_UNIT, UNIT}) + @Retention(RetentionPolicy.SOURCE) + public @interface Diag {} + + @IntDef({LEFT, RIGHT}) + @Retention(RetentionPolicy.SOURCE) + public @interface Side {} + + public static final int NO_TRANSPOSE = 111; + public static final int TRANSPOSE = 112; + public static final int CONJ_TRANSPOSE = 113; + + public static final int UPPER = 121; + public static final int LOWER = 122; + + public static final int NON_UNIT = 131; + public static final int UNIT = 132; + + public static final int LEFT = 141; + public static final int RIGHT = 142; + + static void validateSide(@Side int Side) { + if (Side != LEFT && Side != RIGHT) { + throw new RSRuntimeException("Invalid side passed to BLAS"); + } + } + + static void validateTranspose(@Transpose int Trans) { + if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE && + Trans != CONJ_TRANSPOSE) { + throw new RSRuntimeException("Invalid transpose passed to BLAS"); + } + } + + static void validateConjTranspose(@Transpose int Trans) { + if (Trans != NO_TRANSPOSE && + Trans != CONJ_TRANSPOSE) { + throw new RSRuntimeException("Invalid transpose passed to BLAS"); + } + } + + static void validateDiag(@Diag int Diag) { + if (Diag != NON_UNIT && Diag != UNIT) { + throw new RSRuntimeException("Invalid diag passed to BLAS"); + } + } + + static void validateUplo(@Uplo int Uplo) { + if (Uplo != LEFT && Uplo != RIGHT) { + throw new RSRuntimeException("Invalid uplo passed to BLAS"); + } + } + + + /** + * Level 2 BLAS + */ + + static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) { + validateTranspose(TransA); + int M = A.getType().getY(); + int N = A.getType().getX(); + if (!A.getType().getElement().isCompatible(e) || + !X.getType().getElement().isCompatible(e) || + !Y.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + if (X.getType().getY() > 1 || Y.getType().getY() > 1) { + throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); + } + + if (incX <= 0 || incY <= 0) { + throw new RSRuntimeException("Vector increments must be greater than 0"); + } + int expectedXDim = -1, expectedYDim = -1; + if (TransA == NO_TRANSPOSE) { + expectedXDim = 1 + (N - 1) * incX; + expectedYDim = 1 + (M - 1) * incY; + } else { + expectedXDim = 1 + (M - 1) * incX; + expectedYDim = 1 + (N - 1) * incY; + } + if (X.getType().getX() != expectedXDim || + Y.getType().getY() != expectedXDim) { + throw new RSRuntimeException("Incorrect vector dimensions for GEMV"); + } + } + void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { + validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); + } + void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { + validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); + } + void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { + validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); + } + void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { + validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); + } + + void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { + // GBMV has the same validation requirements as GEMV + KL and KU >= 0 + validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); + if (KL < 0 || KU < 0) { + throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); + } + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU); + } + void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { + // GBMV has the same validation requirements as GEMV + KL and KU >= 0 + validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); + if (KL < 0 || KU < 0) { + throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); + } + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU); + } + void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { + // GBMV has the same validation requirements as GEMV + KL and KU >= 0 + validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); + if (KL < 0 || KU < 0) { + throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); + } + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU); + } + void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { + // GBMV has the same validation requirements as GEMV + KL and KU >= 0 + validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); + if (KL < 0 || KU < 0) { + throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); + } + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU); + } + + static void validateTRMV(Element e, @Transpose int TransA, Allocation A, Allocation X, int incX) { + validateTranspose(TransA); + int N = A.getType().getY(); + if (A.getType().getX() != N) { + throw new RSRuntimeException("A must be a square matrix for TRMV"); + } + if (!A.getType().getElement().isCompatible(e) || + !X.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + if (X.getType().getY() > 1) { + throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); + } + + if (incX <= 0) { + throw new RSRuntimeException("Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (N - 1) * incX; + if (X.getType().getX() != expectedXDim) { + throw new RSRuntimeException("Incorrect vector dimensions for TRMV"); + } + } + + static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + validateTranspose(TransA); + validateUplo(Uplo); + validateDiag(Diag); + if (!Ap.getType().getElement().isCompatible(e) || + !X.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + if (X.getType().getY() > 1) { + throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); + } + + if (Ap.getType().getY() > 1) { + throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); + } + + int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); + if (Ap.getType().getX() != ((N * (N+1)) / 2)) { + throw new RSRuntimeException("Invalid dimension for Ap"); + } + + int expectedXDim = 1 + (N - 1) * incX; + if (X.getType().getX() != expectedXDim) { + throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); + } + + return N; + } + + void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + validateTRMV(Element.F32(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); + } + void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + validateTRMV(Element.F64(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); + } + void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + validateTRMV(Element.F32_2(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); + } + void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + validateTRMV(Element.F64_2(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); + } + void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBMV has the same requirements as TRMV + validateTRMV(Element.F32(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); + } + void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBMV has the same requirements as TRMV + validateTRMV(Element.F64(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); + } + void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBMV has the same requirements as TRMV + validateTRMV(Element.F32_2(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); + } + void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBMV has the same requirements as TRMV + validateTRMV(Element.F64_2(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); + } + void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); + } + void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); + } + void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); + } + void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); + } + void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + // TRSV is the same as TRMV + validateTRMV(Element.F32(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); + + } + void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + // TRSV is the same as TRMV + validateTRMV(Element.F64(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); + + } + void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + // TRSV is the same as TRMV + validateTRMV(Element.F32_2(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); + + } + void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { + // TRSV is the same as TRMV + validateTRMV(Element.F64_2(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); + + } + void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBSV is the same as TRMV + validateTRMV(Element.F32(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + if (K < 0) { + throw new RSRuntimeException("Number of diagonals must be positive"); + } + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); + } + void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBSV is the same as TRMV + validateTRMV(Element.F64(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + if (K < 0) { + throw new RSRuntimeException("Number of diagonals must be positive"); + } + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); + } + void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBSV is the same as TRMV + validateTRMV(Element.F32_2(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + if (K < 0) { + throw new RSRuntimeException("Number of diagonals must be positive"); + } + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); + } + void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { + // TBSV is the same as TRMV + validateTRMV(Element.F64_2(mRS), TransA, A, X, incX); + int N = A.getType().getY(); + if (K < 0) { + throw new RSRuntimeException("Number of diagonals must be positive"); + } + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); + } + void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + // TPSV is same as TPMV + int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); + } + void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + // TPSV is same as TPMV + int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); + } + void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + // TPSV is same as TPMV + int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); + } + void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { + // TPSV is same as TPMV + int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); + } + + /** + * Level 2, S and D only + */ + static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) { + validateUplo(Uplo); + int N = A.getType().getY(); + if (A.getType().getX() != N) { + throw new RSRuntimeException("A must be a square matrix for SYMV"); + } + if (!A.getType().getElement().isCompatible(e) || + !X.getType().getElement().isCompatible(e) || + !Y.getType().getElement().isCompatible(e) ) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + if (X.getType().getY() > 1 || Y.getType().getY() > 1) { + throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); + } + + if (incX <= 0 || incY <= 0) { + throw new RSRuntimeException("Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (N - 1) * incX; + if (X.getType().getX() != expectedXDim) { + throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); + } + int expectedYDim = 1 + (N - 1) * incY; + if (Y.getType().getX() != expectedYDim) { + throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); + } + return N; + } + static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) { + validateUplo(Uplo); + if (!Ap.getType().getElement().isCompatible(e) || + !X.getType().getElement().isCompatible(e) || + !Y.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + if (X.getType().getY() > 1 || Y.getType().getY() > 1) { + throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); + } + + if (Ap.getType().getY() > 1) { + throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); + } + + int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); + if (Ap.getType().getX() != ((N * (N+1)) / 2)) { + throw new RSRuntimeException("Invalid dimension for Ap"); + } + + int expectedXDim = 1 + (N - 1) * incX; + if (X.getType().getX() != expectedXDim) { + throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); + } + int expectedYDim = 1 + (N - 1) * incY; + if (Y.getType().getX() != expectedYDim) { + throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); + } + + return N; + } + static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + if (!A.getType().getElement().isCompatible(e) || + !X.getType().getElement().isCompatible(e) || + !Y.getType().getElement().isCompatible(e) ) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + + if (X.getType().getY() > 1 || Y.getType().getY() > 1) { + throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); + } + + int M = A.getType().getY(); + int N = A.getType().getX(); + + if (N < 1 || M < 1) { + throw new RSRuntimeException("M and N must be 1 or greater for GER"); + } + + int expectedXDim = 1 + (N - 1) * incX; + if (X.getType().getX() != expectedXDim) { + throw new RSRuntimeException("Incorrect vector dimensions for GER"); + } + int expectedYDim = 1 + (N - 1) * incY; + if (Y.getType().getX() != expectedYDim) { + throw new RSRuntimeException("Incorrect vector dimensions for GER"); + } + + + } + static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) { + validateUplo(Uplo); + if (!A.getType().getElement().isCompatible(e) || + !X.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + + int N = A.getType().getX(); + + if (X.getType().getY() > 1) { + throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); + } + if (N != A.getType().getY()) { + throw new RSRuntimeException("A must be a symmetric matrix"); + } + + int expectedXDim = 1 + (N - 1) * incX; + if (X.getType().getX() != expectedXDim) { + throw new RSRuntimeException("Incorrect vector dimensions for SYR"); + } + return N; + } + static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) { + validateUplo(Uplo); + if (!Ap.getType().getElement().isCompatible(e) || + !X.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + if (X.getType().getY() > 1) { + throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); + } + + if (Ap.getType().getY() > 1) { + throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); + } + + int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); + if (Ap.getType().getX() != ((N * (N+1)) / 2)) { + throw new RSRuntimeException("Invalid dimension for Ap"); + } + + int expectedXDim = 1 + (N - 1) * incX; + if (X.getType().getX() != expectedXDim) { + throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); + } + + return N; + } + + static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + validateUplo(Uplo); + if (!A.getType().getElement().isCompatible(e) || + !X.getType().getElement().isCompatible(e) || + !Y.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + + if (X.getType().getY() > 1 || Y.getType().getY() > 1) { + throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); + } + + int N = A.getType().getX(); + + if (N != A.getType().getY()) { + throw new RSRuntimeException("A must be a symmetric matrix"); + } + + int expectedXDim = 1 + (N - 1) * incX; + int expectedYDim = 1 + (N - 1) * incY; + if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { + throw new RSRuntimeException("Incorrect vector dimensions for SYR"); + } + return N; + + } + static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { + validateUplo(Uplo); + if (!Ap.getType().getElement().isCompatible(e) || + !X.getType().getElement().isCompatible(e) || + !Y.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + if (X.getType().getY() > 1 || Y.getType().getY() > 1) { + throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); + } + + if (Ap.getType().getY() > 1) { + throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); + } + + int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); + if (Ap.getType().getX() != ((N * (N+1)) / 2)) { + throw new RSRuntimeException("Invalid dimension for Ap"); + } + + int expectedXDim = 1 + (N - 1) * incX; + int expectedYDim = 1 + (N - 1) * incY; + if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { + throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); + } + + return N; + } + + void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { + int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); + } + void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { + // SBMV is the same as SYMV + int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); + } + void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) { + int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); + } + void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0); + } + void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { + int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0); + } + void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { + int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0); + } + void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0); + } + void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { + int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0); + } + void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { + int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); + } + void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { + // SBMV is the same as SYMV + int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); + } + void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) { + int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); + } + void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0); + } + void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { + int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0); + } + void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { + int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0); + } + void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0); + } + void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { + int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0); + } + + + /** + * Level 2, C and Z only + */ + + static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + if (!A.getType().getElement().isCompatible(e) || + !X.getType().getElement().isCompatible(e) || + !Y.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + if (X.getType().getY() > 1 || Y.getType().getY() > 1) { + throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); + } + + int M = A.getType().getY(); + int N = A.getType().getX(); + + int expectedXDim = 1 + (N - 1) * incX; + if (X.getType().getX() != expectedXDim) { + throw new RSRuntimeException("Incorrect vector dimensions for GERU"); + } + int expectedYDim = 1 + (N - 1) * incY; + if (Y.getType().getX() != expectedYDim) { + throw new RSRuntimeException("Incorrect vector dimensions for GERU"); + } + + } + + void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { + // HEMV is the same as SYR2 validation-wise + int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); + } + void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { + // HBMV is the same as SYR2 validation-wise + int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); + if (K < 0) { + throw new RSRuntimeException("K must be 0 or greater for HBMV"); + } + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); + } + void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { + // HPMV is the same as SPR2 + int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); + } + void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); + } + void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + // same as GERU + validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); + } + void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { + // same as SYR + int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0); + } + void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { + // equivalent to SPR for validation + int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0); + } + void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + // same as SYR2 + int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); + } + void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { + // same as SPR2 + int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0); + } + void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { + // HEMV is the same as SYR2 validation-wise + int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); + } + void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { + // HBMV is the same as SYR2 validation-wise + int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); + if (K < 0) { + throw new RSRuntimeException("K must be 0 or greater for HBMV"); + } + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); + } + void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { + // HPMV is the same as SPR2 + int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); + } + void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); + } + void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + // same as GERU + validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); + int M = A.getType().getY(); + int N = A.getType().getX(); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); + } + void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { + // same as SYR + int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0); + } + void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { + // equivalent to SPR for validation + int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0); + } + void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { + // same as SYR2 + int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); + } + void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { + // same as SPR2 + int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0); + } + + + /** + * Level 3 BLAS + */ + + static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) { + int aX = -1, aY = -1, bX = -1, bY = -1, cX = -1, cY = -1; + if ((A != null && !A.getType().getElement().isCompatible(e)) || + (B != null && !B.getType().getElement().isCompatible(e)) || + (C != null && !C.getType().getElement().isCompatible(e))) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + if (C != null) { + cX = C.getType().getY(); + cY = C.getType().getX(); + } + if (Side == RIGHT) { + if (B != null) { + bX = A.getType().getY(); + bY = A.getType().getX(); + } + if (A != null) { + aX = B.getType().getY(); + aY = B.getType().getX(); + } + } else { + if (A != null) { + if (TransA == TRANSPOSE) { + aY = A.getType().getY(); + aX = A.getType().getX(); + } else { + aX = A.getType().getY(); + aY = A.getType().getX(); + } + } + if (B != null) { + if (TransB == TRANSPOSE) { + bY = B.getType().getY(); + bX = B.getType().getX(); + } else { + bX = B.getType().getY(); + bY = B.getType().getX(); + } + } + } + if (A != null && B != null && C != null) { + if (aY != bX || aX != cX || bY != cY) { + throw new RSRuntimeException("Called BLAS with invalid dimensions"); + } + } else if (A != null && C != null) { + // A and C only + if (aX != cY || aY != cX) { + throw new RSRuntimeException("Called BLAS with invalid dimensions"); + } + } else if (A != null && B != null) { + // A and B only + } + + } + + public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A, + Allocation B, float beta, Allocation C) { + validateTranspose(TransA); + validateTranspose(TransB); + validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C); + + int M = -1, N = -1, K = -1; + if (TransA == TRANSPOSE) { + M = A.getType().getX(); + K = A.getType().getY(); + } else { + M = A.getType().getY(); + K = A.getType().getX(); + } + if (TransB == TRANSPOSE) { + N = B.getType().getY(); + } else { + N = B.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS), + beta, C.getID(mRS), 0, 0, 0, 0); + } + public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A, + Allocation B, double beta, Allocation C) { + validateTranspose(TransA); + validateTranspose(TransB); + validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C); + int M = -1, N = -1, K = -1; + if (TransA == TRANSPOSE) { + M = A.getType().getX(); + K = A.getType().getY(); + } else { + M = A.getType().getY(); + K = A.getType().getX(); + } + if (TransB == TRANSPOSE) { + N = B.getType().getY(); + } else { + N = B.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS), + beta, C.getID(mRS), 0, 0, 0, 0); + } + public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, + Allocation B, Float2 beta, Allocation C) { + validateTranspose(TransA); + validateTranspose(TransB); + validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C); + int M = -1, N = -1, K = -1; + if (TransA == TRANSPOSE) { + M = A.getType().getX(); + K = A.getType().getY(); + } else { + M = A.getType().getY(); + K = A.getType().getX(); + } + if (TransB == TRANSPOSE) { + N = B.getType().getY(); + } else { + N = B.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), + beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); + } + + public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, + Allocation B, Double2 beta, Allocation C) { + validateTranspose(TransA); + validateTranspose(TransB); + validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C); + int M = -1, N = -1, K = -1; + if (TransA == TRANSPOSE) { + M = A.getType().getX(); + K = A.getType().getY(); + } else { + M = A.getType().getY(); + K = A.getType().getX(); + } + if (TransB == TRANSPOSE) { + N = B.getType().getY(); + } else { + N = B.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), + beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); + } + + public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, + Allocation B, float beta, Allocation C) { + validateSide(Side); + validateUplo(Uplo); + validateL3(Element.F32(mRS), 0, 0, Side, A, B, C); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), + beta, C.getID(mRS), 0, 0, 0, 0); + } + public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, + Allocation B, double beta, Allocation C) { + validateSide(Side); + validateUplo(Uplo); + validateL3(Element.F64(mRS), 0, 0, Side, A, B, C); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), + beta, C.getID(mRS), 0, 0, 0, 0); + } + public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, + Allocation B, Float2 beta, Allocation C) { + validateSide(Side); + validateUplo(Uplo); + validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), + beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); + } + public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, + Allocation B, Double2 beta, Allocation C) { + validateSide(Side); + validateUplo(Uplo); + validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), + beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); + } + + public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { + validateTranspose(Trans); + validateUplo(Uplo); + validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C); + int K = -1; + if (Trans == TRANSPOSE) { + K = A.getType().getY(); + } else { + K = A.getType().getX(); + } + + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0); + } + + public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { + validateTranspose(Trans); + validateUplo(Uplo); + validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C); + int K = -1; + if (Trans == TRANSPOSE) { + K = A.getType().getY(); + } else { + K = A.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0); + } + public void CSYRK(@Uplo int Uplo, @Transpose int Trans, float alphaX, float alphaY, Allocation A, float betaX, float betaY, Allocation C) { + validateTranspose(Trans); + validateUplo(Uplo); + validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C); + int K = -1; + if (Trans == TRANSPOSE) { + K = A.getType().getY(); + } else { + K = A.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alphaX, alphaY, A.getID(mRS), 0, betaX, betaY, + C.getID(mRS), 0, 0, 0, 0); + } + public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, double alphaX, double alphaY, Allocation A, double betaX, double betaY, Allocation C) { + validateTranspose(Trans); + validateUplo(Uplo); + validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C); + int K = -1; + if (Trans == TRANSPOSE) { + K = A.getType().getY(); + } else { + K = A.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alphaX, alphaY, A.getID(mRS), 0, betaX, betaY, + C.getID(mRS), 0, 0, 0, 0); + } + + static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { + validateTranspose(Trans); + if (!A.getType().getElement().isCompatible(e) || + !B.getType().getElement().isCompatible(e) || + !C.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + int Cdim = -1; + // A is n x k if no transpose, k x n if transpose + // C is n x n + if (Trans == TRANSPOSE) { + // check columns versus C + Cdim = A.getType().getX(); + } else { + // check rows versus C + Cdim = A.getType().getY(); + } + if (C.getType().getX() != Cdim && C.getType().getY() != Cdim) { + throw new RSRuntimeException("Invalid symmetric matrix in SYR2K"); + } + // A dims == B dims + if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { + throw new RSRuntimeException("Invalid A and B in SYR2K"); + } + } + public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) { + validateUplo(Uplo); + validateSYR2K(Element.F32(mRS), Trans, A, B, C); + int K = -1; + if (Trans == TRANSPOSE) { + K = A.getType().getY(); + } else { + K = A.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); + } + public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) { + validateUplo(Uplo); + validateSYR2K(Element.F64(mRS), Trans, A, B, C); + int K = -1; + if (Trans == TRANSPOSE) { + K = A.getType().getY(); + } else { + K = A.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); + } + public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { + validateUplo(Uplo); + validateSYR2K(Element.F32_2(mRS), Trans, A, B, C); + int K = -1; + if (Trans == TRANSPOSE) { + K = A.getType().getY(); + } else { + K = A.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); + } + public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { + validateUplo(Uplo); + validateSYR2K(Element.F64_2(mRS), Trans, A, B, C); + int K = -1; + if (Trans == TRANSPOSE) { + K = A.getType().getY(); + } else { + K = A.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); + } + + static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { + validateSide(Side); + validateTranspose(TransA); + int aX = -1, aY = -1, bX = -1, bY = -1; + if (!A.getType().getElement().isCompatible(e) || + !B.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + if (TransA == TRANSPOSE) { + aY = A.getType().getY(); + aX = A.getType().getX(); + } else { + aY = A.getType().getX(); + aX = A.getType().getY(); + } + bX = B.getType().getY(); + bY = B.getType().getX(); + if (Side == LEFT) { + if (aX == 0 || aY != bX) { + throw new RSRuntimeException("Called TRMM with invalid matrices"); + } + } else { + if (bY != aX || aY == 0) { + throw new RSRuntimeException("Called TRMM with invalid matrices"); + } + } + } + public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { + validateUplo(Uplo); + validateDiag(Diag); + validateTRMM(Element.F32(mRS), Side, TransA, A, B); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0); + } + public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { + validateUplo(Uplo); + validateDiag(Diag); + validateTRMM(Element.F64(mRS), Side, TransA, A, B); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0); + } + public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { + validateUplo(Uplo); + validateDiag(Diag); + validateTRMM(Element.F32_2(mRS), Side, TransA, A, B); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); + } + public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { + validateUplo(Uplo); + validateDiag(Diag); + validateTRMM(Element.F64_2(mRS), Side, TransA, A, B); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); + } + + static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { + int adim = -1, bX = -1, bY = -1; + validateSide(Side); + validateTranspose(TransA); + if (!A.getType().getElement().isCompatible(e) || + !B.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + adim = A.getType().getX(); + if (adim != A.getType().getY()) { + // this may be unnecessary, the restriction could potentially be relaxed + // A needs to contain at least that symmetric matrix but could theoretically be larger + // for now we assume adapters are sufficient, will reevaluate in the future + throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A"); + } + bX = B.getType().getY(); + bY = B.getType().getX(); + if (Side == LEFT) { + // A is M*M + if (adim != bY) { + throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); + } + } else { + // A is N*N + if (adim != bX) { + throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); + } + } + } + public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { + validateUplo(Uplo); + validateDiag(Diag); + validateTRSM(Element.F32(mRS), Side, TransA, A, B); + mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); + } + public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { + validateUplo(Uplo); + validateDiag(Diag); + validateTRSM(Element.F64(mRS), Side, TransA, A, B); + mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); + } + public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { + validateUplo(Uplo); + validateDiag(Diag); + validateTRSM(Element.F32_2(mRS), Side, TransA, A, B); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); + } + public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { + validateUplo(Uplo); + validateDiag(Diag); + validateTRSM(Element.F64_2(mRS), Side, TransA, A, B); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, + alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); + } + + static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) { + validateSide(Side); + + if (!A.getType().getElement().isCompatible(e) || + !B.getType().getElement().isCompatible(e) || + !C.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + + // A must be square; can potentially be relaxed similar to TRSM + int adim = A.getType().getX(); + if (adim != A.getType().getY()) { + throw new RSRuntimeException("Called HEMM with non-square A"); + } + if ((Side == LEFT && adim != B.getType().getY()) || + (Side == RIGHT && adim != B.getType().getX())) { + throw new RSRuntimeException("Called HEMM with invalid B"); + } + if (B.getType().getX() != C.getType().getX() || + B.getType().getY() != C.getType().getY()) { + throw new RSRuntimeException("Called HEMM with mismatched B and C"); + } + } + public void CHEMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, Allocation B, float beta, Allocation C) { + validateUplo(Uplo); + validateHEMM(Element.F32_2(mRS), Side, A, B, C); + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, + alpha, 0, A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); + } + public void ZHEMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, Allocation B, double beta, Allocation C) { + validateUplo(Uplo); + validateHEMM(Element.F32_2(mRS), Side, A, B, C); + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, + alpha, 0, A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); + } + + static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) { + if (!A.getType().getElement().isCompatible(e) || + !C.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + validateConjTranspose(Trans); + int cdim = C.getType().getX(); + if (cdim != C.getType().getY()) { + throw new RSRuntimeException("Called HERK with non-square C"); + } + if (Trans == NO_TRANSPOSE) { + if (cdim != A.getType().getX()) { + throw new RSRuntimeException("Called HERK with invalid A"); + } + } else { + if (cdim != A.getType().getY()) { + throw new RSRuntimeException("Called HERK with invalid A"); + } + } + } + public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { + validateUplo(Uplo); + validateHERK(Element.F32_2(mRS), Trans, A, C); + int k = 0; + if (Trans == TRANSPOSE) { + k = A.getType().getY(); + } else { + k = A.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, + alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0); + } + public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { + validateUplo(Uplo); + validateHERK(Element.F64_2(mRS), Trans, A, C); + int k = 0; + if (Trans == TRANSPOSE) { + k = A.getType().getY(); + } else { + k = A.getType().getX(); + } + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, + alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0); + } + + static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { + if (!A.getType().getElement().isCompatible(e) || + !B.getType().getElement().isCompatible(e) || + !C.getType().getElement().isCompatible(e)) { + throw new RSRuntimeException("Called BLAS with wrong Element type"); + } + validateConjTranspose(Trans); + int cdim = C.getType().getX(); + if (cdim != C.getType().getY()) { + throw new RSRuntimeException("Called HER2K with non-square C"); + } + if (Trans == NO_TRANSPOSE) { + if (A.getType().getY() != cdim) { + throw new RSRuntimeException("Called HER2K with invalid matrices"); + } + } else { + if (A.getType().getX() != cdim) { + throw new RSRuntimeException("Called HER2K with invalid matrices"); + } + } + if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { + throw new RSRuntimeException("Called HER2K with invalid A and B matrices"); + } + } + public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) { + validateUplo(Uplo); + validateHER2K(Element.F32_2(mRS), Trans, A, B, C); + int k = 0; + if (Trans == NO_TRANSPOSE) { + k = A.getType().getX(); + } else { + k = A.getType().getY(); + } + mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, + A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); + } + public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) { + validateUplo(Uplo); + validateHER2K(Element.F64_2(mRS), Trans, A, B, C); + int k = 0; + if (Trans == NO_TRANSPOSE) { + k = A.getType().getX(); + } else { + k = A.getType().getY(); + } + mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, + A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); + } + + + +} diff --git a/rs/jni/android_renderscript_RenderScript.cpp b/rs/jni/android_renderscript_RenderScript.cpp index 198cabe..2612323 100644 --- a/rs/jni/android_renderscript_RenderScript.cpp +++ b/rs/jni/android_renderscript_RenderScript.cpp @@ -310,6 +310,143 @@ nScriptGroup2Execute(JNIEnv *_env, jobject _this, jlong con, jlong groupID) { } static void +nScriptIntrinsicBLAS_Single(JNIEnv *_env, jobject _this, jlong con, jlong id, jint func, jint TransA, + jint TransB, jint Side, jint Uplo, jint Diag, jint M, jint N, jint K, + jfloat alpha, jlong A, jlong B, jfloat beta, jlong C, jint incX, jint incY, + jint KL, jint KU) { + RsBlasCall call; + memset(&call, 0, sizeof(call)); + call.func = (RsBlasFunction)func; + call.transA = (RsBlasTranspose)TransA; + call.transB = (RsBlasTranspose)TransB; + call.side = (RsBlasSide)Side; + call.uplo = (RsBlasUplo)Uplo; + call.diag = (RsBlasDiag)Diag; + call.M = M; + call.N = N; + call.K = K; + call.alpha.f = alpha; + call.beta.f = beta; + call.incX = incX; + call.incY = incY; + call.KL = KL; + call.KU = KU; + + RsAllocation in_allocs[3]; + in_allocs[0] = (RsAllocation)A; + in_allocs[1] = (RsAllocation)B; + in_allocs[2] = (RsAllocation)C; + + rsScriptForEachMulti((RsContext)con, (RsScript)id, 0, + in_allocs, sizeof(in_allocs), nullptr, + &call, sizeof(call), nullptr, 0); +} + +static void +nScriptIntrinsicBLAS_Double(JNIEnv *_env, jobject _this, jlong con, jlong id, jint func, jint TransA, + jint TransB, jint Side, jint Uplo, jint Diag, jint M, jint N, jint K, + jdouble alpha, jlong A, jlong B, jdouble beta, jlong C, jint incX, jint incY, + jint KL, jint KU) { + RsBlasCall call; + memset(&call, 0, sizeof(call)); + call.func = (RsBlasFunction)func; + call.transA = (RsBlasTranspose)TransA; + call.transB = (RsBlasTranspose)TransB; + call.side = (RsBlasSide)Side; + call.uplo = (RsBlasUplo)Uplo; + call.diag = (RsBlasDiag)Diag; + call.M = M; + call.N = N; + call.K = K; + call.alpha.d = alpha; + call.beta.d = beta; + call.incX = incX; + call.incY = incY; + call.KL = KL; + call.KU = KU; + + RsAllocation in_allocs[3]; + in_allocs[0] = (RsAllocation)A; + in_allocs[1] = (RsAllocation)B; + in_allocs[2] = (RsAllocation)C; + + rsScriptForEachMulti((RsContext)con, (RsScript)id, 0, + in_allocs, sizeof(in_allocs), nullptr, + &call, sizeof(call), nullptr, 0); +} + +static void +nScriptIntrinsicBLAS_Complex(JNIEnv *_env, jobject _this, jlong con, jlong id, jint func, jint TransA, + jint TransB, jint Side, jint Uplo, jint Diag, jint M, jint N, jint K, + jfloat alphaX, jfloat alphaY, jlong A, jlong B, jfloat betaX, + jfloat betaY, jlong C, jint incX, jint incY, jint KL, jint KU) { + RsBlasCall call; + memset(&call, 0, sizeof(call)); + call.func = (RsBlasFunction)func; + call.transA = (RsBlasTranspose)TransA; + call.transB = (RsBlasTranspose)TransB; + call.side = (RsBlasSide)Side; + call.uplo = (RsBlasUplo)Uplo; + call.diag = (RsBlasDiag)Diag; + call.M = M; + call.N = N; + call.K = K; + call.alpha.c.r = alphaX; + call.alpha.c.i = alphaY; + call.beta.c.r = betaX; + call.beta.c.r = betaY; + call.incX = incX; + call.incY = incY; + call.KL = KL; + call.KU = KU; + + RsAllocation in_allocs[3]; + in_allocs[0] = (RsAllocation)A; + in_allocs[1] = (RsAllocation)B; + in_allocs[2] = (RsAllocation)C; + + rsScriptForEachMulti((RsContext)con, (RsScript)id, 0, + in_allocs, sizeof(in_allocs), nullptr, + &call, sizeof(call), nullptr, 0); +} + +static void +nScriptIntrinsicBLAS_Z(JNIEnv *_env, jobject _this, jlong con, jlong id, jint func, jint TransA, + jint TransB, jint Side, jint Uplo, jint Diag, jint M, jint N, jint K, + jdouble alphaX, jdouble alphaY, jlong A, jlong B, jdouble betaX, + jdouble betaY, jlong C, jint incX, jint incY, jint KL, jint KU) { + RsBlasCall call; + memset(&call, 0, sizeof(call)); + call.func = (RsBlasFunction)func; + call.transA = (RsBlasTranspose)TransA; + call.transB = (RsBlasTranspose)TransB; + call.side = (RsBlasSide)Side; + call.uplo = (RsBlasUplo)Uplo; + call.diag = (RsBlasDiag)Diag; + call.M = M; + call.N = N; + call.K = K; + call.alpha.z.r = alphaX; + call.alpha.z.i = alphaY; + call.beta.z.r = betaX; + call.beta.z.r = betaY; + call.incX = incX; + call.incY = incY; + call.KL = KL; + call.KU = KU; + + RsAllocation in_allocs[3]; + in_allocs[0] = (RsAllocation)A; + in_allocs[1] = (RsAllocation)B; + in_allocs[2] = (RsAllocation)C; + + rsScriptForEachMulti((RsContext)con, (RsScript)id, 0, + in_allocs, sizeof(in_allocs), nullptr, + &call, sizeof(call), nullptr, 0); +} + + +static void nAssignName(JNIEnv *_env, jobject _this, jlong con, jlong obj, jbyteArray str) { if (kLogApi) { @@ -2094,6 +2231,11 @@ static JNINativeMethod methods[] = { {"rsnScriptGroupExecute", "(JJ)V", (void*)nScriptGroupExecute }, {"rsnScriptGroup2Execute", "(JJ)V", (void*)nScriptGroup2Execute }, +{"rsnScriptIntrinsicBLAS_Single", "(JJIIIIIIIIIFJJFJIIII)V", (void*)nScriptIntrinsicBLAS_Single }, +{"rsnScriptIntrinsicBLAS_Double", "(JJIIIIIIIIIDJJDJIIII)V", (void*)nScriptIntrinsicBLAS_Double }, +{"rsnScriptIntrinsicBLAS_Complex", "(JJIIIIIIIIIFFJJFFJIIII)V", (void*)nScriptIntrinsicBLAS_Complex }, +{"rsnScriptIntrinsicBLAS_Z", "(JJIIIIIIIIIDDJJDDJIIII)V", (void*)nScriptIntrinsicBLAS_Z }, + {"rsnProgramStoreCreate", "(JZZZZZZIII)J", (void*)nProgramStoreCreate }, {"rsnProgramBindConstants", "(JJIJ)V", (void*)nProgramBindConstants }, |