12593348eSBarry Smith 235aab85fSBarry Smith #if !defined(__BAIJ_H) 335aab85fSBarry Smith #define __BAIJ_H 47c4f633dSBarry Smith #include "private/matimpl.h" 57c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/aij.h" 6a4005a5dSBarry Smith #include "../src/mat/impls/baij/seq/ftn-kernels/fsolvebaij.h" 72593348eSBarry Smith 82593348eSBarry Smith /* 935aab85fSBarry Smith MATSEQBAIJ format - Block compressed row storage. The i[] and j[] 10e24b481bSBarry Smith arrays start at 0. 112593348eSBarry Smith */ 122593348eSBarry Smith 136c6c5352SBarry Smith /* This header is shared by the SeqSBAIJ matrix */ 146c6c5352SBarry Smith #define SEQBAIJHEADER \ 15521d7252SBarry Smith PetscInt bs2; /* square of block size */ \ 16690b6cddSBarry Smith PetscInt mbs,nbs; /* rows/bs, columns/bs */ \ 176c6c5352SBarry Smith PetscScalar *mult_work; /* work array for matrix vector product*/ \ 18dd6ea824SBarry Smith MatScalar *saved_values; \ 196c6c5352SBarry Smith \ 206ad2eaddSHong Zhang Mat sbaijMat; /* mat in sbaij format */ \ 216c6c5352SBarry Smith \ 226c6c5352SBarry Smith \ 23dd6ea824SBarry Smith MatScalar *idiag; /* inverse of block diagonal */ \ 24e6b907acSBarry Smith PetscTruth idiagvalid /* if above has correct/current values */ 25e6b907acSBarry Smith 266c6c5352SBarry Smith 276c6c5352SBarry Smith typedef struct { 28dd6ea824SBarry Smith SEQAIJHEADER(MatScalar); 29e6b907acSBarry Smith SEQBAIJHEADER; 3035aab85fSBarry Smith } Mat_SeqBAIJ; 312593348eSBarry Smith 32f349cc81SSatish Balay EXTERN_C_BEGIN 33ab93d7beSBarry Smith EXTERN PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat,PetscInt,PetscInt,PetscInt*); 34f349cc81SSatish Balay EXTERN_C_END 3506e38f1dSHong Zhang EXTERN PetscErrorCode MatILUFactorSymbolic_SeqBAIJ_inplace(Mat,Mat,IS,IS,const MatFactorInfo*); 364dd39f65SShri Abhyankar EXTERN PetscErrorCode MatILUFactorSymbolic_SeqBAIJ(Mat,Mat,IS,IS,const MatFactorInfo*); 370481f469SBarry Smith EXTERN PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*); 380481f469SBarry Smith EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*); 390481f469SBarry Smith EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*); 400481f469SBarry Smith EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 41dfbe8321SBarry Smith EXTERN PetscErrorCode MatDuplicate_SeqBAIJ(Mat,MatDuplicateOption,Mat*); 422af78befSBarry Smith EXTERN PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat,PetscTruth*,PetscInt*); 43dfbe8321SBarry Smith EXTERN PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat); 44c8342467SHong Zhang EXTERN PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*,Mat*); 45be3590efSBarry Smith 4606e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat,Mat,IS,IS,const MatFactorInfo*); 474dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat,Mat,IS,IS,const MatFactorInfo*); 480481f469SBarry Smith EXTERN PetscErrorCode MatLUFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*); 49690b6cddSBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat,PetscInt,IS*,PetscInt); 504aa3045dSJed Brown EXTERN PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatReuse,Mat*); 51690b6cddSBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 52dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultTranspose_SeqBAIJ(Mat,Vec,Vec); 53547795f9SHong Zhang EXTERN PetscErrorCode MatMultHermitianTranspose_SeqBAIJ(Mat,Vec,Vec); 54dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 55547795f9SHong Zhang EXTERN PetscErrorCode MatMultHermitianTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 56f4df32b1SMatthew Knepley EXTERN PetscErrorCode MatScale_SeqBAIJ(Mat,PetscScalar); 57dfbe8321SBarry Smith EXTERN PetscErrorCode MatNorm_SeqBAIJ(Mat,NormType,PetscReal *); 58dfbe8321SBarry Smith EXTERN PetscErrorCode MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*); 59dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat,Vec); 60dfbe8321SBarry Smith EXTERN PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 61dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 62dfbe8321SBarry Smith EXTERN PetscErrorCode MatZeroEntries_SeqBAIJ(Mat); 6354138f6bSKris Buschelman 64dfbe8321SBarry Smith EXTERN PetscErrorCode MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(Mat); 6554138f6bSKris Buschelman 6606e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_1_inplace(Mat,Vec,Vec); 6706e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_1_NaturalOrdering_inplace(Mat,Vec,Vec); 6806e38f1dSHong Zhang 6906e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_inplace(Mat,Vec,Vec); 704dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 7106e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat,Vec,Vec); 724dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 7306e38f1dSHong Zhang 7406e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_inplace(Mat,Vec,Vec); 754dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 7606e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat,Vec,Vec); 774dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 7806e38f1dSHong Zhang 7906e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_inplace(Mat,Vec,Vec); 804dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 8106e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_inplace(Mat,Vec,Vec); 824dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 8354138f6bSKris Buschelman #if defined(PETSC_HAVE_SSE) 84dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_SSE_Demotion(Mat,Vec,Vec); 85dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion(Mat,Vec,Vec); 86dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion_usj(Mat,Vec,Vec); 874110d75bSKris Buschelman #endif 8806e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_inplace(Mat,Vec,Vec); 894dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 9006e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_inplace(Mat,Vec,Vec); 914dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 9206e38f1dSHong Zhang 9306e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_inplace(Mat,Vec,Vec); 944dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 9506e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering_inplace(Mat,Vec,Vec); 964dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 9706e38f1dSHong Zhang 9806e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_inplace(Mat,Vec,Vec); 994dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 10006e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering_inplace(Mat,Vec,Vec); 1014dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 10206e38f1dSHong Zhang 103*2b0b2ea7SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_15(Mat,Vec,Vec); 104*2b0b2ea7SShri Abhyankar 10506e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_inplace(Mat,Vec,Vec); 1064dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1074dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 10806e38f1dSHong Zhang 109dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_Update(Mat,Vec,Vec); 11006e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1_inplace(Mat,Vec,Vec); 11106e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1_NaturalOrdering_inplace(Mat,Vec,Vec); 11206e38f1dSHong Zhang 11306e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2_inplace(Mat,Vec,Vec); 1144dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec); 11506e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_inplace(Mat,Vec,Vec); 1164dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 11706e38f1dSHong Zhang 11806e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3_inplace(Mat,Vec,Vec); 1194dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec); 12006e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering_inplace(Mat,Vec,Vec); 1214dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 12206e38f1dSHong Zhang 12306e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4_inplace(Mat,Vec,Vec); 1244dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec); 12506e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering_inplace(Mat,Vec,Vec); 1264dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 12706e38f1dSHong Zhang 12806e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5_inplace(Mat,Vec,Vec); 1294dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec); 13006e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering_inplace(Mat,Vec,Vec); 1314dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 13206e38f1dSHong Zhang 13306e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6_inplace(Mat,Vec,Vec); 1344dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec); 13506e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering_inplace(Mat,Vec,Vec); 1364dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 13706e38f1dSHong Zhang 13806e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7_inplace(Mat,Vec,Vec); 1394dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec); 14006e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering_inplace(Mat,Vec,Vec); 1414dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 14206e38f1dSHong Zhang 14306e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_N_inplace(Mat,Vec,Vec); 1444dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_N(Mat,Vec,Vec); 14554138f6bSKris Buschelman 1464dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*); 14706e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1_inplace(Mat,Mat,const MatFactorInfo*); 14806e38f1dSHong Zhang 14906e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_inplace(Mat,Mat,const MatFactorInfo*); 1504dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat,const MatFactorInfo*); 15106e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace(Mat,Mat,const MatFactorInfo*); 1524dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 15306e38f1dSHong Zhang 15406e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_inplace(Mat,Mat,const MatFactorInfo*); 1554dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat,const MatFactorInfo*); 15606e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace(Mat,Mat,const MatFactorInfo*); 1574dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 15806e38f1dSHong Zhang 15906e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_inplace(Mat,Mat,const MatFactorInfo*); 1604dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat,const MatFactorInfo*); 16106e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace(Mat,Mat,const MatFactorInfo*); 1624dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 16354138f6bSKris Buschelman #if defined(PETSC_HAVE_SSE) 1640481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE(Mat,Mat,const MatFactorInfo*); 1650481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj(Mat,Mat,const MatFactorInfo*); 16654138f6bSKris Buschelman #else 16754138f6bSKris Buschelman #endif 16806e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_inplace(Mat,Mat,const MatFactorInfo*); 1694dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat,const MatFactorInfo*); 17006e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace(Mat,Mat,const MatFactorInfo*); 1714dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 17206e38f1dSHong Zhang 17306e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_inplace(Mat,Mat,const MatFactorInfo*); 1744dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat,const MatFactorInfo*); 17506e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace(Mat,Mat,const MatFactorInfo*); 1764dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 17706e38f1dSHong Zhang 17806e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_inplace(Mat,Mat,const MatFactorInfo*); 1794dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7(Mat,Mat,const MatFactorInfo*); 18006e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace(Mat,Mat,const MatFactorInfo*); 1814dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 18206e38f1dSHong Zhang 183*2b0b2ea7SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_15(Mat,Mat,const MatFactorInfo*); 18406e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N_inplace(Mat,Mat,const MatFactorInfo*); 1854dd39f65SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*); 18654138f6bSKris Buschelman 187dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_1(Mat,Vec,Vec); 188dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_2(Mat,Vec,Vec); 189dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_3(Mat,Vec,Vec); 190dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_4(Mat,Vec,Vec); 191dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_5(Mat,Vec,Vec); 192dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_6(Mat,Vec,Vec); 193dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_7(Mat,Vec,Vec); 194dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_N(Mat,Vec,Vec); 19554138f6bSKris Buschelman 196dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 197dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 198dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 199dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 200dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 201dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 202dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 203dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 204a313700dSBarry Smith EXTERN PetscErrorCode MatLoad_SeqBAIJ(PetscViewer, const MatType,Mat*); 2052593348eSBarry Smith 2068b1456e3SHong Zhang EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat,PetscTruth); 2074dd39f65SShri Abhyankar EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2088b1456e3SHong Zhang 2094c000e2eSHong Zhang /* 2104c000e2eSHong Zhang Kernel_A_gets_A_times_B_2: A = A * B with size bs=2 2114c000e2eSHong Zhang 2124c000e2eSHong Zhang Input Parameters: 2134c000e2eSHong Zhang + A,B - square bs by bs arrays stored in column major order 2144c000e2eSHong Zhang - W - bs*bs work arrary 2154c000e2eSHong Zhang 2164c000e2eSHong Zhang Output Parameter: 2174c000e2eSHong Zhang . A = A * B 2184c000e2eSHong Zhang */ 2194c000e2eSHong Zhang 2204c000e2eSHong Zhang #define Kernel_A_gets_A_times_B_2(A,B,W) 0;\ 2214c000e2eSHong Zhang {\ 2224c000e2eSHong Zhang PetscMemcpy(W,A,4*sizeof(MatScalar));\ 2234c000e2eSHong Zhang A[0] = W[0]*B[0] + W[2]*B[1];\ 2244c000e2eSHong Zhang A[1] = W[1]*B[0] + W[3]*B[1];\ 2254c000e2eSHong Zhang A[2] = W[0]*B[2] + W[2]*B[3];\ 2264c000e2eSHong Zhang A[3] = W[1]*B[2] + W[3]*B[3];\ 2274c000e2eSHong Zhang } 2284c000e2eSHong Zhang 2294c000e2eSHong Zhang /* 2304c000e2eSHong Zhang Kernel_A_gets_A_minus_B_times_C_2: A = A - B * C with size bs=2 2314c000e2eSHong Zhang 2324c000e2eSHong Zhang Input Parameters: 2334c000e2eSHong Zhang + A,B,C - square bs by bs arrays stored in column major order 2344c000e2eSHong Zhang 2354c000e2eSHong Zhang Output Parameter: 2364c000e2eSHong Zhang . A = A - B*C 2374c000e2eSHong Zhang */ 2384c000e2eSHong Zhang 2394c000e2eSHong Zhang #define Kernel_A_gets_A_minus_B_times_C_2(A,B,C) 0;\ 2404c000e2eSHong Zhang {\ 2414c000e2eSHong Zhang A[0] -= B[0]*C[0] + B[2]*C[1];\ 2424c000e2eSHong Zhang A[1] -= B[1]*C[0] + B[3]*C[1];\ 2434c000e2eSHong Zhang A[2] -= B[0]*C[2] + B[2]*C[3];\ 2444c000e2eSHong Zhang A[3] -= B[1]*C[2] + B[3]*C[3];\ 2454c000e2eSHong Zhang } 2464c000e2eSHong Zhang 24717542729SShri Abhyankar /* 24817542729SShri Abhyankar Kernel_A_gets_A_times_B_3: A = A * B with size bs=3 24917542729SShri Abhyankar 25017542729SShri Abhyankar Input Parameters: 25117542729SShri Abhyankar + A,B - square bs by bs arrays stored in column major order 25217542729SShri Abhyankar - W - bs*bs work arrary 25317542729SShri Abhyankar 25417542729SShri Abhyankar Output Parameter: 25517542729SShri Abhyankar . A = A * B 25617542729SShri Abhyankar */ 25717542729SShri Abhyankar 25817542729SShri Abhyankar #define Kernel_A_gets_A_times_B_3(A,B,W) 0;\ 25917542729SShri Abhyankar {\ 26017542729SShri Abhyankar PetscMemcpy(W,A,9*sizeof(MatScalar));\ 26117542729SShri Abhyankar A[0] = W[0]*B[0] + W[3]*B[1] + W[6]*B[2];\ 26217542729SShri Abhyankar A[1] = W[1]*B[0] + W[4]*B[1] + W[7]*B[2];\ 26317542729SShri Abhyankar A[2] = W[2]*B[0] + W[5]*B[1] + W[8]*B[2];\ 26417542729SShri Abhyankar A[3] = W[0]*B[3] + W[3]*B[4] + W[6]*B[5];\ 26517542729SShri Abhyankar A[4] = W[1]*B[3] + W[4]*B[4] + W[7]*B[5];\ 26617542729SShri Abhyankar A[5] = W[2]*B[3] + W[5]*B[4] + W[8]*B[5];\ 26717542729SShri Abhyankar A[6] = W[0]*B[6] + W[3]*B[7] + W[6]*B[8];\ 26817542729SShri Abhyankar A[7] = W[1]*B[6] + W[4]*B[7] + W[7]*B[8];\ 26917542729SShri Abhyankar A[8] = W[2]*B[6] + W[5]*B[7] + W[8]*B[8];\ 27017542729SShri Abhyankar } 27117542729SShri Abhyankar 27217542729SShri Abhyankar /* 27317542729SShri Abhyankar Kernel_A_gets_A_minus_B_times_C_3: A = A - B * C with size bs=3 27417542729SShri Abhyankar 27517542729SShri Abhyankar Input Parameters: 27617542729SShri Abhyankar + A,B,C - square bs by bs arrays stored in column major order 27717542729SShri Abhyankar 27817542729SShri Abhyankar Output Parameter: 27917542729SShri Abhyankar . A = A - B*C 28017542729SShri Abhyankar */ 28117542729SShri Abhyankar 28217542729SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_3(A,B,C) 0;\ 28317542729SShri Abhyankar {\ 28417542729SShri Abhyankar A[0] -= B[0]*C[0] + B[3]*C[1] + B[6]*C[2];\ 28517542729SShri Abhyankar A[1] -= B[1]*C[0] + B[4]*C[1] + B[7]*C[2];\ 28617542729SShri Abhyankar A[2] -= B[2]*C[0] + B[5]*C[1] + B[8]*C[2];\ 28717542729SShri Abhyankar A[3] -= B[0]*C[3] + B[3]*C[4] + B[6]*C[5];\ 28817542729SShri Abhyankar A[4] -= B[1]*C[3] + B[4]*C[4] + B[7]*C[5];\ 28917542729SShri Abhyankar A[5] -= B[2]*C[3] + B[5]*C[4] + B[8]*C[5];\ 29017542729SShri Abhyankar A[6] -= B[0]*C[6] + B[3]*C[7] + B[6]*C[8];\ 29117542729SShri Abhyankar A[7] -= B[1]*C[6] + B[4]*C[7] + B[7]*C[8];\ 29217542729SShri Abhyankar A[8] -= B[2]*C[6] + B[5]*C[7] + B[8]*C[8];\ 29317542729SShri Abhyankar } 29417542729SShri Abhyankar 295209027a4SShri Abhyankar /* 296209027a4SShri Abhyankar Kernel_A_gets_A_times_B_4: A = A * B with size bs=4 297209027a4SShri Abhyankar 298209027a4SShri Abhyankar Input Parameters: 299209027a4SShri Abhyankar + A,B - square bs by bs arrays stored in column major order 300209027a4SShri Abhyankar - W - bs*bs work arrary 301209027a4SShri Abhyankar 302209027a4SShri Abhyankar Output Parameter: 303209027a4SShri Abhyankar . A = A * B 304209027a4SShri Abhyankar */ 305209027a4SShri Abhyankar 306209027a4SShri Abhyankar #define Kernel_A_gets_A_times_B_4(A,B,W) 0;\ 307209027a4SShri Abhyankar {\ 308209027a4SShri Abhyankar PetscMemcpy(W,A,16*sizeof(MatScalar));\ 309209027a4SShri Abhyankar A[0] = W[0]*B[0] + W[4]*B[1] + W[8]*B[2] + W[12]*B[3];\ 310209027a4SShri Abhyankar A[1] = W[1]*B[0] + W[5]*B[1] + W[9]*B[2] + W[13]*B[3];\ 311209027a4SShri Abhyankar A[2] = W[2]*B[0] + W[6]*B[1] + W[10]*B[2] + W[14]*B[3];\ 312209027a4SShri Abhyankar A[3] = W[3]*B[0] + W[7]*B[1] + W[11]*B[2] + W[15]*B[3];\ 313209027a4SShri Abhyankar A[4] = W[0]*B[4] + W[4]*B[5] + W[8]*B[6] + W[12]*B[7];\ 314209027a4SShri Abhyankar A[5] = W[1]*B[4] + W[5]*B[5] + W[9]*B[6] + W[13]*B[7];\ 315209027a4SShri Abhyankar A[6] = W[2]*B[4] + W[6]*B[5] + W[10]*B[6] + W[14]*B[7];\ 316209027a4SShri Abhyankar A[7] = W[3]*B[4] + W[7]*B[5] + W[11]*B[6] + W[15]*B[7];\ 317209027a4SShri Abhyankar A[8] = W[0]*B[8] + W[4]*B[9] + W[8]*B[10] + W[12]*B[11];\ 318209027a4SShri Abhyankar A[9] = W[1]*B[8] + W[5]*B[9] + W[9]*B[10] + W[13]*B[11];\ 319209027a4SShri Abhyankar A[10] = W[2]*B[8] + W[6]*B[9] + W[10]*B[10] + W[14]*B[11];\ 320209027a4SShri Abhyankar A[11] = W[3]*B[8] + W[7]*B[9] + W[11]*B[10] + W[15]*B[11];\ 321209027a4SShri Abhyankar A[12] = W[0]*B[12] + W[4]*B[13] + W[8]*B[14] + W[12]*B[15];\ 322209027a4SShri Abhyankar A[13] = W[1]*B[12] + W[5]*B[13] + W[9]*B[14] + W[13]*B[15];\ 323209027a4SShri Abhyankar A[14] = W[2]*B[12] + W[6]*B[13] + W[10]*B[14] + W[14]*B[15];\ 324209027a4SShri Abhyankar A[15] = W[3]*B[12] + W[7]*B[13] + W[11]*B[14] + W[15]*B[15];\ 325209027a4SShri Abhyankar } 326209027a4SShri Abhyankar 327209027a4SShri Abhyankar /* 328209027a4SShri Abhyankar Kernel_A_gets_A_minus_B_times_C_4: A = A - B * C with size bs=4 329209027a4SShri Abhyankar 330209027a4SShri Abhyankar Input Parameters: 331209027a4SShri Abhyankar + A,B,C - square bs by bs arrays stored in column major order 332209027a4SShri Abhyankar 333209027a4SShri Abhyankar Output Parameter: 334209027a4SShri Abhyankar . A = A - B*C 335209027a4SShri Abhyankar */ 336209027a4SShri Abhyankar 337209027a4SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_4(A,B,C) 0;\ 338209027a4SShri Abhyankar {\ 339209027a4SShri Abhyankar A[0] -= B[0]*C[0] + B[4]*C[1] + B[8]*C[2] + B[12]*C[3];\ 340209027a4SShri Abhyankar A[1] -= B[1]*C[0] + B[5]*C[1] + B[9]*C[2] + B[13]*C[3];\ 341209027a4SShri Abhyankar A[2] -= B[2]*C[0] + B[6]*C[1] + B[10]*C[2] + B[14]*C[3];\ 342209027a4SShri Abhyankar A[3] -= B[3]*C[0] + B[7]*C[1] + B[11]*C[2] + B[15]*C[3];\ 343209027a4SShri Abhyankar A[4] -= B[0]*C[4] + B[4]*C[5] + B[8]*C[6] + B[12]*C[7];\ 344209027a4SShri Abhyankar A[5] -= B[1]*C[4] + B[5]*C[5] + B[9]*C[6] + B[13]*C[7];\ 345209027a4SShri Abhyankar A[6] -= B[2]*C[4] + B[6]*C[5] + B[10]*C[6] + B[14]*C[7];\ 346209027a4SShri Abhyankar A[7] -= B[3]*C[4] + B[7]*C[5] + B[11]*C[6] + B[15]*C[7];\ 347209027a4SShri Abhyankar A[8] -= B[0]*C[8] + B[4]*C[9] + B[8]*C[10] + B[12]*C[11];\ 348209027a4SShri Abhyankar A[9] -= B[1]*C[8] + B[5]*C[9] + B[9]*C[10] + B[13]*C[11];\ 349209027a4SShri Abhyankar A[10] -= B[2]*C[8] + B[6]*C[9] + B[10]*C[10] + B[14]*C[11];\ 350209027a4SShri Abhyankar A[11] -= B[3]*C[8] + B[7]*C[9] + B[11]*C[10] + B[15]*C[11];\ 351209027a4SShri Abhyankar A[12] -= B[0]*C[12] + B[4]*C[13] + B[8]*C[14] + B[12]*C[15];\ 352209027a4SShri Abhyankar A[13] -= B[1]*C[12] + B[5]*C[13] + B[9]*C[14] + B[13]*C[15];\ 353209027a4SShri Abhyankar A[14] -= B[2]*C[12] + B[6]*C[13] + B[10]*C[14] + B[14]*C[15];\ 354209027a4SShri Abhyankar A[15] -= B[3]*C[12] + B[7]*C[13] + B[11]*C[14] + B[15]*C[15];\ 355209027a4SShri Abhyankar } 356209027a4SShri Abhyankar 3570deeaf61SShri Abhyankar #define Kernel_A_gets_A_times_B_5(A,B,W) 0;\ 3580deeaf61SShri Abhyankar {\ 3590deeaf61SShri Abhyankar PetscMemcpy(W,A,25*sizeof(MatScalar));\ 3600deeaf61SShri Abhyankar A[0] = W[0]*B[0] + W[5]*B[1] + W[10]*B[2] + W[15]*B[3] + W[20]*B[4];\ 3610deeaf61SShri Abhyankar A[1] = W[1]*B[0] + W[6]*B[1] + W[11]*B[2] + W[16]*B[3] + W[21]*B[4];\ 3620deeaf61SShri Abhyankar A[2] = W[2]*B[0] + W[7]*B[1] + W[12]*B[2] + W[17]*B[3] + W[22]*B[4];\ 3630deeaf61SShri Abhyankar A[3] = W[3]*B[0] + W[8]*B[1] + W[13]*B[2] + W[18]*B[3] + W[23]*B[4];\ 3640deeaf61SShri Abhyankar A[4] = W[4]*B[0] + W[9]*B[1] + W[14]*B[2] + W[19]*B[3] + W[24]*B[4];\ 3650deeaf61SShri Abhyankar A[5] = W[0]*B[5] + W[5]*B[6] + W[10]*B[7] + W[15]*B[8] + W[20]*B[9];\ 3660deeaf61SShri Abhyankar A[6] = W[1]*B[5] + W[6]*B[6] + W[11]*B[7] + W[16]*B[8] + W[21]*B[9];\ 3670deeaf61SShri Abhyankar A[7] = W[2]*B[5] + W[7]*B[6] + W[12]*B[7] + W[17]*B[8] + W[22]*B[9];\ 3680deeaf61SShri Abhyankar A[8] = W[3]*B[5] + W[8]*B[6] + W[13]*B[7] + W[18]*B[8] + W[23]*B[9];\ 3690deeaf61SShri Abhyankar A[9] = W[4]*B[5] + W[9]*B[6] + W[14]*B[7] + W[19]*B[8] + W[24]*B[9];\ 3700deeaf61SShri Abhyankar A[10] = W[0]*B[10] + W[5]*B[11] + W[10]*B[12] + W[15]*B[13] + W[20]*B[14];\ 3710deeaf61SShri Abhyankar A[11] = W[1]*B[10] + W[6]*B[11] + W[11]*B[12] + W[16]*B[13] + W[21]*B[14];\ 3720deeaf61SShri Abhyankar A[12] = W[2]*B[10] + W[7]*B[11] + W[12]*B[12] + W[17]*B[13] + W[22]*B[14];\ 3730deeaf61SShri Abhyankar A[13] = W[3]*B[10] + W[8]*B[11] + W[13]*B[12] + W[18]*B[13] + W[23]*B[14];\ 3740deeaf61SShri Abhyankar A[14] = W[4]*B[10] + W[9]*B[11] + W[14]*B[12] + W[19]*B[13] + W[24]*B[14];\ 3750deeaf61SShri Abhyankar A[15] = W[0]*B[15] + W[5]*B[16] + W[10]*B[17] + W[15]*B[18] + W[20]*B[19];\ 3760deeaf61SShri Abhyankar A[16] = W[1]*B[15] + W[6]*B[16] + W[11]*B[17] + W[16]*B[18] + W[21]*B[19];\ 3770deeaf61SShri Abhyankar A[17] = W[2]*B[15] + W[7]*B[16] + W[12]*B[17] + W[17]*B[18] + W[22]*B[19];\ 3780deeaf61SShri Abhyankar A[18] = W[3]*B[15] + W[8]*B[16] + W[13]*B[17] + W[18]*B[18] + W[23]*B[19];\ 3790deeaf61SShri Abhyankar A[19] = W[4]*B[15] + W[9]*B[16] + W[14]*B[17] + W[19]*B[18] + W[24]*B[19];\ 3800deeaf61SShri Abhyankar A[20] = W[0]*B[20] + W[5]*B[21] + W[10]*B[22] + W[15]*B[23] + W[20]*B[24];\ 3810deeaf61SShri Abhyankar A[21] = W[1]*B[20] + W[6]*B[21] + W[11]*B[22] + W[16]*B[23] + W[21]*B[24];\ 3820deeaf61SShri Abhyankar A[22] = W[2]*B[20] + W[7]*B[21] + W[12]*B[22] + W[17]*B[23] + W[22]*B[24];\ 3830deeaf61SShri Abhyankar A[23] = W[3]*B[20] + W[8]*B[21] + W[13]*B[22] + W[18]*B[23] + W[23]*B[24];\ 3840deeaf61SShri Abhyankar A[24] = W[4]*B[20] + W[9]*B[21] + W[14]*B[22] + W[19]*B[23] + W[24]*B[24];\ 3850deeaf61SShri Abhyankar } 3860deeaf61SShri Abhyankar 3870deeaf61SShri Abhyankar /* 3880deeaf61SShri Abhyankar Kernel_A_gets_A_minus_B_times_C_5: A = A - B * C with size bs=5 3890deeaf61SShri Abhyankar 3900deeaf61SShri Abhyankar Input Parameters: 3910deeaf61SShri Abhyankar + A,B,C - square bs by bs arrays stored in column major order 3920deeaf61SShri Abhyankar 3930deeaf61SShri Abhyankar Output Parameter: 3940deeaf61SShri Abhyankar . A = A - B*C 3950deeaf61SShri Abhyankar */ 3960deeaf61SShri Abhyankar 3970deeaf61SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_5(A,B,C) 0;\ 3980deeaf61SShri Abhyankar {\ 3990deeaf61SShri Abhyankar A[0] -= B[0]*C[0] + B[5]*C[1] + B[10]*C[2] + B[15]*C[3] + B[20]*C[4];\ 4000deeaf61SShri Abhyankar A[1] -= B[1]*C[0] + B[6]*C[1] + B[11]*C[2] + B[16]*C[3] + B[21]*C[4];\ 4010deeaf61SShri Abhyankar A[2] -= B[2]*C[0] + B[7]*C[1] + B[12]*C[2] + B[17]*C[3] + B[22]*C[4];\ 4020deeaf61SShri Abhyankar A[3] -= B[3]*C[0] + B[8]*C[1] + B[13]*C[2] + B[18]*C[3] + B[23]*C[4];\ 4030deeaf61SShri Abhyankar A[4] -= B[4]*C[0] + B[9]*C[1] + B[14]*C[2] + B[19]*C[3] + B[24]*C[4];\ 4040deeaf61SShri Abhyankar A[5] -= B[0]*C[5] + B[5]*C[6] + B[10]*C[7] + B[15]*C[8] + B[20]*C[9];\ 4050deeaf61SShri Abhyankar A[6] -= B[1]*C[5] + B[6]*C[6] + B[11]*C[7] + B[16]*C[8] + B[21]*C[9];\ 4060deeaf61SShri Abhyankar A[7] -= B[2]*C[5] + B[7]*C[6] + B[12]*C[7] + B[17]*C[8] + B[22]*C[9];\ 4070deeaf61SShri Abhyankar A[8] -= B[3]*C[5] + B[8]*C[6] + B[13]*C[7] + B[18]*C[8] + B[23]*C[9];\ 4080deeaf61SShri Abhyankar A[9] -= B[4]*C[5] + B[9]*C[6] + B[14]*C[7] + B[19]*C[8] + B[24]*C[9];\ 4090deeaf61SShri Abhyankar A[10] -= B[0]*C[10] + B[5]*C[11] + B[10]*C[12] + B[15]*C[13] + B[20]*C[14];\ 4100deeaf61SShri Abhyankar A[11] -= B[1]*C[10] + B[6]*C[11] + B[11]*C[12] + B[16]*C[13] + B[21]*C[14];\ 4110deeaf61SShri Abhyankar A[12] -= B[2]*C[10] + B[7]*C[11] + B[12]*C[12] + B[17]*C[13] + B[22]*C[14];\ 4120deeaf61SShri Abhyankar A[13] -= B[3]*C[10] + B[8]*C[11] + B[13]*C[12] + B[18]*C[13] + B[23]*C[14];\ 4130deeaf61SShri Abhyankar A[14] -= B[4]*C[10] + B[9]*C[11] + B[14]*C[12] + B[19]*C[13] + B[24]*C[14];\ 4140deeaf61SShri Abhyankar A[15] -= B[0]*C[15] + B[5]*C[16] + B[10]*C[17] + B[15]*C[18] + B[20]*C[19];\ 4150deeaf61SShri Abhyankar A[16] -= B[1]*C[15] + B[6]*C[16] + B[11]*C[17] + B[16]*C[18] + B[21]*C[19];\ 4160deeaf61SShri Abhyankar A[17] -= B[2]*C[15] + B[7]*C[16] + B[12]*C[17] + B[17]*C[18] + B[22]*C[19];\ 4170deeaf61SShri Abhyankar A[18] -= B[3]*C[15] + B[8]*C[16] + B[13]*C[17] + B[18]*C[18] + B[23]*C[19];\ 4180deeaf61SShri Abhyankar A[19] -= B[4]*C[15] + B[9]*C[16] + B[14]*C[17] + B[19]*C[18] + B[24]*C[19];\ 4190deeaf61SShri Abhyankar A[20] -= B[0]*C[20] + B[5]*C[21] + B[10]*C[22] + B[15]*C[23] + B[20]*C[24];\ 4200deeaf61SShri Abhyankar A[21] -= B[1]*C[20] + B[6]*C[21] + B[11]*C[22] + B[16]*C[23] + B[21]*C[24];\ 4210deeaf61SShri Abhyankar A[22] -= B[2]*C[20] + B[7]*C[21] + B[12]*C[22] + B[17]*C[23] + B[22]*C[24];\ 4220deeaf61SShri Abhyankar A[23] -= B[3]*C[20] + B[8]*C[21] + B[13]*C[22] + B[18]*C[23] + B[23]*C[24];\ 4230deeaf61SShri Abhyankar A[24] -= B[4]*C[20] + B[9]*C[21] + B[14]*C[22] + B[19]*C[23] + B[24]*C[24];\ 4240deeaf61SShri Abhyankar } 4250deeaf61SShri Abhyankar 426bef36659SShri Abhyankar #define Kernel_A_gets_A_times_B_6(A,B,W) 0;\ 427bef36659SShri Abhyankar {\ 428bef36659SShri Abhyankar PetscMemcpy(W,A,36*sizeof(MatScalar));\ 429bef36659SShri Abhyankar A[0] = W[0]*B[0] + W[6]*B[1] + W[12]*B[2] + W[18]*B[3] + W[24]*B[4] + W[30]*B[5];\ 430bef36659SShri Abhyankar A[1] = W[1]*B[0] + W[7]*B[1] + W[13]*B[2] + W[19]*B[3] + W[25]*B[4] + W[31]*B[5];\ 431bef36659SShri Abhyankar A[2] = W[2]*B[0] + W[8]*B[1] + W[14]*B[2] + W[20]*B[3] + W[26]*B[4] + W[32]*B[5];\ 432bef36659SShri Abhyankar A[3] = W[3]*B[0] + W[9]*B[1] + W[15]*B[2] + W[21]*B[3] + W[27]*B[4] + W[33]*B[5];\ 433bef36659SShri Abhyankar A[4] = W[4]*B[0] + W[10]*B[1] + W[16]*B[2] + W[22]*B[3] + W[28]*B[4] + W[34]*B[5];\ 434bef36659SShri Abhyankar A[5] = W[5]*B[0] + W[11]*B[1] + W[17]*B[2] + W[23]*B[3] + W[29]*B[4] + W[35]*B[5];\ 435bef36659SShri Abhyankar A[6] = W[0]*B[6] + W[6]*B[7] + W[12]*B[8] + W[18]*B[9] + W[24]*B[10] + W[30]*B[11];\ 436bef36659SShri Abhyankar A[7] = W[1]*B[6] + W[7]*B[7] + W[13]*B[8] + W[19]*B[9] + W[25]*B[10] + W[31]*B[11];\ 437bef36659SShri Abhyankar A[8] = W[2]*B[6] + W[8]*B[7] + W[14]*B[8] + W[20]*B[9] + W[26]*B[10] + W[32]*B[11];\ 438bef36659SShri Abhyankar A[9] = W[3]*B[6] + W[9]*B[7] + W[15]*B[8] + W[21]*B[9] + W[27]*B[10] + W[33]*B[11];\ 439bef36659SShri Abhyankar A[10] = W[4]*B[6] + W[10]*B[7] + W[16]*B[8] + W[22]*B[9] + W[28]*B[10] + W[34]*B[11];\ 440bef36659SShri Abhyankar A[11] = W[5]*B[6] + W[11]*B[7] + W[17]*B[8] + W[23]*B[9] + W[29]*B[10] + W[35]*B[11];\ 441bef36659SShri Abhyankar A[12] = W[0]*B[12] + W[6]*B[13] + W[12]*B[14] + W[18]*B[15] + W[24]*B[16] + W[30]*B[17];\ 442bef36659SShri Abhyankar A[13] = W[1]*B[12] + W[7]*B[13] + W[13]*B[14] + W[19]*B[15] + W[25]*B[16] + W[31]*B[17];\ 443bef36659SShri Abhyankar A[14] = W[2]*B[12] + W[8]*B[13] + W[14]*B[14] + W[20]*B[15] + W[26]*B[16] + W[32]*B[17];\ 444bef36659SShri Abhyankar A[15] = W[3]*B[12] + W[9]*B[13] + W[15]*B[14] + W[21]*B[15] + W[27]*B[16] + W[33]*B[17];\ 445bef36659SShri Abhyankar A[16] = W[4]*B[12] + W[10]*B[13] + W[16]*B[14] + W[22]*B[15] + W[28]*B[16] + W[34]*B[17];\ 446bef36659SShri Abhyankar A[17] = W[5]*B[12] + W[11]*B[13] + W[17]*B[14] + W[23]*B[15] + W[29]*B[16] + W[35]*B[17];\ 447bef36659SShri Abhyankar A[18] = W[0]*B[18] + W[6]*B[19] + W[12]*B[20] + W[18]*B[21] + W[24]*B[22] + W[30]*B[23];\ 448bef36659SShri Abhyankar A[19] = W[1]*B[18] + W[7]*B[19] + W[13]*B[20] + W[19]*B[21] + W[25]*B[22] + W[31]*B[23];\ 449bef36659SShri Abhyankar A[20] = W[2]*B[18] + W[8]*B[19] + W[14]*B[20] + W[20]*B[21] + W[26]*B[22] + W[32]*B[23];\ 450bef36659SShri Abhyankar A[21] = W[3]*B[18] + W[9]*B[19] + W[15]*B[20] + W[21]*B[21] + W[27]*B[22] + W[33]*B[23];\ 451bef36659SShri Abhyankar A[22] = W[4]*B[18] + W[10]*B[19] + W[16]*B[20] + W[22]*B[21] + W[28]*B[22] + W[34]*B[23];\ 452bef36659SShri Abhyankar A[23] = W[5]*B[18] + W[11]*B[19] + W[17]*B[20] + W[23]*B[21] + W[29]*B[22] + W[35]*B[23];\ 453bef36659SShri Abhyankar A[24] = W[0]*B[24] + W[6]*B[25] + W[12]*B[26] + W[18]*B[27] + W[24]*B[28] + W[30]*B[29];\ 454bef36659SShri Abhyankar A[25] = W[1]*B[24] + W[7]*B[25] + W[13]*B[26] + W[19]*B[27] + W[25]*B[28] + W[31]*B[29];\ 455bef36659SShri Abhyankar A[26] = W[2]*B[24] + W[8]*B[25] + W[14]*B[26] + W[20]*B[27] + W[26]*B[28] + W[32]*B[29];\ 456bef36659SShri Abhyankar A[27] = W[3]*B[24] + W[9]*B[25] + W[15]*B[26] + W[21]*B[27] + W[27]*B[28] + W[33]*B[29];\ 457bef36659SShri Abhyankar A[28] = W[4]*B[24] + W[10]*B[25] + W[16]*B[26] + W[22]*B[27] + W[28]*B[28] + W[34]*B[29];\ 458bef36659SShri Abhyankar A[29] = W[5]*B[24] + W[11]*B[25] + W[17]*B[26] + W[23]*B[27] + W[29]*B[28] + W[35]*B[29];\ 459bef36659SShri Abhyankar A[30] = W[0]*B[30] + W[6]*B[31] + W[12]*B[32] + W[18]*B[33] + W[24]*B[34] + W[30]*B[35];\ 460bef36659SShri Abhyankar A[31] = W[1]*B[30] + W[7]*B[31] + W[13]*B[32] + W[19]*B[33] + W[25]*B[34] + W[31]*B[35];\ 461bef36659SShri Abhyankar A[32] = W[2]*B[30] + W[8]*B[31] + W[14]*B[32] + W[20]*B[33] + W[26]*B[34] + W[32]*B[35];\ 462bef36659SShri Abhyankar A[33] = W[3]*B[30] + W[9]*B[31] + W[15]*B[32] + W[21]*B[33] + W[27]*B[34] + W[33]*B[35];\ 463bef36659SShri Abhyankar A[34] = W[4]*B[30] + W[10]*B[31] + W[16]*B[32] + W[22]*B[33] + W[28]*B[34] + W[34]*B[35];\ 464bef36659SShri Abhyankar A[35] = W[5]*B[30] + W[11]*B[31] + W[17]*B[32] + W[23]*B[33] + W[29]*B[34] + W[35]*B[35];\ 465bef36659SShri Abhyankar } 466bef36659SShri Abhyankar 467bef36659SShri Abhyankar /* 468bef36659SShri Abhyankar Kernel_A_gets_A_minus_B_times_C_6: A = A - B * C with size bs=6 469bef36659SShri Abhyankar 470bef36659SShri Abhyankar Input Parameters: 471bef36659SShri Abhyankar + A,B,C - square bs by bs arrays stored in column major order 472bef36659SShri Abhyankar 473bef36659SShri Abhyankar Output Parameter: 474bef36659SShri Abhyankar . A = A - B*C 475bef36659SShri Abhyankar */ 476bef36659SShri Abhyankar 477bef36659SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_6(A,B,C) 0;\ 478bef36659SShri Abhyankar {\ 479bef36659SShri Abhyankar A[0] -= B[0]*C[0] + B[6]*C[1] + B[12]*C[2] + B[18]*C[3] + B[24]*C[4] + B[30]*C[5];\ 480bef36659SShri Abhyankar A[1] -= B[1]*C[0] + B[7]*C[1] + B[13]*C[2] + B[19]*C[3] + B[25]*C[4] + B[31]*C[5];\ 481bef36659SShri Abhyankar A[2] -= B[2]*C[0] + B[8]*C[1] + B[14]*C[2] + B[20]*C[3] + B[26]*C[4] + B[32]*C[5];\ 482bef36659SShri Abhyankar A[3] -= B[3]*C[0] + B[9]*C[1] + B[15]*C[2] + B[21]*C[3] + B[27]*C[4] + B[33]*C[5];\ 483bef36659SShri Abhyankar A[4] -= B[4]*C[0] + B[10]*C[1] + B[16]*C[2] + B[22]*C[3] + B[28]*C[4] + B[34]*C[5];\ 484bef36659SShri Abhyankar A[5] -= B[5]*C[0] + B[11]*C[1] + B[17]*C[2] + B[23]*C[3] + B[29]*C[4] + B[35]*C[5];\ 485bef36659SShri Abhyankar A[6] -= B[0]*C[6] + B[6]*C[7] + B[12]*C[8] + B[18]*C[9] + B[24]*C[10] + B[30]*C[11];\ 486bef36659SShri Abhyankar A[7] -= B[1]*C[6] + B[7]*C[7] + B[13]*C[8] + B[19]*C[9] + B[25]*C[10] + B[31]*C[11];\ 487bef36659SShri Abhyankar A[8] -= B[2]*C[6] + B[8]*C[7] + B[14]*C[8] + B[20]*C[9] + B[26]*C[10] + B[32]*C[11];\ 488bef36659SShri Abhyankar A[9] -= B[3]*C[6] + B[9]*C[7] + B[15]*C[8] + B[21]*C[9] + B[27]*C[10] + B[33]*C[11];\ 489bef36659SShri Abhyankar A[10] -= B[4]*C[6] + B[10]*C[7] + B[16]*C[8] + B[22]*C[9] + B[28]*C[10] + B[34]*C[11];\ 490bef36659SShri Abhyankar A[11] -= B[5]*C[6] + B[11]*C[7] + B[17]*C[8] + B[23]*C[9] + B[29]*C[10] + B[35]*C[11];\ 491bef36659SShri Abhyankar A[12] -= B[0]*C[12] + B[6]*C[13] + B[12]*C[14] + B[18]*C[15] + B[24]*C[16] + B[30]*C[17];\ 492bef36659SShri Abhyankar A[13] -= B[1]*C[12] + B[7]*C[13] + B[13]*C[14] + B[19]*C[15] + B[25]*C[16] + B[31]*C[17];\ 493bef36659SShri Abhyankar A[14] -= B[2]*C[12] + B[8]*C[13] + B[14]*C[14] + B[20]*C[15] + B[26]*C[16] + B[32]*C[17];\ 494bef36659SShri Abhyankar A[15] -= B[3]*C[12] + B[9]*C[13] + B[15]*C[14] + B[21]*C[15] + B[27]*C[16] + B[33]*C[17];\ 495bef36659SShri Abhyankar A[16] -= B[4]*C[12] + B[10]*C[13] + B[16]*C[14] + B[22]*C[15] + B[28]*C[16] + B[34]*C[17];\ 496bef36659SShri Abhyankar A[17] -= B[5]*C[12] + B[11]*C[13] + B[17]*C[14] + B[23]*C[15] + B[29]*C[16] + B[35]*C[17];\ 497bef36659SShri Abhyankar A[18] -= B[0]*C[18] + B[6]*C[19] + B[12]*C[20] + B[18]*C[21] + B[24]*C[22] + B[30]*C[23];\ 498bef36659SShri Abhyankar A[19] -= B[1]*C[18] + B[7]*C[19] + B[13]*C[20] + B[19]*C[21] + B[25]*C[22] + B[31]*C[23];\ 499bef36659SShri Abhyankar A[20] -= B[2]*C[18] + B[8]*C[19] + B[14]*C[20] + B[20]*C[21] + B[26]*C[22] + B[32]*C[23];\ 500bef36659SShri Abhyankar A[21] -= B[3]*C[18] + B[9]*C[19] + B[15]*C[20] + B[21]*C[21] + B[27]*C[22] + B[33]*C[23];\ 501bef36659SShri Abhyankar A[22] -= B[4]*C[18] + B[10]*C[19] + B[16]*C[20] + B[22]*C[21] + B[28]*C[22] + B[34]*C[23];\ 502bef36659SShri Abhyankar A[23] -= B[5]*C[18] + B[11]*C[19] + B[17]*C[20] + B[23]*C[21] + B[29]*C[22] + B[35]*C[23];\ 503bef36659SShri Abhyankar A[24] -= B[0]*C[24] + B[6]*C[25] + B[12]*C[26] + B[18]*C[27] + B[24]*C[28] + B[30]*C[29];\ 504bef36659SShri Abhyankar A[25] -= B[1]*C[24] + B[7]*C[25] + B[13]*C[26] + B[19]*C[27] + B[25]*C[28] + B[31]*C[29];\ 505bef36659SShri Abhyankar A[26] -= B[2]*C[24] + B[8]*C[25] + B[14]*C[26] + B[20]*C[27] + B[26]*C[28] + B[32]*C[29];\ 506bef36659SShri Abhyankar A[27] -= B[3]*C[24] + B[9]*C[25] + B[15]*C[26] + B[21]*C[27] + B[27]*C[28] + B[33]*C[29];\ 507bef36659SShri Abhyankar A[28] -= B[4]*C[24] + B[10]*C[25] + B[16]*C[26] + B[22]*C[27] + B[28]*C[28] + B[34]*C[29];\ 508bef36659SShri Abhyankar A[29] -= B[5]*C[24] + B[11]*C[25] + B[17]*C[26] + B[23]*C[27] + B[29]*C[28] + B[35]*C[29];\ 509bef36659SShri Abhyankar A[30] -= B[0]*C[30] + B[6]*C[31] + B[12]*C[32] + B[18]*C[33] + B[24]*C[34] + B[30]*C[35];\ 510bef36659SShri Abhyankar A[31] -= B[1]*C[30] + B[7]*C[31] + B[13]*C[32] + B[19]*C[33] + B[25]*C[34] + B[31]*C[35];\ 511bef36659SShri Abhyankar A[32] -= B[2]*C[30] + B[8]*C[31] + B[14]*C[32] + B[20]*C[33] + B[26]*C[34] + B[32]*C[35];\ 512bef36659SShri Abhyankar A[33] -= B[3]*C[30] + B[9]*C[31] + B[15]*C[32] + B[21]*C[33] + B[27]*C[34] + B[33]*C[35];\ 513bef36659SShri Abhyankar A[34] -= B[4]*C[30] + B[10]*C[31] + B[16]*C[32] + B[22]*C[33] + B[28]*C[34] + B[34]*C[35];\ 514bef36659SShri Abhyankar A[35] -= B[5]*C[30] + B[11]*C[31] + B[17]*C[32] + B[23]*C[33] + B[29]*C[34] + B[35]*C[35];\ 515bef36659SShri Abhyankar } 516bef36659SShri Abhyankar 517bef36659SShri Abhyankar #define Kernel_A_gets_A_times_B_7(A,B,W) 0;\ 518bef36659SShri Abhyankar {\ 519bef36659SShri Abhyankar PetscMemcpy(W,A,49*sizeof(MatScalar));\ 520bef36659SShri Abhyankar A[0] = W[0]*B[0] + W[7]*B[1] + W[14]*B[2] + W[21]*B[3] + W[28]*B[4] + W[35]*B[5] + W[42]*B[6];\ 521bef36659SShri Abhyankar A[1] = W[1]*B[0] + W[8]*B[1] + W[15]*B[2] + W[22]*B[3] + W[29]*B[4] + W[36]*B[5] + W[43]*B[6];\ 522bef36659SShri Abhyankar A[2] = W[2]*B[0] + W[9]*B[1] + W[16]*B[2] + W[23]*B[3] + W[30]*B[4] + W[37]*B[5] + W[44]*B[6];\ 523bef36659SShri Abhyankar A[3] = W[3]*B[0] + W[10]*B[1] + W[17]*B[2] + W[24]*B[3] + W[31]*B[4] + W[38]*B[5] + W[45]*B[6];\ 524bef36659SShri Abhyankar A[4] = W[4]*B[0] + W[11]*B[1] + W[18]*B[2] + W[25]*B[3] + W[32]*B[4] + W[39]*B[5] + W[46]*B[6];\ 525bef36659SShri Abhyankar A[5] = W[5]*B[0] + W[12]*B[1] + W[19]*B[2] + W[26]*B[3] + W[33]*B[4] + W[40]*B[5] + W[47]*B[6];\ 526bef36659SShri Abhyankar A[6] = W[6]*B[0] + W[13]*B[1] + W[20]*B[2] + W[27]*B[3] + W[34]*B[4] + W[41]*B[5] + W[48]*B[6];\ 527bef36659SShri Abhyankar A[7] = W[0]*B[7] + W[7]*B[8] + W[14]*B[9] + W[21]*B[10] + W[28]*B[11] + W[35]*B[12] + W[42]*B[13];\ 528bef36659SShri Abhyankar A[8] = W[1]*B[7] + W[8]*B[8] + W[15]*B[9] + W[22]*B[10] + W[29]*B[11] + W[36]*B[12] + W[43]*B[13];\ 529bef36659SShri Abhyankar A[9] = W[2]*B[7] + W[9]*B[8] + W[16]*B[9] + W[23]*B[10] + W[30]*B[11] + W[37]*B[12] + W[44]*B[13];\ 530bef36659SShri Abhyankar A[10] = W[3]*B[7] + W[10]*B[8] + W[17]*B[9] + W[24]*B[10] + W[31]*B[11] + W[38]*B[12] + W[45]*B[13];\ 531bef36659SShri Abhyankar A[11] = W[4]*B[7] + W[11]*B[8] + W[18]*B[9] + W[25]*B[10] + W[32]*B[11] + W[39]*B[12] + W[46]*B[13];\ 532bef36659SShri Abhyankar A[12] = W[5]*B[7] + W[12]*B[8] + W[19]*B[9] + W[26]*B[10] + W[33]*B[11] + W[40]*B[12] + W[47]*B[13];\ 533bef36659SShri Abhyankar A[13] = W[6]*B[7] + W[13]*B[8] + W[20]*B[9] + W[27]*B[10] + W[34]*B[11] + W[41]*B[12] + W[48]*B[13];\ 534bef36659SShri Abhyankar A[14] = W[0]*B[14] + W[7]*B[15] + W[14]*B[16] + W[21]*B[17] + W[28]*B[18] + W[35]*B[19] + W[42]*B[20];\ 535bef36659SShri Abhyankar A[15] = W[1]*B[14] + W[8]*B[15] + W[15]*B[16] + W[22]*B[17] + W[29]*B[18] + W[36]*B[19] + W[43]*B[20];\ 536bef36659SShri Abhyankar A[16] = W[2]*B[14] + W[9]*B[15] + W[16]*B[16] + W[23]*B[17] + W[30]*B[18] + W[37]*B[19] + W[44]*B[20];\ 537bef36659SShri Abhyankar A[17] = W[3]*B[14] + W[10]*B[15] + W[17]*B[16] + W[24]*B[17] + W[31]*B[18] + W[38]*B[19] + W[45]*B[20];\ 538bef36659SShri Abhyankar A[18] = W[4]*B[14] + W[11]*B[15] + W[18]*B[16] + W[25]*B[17] + W[32]*B[18] + W[39]*B[19] + W[46]*B[20];\ 539bef36659SShri Abhyankar A[19] = W[5]*B[14] + W[12]*B[15] + W[19]*B[16] + W[26]*B[17] + W[33]*B[18] + W[40]*B[19] + W[47]*B[20];\ 540bef36659SShri Abhyankar A[20] = W[6]*B[14] + W[13]*B[15] + W[20]*B[16] + W[27]*B[17] + W[34]*B[18] + W[41]*B[19] + W[48]*B[20];\ 541bef36659SShri Abhyankar A[21] = W[0]*B[21] + W[7]*B[22] + W[14]*B[23] + W[21]*B[24] + W[28]*B[25] + W[35]*B[26] + W[42]*B[27];\ 542bef36659SShri Abhyankar A[22] = W[1]*B[21] + W[8]*B[22] + W[15]*B[23] + W[22]*B[24] + W[29]*B[25] + W[36]*B[26] + W[43]*B[27];\ 543bef36659SShri Abhyankar A[23] = W[2]*B[21] + W[9]*B[22] + W[16]*B[23] + W[23]*B[24] + W[30]*B[25] + W[37]*B[26] + W[44]*B[27];\ 544bef36659SShri Abhyankar A[24] = W[3]*B[21] + W[10]*B[22] + W[17]*B[23] + W[24]*B[24] + W[31]*B[25] + W[38]*B[26] + W[45]*B[27];\ 545bef36659SShri Abhyankar A[25] = W[4]*B[21] + W[11]*B[22] + W[18]*B[23] + W[25]*B[24] + W[32]*B[25] + W[39]*B[26] + W[46]*B[27];\ 546bef36659SShri Abhyankar A[26] = W[5]*B[21] + W[12]*B[22] + W[19]*B[23] + W[26]*B[24] + W[33]*B[25] + W[40]*B[26] + W[47]*B[27];\ 547bef36659SShri Abhyankar A[27] = W[6]*B[21] + W[13]*B[22] + W[20]*B[23] + W[27]*B[24] + W[34]*B[25] + W[41]*B[26] + W[48]*B[27];\ 548bef36659SShri Abhyankar A[28] = W[0]*B[28] + W[7]*B[29] + W[14]*B[30] + W[21]*B[31] + W[28]*B[32] + W[35]*B[33] + W[42]*B[34];\ 549bef36659SShri Abhyankar A[29] = W[1]*B[28] + W[8]*B[29] + W[15]*B[30] + W[22]*B[31] + W[29]*B[32] + W[36]*B[33] + W[43]*B[34];\ 550bef36659SShri Abhyankar A[30] = W[2]*B[28] + W[9]*B[29] + W[16]*B[30] + W[23]*B[31] + W[30]*B[32] + W[37]*B[33] + W[44]*B[34];\ 551bef36659SShri Abhyankar A[31] = W[3]*B[28] + W[10]*B[29] + W[17]*B[30] + W[24]*B[31] + W[31]*B[32] + W[38]*B[33] + W[45]*B[34];\ 552bef36659SShri Abhyankar A[32] = W[4]*B[28] + W[11]*B[29] + W[18]*B[30] + W[25]*B[31] + W[32]*B[32] + W[39]*B[33] + W[46]*B[34];\ 553bef36659SShri Abhyankar A[33] = W[5]*B[28] + W[12]*B[29] + W[19]*B[30] + W[26]*B[31] + W[33]*B[32] + W[40]*B[33] + W[47]*B[34];\ 554bef36659SShri Abhyankar A[34] = W[6]*B[28] + W[13]*B[29] + W[20]*B[30] + W[27]*B[31] + W[34]*B[32] + W[41]*B[33] + W[48]*B[34];\ 555bef36659SShri Abhyankar A[35] = W[0]*B[35] + W[7]*B[36] + W[14]*B[37] + W[21]*B[38] + W[28]*B[39] + W[35]*B[40] + W[42]*B[41];\ 556bef36659SShri Abhyankar A[36] = W[1]*B[35] + W[8]*B[36] + W[15]*B[37] + W[22]*B[38] + W[29]*B[39] + W[36]*B[40] + W[43]*B[41];\ 557bef36659SShri Abhyankar A[37] = W[2]*B[35] + W[9]*B[36] + W[16]*B[37] + W[23]*B[38] + W[30]*B[39] + W[37]*B[40] + W[44]*B[41];\ 558bef36659SShri Abhyankar A[38] = W[3]*B[35] + W[10]*B[36] + W[17]*B[37] + W[24]*B[38] + W[31]*B[39] + W[38]*B[40] + W[45]*B[41];\ 559bef36659SShri Abhyankar A[39] = W[4]*B[35] + W[11]*B[36] + W[18]*B[37] + W[25]*B[38] + W[32]*B[39] + W[39]*B[40] + W[46]*B[41];\ 560bef36659SShri Abhyankar A[40] = W[5]*B[35] + W[12]*B[36] + W[19]*B[37] + W[26]*B[38] + W[33]*B[39] + W[40]*B[40] + W[47]*B[41];\ 561bef36659SShri Abhyankar A[41] = W[6]*B[35] + W[13]*B[36] + W[20]*B[37] + W[27]*B[38] + W[34]*B[39] + W[41]*B[40] + W[48]*B[41];\ 562bef36659SShri Abhyankar A[42] = W[0]*B[42] + W[7]*B[43] + W[14]*B[44] + W[21]*B[45] + W[28]*B[46] + W[35]*B[47] + W[42]*B[48];\ 563bef36659SShri Abhyankar A[43] = W[1]*B[42] + W[8]*B[43] + W[15]*B[44] + W[22]*B[45] + W[29]*B[46] + W[36]*B[47] + W[43]*B[48];\ 564bef36659SShri Abhyankar A[44] = W[2]*B[42] + W[9]*B[43] + W[16]*B[44] + W[23]*B[45] + W[30]*B[46] + W[37]*B[47] + W[44]*B[48];\ 565bef36659SShri Abhyankar A[45] = W[3]*B[42] + W[10]*B[43] + W[17]*B[44] + W[24]*B[45] + W[31]*B[46] + W[38]*B[47] + W[45]*B[48];\ 566bef36659SShri Abhyankar A[46] = W[4]*B[42] + W[11]*B[43] + W[18]*B[44] + W[25]*B[45] + W[32]*B[46] + W[39]*B[47] + W[46]*B[48];\ 567bef36659SShri Abhyankar A[47] = W[5]*B[42] + W[12]*B[43] + W[19]*B[44] + W[26]*B[45] + W[33]*B[46] + W[40]*B[47] + W[47]*B[48];\ 568bef36659SShri Abhyankar A[48] = W[6]*B[42] + W[13]*B[43] + W[20]*B[44] + W[27]*B[45] + W[34]*B[46] + W[41]*B[47] + W[48]*B[48];\ 569bef36659SShri Abhyankar } 570bef36659SShri Abhyankar 571bef36659SShri Abhyankar /* 572bef36659SShri Abhyankar Kernel_A_gets_A_minus_B_times_C_7: A = A - B * C with size bs=7 573bef36659SShri Abhyankar 574bef36659SShri Abhyankar Input Parameters: 575bef36659SShri Abhyankar + A,B,C - square bs by bs arrays stored in column major order 576bef36659SShri Abhyankar 577bef36659SShri Abhyankar Output Parameter: 578bef36659SShri Abhyankar . A = A - B*C 579bef36659SShri Abhyankar */ 580bef36659SShri Abhyankar 581bef36659SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_7(A,B,C) 0;\ 582bef36659SShri Abhyankar {\ 583bef36659SShri Abhyankar A[0] -= B[0]*C[0] + B[7]*C[1] + B[14]*C[2] + B[21]*C[3] + B[28]*C[4] + B[35]*C[5] + B[42]*C[6];\ 584bef36659SShri Abhyankar A[1] -= B[1]*C[0] + B[8]*C[1] + B[15]*C[2] + B[22]*C[3] + B[29]*C[4] + B[36]*C[5] + B[43]*C[6];\ 585bef36659SShri Abhyankar A[2] -= B[2]*C[0] + B[9]*C[1] + B[16]*C[2] + B[23]*C[3] + B[30]*C[4] + B[37]*C[5] + B[44]*C[6];\ 586bef36659SShri Abhyankar A[3] -= B[3]*C[0] + B[10]*C[1] + B[17]*C[2] + B[24]*C[3] + B[31]*C[4] + B[38]*C[5] + B[45]*C[6];\ 587bef36659SShri Abhyankar A[4] -= B[4]*C[0] + B[11]*C[1] + B[18]*C[2] + B[25]*C[3] + B[32]*C[4] + B[39]*C[5] + B[46]*C[6];\ 588bef36659SShri Abhyankar A[5] -= B[5]*C[0] + B[12]*C[1] + B[19]*C[2] + B[26]*C[3] + B[33]*C[4] + B[40]*C[5] + B[47]*C[6];\ 589bef36659SShri Abhyankar A[6] -= B[6]*C[0] + B[13]*C[1] + B[20]*C[2] + B[27]*C[3] + B[34]*C[4] + B[41]*C[5] + B[48]*C[6];\ 590bef36659SShri Abhyankar A[7] -= B[0]*C[7] + B[7]*C[8] + B[14]*C[9] + B[21]*C[10] + B[28]*C[11] + B[35]*C[12] + B[42]*C[13];\ 591bef36659SShri Abhyankar A[8] -= B[1]*C[7] + B[8]*C[8] + B[15]*C[9] + B[22]*C[10] + B[29]*C[11] + B[36]*C[12] + B[43]*C[13];\ 592bef36659SShri Abhyankar A[9] -= B[2]*C[7] + B[9]*C[8] + B[16]*C[9] + B[23]*C[10] + B[30]*C[11] + B[37]*C[12] + B[44]*C[13];\ 593bef36659SShri Abhyankar A[10] -= B[3]*C[7] + B[10]*C[8] + B[17]*C[9] + B[24]*C[10] + B[31]*C[11] + B[38]*C[12] + B[45]*C[13];\ 594bef36659SShri Abhyankar A[11] -= B[4]*C[7] + B[11]*C[8] + B[18]*C[9] + B[25]*C[10] + B[32]*C[11] + B[39]*C[12] + B[46]*C[13];\ 595bef36659SShri Abhyankar A[12] -= B[5]*C[7] + B[12]*C[8] + B[19]*C[9] + B[26]*C[10] + B[33]*C[11] + B[40]*C[12] + B[47]*C[13];\ 596bef36659SShri Abhyankar A[13] -= B[6]*C[7] + B[13]*C[8] + B[20]*C[9] + B[27]*C[10] + B[34]*C[11] + B[41]*C[12] + B[48]*C[13];\ 597bef36659SShri Abhyankar A[14] -= B[0]*C[14] + B[7]*C[15] + B[14]*C[16] + B[21]*C[17] + B[28]*C[18] + B[35]*C[19] + B[42]*C[20];\ 598bef36659SShri Abhyankar A[15] -= B[1]*C[14] + B[8]*C[15] + B[15]*C[16] + B[22]*C[17] + B[29]*C[18] + B[36]*C[19] + B[43]*C[20];\ 599bef36659SShri Abhyankar A[16] -= B[2]*C[14] + B[9]*C[15] + B[16]*C[16] + B[23]*C[17] + B[30]*C[18] + B[37]*C[19] + B[44]*C[20];\ 600bef36659SShri Abhyankar A[17] -= B[3]*C[14] + B[10]*C[15] + B[17]*C[16] + B[24]*C[17] + B[31]*C[18] + B[38]*C[19] + B[45]*C[20];\ 601bef36659SShri Abhyankar A[18] -= B[4]*C[14] + B[11]*C[15] + B[18]*C[16] + B[25]*C[17] + B[32]*C[18] + B[39]*C[19] + B[46]*C[20];\ 602bef36659SShri Abhyankar A[19] -= B[5]*C[14] + B[12]*C[15] + B[19]*C[16] + B[26]*C[17] + B[33]*C[18] + B[40]*C[19] + B[47]*C[20];\ 603bef36659SShri Abhyankar A[20] -= B[6]*C[14] + B[13]*C[15] + B[20]*C[16] + B[27]*C[17] + B[34]*C[18] + B[41]*C[19] + B[48]*C[20];\ 604bef36659SShri Abhyankar A[21] -= B[0]*C[21] + B[7]*C[22] + B[14]*C[23] + B[21]*C[24] + B[28]*C[25] + B[35]*C[26] + B[42]*C[27];\ 605bef36659SShri Abhyankar A[22] -= B[1]*C[21] + B[8]*C[22] + B[15]*C[23] + B[22]*C[24] + B[29]*C[25] + B[36]*C[26] + B[43]*C[27];\ 606bef36659SShri Abhyankar A[23] -= B[2]*C[21] + B[9]*C[22] + B[16]*C[23] + B[23]*C[24] + B[30]*C[25] + B[37]*C[26] + B[44]*C[27];\ 607bef36659SShri Abhyankar A[24] -= B[3]*C[21] + B[10]*C[22] + B[17]*C[23] + B[24]*C[24] + B[31]*C[25] + B[38]*C[26] + B[45]*C[27];\ 608bef36659SShri Abhyankar A[25] -= B[4]*C[21] + B[11]*C[22] + B[18]*C[23] + B[25]*C[24] + B[32]*C[25] + B[39]*C[26] + B[46]*C[27];\ 609bef36659SShri Abhyankar A[26] -= B[5]*C[21] + B[12]*C[22] + B[19]*C[23] + B[26]*C[24] + B[33]*C[25] + B[40]*C[26] + B[47]*C[27];\ 610bef36659SShri Abhyankar A[27] -= B[6]*C[21] + B[13]*C[22] + B[20]*C[23] + B[27]*C[24] + B[34]*C[25] + B[41]*C[26] + B[48]*C[27];\ 611bef36659SShri Abhyankar A[28] -= B[0]*C[28] + B[7]*C[29] + B[14]*C[30] + B[21]*C[31] + B[28]*C[32] + B[35]*C[33] + B[42]*C[34];\ 612bef36659SShri Abhyankar A[29] -= B[1]*C[28] + B[8]*C[29] + B[15]*C[30] + B[22]*C[31] + B[29]*C[32] + B[36]*C[33] + B[43]*C[34];\ 613bef36659SShri Abhyankar A[30] -= B[2]*C[28] + B[9]*C[29] + B[16]*C[30] + B[23]*C[31] + B[30]*C[32] + B[37]*C[33] + B[44]*C[34];\ 614bef36659SShri Abhyankar A[31] -= B[3]*C[28] + B[10]*C[29] + B[17]*C[30] + B[24]*C[31] + B[31]*C[32] + B[38]*C[33] + B[45]*C[34];\ 615bef36659SShri Abhyankar A[32] -= B[4]*C[28] + B[11]*C[29] + B[18]*C[30] + B[25]*C[31] + B[32]*C[32] + B[39]*C[33] + B[46]*C[34];\ 616bef36659SShri Abhyankar A[33] -= B[5]*C[28] + B[12]*C[29] + B[19]*C[30] + B[26]*C[31] + B[33]*C[32] + B[40]*C[33] + B[47]*C[34];\ 617bef36659SShri Abhyankar A[34] -= B[6]*C[28] + B[13]*C[29] + B[20]*C[30] + B[27]*C[31] + B[34]*C[32] + B[41]*C[33] + B[48]*C[34];\ 618bef36659SShri Abhyankar A[35] -= B[0]*C[35] + B[7]*C[36] + B[14]*C[37] + B[21]*C[38] + B[28]*C[39] + B[35]*C[40] + B[42]*C[41];\ 619bef36659SShri Abhyankar A[36] -= B[1]*C[35] + B[8]*C[36] + B[15]*C[37] + B[22]*C[38] + B[29]*C[39] + B[36]*C[40] + B[43]*C[41];\ 620bef36659SShri Abhyankar A[37] -= B[2]*C[35] + B[9]*C[36] + B[16]*C[37] + B[23]*C[38] + B[30]*C[39] + B[37]*C[40] + B[44]*C[41];\ 621bef36659SShri Abhyankar A[38] -= B[3]*C[35] + B[10]*C[36] + B[17]*C[37] + B[24]*C[38] + B[31]*C[39] + B[38]*C[40] + B[45]*C[41];\ 622bef36659SShri Abhyankar A[39] -= B[4]*C[35] + B[11]*C[36] + B[18]*C[37] + B[25]*C[38] + B[32]*C[39] + B[39]*C[40] + B[46]*C[41];\ 623bef36659SShri Abhyankar A[40] -= B[5]*C[35] + B[12]*C[36] + B[19]*C[37] + B[26]*C[38] + B[33]*C[39] + B[40]*C[40] + B[47]*C[41];\ 624bef36659SShri Abhyankar A[41] -= B[6]*C[35] + B[13]*C[36] + B[20]*C[37] + B[27]*C[38] + B[34]*C[39] + B[41]*C[40] + B[48]*C[41];\ 625bef36659SShri Abhyankar A[42] -= B[0]*C[42] + B[7]*C[43] + B[14]*C[44] + B[21]*C[45] + B[28]*C[46] + B[35]*C[47] + B[42]*C[48];\ 626bef36659SShri Abhyankar A[43] -= B[1]*C[42] + B[8]*C[43] + B[15]*C[44] + B[22]*C[45] + B[29]*C[46] + B[36]*C[47] + B[43]*C[48];\ 627bef36659SShri Abhyankar A[44] -= B[2]*C[42] + B[9]*C[43] + B[16]*C[44] + B[23]*C[45] + B[30]*C[46] + B[37]*C[47] + B[44]*C[48];\ 628bef36659SShri Abhyankar A[45] -= B[3]*C[42] + B[10]*C[43] + B[17]*C[44] + B[24]*C[45] + B[31]*C[46] + B[38]*C[47] + B[45]*C[48];\ 629bef36659SShri Abhyankar A[46] -= B[4]*C[42] + B[11]*C[43] + B[18]*C[44] + B[25]*C[45] + B[32]*C[46] + B[39]*C[47] + B[46]*C[48];\ 630bef36659SShri Abhyankar A[47] -= B[5]*C[42] + B[12]*C[43] + B[19]*C[44] + B[26]*C[45] + B[33]*C[46] + B[40]*C[47] + B[47]*C[48];\ 631bef36659SShri Abhyankar A[48] -= B[6]*C[42] + B[13]*C[43] + B[20]*C[44] + B[27]*C[45] + B[34]*C[46] + B[41]*C[47] + B[48]*C[48];\ 632bef36659SShri Abhyankar } 633bef36659SShri Abhyankar 634*2b0b2ea7SShri Abhyankar #define Kernel_A_gets_A_times_B_15(A,B,W) 0;\ 635*2b0b2ea7SShri Abhyankar { \ 636*2b0b2ea7SShri Abhyankar PetscInt __i,__j,__k; \ 637*2b0b2ea7SShri Abhyankar PetscMemcpy(W,A,225*sizeof(MatScalar)); \ 638*2b0b2ea7SShri Abhyankar for(__i=0;__i<15;__i++){ \ 639*2b0b2ea7SShri Abhyankar for(__j=0;__j<15;__j++) { \ 640*2b0b2ea7SShri Abhyankar for(__k=0;__k<15;__k++) A[15*__i+__j] += W[15*__k+__j]*B[15*__i+k];}};\ 641*2b0b2ea7SShri Abhyankar } 642*2b0b2ea7SShri Abhyankar 643*2b0b2ea7SShri Abhyankar /* 644*2b0b2ea7SShri Abhyankar Kernel_A_gets_A_minus_B_times_C_15: A = A - B * C with size bs=15 645*2b0b2ea7SShri Abhyankar 646*2b0b2ea7SShri Abhyankar Input Parameters: 647*2b0b2ea7SShri Abhyankar + A,B,C - square bs by bs arrays stored in column major order 648*2b0b2ea7SShri Abhyankar 649*2b0b2ea7SShri Abhyankar Output Parameter: 650*2b0b2ea7SShri Abhyankar . A = A - B*C 651*2b0b2ea7SShri Abhyankar */ 652*2b0b2ea7SShri Abhyankar 653*2b0b2ea7SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_15(A,B,C) 0;\ 654*2b0b2ea7SShri Abhyankar { \ 655*2b0b2ea7SShri Abhyankar PetscInt __i,__j,__k; \ 656*2b0b2ea7SShri Abhyankar for(__i=0;__i<15;__i++){\ 657*2b0b2ea7SShri Abhyankar for(__j=0;__j<15;__j++){\ 658*2b0b2ea7SShri Abhyankar for(__k=0;__k<15;__k++) A[15*__i+__j] -= B[15*__k+__j]*C[15*__i+__k];}}\ 659*2b0b2ea7SShri Abhyankar } 660bef36659SShri Abhyankar 6612593348eSBarry Smith #endif 662