xref: /petsc/src/mat/impls/baij/seq/baij.h (revision 06e38f1d1dda9d815f70edc0143bb77eead08adb)
12593348eSBarry Smith 
235aab85fSBarry Smith #if !defined(__BAIJ_H)
335aab85fSBarry Smith #define __BAIJ_H
47c4f633dSBarry Smith #include "private/matimpl.h"
57c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/aij.h"
6a4005a5dSBarry Smith #include "../src/mat/impls/baij/seq/ftn-kernels/fsolvebaij.h"
72593348eSBarry Smith 
82593348eSBarry Smith /*
935aab85fSBarry Smith   MATSEQBAIJ format - Block compressed row storage. The i[] and j[]
10e24b481bSBarry Smith   arrays start at 0.
112593348eSBarry Smith */
122593348eSBarry Smith 
136c6c5352SBarry Smith /* This header is shared by the SeqSBAIJ matrix */
146c6c5352SBarry Smith #define SEQBAIJHEADER \
15521d7252SBarry Smith   PetscInt         bs2;              /*  square of block size */                                     \
16690b6cddSBarry Smith   PetscInt         mbs,nbs;          /* rows/bs, columns/bs */                                       \
176c6c5352SBarry Smith   PetscScalar      *mult_work;       /* work array for matrix vector product*/                       \
18dd6ea824SBarry Smith   MatScalar        *saved_values;                                                                    \
196c6c5352SBarry Smith                                                                                                      \
206ad2eaddSHong Zhang   Mat              sbaijMat;         /* mat in sbaij format */                                       \
216c6c5352SBarry Smith                                                                                                      \
226c6c5352SBarry Smith                                                                                                      \
23dd6ea824SBarry Smith   MatScalar        *idiag;           /* inverse of block diagonal  */                                \
24e6b907acSBarry Smith   PetscTruth       idiagvalid       /* if above has correct/current values */
25e6b907acSBarry Smith 
266c6c5352SBarry Smith 
276c6c5352SBarry Smith typedef struct {
28dd6ea824SBarry Smith   SEQAIJHEADER(MatScalar);
29e6b907acSBarry Smith   SEQBAIJHEADER;
3035aab85fSBarry Smith } Mat_SeqBAIJ;
312593348eSBarry Smith 
32f349cc81SSatish Balay EXTERN_C_BEGIN
33ab93d7beSBarry Smith EXTERN PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat,PetscInt,PetscInt,PetscInt*);
34f349cc81SSatish Balay EXTERN_C_END
35*06e38f1dSHong Zhang EXTERN PetscErrorCode MatILUFactorSymbolic_SeqBAIJ_inplace(Mat,Mat,IS,IS,const MatFactorInfo*);
36*06e38f1dSHong Zhang EXTERN PetscErrorCode MatILUFactorSymbolic_SeqBAIJ_newdatastruct(Mat,Mat,IS,IS,const MatFactorInfo*);
370481f469SBarry Smith EXTERN PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*);
380481f469SBarry Smith EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*);
390481f469SBarry Smith EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*);
400481f469SBarry Smith EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
41dfbe8321SBarry Smith EXTERN PetscErrorCode MatDuplicate_SeqBAIJ(Mat,MatDuplicateOption,Mat*);
422af78befSBarry Smith EXTERN PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat,PetscTruth*,PetscInt*);
43dfbe8321SBarry Smith EXTERN PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat);
44c8342467SHong Zhang EXTERN PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*,Mat*);
45be3590efSBarry Smith 
46*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat,Mat,IS,IS,const MatFactorInfo*);
47*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_newdatastruct(Mat,Mat,IS,IS,const MatFactorInfo*);
480481f469SBarry Smith EXTERN PetscErrorCode MatLUFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*);
49690b6cddSBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat,PetscInt,IS*,PetscInt);
504aa3045dSJed Brown EXTERN PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatReuse,Mat*);
51690b6cddSBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]);
52dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
53547795f9SHong Zhang EXTERN PetscErrorCode MatMultHermitianTranspose_SeqBAIJ(Mat,Vec,Vec);
54dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
55547795f9SHong Zhang EXTERN PetscErrorCode MatMultHermitianTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
56f4df32b1SMatthew Knepley EXTERN PetscErrorCode MatScale_SeqBAIJ(Mat,PetscScalar);
57dfbe8321SBarry Smith EXTERN PetscErrorCode MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
58dfbe8321SBarry Smith EXTERN PetscErrorCode MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
59dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat,Vec);
60dfbe8321SBarry Smith EXTERN PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
61dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
62dfbe8321SBarry Smith EXTERN PetscErrorCode MatZeroEntries_SeqBAIJ(Mat);
6354138f6bSKris Buschelman 
64dfbe8321SBarry Smith EXTERN PetscErrorCode MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(Mat);
6554138f6bSKris Buschelman 
66*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_1_inplace(Mat,Vec,Vec);
67*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_1_NaturalOrdering_inplace(Mat,Vec,Vec);
68*06e38f1dSHong Zhang 
69*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_inplace(Mat,Vec,Vec);
708bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_newdatastruct(Mat,Vec,Vec);
71*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat,Vec,Vec);
728bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
73*06e38f1dSHong Zhang 
74*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_inplace(Mat,Vec,Vec);
758bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_newdatastruct(Mat,Vec,Vec);
76*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat,Vec,Vec);
778bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
78*06e38f1dSHong Zhang 
79*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_inplace(Mat,Vec,Vec);
808bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_newdatastruct(Mat,Vec,Vec);
81*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_inplace(Mat,Vec,Vec);
828bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
8354138f6bSKris Buschelman #if defined(PETSC_HAVE_SSE)
84dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_SSE_Demotion(Mat,Vec,Vec);
85dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion(Mat,Vec,Vec);
86dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion_usj(Mat,Vec,Vec);
874110d75bSKris Buschelman #endif
88*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_inplace(Mat,Vec,Vec);
898bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_newdatastruct(Mat,Vec,Vec);
90*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_inplace(Mat,Vec,Vec);
918bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
92*06e38f1dSHong Zhang 
93*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_inplace(Mat,Vec,Vec);
948bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_newdatastruct(Mat,Vec,Vec);
95*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering_inplace(Mat,Vec,Vec);
968bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
97*06e38f1dSHong Zhang 
98*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_inplace(Mat,Vec,Vec);
998bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_newdatastruct(Mat,Vec,Vec);
100*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering_inplace(Mat,Vec,Vec);
1018bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
102*06e38f1dSHong Zhang 
103*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_inplace(Mat,Vec,Vec);
1048bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_newdatastruct(Mat,Vec,Vec);
1058bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
106*06e38f1dSHong Zhang 
107dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_Update(Mat,Vec,Vec);
108*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1_inplace(Mat,Vec,Vec);
109*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1_NaturalOrdering_inplace(Mat,Vec,Vec);
110*06e38f1dSHong Zhang 
111*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2_inplace(Mat,Vec,Vec);
11232121132SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2_newdatastruct(Mat,Vec,Vec);
113*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_inplace(Mat,Vec,Vec);
1146929473cSShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
115*06e38f1dSHong Zhang 
116*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3_inplace(Mat,Vec,Vec);
11732121132SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3_newdatastruct(Mat,Vec,Vec);
118*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering_inplace(Mat,Vec,Vec);
1198499736aSShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
120*06e38f1dSHong Zhang 
121*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4_inplace(Mat,Vec,Vec);
12232121132SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4_newdatastruct(Mat,Vec,Vec);
123*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering_inplace(Mat,Vec,Vec);
1248499736aSShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
125*06e38f1dSHong Zhang 
126*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5_inplace(Mat,Vec,Vec);
12732121132SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5_newdatastruct(Mat,Vec,Vec);
128*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering_inplace(Mat,Vec,Vec);
1298499736aSShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
130*06e38f1dSHong Zhang 
131*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6_inplace(Mat,Vec,Vec);
13232121132SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6_newdatastruct(Mat,Vec,Vec);
133*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering_inplace(Mat,Vec,Vec);
1348499736aSShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
135*06e38f1dSHong Zhang 
136*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7_inplace(Mat,Vec,Vec);
13732121132SShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7_newdatastruct(Mat,Vec,Vec);
138*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering_inplace(Mat,Vec,Vec);
1398499736aSShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
140*06e38f1dSHong Zhang 
141*06e38f1dSHong Zhang EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_N_inplace(Mat,Vec,Vec);
1428499736aSShri Abhyankar EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_N_newdatastruct(Mat,Vec,Vec);
14354138f6bSKris Buschelman 
144faca2338SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N_newdatastruct(Mat,Mat,const MatFactorInfo*);
145*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1_inplace(Mat,Mat,const MatFactorInfo*);
146*06e38f1dSHong Zhang 
147*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_inplace(Mat,Mat,const MatFactorInfo*);
148b588c5a2SHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_newdatastruct(Mat,Mat,const MatFactorInfo*);
149*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace(Mat,Mat,const MatFactorInfo*);
1504c000e2eSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
151*06e38f1dSHong Zhang 
152*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_inplace(Mat,Mat,const MatFactorInfo*);
15317542729SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_newdatastruct(Mat,Mat,const MatFactorInfo*);
154*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace(Mat,Mat,const MatFactorInfo*);
15517542729SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
156*06e38f1dSHong Zhang 
157*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_inplace(Mat,Mat,const MatFactorInfo*);
158209027a4SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_newdatastruct(Mat,Mat,const MatFactorInfo*);
159*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace(Mat,Mat,const MatFactorInfo*);
160209027a4SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
16154138f6bSKris Buschelman #if defined(PETSC_HAVE_SSE)
1620481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE(Mat,Mat,const MatFactorInfo*);
1630481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj(Mat,Mat,const MatFactorInfo*);
16454138f6bSKris Buschelman #else
16554138f6bSKris Buschelman #endif
166*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_inplace(Mat,Mat,const MatFactorInfo*);
1670deeaf61SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_newdatastruct(Mat,Mat,const MatFactorInfo*);
168*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace(Mat,Mat,const MatFactorInfo*);
1690deeaf61SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
170*06e38f1dSHong Zhang 
171*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_inplace(Mat,Mat,const MatFactorInfo*);
172bef36659SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_newdatastruct(Mat,Mat,const MatFactorInfo*);
173*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace(Mat,Mat,const MatFactorInfo*);
174bef36659SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
175*06e38f1dSHong Zhang 
176*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_inplace(Mat,Mat,const MatFactorInfo*);
177bef36659SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_newdatastruct(Mat,Mat,const MatFactorInfo*);
178*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace(Mat,Mat,const MatFactorInfo*);
179bef36659SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
180*06e38f1dSHong Zhang 
181*06e38f1dSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N_inplace(Mat,Mat,const MatFactorInfo*);
182c0c7eb62SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N_newdatastruct(Mat,Mat,const MatFactorInfo*);
18354138f6bSKris Buschelman 
184dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_1(Mat,Vec,Vec);
185dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_2(Mat,Vec,Vec);
186dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_3(Mat,Vec,Vec);
187dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_4(Mat,Vec,Vec);
188dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_5(Mat,Vec,Vec);
189dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_6(Mat,Vec,Vec);
190dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_7(Mat,Vec,Vec);
191dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_N(Mat,Vec,Vec);
19254138f6bSKris Buschelman 
193dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
194dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
195dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
196dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
197dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
198dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
199dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
200dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
201a313700dSBarry Smith EXTERN PetscErrorCode MatLoad_SeqBAIJ(PetscViewer, const MatType,Mat*);
2022593348eSBarry Smith 
2034c000e2eSHong Zhang /*
2044c000e2eSHong Zhang   Kernel_A_gets_A_times_B_2: A = A * B with size bs=2
2054c000e2eSHong Zhang 
2064c000e2eSHong Zhang   Input Parameters:
2074c000e2eSHong Zhang +  A,B - square bs by bs arrays stored in column major order
2084c000e2eSHong Zhang -  W   - bs*bs work arrary
2094c000e2eSHong Zhang 
2104c000e2eSHong Zhang   Output Parameter:
2114c000e2eSHong Zhang .  A = A * B
2124c000e2eSHong Zhang */
2134c000e2eSHong Zhang 
2144c000e2eSHong Zhang #define Kernel_A_gets_A_times_B_2(A,B,W) 0;\
2154c000e2eSHong Zhang {\
2164c000e2eSHong Zhang   PetscMemcpy(W,A,4*sizeof(MatScalar));\
2174c000e2eSHong Zhang   A[0] = W[0]*B[0] + W[2]*B[1];\
2184c000e2eSHong Zhang   A[1] = W[1]*B[0] + W[3]*B[1];\
2194c000e2eSHong Zhang   A[2] = W[0]*B[2] + W[2]*B[3];\
2204c000e2eSHong Zhang   A[3] = W[1]*B[2] + W[3]*B[3];\
2214c000e2eSHong Zhang }
2224c000e2eSHong Zhang 
2234c000e2eSHong Zhang /*
2244c000e2eSHong Zhang   Kernel_A_gets_A_minus_B_times_C_2: A = A - B * C with size bs=2
2254c000e2eSHong Zhang 
2264c000e2eSHong Zhang   Input Parameters:
2274c000e2eSHong Zhang +  A,B,C - square bs by bs arrays stored in column major order
2284c000e2eSHong Zhang 
2294c000e2eSHong Zhang   Output Parameter:
2304c000e2eSHong Zhang .  A = A - B*C
2314c000e2eSHong Zhang */
2324c000e2eSHong Zhang 
2334c000e2eSHong Zhang #define Kernel_A_gets_A_minus_B_times_C_2(A,B,C) 0;\
2344c000e2eSHong Zhang {\
2354c000e2eSHong Zhang   A[0] -= B[0]*C[0] + B[2]*C[1];\
2364c000e2eSHong Zhang   A[1] -= B[1]*C[0] + B[3]*C[1];\
2374c000e2eSHong Zhang   A[2] -= B[0]*C[2] + B[2]*C[3];\
2384c000e2eSHong Zhang   A[3] -= B[1]*C[2] + B[3]*C[3];\
2394c000e2eSHong Zhang }
2404c000e2eSHong Zhang 
24117542729SShri Abhyankar /*
24217542729SShri Abhyankar   Kernel_A_gets_A_times_B_3: A = A * B with size bs=3
24317542729SShri Abhyankar 
24417542729SShri Abhyankar   Input Parameters:
24517542729SShri Abhyankar +  A,B - square bs by bs arrays stored in column major order
24617542729SShri Abhyankar -  W   - bs*bs work arrary
24717542729SShri Abhyankar 
24817542729SShri Abhyankar   Output Parameter:
24917542729SShri Abhyankar .  A = A * B
25017542729SShri Abhyankar */
25117542729SShri Abhyankar 
25217542729SShri Abhyankar #define Kernel_A_gets_A_times_B_3(A,B,W) 0;\
25317542729SShri Abhyankar {\
25417542729SShri Abhyankar   PetscMemcpy(W,A,9*sizeof(MatScalar));\
25517542729SShri Abhyankar   A[0] = W[0]*B[0] + W[3]*B[1] + W[6]*B[2];\
25617542729SShri Abhyankar   A[1] = W[1]*B[0] + W[4]*B[1] + W[7]*B[2];\
25717542729SShri Abhyankar   A[2] = W[2]*B[0] + W[5]*B[1] + W[8]*B[2];\
25817542729SShri Abhyankar   A[3] = W[0]*B[3] + W[3]*B[4] + W[6]*B[5];\
25917542729SShri Abhyankar   A[4] = W[1]*B[3] + W[4]*B[4] + W[7]*B[5];\
26017542729SShri Abhyankar   A[5] = W[2]*B[3] + W[5]*B[4] + W[8]*B[5];\
26117542729SShri Abhyankar   A[6] = W[0]*B[6] + W[3]*B[7] + W[6]*B[8];\
26217542729SShri Abhyankar   A[7] = W[1]*B[6] + W[4]*B[7] + W[7]*B[8];\
26317542729SShri Abhyankar   A[8] = W[2]*B[6] + W[5]*B[7] + W[8]*B[8];\
26417542729SShri Abhyankar }
26517542729SShri Abhyankar 
26617542729SShri Abhyankar /*
26717542729SShri Abhyankar   Kernel_A_gets_A_minus_B_times_C_3: A = A - B * C with size bs=3
26817542729SShri Abhyankar 
26917542729SShri Abhyankar   Input Parameters:
27017542729SShri Abhyankar +  A,B,C - square bs by bs arrays stored in column major order
27117542729SShri Abhyankar 
27217542729SShri Abhyankar   Output Parameter:
27317542729SShri Abhyankar .  A = A - B*C
27417542729SShri Abhyankar */
27517542729SShri Abhyankar 
27617542729SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_3(A,B,C) 0;\
27717542729SShri Abhyankar {\
27817542729SShri Abhyankar   A[0] -= B[0]*C[0] + B[3]*C[1] + B[6]*C[2];\
27917542729SShri Abhyankar   A[1] -= B[1]*C[0] + B[4]*C[1] + B[7]*C[2];\
28017542729SShri Abhyankar   A[2] -= B[2]*C[0] + B[5]*C[1] + B[8]*C[2];\
28117542729SShri Abhyankar   A[3] -= B[0]*C[3] + B[3]*C[4] + B[6]*C[5];\
28217542729SShri Abhyankar   A[4] -= B[1]*C[3] + B[4]*C[4] + B[7]*C[5];\
28317542729SShri Abhyankar   A[5] -= B[2]*C[3] + B[5]*C[4] + B[8]*C[5];\
28417542729SShri Abhyankar   A[6] -= B[0]*C[6] + B[3]*C[7] + B[6]*C[8];\
28517542729SShri Abhyankar   A[7] -= B[1]*C[6] + B[4]*C[7] + B[7]*C[8];\
28617542729SShri Abhyankar   A[8] -= B[2]*C[6] + B[5]*C[7] + B[8]*C[8];\
28717542729SShri Abhyankar }
28817542729SShri Abhyankar 
289209027a4SShri Abhyankar /*
290209027a4SShri Abhyankar   Kernel_A_gets_A_times_B_4: A = A * B with size bs=4
291209027a4SShri Abhyankar 
292209027a4SShri Abhyankar   Input Parameters:
293209027a4SShri Abhyankar +  A,B - square bs by bs arrays stored in column major order
294209027a4SShri Abhyankar -  W   - bs*bs work arrary
295209027a4SShri Abhyankar 
296209027a4SShri Abhyankar   Output Parameter:
297209027a4SShri Abhyankar .  A = A * B
298209027a4SShri Abhyankar */
299209027a4SShri Abhyankar 
300209027a4SShri Abhyankar #define Kernel_A_gets_A_times_B_4(A,B,W) 0;\
301209027a4SShri Abhyankar {\
302209027a4SShri Abhyankar   PetscMemcpy(W,A,16*sizeof(MatScalar));\
303209027a4SShri Abhyankar   A[0] =  W[0]*B[0]  + W[4]*B[1]  + W[8]*B[2]   + W[12]*B[3];\
304209027a4SShri Abhyankar   A[1] =  W[1]*B[0]  + W[5]*B[1]  + W[9]*B[2]   + W[13]*B[3];\
305209027a4SShri Abhyankar   A[2] =  W[2]*B[0]  + W[6]*B[1]  + W[10]*B[2]  + W[14]*B[3];\
306209027a4SShri Abhyankar   A[3] =  W[3]*B[0]  + W[7]*B[1]  + W[11]*B[2]  + W[15]*B[3];\
307209027a4SShri Abhyankar   A[4] =  W[0]*B[4]  + W[4]*B[5]  + W[8]*B[6]   + W[12]*B[7];\
308209027a4SShri Abhyankar   A[5] =  W[1]*B[4]  + W[5]*B[5]  + W[9]*B[6]   + W[13]*B[7];\
309209027a4SShri Abhyankar   A[6] =  W[2]*B[4]  + W[6]*B[5]  + W[10]*B[6]  + W[14]*B[7];\
310209027a4SShri Abhyankar   A[7] =  W[3]*B[4]  + W[7]*B[5]  + W[11]*B[6]  + W[15]*B[7];\
311209027a4SShri Abhyankar   A[8] =  W[0]*B[8]  + W[4]*B[9]  + W[8]*B[10]  + W[12]*B[11];\
312209027a4SShri Abhyankar   A[9] =  W[1]*B[8]  + W[5]*B[9]  + W[9]*B[10]  + W[13]*B[11];\
313209027a4SShri Abhyankar   A[10] = W[2]*B[8]  + W[6]*B[9]  + W[10]*B[10] + W[14]*B[11];\
314209027a4SShri Abhyankar   A[11] = W[3]*B[8]  + W[7]*B[9]  + W[11]*B[10] + W[15]*B[11];\
315209027a4SShri Abhyankar   A[12] = W[0]*B[12] + W[4]*B[13] + W[8]*B[14]  + W[12]*B[15];\
316209027a4SShri Abhyankar   A[13] = W[1]*B[12] + W[5]*B[13] + W[9]*B[14]  + W[13]*B[15];\
317209027a4SShri Abhyankar   A[14] = W[2]*B[12] + W[6]*B[13] + W[10]*B[14] + W[14]*B[15];\
318209027a4SShri Abhyankar   A[15] = W[3]*B[12] + W[7]*B[13] + W[11]*B[14] + W[15]*B[15];\
319209027a4SShri Abhyankar }
320209027a4SShri Abhyankar 
321209027a4SShri Abhyankar /*
322209027a4SShri Abhyankar   Kernel_A_gets_A_minus_B_times_C_4: A = A - B * C with size bs=4
323209027a4SShri Abhyankar 
324209027a4SShri Abhyankar   Input Parameters:
325209027a4SShri Abhyankar +  A,B,C - square bs by bs arrays stored in column major order
326209027a4SShri Abhyankar 
327209027a4SShri Abhyankar   Output Parameter:
328209027a4SShri Abhyankar .  A = A - B*C
329209027a4SShri Abhyankar */
330209027a4SShri Abhyankar 
331209027a4SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_4(A,B,C) 0;\
332209027a4SShri Abhyankar {\
333209027a4SShri Abhyankar   A[0] -=  B[0]*C[0]  + B[4]*C[1]  + B[8]*C[2]   + B[12]*C[3];\
334209027a4SShri Abhyankar   A[1] -=  B[1]*C[0]  + B[5]*C[1]  + B[9]*C[2]   + B[13]*C[3];\
335209027a4SShri Abhyankar   A[2] -=  B[2]*C[0]  + B[6]*C[1]  + B[10]*C[2]  + B[14]*C[3];\
336209027a4SShri Abhyankar   A[3] -=  B[3]*C[0]  + B[7]*C[1]  + B[11]*C[2]  + B[15]*C[3];\
337209027a4SShri Abhyankar   A[4] -=  B[0]*C[4]  + B[4]*C[5]  + B[8]*C[6]   + B[12]*C[7];\
338209027a4SShri Abhyankar   A[5] -=  B[1]*C[4]  + B[5]*C[5]  + B[9]*C[6]   + B[13]*C[7];\
339209027a4SShri Abhyankar   A[6] -=  B[2]*C[4]  + B[6]*C[5]  + B[10]*C[6]  + B[14]*C[7];\
340209027a4SShri Abhyankar   A[7] -=  B[3]*C[4]  + B[7]*C[5]  + B[11]*C[6]  + B[15]*C[7];\
341209027a4SShri Abhyankar   A[8] -=  B[0]*C[8]  + B[4]*C[9]  + B[8]*C[10]  + B[12]*C[11];\
342209027a4SShri Abhyankar   A[9] -=  B[1]*C[8]  + B[5]*C[9]  + B[9]*C[10]  + B[13]*C[11];\
343209027a4SShri Abhyankar   A[10] -= B[2]*C[8]  + B[6]*C[9]  + B[10]*C[10] + B[14]*C[11];\
344209027a4SShri Abhyankar   A[11] -= B[3]*C[8]  + B[7]*C[9]  + B[11]*C[10] + B[15]*C[11];\
345209027a4SShri Abhyankar   A[12] -= B[0]*C[12] + B[4]*C[13] + B[8]*C[14]  + B[12]*C[15];\
346209027a4SShri Abhyankar   A[13] -= B[1]*C[12] + B[5]*C[13] + B[9]*C[14]  + B[13]*C[15];\
347209027a4SShri Abhyankar   A[14] -= B[2]*C[12] + B[6]*C[13] + B[10]*C[14] + B[14]*C[15];\
348209027a4SShri Abhyankar   A[15] -= B[3]*C[12] + B[7]*C[13] + B[11]*C[14] + B[15]*C[15];\
349209027a4SShri Abhyankar }
350209027a4SShri Abhyankar 
3510deeaf61SShri Abhyankar #define Kernel_A_gets_A_times_B_5(A,B,W) 0;\
3520deeaf61SShri Abhyankar {\
3530deeaf61SShri Abhyankar   PetscMemcpy(W,A,25*sizeof(MatScalar));\
3540deeaf61SShri Abhyankar   A[0] =  W[0]*B[0]  + W[5]*B[1]  + W[10]*B[2]   + W[15]*B[3] + W[20]*B[4];\
3550deeaf61SShri Abhyankar   A[1] =  W[1]*B[0]  + W[6]*B[1]  + W[11]*B[2]   + W[16]*B[3] + W[21]*B[4];\
3560deeaf61SShri Abhyankar   A[2] =  W[2]*B[0]  + W[7]*B[1]  + W[12]*B[2]  + W[17]*B[3]  + W[22]*B[4];\
3570deeaf61SShri Abhyankar   A[3] =  W[3]*B[0]  + W[8]*B[1]  + W[13]*B[2]  + W[18]*B[3]  + W[23]*B[4];\
3580deeaf61SShri Abhyankar   A[4] =  W[4]*B[0]  + W[9]*B[1]  + W[14]*B[2]   + W[19]*B[3] + W[24]*B[4];\
3590deeaf61SShri Abhyankar   A[5] =  W[0]*B[5]  + W[5]*B[6]  + W[10]*B[7]   + W[15]*B[8] + W[20]*B[9];\
3600deeaf61SShri Abhyankar   A[6] =  W[1]*B[5]  + W[6]*B[6]  + W[11]*B[7]   + W[16]*B[8] + W[21]*B[9];\
3610deeaf61SShri Abhyankar   A[7] =  W[2]*B[5]  + W[7]*B[6]  + W[12]*B[7]  + W[17]*B[8]  + W[22]*B[9];\
3620deeaf61SShri Abhyankar   A[8] =  W[3]*B[5]  + W[8]*B[6]  + W[13]*B[7]  + W[18]*B[8]  + W[23]*B[9];\
3630deeaf61SShri Abhyankar   A[9] =  W[4]*B[5]  + W[9]*B[6]  + W[14]*B[7]   + W[19]*B[8] + W[24]*B[9];\
3640deeaf61SShri Abhyankar   A[10] =  W[0]*B[10]  + W[5]*B[11]  + W[10]*B[12]   + W[15]*B[13] + W[20]*B[14];\
3650deeaf61SShri Abhyankar   A[11] =  W[1]*B[10]  + W[6]*B[11]  + W[11]*B[12]   + W[16]*B[13] + W[21]*B[14];\
3660deeaf61SShri Abhyankar   A[12] =  W[2]*B[10]  + W[7]*B[11]  + W[12]*B[12]  + W[17]*B[13]  + W[22]*B[14];\
3670deeaf61SShri Abhyankar   A[13] =  W[3]*B[10]  + W[8]*B[11]  + W[13]*B[12]  + W[18]*B[13]  + W[23]*B[14];\
3680deeaf61SShri Abhyankar   A[14] =  W[4]*B[10]  + W[9]*B[11]  + W[14]*B[12]   + W[19]*B[13] + W[24]*B[14];\
3690deeaf61SShri Abhyankar   A[15] =  W[0]*B[15]  + W[5]*B[16]  + W[10]*B[17]   + W[15]*B[18] + W[20]*B[19];\
3700deeaf61SShri Abhyankar   A[16] =  W[1]*B[15]  + W[6]*B[16]  + W[11]*B[17]   + W[16]*B[18] + W[21]*B[19];\
3710deeaf61SShri Abhyankar   A[17] =  W[2]*B[15]  + W[7]*B[16]  + W[12]*B[17]  + W[17]*B[18]  + W[22]*B[19];\
3720deeaf61SShri Abhyankar   A[18] =  W[3]*B[15]  + W[8]*B[16]  + W[13]*B[17]  + W[18]*B[18]  + W[23]*B[19];\
3730deeaf61SShri Abhyankar   A[19] =  W[4]*B[15]  + W[9]*B[16]  + W[14]*B[17]   + W[19]*B[18] + W[24]*B[19];\
3740deeaf61SShri Abhyankar   A[20] =  W[0]*B[20]  + W[5]*B[21]  + W[10]*B[22]   + W[15]*B[23] + W[20]*B[24];\
3750deeaf61SShri Abhyankar   A[21] =  W[1]*B[20]  + W[6]*B[21]  + W[11]*B[22]   + W[16]*B[23] + W[21]*B[24];\
3760deeaf61SShri Abhyankar   A[22] =  W[2]*B[20]  + W[7]*B[21]  + W[12]*B[22]  + W[17]*B[23]  + W[22]*B[24];\
3770deeaf61SShri Abhyankar   A[23] =  W[3]*B[20]  + W[8]*B[21]  + W[13]*B[22]  + W[18]*B[23]  + W[23]*B[24];\
3780deeaf61SShri Abhyankar   A[24] =  W[4]*B[20]  + W[9]*B[21]  + W[14]*B[22]   + W[19]*B[23] + W[24]*B[24];\
3790deeaf61SShri Abhyankar }
3800deeaf61SShri Abhyankar 
3810deeaf61SShri Abhyankar /*
3820deeaf61SShri Abhyankar   Kernel_A_gets_A_minus_B_times_C_5: A = A - B * C with size bs=5
3830deeaf61SShri Abhyankar 
3840deeaf61SShri Abhyankar   Input Parameters:
3850deeaf61SShri Abhyankar +  A,B,C - square bs by bs arrays stored in column major order
3860deeaf61SShri Abhyankar 
3870deeaf61SShri Abhyankar   Output Parameter:
3880deeaf61SShri Abhyankar .  A = A - B*C
3890deeaf61SShri Abhyankar */
3900deeaf61SShri Abhyankar 
3910deeaf61SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_5(A,B,C) 0;\
3920deeaf61SShri Abhyankar {\
3930deeaf61SShri Abhyankar   A[0] -=  B[0]*C[0]  + B[5]*C[1]  + B[10]*C[2]   + B[15]*C[3] + B[20]*C[4];\
3940deeaf61SShri Abhyankar   A[1] -=  B[1]*C[0]  + B[6]*C[1]  + B[11]*C[2]   + B[16]*C[3] + B[21]*C[4];\
3950deeaf61SShri Abhyankar   A[2] -=  B[2]*C[0]  + B[7]*C[1]  + B[12]*C[2]  + B[17]*C[3]  + B[22]*C[4];\
3960deeaf61SShri Abhyankar   A[3] -=  B[3]*C[0]  + B[8]*C[1]  + B[13]*C[2]  + B[18]*C[3]  + B[23]*C[4];\
3970deeaf61SShri Abhyankar   A[4] -=  B[4]*C[0]  + B[9]*C[1]  + B[14]*C[2]   + B[19]*C[3] + B[24]*C[4];\
3980deeaf61SShri Abhyankar   A[5] -=  B[0]*C[5]  + B[5]*C[6]  + B[10]*C[7]   + B[15]*C[8] + B[20]*C[9];\
3990deeaf61SShri Abhyankar   A[6] -=  B[1]*C[5]  + B[6]*C[6]  + B[11]*C[7]   + B[16]*C[8] + B[21]*C[9];\
4000deeaf61SShri Abhyankar   A[7] -=  B[2]*C[5]  + B[7]*C[6]  + B[12]*C[7]  + B[17]*C[8]  + B[22]*C[9];\
4010deeaf61SShri Abhyankar   A[8] -=  B[3]*C[5]  + B[8]*C[6]  + B[13]*C[7]  + B[18]*C[8]  + B[23]*C[9];\
4020deeaf61SShri Abhyankar   A[9] -=  B[4]*C[5]  + B[9]*C[6]  + B[14]*C[7]   + B[19]*C[8] + B[24]*C[9];\
4030deeaf61SShri Abhyankar   A[10] -=  B[0]*C[10]  + B[5]*C[11]  + B[10]*C[12]   + B[15]*C[13] + B[20]*C[14];\
4040deeaf61SShri Abhyankar   A[11] -=  B[1]*C[10]  + B[6]*C[11]  + B[11]*C[12]   + B[16]*C[13] + B[21]*C[14];\
4050deeaf61SShri Abhyankar   A[12] -=  B[2]*C[10]  + B[7]*C[11]  + B[12]*C[12]  + B[17]*C[13]  + B[22]*C[14];\
4060deeaf61SShri Abhyankar   A[13] -=  B[3]*C[10]  + B[8]*C[11]  + B[13]*C[12]  + B[18]*C[13]  + B[23]*C[14];\
4070deeaf61SShri Abhyankar   A[14] -=  B[4]*C[10]  + B[9]*C[11]  + B[14]*C[12]   + B[19]*C[13] + B[24]*C[14];\
4080deeaf61SShri Abhyankar   A[15] -=  B[0]*C[15]  + B[5]*C[16]  + B[10]*C[17]   + B[15]*C[18] + B[20]*C[19];\
4090deeaf61SShri Abhyankar   A[16] -=  B[1]*C[15]  + B[6]*C[16]  + B[11]*C[17]   + B[16]*C[18] + B[21]*C[19];\
4100deeaf61SShri Abhyankar   A[17] -=  B[2]*C[15]  + B[7]*C[16]  + B[12]*C[17]  + B[17]*C[18]  + B[22]*C[19];\
4110deeaf61SShri Abhyankar   A[18] -=  B[3]*C[15]  + B[8]*C[16]  + B[13]*C[17]  + B[18]*C[18]  + B[23]*C[19];\
4120deeaf61SShri Abhyankar   A[19] -=  B[4]*C[15]  + B[9]*C[16]  + B[14]*C[17]   + B[19]*C[18] + B[24]*C[19];\
4130deeaf61SShri Abhyankar   A[20] -=  B[0]*C[20]  + B[5]*C[21]  + B[10]*C[22]   + B[15]*C[23] + B[20]*C[24];\
4140deeaf61SShri Abhyankar   A[21] -=  B[1]*C[20]  + B[6]*C[21]  + B[11]*C[22]   + B[16]*C[23] + B[21]*C[24];\
4150deeaf61SShri Abhyankar   A[22] -=  B[2]*C[20]  + B[7]*C[21]  + B[12]*C[22]  + B[17]*C[23]  + B[22]*C[24];\
4160deeaf61SShri Abhyankar   A[23] -=  B[3]*C[20]  + B[8]*C[21]  + B[13]*C[22]  + B[18]*C[23]  + B[23]*C[24];\
4170deeaf61SShri Abhyankar   A[24] -=  B[4]*C[20]  + B[9]*C[21]  + B[14]*C[22]   + B[19]*C[23] + B[24]*C[24];\
4180deeaf61SShri Abhyankar }
4190deeaf61SShri Abhyankar 
420bef36659SShri Abhyankar #define Kernel_A_gets_A_times_B_6(A,B,W) 0;\
421bef36659SShri Abhyankar {\
422bef36659SShri Abhyankar   PetscMemcpy(W,A,36*sizeof(MatScalar));\
423bef36659SShri Abhyankar   A[0]  =  W[0]*B[0]   + W[6]*B[1]   + W[12]*B[2]   + W[18]*B[3]  + W[24]*B[4]  + W[30]*B[5];\
424bef36659SShri Abhyankar   A[1]  =  W[1]*B[0]   + W[7]*B[1]   + W[13]*B[2]   + W[19]*B[3]  + W[25]*B[4]  + W[31]*B[5];\
425bef36659SShri Abhyankar   A[2]  =  W[2]*B[0]   + W[8]*B[1]   + W[14]*B[2]   + W[20]*B[3]  + W[26]*B[4]  + W[32]*B[5];\
426bef36659SShri Abhyankar   A[3]  =  W[3]*B[0]   + W[9]*B[1]   + W[15]*B[2]   + W[21]*B[3]  + W[27]*B[4]  + W[33]*B[5];\
427bef36659SShri Abhyankar   A[4]  =  W[4]*B[0]   + W[10]*B[1]  + W[16]*B[2]   + W[22]*B[3]  + W[28]*B[4]  + W[34]*B[5];\
428bef36659SShri Abhyankar   A[5]  =  W[5]*B[0]   + W[11]*B[1]  + W[17]*B[2]   + W[23]*B[3]  + W[29]*B[4]  + W[35]*B[5];\
429bef36659SShri Abhyankar   A[6]  =  W[0]*B[6]   + W[6]*B[7]   + W[12]*B[8]   + W[18]*B[9]  + W[24]*B[10] + W[30]*B[11];\
430bef36659SShri Abhyankar   A[7]  =  W[1]*B[6]   + W[7]*B[7]   + W[13]*B[8]   + W[19]*B[9]  + W[25]*B[10] + W[31]*B[11];\
431bef36659SShri Abhyankar   A[8]  =  W[2]*B[6]   + W[8]*B[7]   + W[14]*B[8]   + W[20]*B[9]  + W[26]*B[10] + W[32]*B[11];\
432bef36659SShri Abhyankar   A[9]  =  W[3]*B[6]   + W[9]*B[7]   + W[15]*B[8]   + W[21]*B[9]  + W[27]*B[10] + W[33]*B[11];\
433bef36659SShri Abhyankar   A[10] =  W[4]*B[6]   + W[10]*B[7]  + W[16]*B[8]   + W[22]*B[9]  + W[28]*B[10] + W[34]*B[11];\
434bef36659SShri Abhyankar   A[11] =  W[5]*B[6]   + W[11]*B[7]  + W[17]*B[8]   + W[23]*B[9]  + W[29]*B[10] + W[35]*B[11];\
435bef36659SShri Abhyankar   A[12] =  W[0]*B[12]  + W[6]*B[13]  + W[12]*B[14]  + W[18]*B[15] + W[24]*B[16] + W[30]*B[17];\
436bef36659SShri Abhyankar   A[13] =  W[1]*B[12]  + W[7]*B[13]  + W[13]*B[14]  + W[19]*B[15] + W[25]*B[16] + W[31]*B[17];\
437bef36659SShri Abhyankar   A[14] =  W[2]*B[12]  + W[8]*B[13]  + W[14]*B[14]  + W[20]*B[15] + W[26]*B[16] + W[32]*B[17];\
438bef36659SShri Abhyankar   A[15] =  W[3]*B[12]  + W[9]*B[13]  + W[15]*B[14]  + W[21]*B[15] + W[27]*B[16] + W[33]*B[17];\
439bef36659SShri Abhyankar   A[16] =  W[4]*B[12]  + W[10]*B[13] + W[16]*B[14]  + W[22]*B[15] + W[28]*B[16] + W[34]*B[17];\
440bef36659SShri Abhyankar   A[17] =  W[5]*B[12]  + W[11]*B[13] + W[17]*B[14]  + W[23]*B[15] + W[29]*B[16] + W[35]*B[17];\
441bef36659SShri Abhyankar   A[18] =  W[0]*B[18]  + W[6]*B[19]  + W[12]*B[20]  + W[18]*B[21] + W[24]*B[22] + W[30]*B[23];\
442bef36659SShri Abhyankar   A[19] =  W[1]*B[18]  + W[7]*B[19]  + W[13]*B[20]  + W[19]*B[21] + W[25]*B[22] + W[31]*B[23];\
443bef36659SShri Abhyankar   A[20] =  W[2]*B[18]  + W[8]*B[19]  + W[14]*B[20]  + W[20]*B[21] + W[26]*B[22] + W[32]*B[23];\
444bef36659SShri Abhyankar   A[21] =  W[3]*B[18]  + W[9]*B[19]  + W[15]*B[20]  + W[21]*B[21] + W[27]*B[22] + W[33]*B[23];\
445bef36659SShri Abhyankar   A[22] =  W[4]*B[18]  + W[10]*B[19] + W[16]*B[20]  + W[22]*B[21] + W[28]*B[22] + W[34]*B[23];\
446bef36659SShri Abhyankar   A[23] =  W[5]*B[18]  + W[11]*B[19] + W[17]*B[20]  + W[23]*B[21] + W[29]*B[22] + W[35]*B[23];\
447bef36659SShri Abhyankar   A[24] =  W[0]*B[24]  + W[6]*B[25]  + W[12]*B[26]  + W[18]*B[27] + W[24]*B[28] + W[30]*B[29];\
448bef36659SShri Abhyankar   A[25] =  W[1]*B[24]  + W[7]*B[25]  + W[13]*B[26]  + W[19]*B[27] + W[25]*B[28] + W[31]*B[29];\
449bef36659SShri Abhyankar   A[26] =  W[2]*B[24]  + W[8]*B[25]  + W[14]*B[26]  + W[20]*B[27] + W[26]*B[28] + W[32]*B[29];\
450bef36659SShri Abhyankar   A[27] =  W[3]*B[24]  + W[9]*B[25]  + W[15]*B[26]  + W[21]*B[27] + W[27]*B[28] + W[33]*B[29];\
451bef36659SShri Abhyankar   A[28] =  W[4]*B[24]  + W[10]*B[25] + W[16]*B[26]  + W[22]*B[27] + W[28]*B[28] + W[34]*B[29];\
452bef36659SShri Abhyankar   A[29] =  W[5]*B[24]  + W[11]*B[25] + W[17]*B[26]  + W[23]*B[27] + W[29]*B[28] + W[35]*B[29];\
453bef36659SShri Abhyankar   A[30] =  W[0]*B[30]  + W[6]*B[31]  + W[12]*B[32]  + W[18]*B[33] + W[24]*B[34] + W[30]*B[35];\
454bef36659SShri Abhyankar   A[31] =  W[1]*B[30]  + W[7]*B[31]  + W[13]*B[32]  + W[19]*B[33] + W[25]*B[34] + W[31]*B[35];\
455bef36659SShri Abhyankar   A[32] =  W[2]*B[30]  + W[8]*B[31]  + W[14]*B[32]  + W[20]*B[33] + W[26]*B[34] + W[32]*B[35];\
456bef36659SShri Abhyankar   A[33] =  W[3]*B[30]  + W[9]*B[31]  + W[15]*B[32]  + W[21]*B[33] + W[27]*B[34] + W[33]*B[35];\
457bef36659SShri Abhyankar   A[34] =  W[4]*B[30]  + W[10]*B[31] + W[16]*B[32]  + W[22]*B[33] + W[28]*B[34] + W[34]*B[35];\
458bef36659SShri Abhyankar   A[35] =  W[5]*B[30]  + W[11]*B[31] + W[17]*B[32]  + W[23]*B[33] + W[29]*B[34] + W[35]*B[35];\
459bef36659SShri Abhyankar }
460bef36659SShri Abhyankar 
461bef36659SShri Abhyankar /*
462bef36659SShri Abhyankar   Kernel_A_gets_A_minus_B_times_C_6: A = A - B * C with size bs=6
463bef36659SShri Abhyankar 
464bef36659SShri Abhyankar   Input Parameters:
465bef36659SShri Abhyankar +  A,B,C - square bs by bs arrays stored in column major order
466bef36659SShri Abhyankar 
467bef36659SShri Abhyankar   Output Parameter:
468bef36659SShri Abhyankar .  A = A - B*C
469bef36659SShri Abhyankar */
470bef36659SShri Abhyankar 
471bef36659SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_6(A,B,C) 0;\
472bef36659SShri Abhyankar {\
473bef36659SShri Abhyankar   A[0]  -=  B[0]*C[0]   + B[6]*C[1]   + B[12]*C[2]   + B[18]*C[3]  + B[24]*C[4]  + B[30]*C[5];\
474bef36659SShri Abhyankar   A[1]  -=  B[1]*C[0]   + B[7]*C[1]   + B[13]*C[2]   + B[19]*C[3]  + B[25]*C[4]  + B[31]*C[5];\
475bef36659SShri Abhyankar   A[2]  -=  B[2]*C[0]   + B[8]*C[1]   + B[14]*C[2]   + B[20]*C[3]  + B[26]*C[4]  + B[32]*C[5];\
476bef36659SShri Abhyankar   A[3]  -=  B[3]*C[0]   + B[9]*C[1]   + B[15]*C[2]   + B[21]*C[3]  + B[27]*C[4]  + B[33]*C[5];\
477bef36659SShri Abhyankar   A[4]  -=  B[4]*C[0]   + B[10]*C[1]  + B[16]*C[2]   + B[22]*C[3]  + B[28]*C[4]  + B[34]*C[5];\
478bef36659SShri Abhyankar   A[5]  -=  B[5]*C[0]   + B[11]*C[1]  + B[17]*C[2]   + B[23]*C[3]  + B[29]*C[4]  + B[35]*C[5];\
479bef36659SShri Abhyankar   A[6]  -=  B[0]*C[6]   + B[6]*C[7]   + B[12]*C[8]   + B[18]*C[9]  + B[24]*C[10] + B[30]*C[11];\
480bef36659SShri Abhyankar   A[7]  -=  B[1]*C[6]   + B[7]*C[7]   + B[13]*C[8]   + B[19]*C[9]  + B[25]*C[10] + B[31]*C[11];\
481bef36659SShri Abhyankar   A[8]  -=  B[2]*C[6]   + B[8]*C[7]   + B[14]*C[8]   + B[20]*C[9]  + B[26]*C[10] + B[32]*C[11];\
482bef36659SShri Abhyankar   A[9]  -=  B[3]*C[6]   + B[9]*C[7]   + B[15]*C[8]   + B[21]*C[9]  + B[27]*C[10] + B[33]*C[11];\
483bef36659SShri Abhyankar   A[10] -=  B[4]*C[6]   + B[10]*C[7]  + B[16]*C[8]   + B[22]*C[9]  + B[28]*C[10] + B[34]*C[11];\
484bef36659SShri Abhyankar   A[11] -=  B[5]*C[6]   + B[11]*C[7]  + B[17]*C[8]   + B[23]*C[9]  + B[29]*C[10] + B[35]*C[11];\
485bef36659SShri Abhyankar   A[12] -=  B[0]*C[12]  + B[6]*C[13]  + B[12]*C[14]  + B[18]*C[15] + B[24]*C[16] + B[30]*C[17];\
486bef36659SShri Abhyankar   A[13] -=  B[1]*C[12]  + B[7]*C[13]  + B[13]*C[14]  + B[19]*C[15] + B[25]*C[16] + B[31]*C[17];\
487bef36659SShri Abhyankar   A[14] -=  B[2]*C[12]  + B[8]*C[13]  + B[14]*C[14]  + B[20]*C[15] + B[26]*C[16] + B[32]*C[17];\
488bef36659SShri Abhyankar   A[15] -=  B[3]*C[12]  + B[9]*C[13]  + B[15]*C[14]  + B[21]*C[15] + B[27]*C[16] + B[33]*C[17];\
489bef36659SShri Abhyankar   A[16] -=  B[4]*C[12]  + B[10]*C[13] + B[16]*C[14]  + B[22]*C[15] + B[28]*C[16] + B[34]*C[17];\
490bef36659SShri Abhyankar   A[17] -=  B[5]*C[12]  + B[11]*C[13] + B[17]*C[14]  + B[23]*C[15] + B[29]*C[16] + B[35]*C[17];\
491bef36659SShri Abhyankar   A[18] -=  B[0]*C[18]  + B[6]*C[19]  + B[12]*C[20]  + B[18]*C[21] + B[24]*C[22] + B[30]*C[23];\
492bef36659SShri Abhyankar   A[19] -=  B[1]*C[18]  + B[7]*C[19]  + B[13]*C[20]  + B[19]*C[21] + B[25]*C[22] + B[31]*C[23];\
493bef36659SShri Abhyankar   A[20] -=  B[2]*C[18]  + B[8]*C[19]  + B[14]*C[20]  + B[20]*C[21] + B[26]*C[22] + B[32]*C[23];\
494bef36659SShri Abhyankar   A[21] -=  B[3]*C[18]  + B[9]*C[19]  + B[15]*C[20]  + B[21]*C[21] + B[27]*C[22] + B[33]*C[23];\
495bef36659SShri Abhyankar   A[22] -=  B[4]*C[18]  + B[10]*C[19] + B[16]*C[20]  + B[22]*C[21] + B[28]*C[22] + B[34]*C[23];\
496bef36659SShri Abhyankar   A[23] -=  B[5]*C[18]  + B[11]*C[19] + B[17]*C[20]  + B[23]*C[21] + B[29]*C[22] + B[35]*C[23];\
497bef36659SShri Abhyankar   A[24] -=  B[0]*C[24]  + B[6]*C[25]  + B[12]*C[26]  + B[18]*C[27] + B[24]*C[28] + B[30]*C[29];\
498bef36659SShri Abhyankar   A[25] -=  B[1]*C[24]  + B[7]*C[25]  + B[13]*C[26]  + B[19]*C[27] + B[25]*C[28] + B[31]*C[29];\
499bef36659SShri Abhyankar   A[26] -=  B[2]*C[24]  + B[8]*C[25]  + B[14]*C[26]  + B[20]*C[27] + B[26]*C[28] + B[32]*C[29];\
500bef36659SShri Abhyankar   A[27] -=  B[3]*C[24]  + B[9]*C[25]  + B[15]*C[26]  + B[21]*C[27] + B[27]*C[28] + B[33]*C[29];\
501bef36659SShri Abhyankar   A[28] -=  B[4]*C[24]  + B[10]*C[25] + B[16]*C[26]  + B[22]*C[27] + B[28]*C[28] + B[34]*C[29];\
502bef36659SShri Abhyankar   A[29] -=  B[5]*C[24]  + B[11]*C[25] + B[17]*C[26]  + B[23]*C[27] + B[29]*C[28] + B[35]*C[29];\
503bef36659SShri Abhyankar   A[30] -=  B[0]*C[30]  + B[6]*C[31]  + B[12]*C[32]  + B[18]*C[33] + B[24]*C[34] + B[30]*C[35];\
504bef36659SShri Abhyankar   A[31] -=  B[1]*C[30]  + B[7]*C[31]  + B[13]*C[32]  + B[19]*C[33] + B[25]*C[34] + B[31]*C[35];\
505bef36659SShri Abhyankar   A[32] -=  B[2]*C[30]  + B[8]*C[31]  + B[14]*C[32]  + B[20]*C[33] + B[26]*C[34] + B[32]*C[35];\
506bef36659SShri Abhyankar   A[33] -=  B[3]*C[30]  + B[9]*C[31]  + B[15]*C[32]  + B[21]*C[33] + B[27]*C[34] + B[33]*C[35];\
507bef36659SShri Abhyankar   A[34] -=  B[4]*C[30]  + B[10]*C[31] + B[16]*C[32]  + B[22]*C[33] + B[28]*C[34] + B[34]*C[35];\
508bef36659SShri Abhyankar   A[35] -=  B[5]*C[30]  + B[11]*C[31] + B[17]*C[32]  + B[23]*C[33] + B[29]*C[34] + B[35]*C[35];\
509bef36659SShri Abhyankar }
510bef36659SShri Abhyankar 
511bef36659SShri Abhyankar #define Kernel_A_gets_A_times_B_7(A,B,W) 0;\
512bef36659SShri Abhyankar {\
513bef36659SShri Abhyankar   PetscMemcpy(W,A,49*sizeof(MatScalar));\
514bef36659SShri Abhyankar   A[0]  =  W[0]*B[0]   + W[7]*B[1]   + W[14]*B[2]   + W[21]*B[3]  + W[28]*B[4]  + W[35]*B[5]  + W[42]*B[6];\
515bef36659SShri Abhyankar   A[1]  =  W[1]*B[0]   + W[8]*B[1]   + W[15]*B[2]   + W[22]*B[3]  + W[29]*B[4]  + W[36]*B[5]  + W[43]*B[6];\
516bef36659SShri Abhyankar   A[2]  =  W[2]*B[0]   + W[9]*B[1]   + W[16]*B[2]   + W[23]*B[3]  + W[30]*B[4]  + W[37]*B[5]  + W[44]*B[6];\
517bef36659SShri Abhyankar   A[3]  =  W[3]*B[0]   + W[10]*B[1]   + W[17]*B[2]   + W[24]*B[3]  + W[31]*B[4]  + W[38]*B[5]  + W[45]*B[6];\
518bef36659SShri Abhyankar   A[4]  =  W[4]*B[0]   + W[11]*B[1]  + W[18]*B[2]   + W[25]*B[3]  + W[32]*B[4]  + W[39]*B[5]  + W[46]*B[6];\
519bef36659SShri Abhyankar   A[5]  =  W[5]*B[0]   + W[12]*B[1]  + W[19]*B[2]   + W[26]*B[3]  + W[33]*B[4]  + W[40]*B[5]  + W[47]*B[6];\
520bef36659SShri Abhyankar   A[6]  =  W[6]*B[0]   + W[13]*B[1]   + W[20]*B[2]   + W[27]*B[3]  + W[34]*B[4] + W[41]*B[5]  + W[48]*B[6];\
521bef36659SShri Abhyankar   A[7]  =  W[0]*B[7]   + W[7]*B[8]   + W[14]*B[9]   + W[21]*B[10]  + W[28]*B[11]  + W[35]*B[12]  + W[42]*B[13];\
522bef36659SShri Abhyankar   A[8]  =  W[1]*B[7]   + W[8]*B[8]   + W[15]*B[9]   + W[22]*B[10]  + W[29]*B[11]  + W[36]*B[12]  + W[43]*B[13];\
523bef36659SShri Abhyankar   A[9]  =  W[2]*B[7]   + W[9]*B[8]   + W[16]*B[9]   + W[23]*B[10]  + W[30]*B[11]  + W[37]*B[12]  + W[44]*B[13];\
524bef36659SShri Abhyankar   A[10]  =  W[3]*B[7]   + W[10]*B[8]   + W[17]*B[9]   + W[24]*B[10]  + W[31]*B[11]  + W[38]*B[12]  + W[45]*B[13];\
525bef36659SShri Abhyankar   A[11]  =  W[4]*B[7]   + W[11]*B[8]  + W[18]*B[9]   + W[25]*B[10]  + W[32]*B[11]  + W[39]*B[12]  + W[46]*B[13];\
526bef36659SShri Abhyankar   A[12]  =  W[5]*B[7]   + W[12]*B[8]  + W[19]*B[9]   + W[26]*B[10]  + W[33]*B[11]  + W[40]*B[12]  + W[47]*B[13];\
527bef36659SShri Abhyankar   A[13]  =  W[6]*B[7]   + W[13]*B[8]   + W[20]*B[9]   + W[27]*B[10]  + W[34]*B[11] + W[41]*B[12]  + W[48]*B[13];\
528bef36659SShri Abhyankar   A[14]  =  W[0]*B[14]   + W[7]*B[15]   + W[14]*B[16]   + W[21]*B[17]  + W[28]*B[18]  + W[35]*B[19]  + W[42]*B[20];\
529bef36659SShri Abhyankar   A[15]  =  W[1]*B[14]   + W[8]*B[15]   + W[15]*B[16]   + W[22]*B[17]  + W[29]*B[18]  + W[36]*B[19]  + W[43]*B[20];\
530bef36659SShri Abhyankar   A[16]  =  W[2]*B[14]   + W[9]*B[15]   + W[16]*B[16]   + W[23]*B[17]  + W[30]*B[18]  + W[37]*B[19]  + W[44]*B[20];\
531bef36659SShri Abhyankar   A[17]  =  W[3]*B[14]   + W[10]*B[15]   + W[17]*B[16]   + W[24]*B[17]  + W[31]*B[18]  + W[38]*B[19]  + W[45]*B[20];\
532bef36659SShri Abhyankar   A[18]  =  W[4]*B[14]   + W[11]*B[15]  + W[18]*B[16]   + W[25]*B[17]  + W[32]*B[18]  + W[39]*B[19]  + W[46]*B[20];\
533bef36659SShri Abhyankar   A[19]  =  W[5]*B[14]   + W[12]*B[15]  + W[19]*B[16]   + W[26]*B[17]  + W[33]*B[18]  + W[40]*B[19]  + W[47]*B[20];\
534bef36659SShri Abhyankar   A[20]  =  W[6]*B[14]   + W[13]*B[15]   + W[20]*B[16]   + W[27]*B[17]  + W[34]*B[18] + W[41]*B[19]  + W[48]*B[20];\
535bef36659SShri Abhyankar   A[21]  =  W[0]*B[21]   + W[7]*B[22]   + W[14]*B[23]   + W[21]*B[24]  + W[28]*B[25]  + W[35]*B[26]  + W[42]*B[27];\
536bef36659SShri Abhyankar   A[22]  =  W[1]*B[21]   + W[8]*B[22]   + W[15]*B[23]   + W[22]*B[24]  + W[29]*B[25]  + W[36]*B[26]  + W[43]*B[27];\
537bef36659SShri Abhyankar   A[23]  =  W[2]*B[21]   + W[9]*B[22]   + W[16]*B[23]   + W[23]*B[24]  + W[30]*B[25]  + W[37]*B[26]  + W[44]*B[27];\
538bef36659SShri Abhyankar   A[24]  =  W[3]*B[21]   + W[10]*B[22]   + W[17]*B[23]   + W[24]*B[24]  + W[31]*B[25]  + W[38]*B[26]  + W[45]*B[27];\
539bef36659SShri Abhyankar   A[25]  =  W[4]*B[21]   + W[11]*B[22]  + W[18]*B[23]   + W[25]*B[24]  + W[32]*B[25]  + W[39]*B[26]  + W[46]*B[27];\
540bef36659SShri Abhyankar   A[26]  =  W[5]*B[21]   + W[12]*B[22]  + W[19]*B[23]   + W[26]*B[24]  + W[33]*B[25]  + W[40]*B[26]  + W[47]*B[27];\
541bef36659SShri Abhyankar   A[27]  =  W[6]*B[21]   + W[13]*B[22]   + W[20]*B[23]   + W[27]*B[24]  + W[34]*B[25] + W[41]*B[26]  + W[48]*B[27];\
542bef36659SShri Abhyankar   A[28]  =  W[0]*B[28]   + W[7]*B[29]   + W[14]*B[30]   + W[21]*B[31]  + W[28]*B[32]  + W[35]*B[33]  + W[42]*B[34];\
543bef36659SShri Abhyankar   A[29]  =  W[1]*B[28]   + W[8]*B[29]   + W[15]*B[30]   + W[22]*B[31]  + W[29]*B[32]  + W[36]*B[33]  + W[43]*B[34];\
544bef36659SShri Abhyankar   A[30]  =  W[2]*B[28]   + W[9]*B[29]   + W[16]*B[30]   + W[23]*B[31]  + W[30]*B[32]  + W[37]*B[33]  + W[44]*B[34];\
545bef36659SShri Abhyankar   A[31]  =  W[3]*B[28]   + W[10]*B[29]   + W[17]*B[30]   + W[24]*B[31]  + W[31]*B[32]  + W[38]*B[33]  + W[45]*B[34];\
546bef36659SShri Abhyankar   A[32]  =  W[4]*B[28]   + W[11]*B[29]  + W[18]*B[30]   + W[25]*B[31]  + W[32]*B[32]  + W[39]*B[33]  + W[46]*B[34];\
547bef36659SShri Abhyankar   A[33]  =  W[5]*B[28]   + W[12]*B[29]  + W[19]*B[30]   + W[26]*B[31]  + W[33]*B[32]  + W[40]*B[33]  + W[47]*B[34];\
548bef36659SShri Abhyankar   A[34]  =  W[6]*B[28]   + W[13]*B[29]   + W[20]*B[30]   + W[27]*B[31]  + W[34]*B[32] + W[41]*B[33]  + W[48]*B[34];\
549bef36659SShri Abhyankar   A[35]  =  W[0]*B[35]   + W[7]*B[36]   + W[14]*B[37]   + W[21]*B[38]  + W[28]*B[39]  + W[35]*B[40]  + W[42]*B[41];\
550bef36659SShri Abhyankar   A[36]  =  W[1]*B[35]   + W[8]*B[36]   + W[15]*B[37]   + W[22]*B[38]  + W[29]*B[39]  + W[36]*B[40]  + W[43]*B[41];\
551bef36659SShri Abhyankar   A[37]  =  W[2]*B[35]   + W[9]*B[36]   + W[16]*B[37]   + W[23]*B[38]  + W[30]*B[39]  + W[37]*B[40]  + W[44]*B[41];\
552bef36659SShri Abhyankar   A[38]  =  W[3]*B[35]   + W[10]*B[36]   + W[17]*B[37]   + W[24]*B[38]  + W[31]*B[39]  + W[38]*B[40]  + W[45]*B[41];\
553bef36659SShri Abhyankar   A[39]  =  W[4]*B[35]   + W[11]*B[36]  + W[18]*B[37]   + W[25]*B[38]  + W[32]*B[39]  + W[39]*B[40]  + W[46]*B[41];\
554bef36659SShri Abhyankar   A[40]  =  W[5]*B[35]   + W[12]*B[36]  + W[19]*B[37]   + W[26]*B[38]  + W[33]*B[39]  + W[40]*B[40]  + W[47]*B[41];\
555bef36659SShri Abhyankar   A[41]  =  W[6]*B[35]   + W[13]*B[36]   + W[20]*B[37]   + W[27]*B[38]  + W[34]*B[39] + W[41]*B[40]  + W[48]*B[41];\
556bef36659SShri Abhyankar   A[42]  =  W[0]*B[42]   + W[7]*B[43]   + W[14]*B[44]   + W[21]*B[45]  + W[28]*B[46]  + W[35]*B[47]  + W[42]*B[48];\
557bef36659SShri Abhyankar   A[43]  =  W[1]*B[42]   + W[8]*B[43]   + W[15]*B[44]   + W[22]*B[45]  + W[29]*B[46]  + W[36]*B[47]  + W[43]*B[48];\
558bef36659SShri Abhyankar   A[44]  =  W[2]*B[42]   + W[9]*B[43]   + W[16]*B[44]   + W[23]*B[45]  + W[30]*B[46]  + W[37]*B[47]  + W[44]*B[48];\
559bef36659SShri Abhyankar   A[45]  =  W[3]*B[42]   + W[10]*B[43]   + W[17]*B[44]   + W[24]*B[45]  + W[31]*B[46]  + W[38]*B[47]  + W[45]*B[48];\
560bef36659SShri Abhyankar   A[46]  =  W[4]*B[42]   + W[11]*B[43]  + W[18]*B[44]   + W[25]*B[45]  + W[32]*B[46]  + W[39]*B[47]  + W[46]*B[48];\
561bef36659SShri Abhyankar   A[47]  =  W[5]*B[42]   + W[12]*B[43]  + W[19]*B[44]   + W[26]*B[45]  + W[33]*B[46]  + W[40]*B[47]  + W[47]*B[48];\
562bef36659SShri Abhyankar   A[48]  =  W[6]*B[42]   + W[13]*B[43]   + W[20]*B[44]   + W[27]*B[45]  + W[34]*B[46] + W[41]*B[47]  + W[48]*B[48];\
563bef36659SShri Abhyankar }
564bef36659SShri Abhyankar 
565bef36659SShri Abhyankar /*
566bef36659SShri Abhyankar   Kernel_A_gets_A_minus_B_times_C_7: A = A - B * C with size bs=7
567bef36659SShri Abhyankar 
568bef36659SShri Abhyankar   Input Parameters:
569bef36659SShri Abhyankar +  A,B,C - square bs by bs arrays stored in column major order
570bef36659SShri Abhyankar 
571bef36659SShri Abhyankar   Output Parameter:
572bef36659SShri Abhyankar .  A = A - B*C
573bef36659SShri Abhyankar */
574bef36659SShri Abhyankar 
575bef36659SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_7(A,B,C) 0;\
576bef36659SShri Abhyankar {\
577bef36659SShri Abhyankar   A[0]  -=  B[0]*C[0]   + B[7]*C[1]   + B[14]*C[2]   + B[21]*C[3]  + B[28]*C[4]  + B[35]*C[5]  + B[42]*C[6];\
578bef36659SShri Abhyankar   A[1]  -=  B[1]*C[0]   + B[8]*C[1]   + B[15]*C[2]   + B[22]*C[3]  + B[29]*C[4]  + B[36]*C[5]  + B[43]*C[6];\
579bef36659SShri Abhyankar   A[2]  -=  B[2]*C[0]   + B[9]*C[1]   + B[16]*C[2]   + B[23]*C[3]  + B[30]*C[4]  + B[37]*C[5]  + B[44]*C[6];\
580bef36659SShri Abhyankar   A[3]  -=  B[3]*C[0]   + B[10]*C[1]   + B[17]*C[2]   + B[24]*C[3]  + B[31]*C[4]  + B[38]*C[5]  + B[45]*C[6];\
581bef36659SShri Abhyankar   A[4]  -=  B[4]*C[0]   + B[11]*C[1]  + B[18]*C[2]   + B[25]*C[3]  + B[32]*C[4]  + B[39]*C[5]  + B[46]*C[6];\
582bef36659SShri Abhyankar   A[5]  -=  B[5]*C[0]   + B[12]*C[1]  + B[19]*C[2]   + B[26]*C[3]  + B[33]*C[4]  + B[40]*C[5]  + B[47]*C[6];\
583bef36659SShri Abhyankar   A[6]  -=  B[6]*C[0]   + B[13]*C[1]   + B[20]*C[2]   + B[27]*C[3]  + B[34]*C[4] + B[41]*C[5]  + B[48]*C[6];\
584bef36659SShri Abhyankar   A[7]  -=  B[0]*C[7]   + B[7]*C[8]   + B[14]*C[9]   + B[21]*C[10]  + B[28]*C[11]  + B[35]*C[12]  + B[42]*C[13];\
585bef36659SShri Abhyankar   A[8]  -=  B[1]*C[7]   + B[8]*C[8]   + B[15]*C[9]   + B[22]*C[10]  + B[29]*C[11]  + B[36]*C[12]  + B[43]*C[13];\
586bef36659SShri Abhyankar   A[9]  -=  B[2]*C[7]   + B[9]*C[8]   + B[16]*C[9]   + B[23]*C[10]  + B[30]*C[11]  + B[37]*C[12]  + B[44]*C[13];\
587bef36659SShri Abhyankar   A[10]  -=  B[3]*C[7]   + B[10]*C[8]   + B[17]*C[9]   + B[24]*C[10]  + B[31]*C[11]  + B[38]*C[12]  + B[45]*C[13];\
588bef36659SShri Abhyankar   A[11]  -=  B[4]*C[7]   + B[11]*C[8]  + B[18]*C[9]   + B[25]*C[10]  + B[32]*C[11]  + B[39]*C[12]  + B[46]*C[13];\
589bef36659SShri Abhyankar   A[12]  -=  B[5]*C[7]   + B[12]*C[8]  + B[19]*C[9]   + B[26]*C[10]  + B[33]*C[11]  + B[40]*C[12]  + B[47]*C[13];\
590bef36659SShri Abhyankar   A[13]  -=  B[6]*C[7]   + B[13]*C[8]   + B[20]*C[9]   + B[27]*C[10]  + B[34]*C[11] + B[41]*C[12]  + B[48]*C[13];\
591bef36659SShri Abhyankar   A[14]  -=  B[0]*C[14]   + B[7]*C[15]   + B[14]*C[16]   + B[21]*C[17]  + B[28]*C[18]  + B[35]*C[19]  + B[42]*C[20];\
592bef36659SShri Abhyankar   A[15]  -=  B[1]*C[14]   + B[8]*C[15]   + B[15]*C[16]   + B[22]*C[17]  + B[29]*C[18]  + B[36]*C[19]  + B[43]*C[20];\
593bef36659SShri Abhyankar   A[16]  -=  B[2]*C[14]   + B[9]*C[15]   + B[16]*C[16]   + B[23]*C[17]  + B[30]*C[18]  + B[37]*C[19]  + B[44]*C[20];\
594bef36659SShri Abhyankar   A[17]  -=  B[3]*C[14]   + B[10]*C[15]   + B[17]*C[16]   + B[24]*C[17]  + B[31]*C[18]  + B[38]*C[19]  + B[45]*C[20];\
595bef36659SShri Abhyankar   A[18]  -=  B[4]*C[14]   + B[11]*C[15]  + B[18]*C[16]   + B[25]*C[17]  + B[32]*C[18]  + B[39]*C[19]  + B[46]*C[20];\
596bef36659SShri Abhyankar   A[19]  -=  B[5]*C[14]   + B[12]*C[15]  + B[19]*C[16]   + B[26]*C[17]  + B[33]*C[18]  + B[40]*C[19]  + B[47]*C[20];\
597bef36659SShri Abhyankar   A[20]  -=  B[6]*C[14]   + B[13]*C[15]   + B[20]*C[16]   + B[27]*C[17]  + B[34]*C[18] + B[41]*C[19]  + B[48]*C[20];\
598bef36659SShri Abhyankar   A[21]  -=  B[0]*C[21]   + B[7]*C[22]   + B[14]*C[23]   + B[21]*C[24]  + B[28]*C[25]  + B[35]*C[26]  + B[42]*C[27];\
599bef36659SShri Abhyankar   A[22]  -=  B[1]*C[21]   + B[8]*C[22]   + B[15]*C[23]   + B[22]*C[24]  + B[29]*C[25]  + B[36]*C[26]  + B[43]*C[27];\
600bef36659SShri Abhyankar   A[23]  -=  B[2]*C[21]   + B[9]*C[22]   + B[16]*C[23]   + B[23]*C[24]  + B[30]*C[25]  + B[37]*C[26]  + B[44]*C[27];\
601bef36659SShri Abhyankar   A[24]  -=  B[3]*C[21]   + B[10]*C[22]   + B[17]*C[23]   + B[24]*C[24]  + B[31]*C[25]  + B[38]*C[26]  + B[45]*C[27];\
602bef36659SShri Abhyankar   A[25]  -=  B[4]*C[21]   + B[11]*C[22]  + B[18]*C[23]   + B[25]*C[24]  + B[32]*C[25]  + B[39]*C[26]  + B[46]*C[27];\
603bef36659SShri Abhyankar   A[26]  -=  B[5]*C[21]   + B[12]*C[22]  + B[19]*C[23]   + B[26]*C[24]  + B[33]*C[25]  + B[40]*C[26]  + B[47]*C[27];\
604bef36659SShri Abhyankar   A[27]  -=  B[6]*C[21]   + B[13]*C[22]   + B[20]*C[23]   + B[27]*C[24]  + B[34]*C[25] + B[41]*C[26]  + B[48]*C[27];\
605bef36659SShri Abhyankar   A[28]  -=  B[0]*C[28]   + B[7]*C[29]   + B[14]*C[30]   + B[21]*C[31]  + B[28]*C[32]  + B[35]*C[33]  + B[42]*C[34];\
606bef36659SShri Abhyankar   A[29]  -=  B[1]*C[28]   + B[8]*C[29]   + B[15]*C[30]   + B[22]*C[31]  + B[29]*C[32]  + B[36]*C[33]  + B[43]*C[34];\
607bef36659SShri Abhyankar   A[30]  -=  B[2]*C[28]   + B[9]*C[29]   + B[16]*C[30]   + B[23]*C[31]  + B[30]*C[32]  + B[37]*C[33]  + B[44]*C[34];\
608bef36659SShri Abhyankar   A[31]  -=  B[3]*C[28]   + B[10]*C[29]   + B[17]*C[30]   + B[24]*C[31]  + B[31]*C[32]  + B[38]*C[33]  + B[45]*C[34];\
609bef36659SShri Abhyankar   A[32]  -=  B[4]*C[28]   + B[11]*C[29]  + B[18]*C[30]   + B[25]*C[31]  + B[32]*C[32]  + B[39]*C[33]  + B[46]*C[34];\
610bef36659SShri Abhyankar   A[33]  -=  B[5]*C[28]   + B[12]*C[29]  + B[19]*C[30]   + B[26]*C[31]  + B[33]*C[32]  + B[40]*C[33]  + B[47]*C[34];\
611bef36659SShri Abhyankar   A[34]  -=  B[6]*C[28]   + B[13]*C[29]   + B[20]*C[30]   + B[27]*C[31]  + B[34]*C[32] + B[41]*C[33]  + B[48]*C[34];\
612bef36659SShri Abhyankar   A[35]  -=  B[0]*C[35]   + B[7]*C[36]   + B[14]*C[37]   + B[21]*C[38]  + B[28]*C[39]  + B[35]*C[40]  + B[42]*C[41];\
613bef36659SShri Abhyankar   A[36]  -=  B[1]*C[35]   + B[8]*C[36]   + B[15]*C[37]   + B[22]*C[38]  + B[29]*C[39]  + B[36]*C[40]  + B[43]*C[41];\
614bef36659SShri Abhyankar   A[37]  -=  B[2]*C[35]   + B[9]*C[36]   + B[16]*C[37]   + B[23]*C[38]  + B[30]*C[39]  + B[37]*C[40]  + B[44]*C[41];\
615bef36659SShri Abhyankar   A[38]  -=  B[3]*C[35]   + B[10]*C[36]   + B[17]*C[37]   + B[24]*C[38]  + B[31]*C[39]  + B[38]*C[40]  + B[45]*C[41];\
616bef36659SShri Abhyankar   A[39]  -=  B[4]*C[35]   + B[11]*C[36]  + B[18]*C[37]   + B[25]*C[38]  + B[32]*C[39]  + B[39]*C[40]  + B[46]*C[41];\
617bef36659SShri Abhyankar   A[40]  -=  B[5]*C[35]   + B[12]*C[36]  + B[19]*C[37]   + B[26]*C[38]  + B[33]*C[39]  + B[40]*C[40]  + B[47]*C[41];\
618bef36659SShri Abhyankar   A[41]  -=  B[6]*C[35]   + B[13]*C[36]   + B[20]*C[37]   + B[27]*C[38]  + B[34]*C[39] + B[41]*C[40]  + B[48]*C[41];\
619bef36659SShri Abhyankar   A[42]  -=  B[0]*C[42]   + B[7]*C[43]   + B[14]*C[44]   + B[21]*C[45]  + B[28]*C[46]  + B[35]*C[47]  + B[42]*C[48];\
620bef36659SShri Abhyankar   A[43]  -=  B[1]*C[42]   + B[8]*C[43]   + B[15]*C[44]   + B[22]*C[45]  + B[29]*C[46]  + B[36]*C[47]  + B[43]*C[48];\
621bef36659SShri Abhyankar   A[44]  -=  B[2]*C[42]   + B[9]*C[43]   + B[16]*C[44]   + B[23]*C[45]  + B[30]*C[46]  + B[37]*C[47]  + B[44]*C[48];\
622bef36659SShri Abhyankar   A[45]  -=  B[3]*C[42]   + B[10]*C[43]   + B[17]*C[44]   + B[24]*C[45]  + B[31]*C[46]  + B[38]*C[47]  + B[45]*C[48];\
623bef36659SShri Abhyankar   A[46]  -=  B[4]*C[42]   + B[11]*C[43]  + B[18]*C[44]   + B[25]*C[45]  + B[32]*C[46]  + B[39]*C[47]  + B[46]*C[48];\
624bef36659SShri Abhyankar   A[47]  -=  B[5]*C[42]   + B[12]*C[43]  + B[19]*C[44]   + B[26]*C[45]  + B[33]*C[46]  + B[40]*C[47]  + B[47]*C[48];\
625bef36659SShri Abhyankar   A[48]  -=  B[6]*C[42]   + B[13]*C[43]   + B[20]*C[44]   + B[27]*C[45]  + B[34]*C[46] + B[41]*C[47]  + B[48]*C[48];\
626bef36659SShri Abhyankar }
627bef36659SShri Abhyankar 
628bef36659SShri Abhyankar 
6292593348eSBarry Smith #endif
630