xref: /petsc/src/mat/impls/baij/seq/baij.h (revision 0deeaf618d282af9bbba6efcbcb8e7a07d546a23)
12593348eSBarry Smith 
235aab85fSBarry Smith #if !defined(__BAIJ_H)
335aab85fSBarry Smith #define __BAIJ_H
47c4f633dSBarry Smith #include "private/matimpl.h"
57c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/aij.h"
6a4005a5dSBarry Smith #include "../src/mat/impls/baij/seq/ftn-kernels/fsolvebaij.h"
72593348eSBarry Smith 
82593348eSBarry Smith /*
935aab85fSBarry Smith   MATSEQBAIJ format - Block compressed row storage. The i[] and j[]
10e24b481bSBarry Smith   arrays start at 0.
112593348eSBarry Smith */
122593348eSBarry Smith 
136c6c5352SBarry Smith /* This header is shared by the SeqSBAIJ matrix */
146c6c5352SBarry Smith #define SEQBAIJHEADER \
15521d7252SBarry Smith   PetscInt         bs2;              /*  square of block size */                                     \
16690b6cddSBarry Smith   PetscInt         mbs,nbs;          /* rows/bs, columns/bs */                                       \
176c6c5352SBarry Smith   PetscScalar      *mult_work;       /* work array for matrix vector product*/                       \
18dd6ea824SBarry Smith   MatScalar        *saved_values;                                                                    \
196c6c5352SBarry Smith                                                                                                      \
206ad2eaddSHong Zhang   Mat              sbaijMat;         /* mat in sbaij format */                                       \
216c6c5352SBarry Smith                                                                                                      \
226c6c5352SBarry Smith   PetscTruth       pivotinblocks;    /* pivot inside factorization of each diagonal block */         \
236c6c5352SBarry Smith                                                                                                      \
24dd6ea824SBarry Smith   MatScalar        *idiag;           /* inverse of block diagonal  */                                \
25e6b907acSBarry Smith   PetscTruth       idiagvalid       /* if above has correct/current values */
26e6b907acSBarry Smith 
276c6c5352SBarry Smith 
286c6c5352SBarry Smith typedef struct {
29dd6ea824SBarry Smith   SEQAIJHEADER(MatScalar);
30e6b907acSBarry Smith   SEQBAIJHEADER;
3135aab85fSBarry Smith } Mat_SeqBAIJ;
322593348eSBarry Smith 
33f349cc81SSatish Balay EXTERN_C_BEGIN
34ab93d7beSBarry Smith EXTERN PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat,PetscInt,PetscInt,PetscInt*);
35f349cc81SSatish Balay EXTERN_C_END
360481f469SBarry Smith EXTERN PetscErrorCode MatILUFactorSymbolic_SeqBAIJ(Mat,Mat,IS,IS,const MatFactorInfo*);
370481f469SBarry Smith EXTERN PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*);
380481f469SBarry Smith EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*);
390481f469SBarry Smith EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*);
400481f469SBarry Smith EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
41dfbe8321SBarry Smith EXTERN PetscErrorCode MatDuplicate_SeqBAIJ(Mat,MatDuplicateOption,Mat*);
422af78befSBarry Smith EXTERN PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat,PetscTruth*,PetscInt*);
43dfbe8321SBarry Smith EXTERN PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat);
44c8342467SHong Zhang EXTERN PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*,Mat*);
45be3590efSBarry Smith 
460481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat,Mat,IS,IS,const MatFactorInfo*);
470481f469SBarry Smith EXTERN PetscErrorCode MatLUFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*);
48690b6cddSBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat,PetscInt,IS*,PetscInt);
494aa3045dSJed Brown EXTERN PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatReuse,Mat*);
50690b6cddSBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]);
51dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
52dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
53f4df32b1SMatthew Knepley EXTERN PetscErrorCode MatScale_SeqBAIJ(Mat,PetscScalar);
54dfbe8321SBarry Smith EXTERN PetscErrorCode MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
55dfbe8321SBarry Smith EXTERN PetscErrorCode MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
56dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat,Vec);
57dfbe8321SBarry Smith EXTERN PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
58dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
59dfbe8321SBarry Smith EXTERN PetscErrorCode MatZeroEntries_SeqBAIJ(Mat);
6054138f6bSKris Buschelman 
61dfbe8321SBarry Smith EXTERN PetscErrorCode MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(Mat);
6254138f6bSKris Buschelman 
63dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
64dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
65dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
668bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_newdatastruct(Mat,Vec,Vec);
67dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
688bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
698bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
70dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
718bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_newdatastruct(Mat,Vec,Vec);
72dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
738bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
74dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
758bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_newdatastruct(Mat,Vec,Vec);
76dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
778bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
7854138f6bSKris Buschelman #if defined(PETSC_HAVE_SSE)
79dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_SSE_Demotion(Mat,Vec,Vec);
80dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion(Mat,Vec,Vec);
81dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion_usj(Mat,Vec,Vec);
824110d75bSKris Buschelman #endif
83dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
848bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_newdatastruct(Mat,Vec,Vec);
85dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
868bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
87dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
888bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_newdatastruct(Mat,Vec,Vec);
89dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
908bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
91dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
928bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_newdatastruct(Mat,Vec,Vec);
93dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
948bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
95dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
968bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_newdatastruct(Mat,Vec,Vec);
978bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
98dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_Update(Mat,Vec,Vec);
99dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);
100dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
101dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
102dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
103dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
104dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
105dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
106dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
107dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
108dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
109dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
110dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
111dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
112dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
11354138f6bSKris Buschelman 
114faca2338SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N_newdatastruct(Mat,Mat,const MatFactorInfo*);
115faca2338SShri Abhyankar 
1160481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat,const MatFactorInfo*);
1170481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat,const MatFactorInfo*);
118b588c5a2SHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_newdatastruct(Mat,Mat,const MatFactorInfo*);
1190481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
1204c000e2eSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
1214c000e2eSHong Zhang 
1220481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat,const MatFactorInfo*);
12317542729SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_newdatastruct(Mat,Mat,const MatFactorInfo*);
1240481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
12517542729SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
1260481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat,const MatFactorInfo*);
127209027a4SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_newdatastruct(Mat,Mat,const MatFactorInfo*);
1280481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
129209027a4SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
13054138f6bSKris Buschelman #if defined(PETSC_HAVE_SSE)
1310481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE(Mat,Mat,const MatFactorInfo*);
1320481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj(Mat,Mat,const MatFactorInfo*);
13354138f6bSKris Buschelman #else
13454138f6bSKris Buschelman #endif
1350481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat,const MatFactorInfo*);
136*0deeaf61SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_newdatastruct(Mat,Mat,const MatFactorInfo*);
1370481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
138*0deeaf61SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
1390481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat,const MatFactorInfo*);
1400481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
1410481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7(Mat,Mat,const MatFactorInfo*);
1420481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
1430481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*);
14454138f6bSKris Buschelman 
145dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_1(Mat,Vec,Vec);
146dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_2(Mat,Vec,Vec);
147dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_3(Mat,Vec,Vec);
148dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_4(Mat,Vec,Vec);
149dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_5(Mat,Vec,Vec);
150dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_6(Mat,Vec,Vec);
151dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_7(Mat,Vec,Vec);
152dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_N(Mat,Vec,Vec);
15354138f6bSKris Buschelman 
154dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
155dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
156dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
157dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
158dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
159dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
160dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
161dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
162a313700dSBarry Smith EXTERN PetscErrorCode MatLoad_SeqBAIJ(PetscViewer, const MatType,Mat*);
1632593348eSBarry Smith 
1644c000e2eSHong Zhang /*
1654c000e2eSHong Zhang   Kernel_A_gets_A_times_B_2: A = A * B with size bs=2
1664c000e2eSHong Zhang 
1674c000e2eSHong Zhang   Input Parameters:
1684c000e2eSHong Zhang +  A,B - square bs by bs arrays stored in column major order
1694c000e2eSHong Zhang -  W   - bs*bs work arrary
1704c000e2eSHong Zhang 
1714c000e2eSHong Zhang   Output Parameter:
1724c000e2eSHong Zhang .  A = A * B
1734c000e2eSHong Zhang */
1744c000e2eSHong Zhang 
1754c000e2eSHong Zhang #define Kernel_A_gets_A_times_B_2(A,B,W) 0;\
1764c000e2eSHong Zhang {\
1774c000e2eSHong Zhang   PetscMemcpy(W,A,4*sizeof(MatScalar));\
1784c000e2eSHong Zhang   A[0] = W[0]*B[0] + W[2]*B[1];\
1794c000e2eSHong Zhang   A[1] = W[1]*B[0] + W[3]*B[1];\
1804c000e2eSHong Zhang   A[2] = W[0]*B[2] + W[2]*B[3];\
1814c000e2eSHong Zhang   A[3] = W[1]*B[2] + W[3]*B[3];\
1824c000e2eSHong Zhang }
1834c000e2eSHong Zhang 
1844c000e2eSHong Zhang /*
1854c000e2eSHong Zhang   Kernel_A_gets_A_minus_B_times_C_2: A = A - B * C with size bs=2
1864c000e2eSHong Zhang 
1874c000e2eSHong Zhang   Input Parameters:
1884c000e2eSHong Zhang +  A,B,C - square bs by bs arrays stored in column major order
1894c000e2eSHong Zhang 
1904c000e2eSHong Zhang   Output Parameter:
1914c000e2eSHong Zhang .  A = A - B*C
1924c000e2eSHong Zhang */
1934c000e2eSHong Zhang 
1944c000e2eSHong Zhang #define Kernel_A_gets_A_minus_B_times_C_2(A,B,C) 0;\
1954c000e2eSHong Zhang {\
1964c000e2eSHong Zhang   A[0] -= B[0]*C[0] + B[2]*C[1];\
1974c000e2eSHong Zhang   A[1] -= B[1]*C[0] + B[3]*C[1];\
1984c000e2eSHong Zhang   A[2] -= B[0]*C[2] + B[2]*C[3];\
1994c000e2eSHong Zhang   A[3] -= B[1]*C[2] + B[3]*C[3];\
2004c000e2eSHong Zhang }
2014c000e2eSHong Zhang 
20217542729SShri Abhyankar /*
20317542729SShri Abhyankar   Kernel_A_gets_A_times_B_3: A = A * B with size bs=3
20417542729SShri Abhyankar 
20517542729SShri Abhyankar   Input Parameters:
20617542729SShri Abhyankar +  A,B - square bs by bs arrays stored in column major order
20717542729SShri Abhyankar -  W   - bs*bs work arrary
20817542729SShri Abhyankar 
20917542729SShri Abhyankar   Output Parameter:
21017542729SShri Abhyankar .  A = A * B
21117542729SShri Abhyankar */
21217542729SShri Abhyankar 
21317542729SShri Abhyankar #define Kernel_A_gets_A_times_B_3(A,B,W) 0;\
21417542729SShri Abhyankar {\
21517542729SShri Abhyankar   PetscMemcpy(W,A,9*sizeof(MatScalar));\
21617542729SShri Abhyankar   A[0] = W[0]*B[0] + W[3]*B[1] + W[6]*B[2];\
21717542729SShri Abhyankar   A[1] = W[1]*B[0] + W[4]*B[1] + W[7]*B[2];\
21817542729SShri Abhyankar   A[2] = W[2]*B[0] + W[5]*B[1] + W[8]*B[2];\
21917542729SShri Abhyankar   A[3] = W[0]*B[3] + W[3]*B[4] + W[6]*B[5];\
22017542729SShri Abhyankar   A[4] = W[1]*B[3] + W[4]*B[4] + W[7]*B[5];\
22117542729SShri Abhyankar   A[5] = W[2]*B[3] + W[5]*B[4] + W[8]*B[5];\
22217542729SShri Abhyankar   A[6] = W[0]*B[6] + W[3]*B[7] + W[6]*B[8];\
22317542729SShri Abhyankar   A[7] = W[1]*B[6] + W[4]*B[7] + W[7]*B[8];\
22417542729SShri Abhyankar   A[8] = W[2]*B[6] + W[5]*B[7] + W[8]*B[8];\
22517542729SShri Abhyankar }
22617542729SShri Abhyankar 
22717542729SShri Abhyankar /*
22817542729SShri Abhyankar   Kernel_A_gets_A_minus_B_times_C_3: A = A - B * C with size bs=3
22917542729SShri Abhyankar 
23017542729SShri Abhyankar   Input Parameters:
23117542729SShri Abhyankar +  A,B,C - square bs by bs arrays stored in column major order
23217542729SShri Abhyankar 
23317542729SShri Abhyankar   Output Parameter:
23417542729SShri Abhyankar .  A = A - B*C
23517542729SShri Abhyankar */
23617542729SShri Abhyankar 
23717542729SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_3(A,B,C) 0;\
23817542729SShri Abhyankar {\
23917542729SShri Abhyankar   A[0] -= B[0]*C[0] + B[3]*C[1] + B[6]*C[2];\
24017542729SShri Abhyankar   A[1] -= B[1]*C[0] + B[4]*C[1] + B[7]*C[2];\
24117542729SShri Abhyankar   A[2] -= B[2]*C[0] + B[5]*C[1] + B[8]*C[2];\
24217542729SShri Abhyankar   A[3] -= B[0]*C[3] + B[3]*C[4] + B[6]*C[5];\
24317542729SShri Abhyankar   A[4] -= B[1]*C[3] + B[4]*C[4] + B[7]*C[5];\
24417542729SShri Abhyankar   A[5] -= B[2]*C[3] + B[5]*C[4] + B[8]*C[5];\
24517542729SShri Abhyankar   A[6] -= B[0]*C[6] + B[3]*C[7] + B[6]*C[8];\
24617542729SShri Abhyankar   A[7] -= B[1]*C[6] + B[4]*C[7] + B[7]*C[8];\
24717542729SShri Abhyankar   A[8] -= B[2]*C[6] + B[5]*C[7] + B[8]*C[8];\
24817542729SShri Abhyankar }
24917542729SShri Abhyankar 
250209027a4SShri Abhyankar /*
251209027a4SShri Abhyankar   Kernel_A_gets_A_times_B_4: A = A * B with size bs=4
252209027a4SShri Abhyankar 
253209027a4SShri Abhyankar   Input Parameters:
254209027a4SShri Abhyankar +  A,B - square bs by bs arrays stored in column major order
255209027a4SShri Abhyankar -  W   - bs*bs work arrary
256209027a4SShri Abhyankar 
257209027a4SShri Abhyankar   Output Parameter:
258209027a4SShri Abhyankar .  A = A * B
259209027a4SShri Abhyankar */
260209027a4SShri Abhyankar 
261209027a4SShri Abhyankar #define Kernel_A_gets_A_times_B_4(A,B,W) 0;\
262209027a4SShri Abhyankar {\
263209027a4SShri Abhyankar   PetscMemcpy(W,A,16*sizeof(MatScalar));\
264209027a4SShri Abhyankar   A[0] =  W[0]*B[0]  + W[4]*B[1]  + W[8]*B[2]   + W[12]*B[3];\
265209027a4SShri Abhyankar   A[1] =  W[1]*B[0]  + W[5]*B[1]  + W[9]*B[2]   + W[13]*B[3];\
266209027a4SShri Abhyankar   A[2] =  W[2]*B[0]  + W[6]*B[1]  + W[10]*B[2]  + W[14]*B[3];\
267209027a4SShri Abhyankar   A[3] =  W[3]*B[0]  + W[7]*B[1]  + W[11]*B[2]  + W[15]*B[3];\
268209027a4SShri Abhyankar   A[4] =  W[0]*B[4]  + W[4]*B[5]  + W[8]*B[6]   + W[12]*B[7];\
269209027a4SShri Abhyankar   A[5] =  W[1]*B[4]  + W[5]*B[5]  + W[9]*B[6]   + W[13]*B[7];\
270209027a4SShri Abhyankar   A[6] =  W[2]*B[4]  + W[6]*B[5]  + W[10]*B[6]  + W[14]*B[7];\
271209027a4SShri Abhyankar   A[7] =  W[3]*B[4]  + W[7]*B[5]  + W[11]*B[6]  + W[15]*B[7];\
272209027a4SShri Abhyankar   A[8] =  W[0]*B[8]  + W[4]*B[9]  + W[8]*B[10]  + W[12]*B[11];\
273209027a4SShri Abhyankar   A[9] =  W[1]*B[8]  + W[5]*B[9]  + W[9]*B[10]  + W[13]*B[11];\
274209027a4SShri Abhyankar   A[10] = W[2]*B[8]  + W[6]*B[9]  + W[10]*B[10] + W[14]*B[11];\
275209027a4SShri Abhyankar   A[11] = W[3]*B[8]  + W[7]*B[9]  + W[11]*B[10] + W[15]*B[11];\
276209027a4SShri Abhyankar   A[12] = W[0]*B[12] + W[4]*B[13] + W[8]*B[14]  + W[12]*B[15];\
277209027a4SShri Abhyankar   A[13] = W[1]*B[12] + W[5]*B[13] + W[9]*B[14]  + W[13]*B[15];\
278209027a4SShri Abhyankar   A[14] = W[2]*B[12] + W[6]*B[13] + W[10]*B[14] + W[14]*B[15];\
279209027a4SShri Abhyankar   A[15] = W[3]*B[12] + W[7]*B[13] + W[11]*B[14] + W[15]*B[15];\
280209027a4SShri Abhyankar }
281209027a4SShri Abhyankar 
282209027a4SShri Abhyankar /*
283209027a4SShri Abhyankar   Kernel_A_gets_A_minus_B_times_C_4: A = A - B * C with size bs=4
284209027a4SShri Abhyankar 
285209027a4SShri Abhyankar   Input Parameters:
286209027a4SShri Abhyankar +  A,B,C - square bs by bs arrays stored in column major order
287209027a4SShri Abhyankar 
288209027a4SShri Abhyankar   Output Parameter:
289209027a4SShri Abhyankar .  A = A - B*C
290209027a4SShri Abhyankar */
291209027a4SShri Abhyankar 
292209027a4SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_4(A,B,C) 0;\
293209027a4SShri Abhyankar {\
294209027a4SShri Abhyankar   A[0] -=  B[0]*C[0]  + B[4]*C[1]  + B[8]*C[2]   + B[12]*C[3];\
295209027a4SShri Abhyankar   A[1] -=  B[1]*C[0]  + B[5]*C[1]  + B[9]*C[2]   + B[13]*C[3];\
296209027a4SShri Abhyankar   A[2] -=  B[2]*C[0]  + B[6]*C[1]  + B[10]*C[2]  + B[14]*C[3];\
297209027a4SShri Abhyankar   A[3] -=  B[3]*C[0]  + B[7]*C[1]  + B[11]*C[2]  + B[15]*C[3];\
298209027a4SShri Abhyankar   A[4] -=  B[0]*C[4]  + B[4]*C[5]  + B[8]*C[6]   + B[12]*C[7];\
299209027a4SShri Abhyankar   A[5] -=  B[1]*C[4]  + B[5]*C[5]  + B[9]*C[6]   + B[13]*C[7];\
300209027a4SShri Abhyankar   A[6] -=  B[2]*C[4]  + B[6]*C[5]  + B[10]*C[6]  + B[14]*C[7];\
301209027a4SShri Abhyankar   A[7] -=  B[3]*C[4]  + B[7]*C[5]  + B[11]*C[6]  + B[15]*C[7];\
302209027a4SShri Abhyankar   A[8] -=  B[0]*C[8]  + B[4]*C[9]  + B[8]*C[10]  + B[12]*C[11];\
303209027a4SShri Abhyankar   A[9] -=  B[1]*C[8]  + B[5]*C[9]  + B[9]*C[10]  + B[13]*C[11];\
304209027a4SShri Abhyankar   A[10] -= B[2]*C[8]  + B[6]*C[9]  + B[10]*C[10] + B[14]*C[11];\
305209027a4SShri Abhyankar   A[11] -= B[3]*C[8]  + B[7]*C[9]  + B[11]*C[10] + B[15]*C[11];\
306209027a4SShri Abhyankar   A[12] -= B[0]*C[12] + B[4]*C[13] + B[8]*C[14]  + B[12]*C[15];\
307209027a4SShri Abhyankar   A[13] -= B[1]*C[12] + B[5]*C[13] + B[9]*C[14]  + B[13]*C[15];\
308209027a4SShri Abhyankar   A[14] -= B[2]*C[12] + B[6]*C[13] + B[10]*C[14] + B[14]*C[15];\
309209027a4SShri Abhyankar   A[15] -= B[3]*C[12] + B[7]*C[13] + B[11]*C[14] + B[15]*C[15];\
310209027a4SShri Abhyankar }
311209027a4SShri Abhyankar 
312*0deeaf61SShri Abhyankar 
313*0deeaf61SShri Abhyankar #define Kernel_A_gets_A_times_B_5(A,B,W) 0;\
314*0deeaf61SShri Abhyankar {\
315*0deeaf61SShri Abhyankar   PetscMemcpy(W,A,25*sizeof(MatScalar));\
316*0deeaf61SShri Abhyankar   A[0] =  W[0]*B[0]  + W[5]*B[1]  + W[10]*B[2]   + W[15]*B[3] + W[20]*B[4];\
317*0deeaf61SShri Abhyankar   A[1] =  W[1]*B[0]  + W[6]*B[1]  + W[11]*B[2]   + W[16]*B[3] + W[21]*B[4];\
318*0deeaf61SShri Abhyankar   A[2] =  W[2]*B[0]  + W[7]*B[1]  + W[12]*B[2]  + W[17]*B[3]  + W[22]*B[4];\
319*0deeaf61SShri Abhyankar   A[3] =  W[3]*B[0]  + W[8]*B[1]  + W[13]*B[2]  + W[18]*B[3]  + W[23]*B[4];\
320*0deeaf61SShri Abhyankar   A[4] =  W[4]*B[0]  + W[9]*B[1]  + W[14]*B[2]   + W[19]*B[3] + W[24]*B[4];\
321*0deeaf61SShri Abhyankar   A[5] =  W[0]*B[5]  + W[5]*B[6]  + W[10]*B[7]   + W[15]*B[8] + W[20]*B[9];\
322*0deeaf61SShri Abhyankar   A[6] =  W[1]*B[5]  + W[6]*B[6]  + W[11]*B[7]   + W[16]*B[8] + W[21]*B[9];\
323*0deeaf61SShri Abhyankar   A[7] =  W[2]*B[5]  + W[7]*B[6]  + W[12]*B[7]  + W[17]*B[8]  + W[22]*B[9];\
324*0deeaf61SShri Abhyankar   A[8] =  W[3]*B[5]  + W[8]*B[6]  + W[13]*B[7]  + W[18]*B[8]  + W[23]*B[9];\
325*0deeaf61SShri Abhyankar   A[9] =  W[4]*B[5]  + W[9]*B[6]  + W[14]*B[7]   + W[19]*B[8] + W[24]*B[9];\
326*0deeaf61SShri Abhyankar   A[10] =  W[0]*B[10]  + W[5]*B[11]  + W[10]*B[12]   + W[15]*B[13] + W[20]*B[14];\
327*0deeaf61SShri Abhyankar   A[11] =  W[1]*B[10]  + W[6]*B[11]  + W[11]*B[12]   + W[16]*B[13] + W[21]*B[14];\
328*0deeaf61SShri Abhyankar   A[12] =  W[2]*B[10]  + W[7]*B[11]  + W[12]*B[12]  + W[17]*B[13]  + W[22]*B[14];\
329*0deeaf61SShri Abhyankar   A[13] =  W[3]*B[10]  + W[8]*B[11]  + W[13]*B[12]  + W[18]*B[13]  + W[23]*B[14];\
330*0deeaf61SShri Abhyankar   A[14] =  W[4]*B[10]  + W[9]*B[11]  + W[14]*B[12]   + W[19]*B[13] + W[24]*B[14];\
331*0deeaf61SShri Abhyankar   A[15] =  W[0]*B[15]  + W[5]*B[16]  + W[10]*B[17]   + W[15]*B[18] + W[20]*B[19];\
332*0deeaf61SShri Abhyankar   A[16] =  W[1]*B[15]  + W[6]*B[16]  + W[11]*B[17]   + W[16]*B[18] + W[21]*B[19];\
333*0deeaf61SShri Abhyankar   A[17] =  W[2]*B[15]  + W[7]*B[16]  + W[12]*B[17]  + W[17]*B[18]  + W[22]*B[19];\
334*0deeaf61SShri Abhyankar   A[18] =  W[3]*B[15]  + W[8]*B[16]  + W[13]*B[17]  + W[18]*B[18]  + W[23]*B[19];\
335*0deeaf61SShri Abhyankar   A[19] =  W[4]*B[15]  + W[9]*B[16]  + W[14]*B[17]   + W[19]*B[18] + W[24]*B[19];\
336*0deeaf61SShri Abhyankar   A[20] =  W[0]*B[20]  + W[5]*B[21]  + W[10]*B[22]   + W[15]*B[23] + W[20]*B[24];\
337*0deeaf61SShri Abhyankar   A[21] =  W[1]*B[20]  + W[6]*B[21]  + W[11]*B[22]   + W[16]*B[23] + W[21]*B[24];\
338*0deeaf61SShri Abhyankar   A[22] =  W[2]*B[20]  + W[7]*B[21]  + W[12]*B[22]  + W[17]*B[23]  + W[22]*B[24];\
339*0deeaf61SShri Abhyankar   A[23] =  W[3]*B[20]  + W[8]*B[21]  + W[13]*B[22]  + W[18]*B[23]  + W[23]*B[24];\
340*0deeaf61SShri Abhyankar   A[24] =  W[4]*B[20]  + W[9]*B[21]  + W[14]*B[22]   + W[19]*B[23] + W[24]*B[24];\
341*0deeaf61SShri Abhyankar }
342*0deeaf61SShri Abhyankar 
343*0deeaf61SShri Abhyankar /*
344*0deeaf61SShri Abhyankar   Kernel_A_gets_A_minus_B_times_C_5: A = A - B * C with size bs=5
345*0deeaf61SShri Abhyankar 
346*0deeaf61SShri Abhyankar   Input Parameters:
347*0deeaf61SShri Abhyankar +  A,B,C - square bs by bs arrays stored in column major order
348*0deeaf61SShri Abhyankar 
349*0deeaf61SShri Abhyankar   Output Parameter:
350*0deeaf61SShri Abhyankar .  A = A - B*C
351*0deeaf61SShri Abhyankar */
352*0deeaf61SShri Abhyankar 
353*0deeaf61SShri Abhyankar #define Kernel_A_gets_A_minus_B_times_C_5(A,B,C) 0;\
354*0deeaf61SShri Abhyankar {\
355*0deeaf61SShri Abhyankar   A[0] -=  B[0]*C[0]  + B[5]*C[1]  + B[10]*C[2]   + B[15]*C[3] + B[20]*C[4];\
356*0deeaf61SShri Abhyankar   A[1] -=  B[1]*C[0]  + B[6]*C[1]  + B[11]*C[2]   + B[16]*C[3] + B[21]*C[4];\
357*0deeaf61SShri Abhyankar   A[2] -=  B[2]*C[0]  + B[7]*C[1]  + B[12]*C[2]  + B[17]*C[3]  + B[22]*C[4];\
358*0deeaf61SShri Abhyankar   A[3] -=  B[3]*C[0]  + B[8]*C[1]  + B[13]*C[2]  + B[18]*C[3]  + B[23]*C[4];\
359*0deeaf61SShri Abhyankar   A[4] -=  B[4]*C[0]  + B[9]*C[1]  + B[14]*C[2]   + B[19]*C[3] + B[24]*C[4];\
360*0deeaf61SShri Abhyankar   A[5] -=  B[0]*C[5]  + B[5]*C[6]  + B[10]*C[7]   + B[15]*C[8] + B[20]*C[9];\
361*0deeaf61SShri Abhyankar   A[6] -=  B[1]*C[5]  + B[6]*C[6]  + B[11]*C[7]   + B[16]*C[8] + B[21]*C[9];\
362*0deeaf61SShri Abhyankar   A[7] -=  B[2]*C[5]  + B[7]*C[6]  + B[12]*C[7]  + B[17]*C[8]  + B[22]*C[9];\
363*0deeaf61SShri Abhyankar   A[8] -=  B[3]*C[5]  + B[8]*C[6]  + B[13]*C[7]  + B[18]*C[8]  + B[23]*C[9];\
364*0deeaf61SShri Abhyankar   A[9] -=  B[4]*C[5]  + B[9]*C[6]  + B[14]*C[7]   + B[19]*C[8] + B[24]*C[9];\
365*0deeaf61SShri Abhyankar   A[10] -=  B[0]*C[10]  + B[5]*C[11]  + B[10]*C[12]   + B[15]*C[13] + B[20]*C[14];\
366*0deeaf61SShri Abhyankar   A[11] -=  B[1]*C[10]  + B[6]*C[11]  + B[11]*C[12]   + B[16]*C[13] + B[21]*C[14];\
367*0deeaf61SShri Abhyankar   A[12] -=  B[2]*C[10]  + B[7]*C[11]  + B[12]*C[12]  + B[17]*C[13]  + B[22]*C[14];\
368*0deeaf61SShri Abhyankar   A[13] -=  B[3]*C[10]  + B[8]*C[11]  + B[13]*C[12]  + B[18]*C[13]  + B[23]*C[14];\
369*0deeaf61SShri Abhyankar   A[14] -=  B[4]*C[10]  + B[9]*C[11]  + B[14]*C[12]   + B[19]*C[13] + B[24]*C[14];\
370*0deeaf61SShri Abhyankar   A[15] -=  B[0]*C[15]  + B[5]*C[16]  + B[10]*C[17]   + B[15]*C[18] + B[20]*C[19];\
371*0deeaf61SShri Abhyankar   A[16] -=  B[1]*C[15]  + B[6]*C[16]  + B[11]*C[17]   + B[16]*C[18] + B[21]*C[19];\
372*0deeaf61SShri Abhyankar   A[17] -=  B[2]*C[15]  + B[7]*C[16]  + B[12]*C[17]  + B[17]*C[18]  + B[22]*C[19];\
373*0deeaf61SShri Abhyankar   A[18] -=  B[3]*C[15]  + B[8]*C[16]  + B[13]*C[17]  + B[18]*C[18]  + B[23]*C[19];\
374*0deeaf61SShri Abhyankar   A[19] -=  B[4]*C[15]  + B[9]*C[16]  + B[14]*C[17]   + B[19]*C[18] + B[24]*C[19];\
375*0deeaf61SShri Abhyankar   A[20] -=  B[0]*C[20]  + B[5]*C[21]  + B[10]*C[22]   + B[15]*C[23] + B[20]*C[24];\
376*0deeaf61SShri Abhyankar   A[21] -=  B[1]*C[20]  + B[6]*C[21]  + B[11]*C[22]   + B[16]*C[23] + B[21]*C[24];\
377*0deeaf61SShri Abhyankar   A[22] -=  B[2]*C[20]  + B[7]*C[21]  + B[12]*C[22]  + B[17]*C[23]  + B[22]*C[24];\
378*0deeaf61SShri Abhyankar   A[23] -=  B[3]*C[20]  + B[8]*C[21]  + B[13]*C[22]  + B[18]*C[23]  + B[23]*C[24];\
379*0deeaf61SShri Abhyankar   A[24] -=  B[4]*C[20]  + B[9]*C[21]  + B[14]*C[22]   + B[19]*C[23] + B[24]*C[24];\
380*0deeaf61SShri Abhyankar }
381*0deeaf61SShri Abhyankar 
3822593348eSBarry Smith #endif
383