xref: /petsc/src/mat/impls/baij/seq/baij.h (revision 0deeaf618d282af9bbba6efcbcb8e7a07d546a23)
1 
2 #if !defined(__BAIJ_H)
3 #define __BAIJ_H
4 #include "private/matimpl.h"
5 #include "../src/mat/impls/aij/seq/aij.h"
6 #include "../src/mat/impls/baij/seq/ftn-kernels/fsolvebaij.h"
7 
8 /*
9   MATSEQBAIJ format - Block compressed row storage. The i[] and j[]
10   arrays start at 0.
11 */
12 
13 /* This header is shared by the SeqSBAIJ matrix */
14 #define SEQBAIJHEADER \
15   PetscInt         bs2;              /*  square of block size */                                     \
16   PetscInt         mbs,nbs;          /* rows/bs, columns/bs */                                       \
17   PetscScalar      *mult_work;       /* work array for matrix vector product*/                       \
18   MatScalar        *saved_values;                                                                    \
19                                                                                                      \
20   Mat              sbaijMat;         /* mat in sbaij format */                                       \
21                                                                                                      \
22   PetscTruth       pivotinblocks;    /* pivot inside factorization of each diagonal block */         \
23                                                                                                      \
24   MatScalar        *idiag;           /* inverse of block diagonal  */                                \
25   PetscTruth       idiagvalid       /* if above has correct/current values */
26 
27 
28 typedef struct {
29   SEQAIJHEADER(MatScalar);
30   SEQBAIJHEADER;
31 } Mat_SeqBAIJ;
32 
33 EXTERN_C_BEGIN
34 EXTERN PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat,PetscInt,PetscInt,PetscInt*);
35 EXTERN_C_END
36 EXTERN PetscErrorCode MatILUFactorSymbolic_SeqBAIJ(Mat,Mat,IS,IS,const MatFactorInfo*);
37 EXTERN PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*);
38 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*);
39 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*);
40 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
41 EXTERN PetscErrorCode MatDuplicate_SeqBAIJ(Mat,MatDuplicateOption,Mat*);
42 EXTERN PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat,PetscTruth*,PetscInt*);
43 EXTERN PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat);
44 EXTERN PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*,Mat*);
45 
46 EXTERN PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat,Mat,IS,IS,const MatFactorInfo*);
47 EXTERN PetscErrorCode MatLUFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*);
48 EXTERN PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat,PetscInt,IS*,PetscInt);
49 EXTERN PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatReuse,Mat*);
50 EXTERN PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]);
51 EXTERN PetscErrorCode MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
52 EXTERN PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
53 EXTERN PetscErrorCode MatScale_SeqBAIJ(Mat,PetscScalar);
54 EXTERN PetscErrorCode MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
55 EXTERN PetscErrorCode MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
56 EXTERN PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat,Vec);
57 EXTERN PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
58 EXTERN PetscErrorCode MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
59 EXTERN PetscErrorCode MatZeroEntries_SeqBAIJ(Mat);
60 
61 EXTERN PetscErrorCode MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(Mat);
62 
63 EXTERN PetscErrorCode MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
64 EXTERN PetscErrorCode MatSolve_SeqBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
65 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
66 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_newdatastruct(Mat,Vec,Vec);
67 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
68 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
69 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
70 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
71 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_newdatastruct(Mat,Vec,Vec);
72 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
73 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
74 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
75 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_newdatastruct(Mat,Vec,Vec);
76 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
77 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
78 #if defined(PETSC_HAVE_SSE)
79 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_SSE_Demotion(Mat,Vec,Vec);
80 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion(Mat,Vec,Vec);
81 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion_usj(Mat,Vec,Vec);
82 #endif
83 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
84 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_newdatastruct(Mat,Vec,Vec);
85 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
86 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
87 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
88 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_newdatastruct(Mat,Vec,Vec);
89 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
90 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
91 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
92 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_newdatastruct(Mat,Vec,Vec);
93 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
94 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
95 EXTERN PetscErrorCode MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
96 EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_newdatastruct(Mat,Vec,Vec);
97 EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
98 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_Update(Mat,Vec,Vec);
99 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);
100 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
101 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
102 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
103 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
104 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
105 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
106 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
107 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
108 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
109 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
110 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
111 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
112 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
113 
114 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N_newdatastruct(Mat,Mat,const MatFactorInfo*);
115 
116 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat,const MatFactorInfo*);
117 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat,const MatFactorInfo*);
118 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_newdatastruct(Mat,Mat,const MatFactorInfo*);
119 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
120 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
121 
122 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat,const MatFactorInfo*);
123 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_newdatastruct(Mat,Mat,const MatFactorInfo*);
124 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
125 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
126 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat,const MatFactorInfo*);
127 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_newdatastruct(Mat,Mat,const MatFactorInfo*);
128 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
129 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
130 #if defined(PETSC_HAVE_SSE)
131 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE(Mat,Mat,const MatFactorInfo*);
132 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj(Mat,Mat,const MatFactorInfo*);
133 #else
134 #endif
135 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat,const MatFactorInfo*);
136 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_newdatastruct(Mat,Mat,const MatFactorInfo*);
137 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
138 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
139 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat,const MatFactorInfo*);
140 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
141 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7(Mat,Mat,const MatFactorInfo*);
142 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
143 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*);
144 
145 EXTERN PetscErrorCode MatMult_SeqBAIJ_1(Mat,Vec,Vec);
146 EXTERN PetscErrorCode MatMult_SeqBAIJ_2(Mat,Vec,Vec);
147 EXTERN PetscErrorCode MatMult_SeqBAIJ_3(Mat,Vec,Vec);
148 EXTERN PetscErrorCode MatMult_SeqBAIJ_4(Mat,Vec,Vec);
149 EXTERN PetscErrorCode MatMult_SeqBAIJ_5(Mat,Vec,Vec);
150 EXTERN PetscErrorCode MatMult_SeqBAIJ_6(Mat,Vec,Vec);
151 EXTERN PetscErrorCode MatMult_SeqBAIJ_7(Mat,Vec,Vec);
152 EXTERN PetscErrorCode MatMult_SeqBAIJ_N(Mat,Vec,Vec);
153 
154 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
155 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
156 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
157 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
158 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
159 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
160 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
161 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
162 EXTERN PetscErrorCode MatLoad_SeqBAIJ(PetscViewer, const MatType,Mat*);
163 
164 /*
165   Kernel_A_gets_A_times_B_2: A = A * B with size bs=2
166 
167   Input Parameters:
168 +  A,B - square bs by bs arrays stored in column major order
169 -  W   - bs*bs work arrary
170 
171   Output Parameter:
172 .  A = A * B
173 */
174 
175 #define Kernel_A_gets_A_times_B_2(A,B,W) 0;\
176 {\
177   PetscMemcpy(W,A,4*sizeof(MatScalar));\
178   A[0] = W[0]*B[0] + W[2]*B[1];\
179   A[1] = W[1]*B[0] + W[3]*B[1];\
180   A[2] = W[0]*B[2] + W[2]*B[3];\
181   A[3] = W[1]*B[2] + W[3]*B[3];\
182 }
183 
184 /*
185   Kernel_A_gets_A_minus_B_times_C_2: A = A - B * C with size bs=2
186 
187   Input Parameters:
188 +  A,B,C - square bs by bs arrays stored in column major order
189 
190   Output Parameter:
191 .  A = A - B*C
192 */
193 
194 #define Kernel_A_gets_A_minus_B_times_C_2(A,B,C) 0;\
195 {\
196   A[0] -= B[0]*C[0] + B[2]*C[1];\
197   A[1] -= B[1]*C[0] + B[3]*C[1];\
198   A[2] -= B[0]*C[2] + B[2]*C[3];\
199   A[3] -= B[1]*C[2] + B[3]*C[3];\
200 }
201 
202 /*
203   Kernel_A_gets_A_times_B_3: A = A * B with size bs=3
204 
205   Input Parameters:
206 +  A,B - square bs by bs arrays stored in column major order
207 -  W   - bs*bs work arrary
208 
209   Output Parameter:
210 .  A = A * B
211 */
212 
213 #define Kernel_A_gets_A_times_B_3(A,B,W) 0;\
214 {\
215   PetscMemcpy(W,A,9*sizeof(MatScalar));\
216   A[0] = W[0]*B[0] + W[3]*B[1] + W[6]*B[2];\
217   A[1] = W[1]*B[0] + W[4]*B[1] + W[7]*B[2];\
218   A[2] = W[2]*B[0] + W[5]*B[1] + W[8]*B[2];\
219   A[3] = W[0]*B[3] + W[3]*B[4] + W[6]*B[5];\
220   A[4] = W[1]*B[3] + W[4]*B[4] + W[7]*B[5];\
221   A[5] = W[2]*B[3] + W[5]*B[4] + W[8]*B[5];\
222   A[6] = W[0]*B[6] + W[3]*B[7] + W[6]*B[8];\
223   A[7] = W[1]*B[6] + W[4]*B[7] + W[7]*B[8];\
224   A[8] = W[2]*B[6] + W[5]*B[7] + W[8]*B[8];\
225 }
226 
227 /*
228   Kernel_A_gets_A_minus_B_times_C_3: A = A - B * C with size bs=3
229 
230   Input Parameters:
231 +  A,B,C - square bs by bs arrays stored in column major order
232 
233   Output Parameter:
234 .  A = A - B*C
235 */
236 
237 #define Kernel_A_gets_A_minus_B_times_C_3(A,B,C) 0;\
238 {\
239   A[0] -= B[0]*C[0] + B[3]*C[1] + B[6]*C[2];\
240   A[1] -= B[1]*C[0] + B[4]*C[1] + B[7]*C[2];\
241   A[2] -= B[2]*C[0] + B[5]*C[1] + B[8]*C[2];\
242   A[3] -= B[0]*C[3] + B[3]*C[4] + B[6]*C[5];\
243   A[4] -= B[1]*C[3] + B[4]*C[4] + B[7]*C[5];\
244   A[5] -= B[2]*C[3] + B[5]*C[4] + B[8]*C[5];\
245   A[6] -= B[0]*C[6] + B[3]*C[7] + B[6]*C[8];\
246   A[7] -= B[1]*C[6] + B[4]*C[7] + B[7]*C[8];\
247   A[8] -= B[2]*C[6] + B[5]*C[7] + B[8]*C[8];\
248 }
249 
250 /*
251   Kernel_A_gets_A_times_B_4: A = A * B with size bs=4
252 
253   Input Parameters:
254 +  A,B - square bs by bs arrays stored in column major order
255 -  W   - bs*bs work arrary
256 
257   Output Parameter:
258 .  A = A * B
259 */
260 
261 #define Kernel_A_gets_A_times_B_4(A,B,W) 0;\
262 {\
263   PetscMemcpy(W,A,16*sizeof(MatScalar));\
264   A[0] =  W[0]*B[0]  + W[4]*B[1]  + W[8]*B[2]   + W[12]*B[3];\
265   A[1] =  W[1]*B[0]  + W[5]*B[1]  + W[9]*B[2]   + W[13]*B[3];\
266   A[2] =  W[2]*B[0]  + W[6]*B[1]  + W[10]*B[2]  + W[14]*B[3];\
267   A[3] =  W[3]*B[0]  + W[7]*B[1]  + W[11]*B[2]  + W[15]*B[3];\
268   A[4] =  W[0]*B[4]  + W[4]*B[5]  + W[8]*B[6]   + W[12]*B[7];\
269   A[5] =  W[1]*B[4]  + W[5]*B[5]  + W[9]*B[6]   + W[13]*B[7];\
270   A[6] =  W[2]*B[4]  + W[6]*B[5]  + W[10]*B[6]  + W[14]*B[7];\
271   A[7] =  W[3]*B[4]  + W[7]*B[5]  + W[11]*B[6]  + W[15]*B[7];\
272   A[8] =  W[0]*B[8]  + W[4]*B[9]  + W[8]*B[10]  + W[12]*B[11];\
273   A[9] =  W[1]*B[8]  + W[5]*B[9]  + W[9]*B[10]  + W[13]*B[11];\
274   A[10] = W[2]*B[8]  + W[6]*B[9]  + W[10]*B[10] + W[14]*B[11];\
275   A[11] = W[3]*B[8]  + W[7]*B[9]  + W[11]*B[10] + W[15]*B[11];\
276   A[12] = W[0]*B[12] + W[4]*B[13] + W[8]*B[14]  + W[12]*B[15];\
277   A[13] = W[1]*B[12] + W[5]*B[13] + W[9]*B[14]  + W[13]*B[15];\
278   A[14] = W[2]*B[12] + W[6]*B[13] + W[10]*B[14] + W[14]*B[15];\
279   A[15] = W[3]*B[12] + W[7]*B[13] + W[11]*B[14] + W[15]*B[15];\
280 }
281 
282 /*
283   Kernel_A_gets_A_minus_B_times_C_4: A = A - B * C with size bs=4
284 
285   Input Parameters:
286 +  A,B,C - square bs by bs arrays stored in column major order
287 
288   Output Parameter:
289 .  A = A - B*C
290 */
291 
292 #define Kernel_A_gets_A_minus_B_times_C_4(A,B,C) 0;\
293 {\
294   A[0] -=  B[0]*C[0]  + B[4]*C[1]  + B[8]*C[2]   + B[12]*C[3];\
295   A[1] -=  B[1]*C[0]  + B[5]*C[1]  + B[9]*C[2]   + B[13]*C[3];\
296   A[2] -=  B[2]*C[0]  + B[6]*C[1]  + B[10]*C[2]  + B[14]*C[3];\
297   A[3] -=  B[3]*C[0]  + B[7]*C[1]  + B[11]*C[2]  + B[15]*C[3];\
298   A[4] -=  B[0]*C[4]  + B[4]*C[5]  + B[8]*C[6]   + B[12]*C[7];\
299   A[5] -=  B[1]*C[4]  + B[5]*C[5]  + B[9]*C[6]   + B[13]*C[7];\
300   A[6] -=  B[2]*C[4]  + B[6]*C[5]  + B[10]*C[6]  + B[14]*C[7];\
301   A[7] -=  B[3]*C[4]  + B[7]*C[5]  + B[11]*C[6]  + B[15]*C[7];\
302   A[8] -=  B[0]*C[8]  + B[4]*C[9]  + B[8]*C[10]  + B[12]*C[11];\
303   A[9] -=  B[1]*C[8]  + B[5]*C[9]  + B[9]*C[10]  + B[13]*C[11];\
304   A[10] -= B[2]*C[8]  + B[6]*C[9]  + B[10]*C[10] + B[14]*C[11];\
305   A[11] -= B[3]*C[8]  + B[7]*C[9]  + B[11]*C[10] + B[15]*C[11];\
306   A[12] -= B[0]*C[12] + B[4]*C[13] + B[8]*C[14]  + B[12]*C[15];\
307   A[13] -= B[1]*C[12] + B[5]*C[13] + B[9]*C[14]  + B[13]*C[15];\
308   A[14] -= B[2]*C[12] + B[6]*C[13] + B[10]*C[14] + B[14]*C[15];\
309   A[15] -= B[3]*C[12] + B[7]*C[13] + B[11]*C[14] + B[15]*C[15];\
310 }
311 
312 
313 #define Kernel_A_gets_A_times_B_5(A,B,W) 0;\
314 {\
315   PetscMemcpy(W,A,25*sizeof(MatScalar));\
316   A[0] =  W[0]*B[0]  + W[5]*B[1]  + W[10]*B[2]   + W[15]*B[3] + W[20]*B[4];\
317   A[1] =  W[1]*B[0]  + W[6]*B[1]  + W[11]*B[2]   + W[16]*B[3] + W[21]*B[4];\
318   A[2] =  W[2]*B[0]  + W[7]*B[1]  + W[12]*B[2]  + W[17]*B[3]  + W[22]*B[4];\
319   A[3] =  W[3]*B[0]  + W[8]*B[1]  + W[13]*B[2]  + W[18]*B[3]  + W[23]*B[4];\
320   A[4] =  W[4]*B[0]  + W[9]*B[1]  + W[14]*B[2]   + W[19]*B[3] + W[24]*B[4];\
321   A[5] =  W[0]*B[5]  + W[5]*B[6]  + W[10]*B[7]   + W[15]*B[8] + W[20]*B[9];\
322   A[6] =  W[1]*B[5]  + W[6]*B[6]  + W[11]*B[7]   + W[16]*B[8] + W[21]*B[9];\
323   A[7] =  W[2]*B[5]  + W[7]*B[6]  + W[12]*B[7]  + W[17]*B[8]  + W[22]*B[9];\
324   A[8] =  W[3]*B[5]  + W[8]*B[6]  + W[13]*B[7]  + W[18]*B[8]  + W[23]*B[9];\
325   A[9] =  W[4]*B[5]  + W[9]*B[6]  + W[14]*B[7]   + W[19]*B[8] + W[24]*B[9];\
326   A[10] =  W[0]*B[10]  + W[5]*B[11]  + W[10]*B[12]   + W[15]*B[13] + W[20]*B[14];\
327   A[11] =  W[1]*B[10]  + W[6]*B[11]  + W[11]*B[12]   + W[16]*B[13] + W[21]*B[14];\
328   A[12] =  W[2]*B[10]  + W[7]*B[11]  + W[12]*B[12]  + W[17]*B[13]  + W[22]*B[14];\
329   A[13] =  W[3]*B[10]  + W[8]*B[11]  + W[13]*B[12]  + W[18]*B[13]  + W[23]*B[14];\
330   A[14] =  W[4]*B[10]  + W[9]*B[11]  + W[14]*B[12]   + W[19]*B[13] + W[24]*B[14];\
331   A[15] =  W[0]*B[15]  + W[5]*B[16]  + W[10]*B[17]   + W[15]*B[18] + W[20]*B[19];\
332   A[16] =  W[1]*B[15]  + W[6]*B[16]  + W[11]*B[17]   + W[16]*B[18] + W[21]*B[19];\
333   A[17] =  W[2]*B[15]  + W[7]*B[16]  + W[12]*B[17]  + W[17]*B[18]  + W[22]*B[19];\
334   A[18] =  W[3]*B[15]  + W[8]*B[16]  + W[13]*B[17]  + W[18]*B[18]  + W[23]*B[19];\
335   A[19] =  W[4]*B[15]  + W[9]*B[16]  + W[14]*B[17]   + W[19]*B[18] + W[24]*B[19];\
336   A[20] =  W[0]*B[20]  + W[5]*B[21]  + W[10]*B[22]   + W[15]*B[23] + W[20]*B[24];\
337   A[21] =  W[1]*B[20]  + W[6]*B[21]  + W[11]*B[22]   + W[16]*B[23] + W[21]*B[24];\
338   A[22] =  W[2]*B[20]  + W[7]*B[21]  + W[12]*B[22]  + W[17]*B[23]  + W[22]*B[24];\
339   A[23] =  W[3]*B[20]  + W[8]*B[21]  + W[13]*B[22]  + W[18]*B[23]  + W[23]*B[24];\
340   A[24] =  W[4]*B[20]  + W[9]*B[21]  + W[14]*B[22]   + W[19]*B[23] + W[24]*B[24];\
341 }
342 
343 /*
344   Kernel_A_gets_A_minus_B_times_C_5: A = A - B * C with size bs=5
345 
346   Input Parameters:
347 +  A,B,C - square bs by bs arrays stored in column major order
348 
349   Output Parameter:
350 .  A = A - B*C
351 */
352 
353 #define Kernel_A_gets_A_minus_B_times_C_5(A,B,C) 0;\
354 {\
355   A[0] -=  B[0]*C[0]  + B[5]*C[1]  + B[10]*C[2]   + B[15]*C[3] + B[20]*C[4];\
356   A[1] -=  B[1]*C[0]  + B[6]*C[1]  + B[11]*C[2]   + B[16]*C[3] + B[21]*C[4];\
357   A[2] -=  B[2]*C[0]  + B[7]*C[1]  + B[12]*C[2]  + B[17]*C[3]  + B[22]*C[4];\
358   A[3] -=  B[3]*C[0]  + B[8]*C[1]  + B[13]*C[2]  + B[18]*C[3]  + B[23]*C[4];\
359   A[4] -=  B[4]*C[0]  + B[9]*C[1]  + B[14]*C[2]   + B[19]*C[3] + B[24]*C[4];\
360   A[5] -=  B[0]*C[5]  + B[5]*C[6]  + B[10]*C[7]   + B[15]*C[8] + B[20]*C[9];\
361   A[6] -=  B[1]*C[5]  + B[6]*C[6]  + B[11]*C[7]   + B[16]*C[8] + B[21]*C[9];\
362   A[7] -=  B[2]*C[5]  + B[7]*C[6]  + B[12]*C[7]  + B[17]*C[8]  + B[22]*C[9];\
363   A[8] -=  B[3]*C[5]  + B[8]*C[6]  + B[13]*C[7]  + B[18]*C[8]  + B[23]*C[9];\
364   A[9] -=  B[4]*C[5]  + B[9]*C[6]  + B[14]*C[7]   + B[19]*C[8] + B[24]*C[9];\
365   A[10] -=  B[0]*C[10]  + B[5]*C[11]  + B[10]*C[12]   + B[15]*C[13] + B[20]*C[14];\
366   A[11] -=  B[1]*C[10]  + B[6]*C[11]  + B[11]*C[12]   + B[16]*C[13] + B[21]*C[14];\
367   A[12] -=  B[2]*C[10]  + B[7]*C[11]  + B[12]*C[12]  + B[17]*C[13]  + B[22]*C[14];\
368   A[13] -=  B[3]*C[10]  + B[8]*C[11]  + B[13]*C[12]  + B[18]*C[13]  + B[23]*C[14];\
369   A[14] -=  B[4]*C[10]  + B[9]*C[11]  + B[14]*C[12]   + B[19]*C[13] + B[24]*C[14];\
370   A[15] -=  B[0]*C[15]  + B[5]*C[16]  + B[10]*C[17]   + B[15]*C[18] + B[20]*C[19];\
371   A[16] -=  B[1]*C[15]  + B[6]*C[16]  + B[11]*C[17]   + B[16]*C[18] + B[21]*C[19];\
372   A[17] -=  B[2]*C[15]  + B[7]*C[16]  + B[12]*C[17]  + B[17]*C[18]  + B[22]*C[19];\
373   A[18] -=  B[3]*C[15]  + B[8]*C[16]  + B[13]*C[17]  + B[18]*C[18]  + B[23]*C[19];\
374   A[19] -=  B[4]*C[15]  + B[9]*C[16]  + B[14]*C[17]   + B[19]*C[18] + B[24]*C[19];\
375   A[20] -=  B[0]*C[20]  + B[5]*C[21]  + B[10]*C[22]   + B[15]*C[23] + B[20]*C[24];\
376   A[21] -=  B[1]*C[20]  + B[6]*C[21]  + B[11]*C[22]   + B[16]*C[23] + B[21]*C[24];\
377   A[22] -=  B[2]*C[20]  + B[7]*C[21]  + B[12]*C[22]  + B[17]*C[23]  + B[22]*C[24];\
378   A[23] -=  B[3]*C[20]  + B[8]*C[21]  + B[13]*C[22]  + B[18]*C[23]  + B[23]*C[24];\
379   A[24] -=  B[4]*C[20]  + B[9]*C[21]  + B[14]*C[22]   + B[19]*C[23] + B[24]*C[24];\
380 }
381 
382 #endif
383