1 2 #if !defined(__BAIJ_H) 3 #define __BAIJ_H 4 #include "private/matimpl.h" 5 #include "../src/mat/impls/aij/seq/aij.h" 6 #include "../src/mat/impls/baij/seq/ftn-kernels/fsolvebaij.h" 7 8 /* 9 MATSEQBAIJ format - Block compressed row storage. The i[] and j[] 10 arrays start at 0. 11 */ 12 13 /* This header is shared by the SeqSBAIJ matrix */ 14 #define SEQBAIJHEADER \ 15 PetscInt bs2; /* square of block size */ \ 16 PetscInt mbs,nbs; /* rows/bs, columns/bs */ \ 17 PetscScalar *mult_work; /* work array for matrix vector product*/ \ 18 MatScalar *saved_values; \ 19 \ 20 Mat sbaijMat; /* mat in sbaij format */ \ 21 \ 22 PetscTruth pivotinblocks; /* pivot inside factorization of each diagonal block */ \ 23 \ 24 MatScalar *idiag; /* inverse of block diagonal */ \ 25 PetscTruth idiagvalid /* if above has correct/current values */ 26 27 28 typedef struct { 29 SEQAIJHEADER(MatScalar); 30 SEQBAIJHEADER; 31 } Mat_SeqBAIJ; 32 33 EXTERN_C_BEGIN 34 EXTERN PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat,PetscInt,PetscInt,PetscInt*); 35 EXTERN_C_END 36 EXTERN PetscErrorCode MatILUFactorSymbolic_SeqBAIJ(Mat,Mat,IS,IS,const MatFactorInfo*); 37 EXTERN PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*); 38 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*); 39 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*); 40 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 41 EXTERN PetscErrorCode MatDuplicate_SeqBAIJ(Mat,MatDuplicateOption,Mat*); 42 EXTERN PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat,PetscTruth*,PetscInt*); 43 EXTERN PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat); 44 EXTERN PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*,Mat*); 45 46 EXTERN PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat,Mat,IS,IS,const MatFactorInfo*); 47 EXTERN PetscErrorCode MatLUFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*); 48 EXTERN PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat,PetscInt,IS*,PetscInt); 49 EXTERN PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatReuse,Mat*); 50 EXTERN PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 51 EXTERN PetscErrorCode MatMultTranspose_SeqBAIJ(Mat,Vec,Vec); 52 EXTERN PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 53 EXTERN PetscErrorCode MatScale_SeqBAIJ(Mat,PetscScalar); 54 EXTERN PetscErrorCode MatNorm_SeqBAIJ(Mat,NormType,PetscReal *); 55 EXTERN PetscErrorCode MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*); 56 EXTERN PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat,Vec); 57 EXTERN PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 58 EXTERN PetscErrorCode MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 59 EXTERN PetscErrorCode MatZeroEntries_SeqBAIJ(Mat); 60 61 EXTERN PetscErrorCode MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(Mat); 62 63 EXTERN PetscErrorCode MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 64 EXTERN PetscErrorCode MatSolve_SeqBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 65 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 66 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_newdatastruct(Mat,Vec,Vec); 67 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 68 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 69 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 70 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 71 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_newdatastruct(Mat,Vec,Vec); 72 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 73 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 74 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 75 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_newdatastruct(Mat,Vec,Vec); 76 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 77 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 78 #if defined(PETSC_HAVE_SSE) 79 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_SSE_Demotion(Mat,Vec,Vec); 80 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion(Mat,Vec,Vec); 81 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion_usj(Mat,Vec,Vec); 82 #endif 83 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 84 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_newdatastruct(Mat,Vec,Vec); 85 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 86 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 87 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 88 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_newdatastruct(Mat,Vec,Vec); 89 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 90 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 91 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 92 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_newdatastruct(Mat,Vec,Vec); 93 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 94 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 95 EXTERN PetscErrorCode MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 96 EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_newdatastruct(Mat,Vec,Vec); 97 EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 98 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_Update(Mat,Vec,Vec); 99 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec); 100 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 101 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec); 102 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 103 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec); 104 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 105 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec); 106 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 107 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec); 108 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 109 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec); 110 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 111 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec); 112 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 113 114 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N_newdatastruct(Mat,Mat,const MatFactorInfo*); 115 116 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat,const MatFactorInfo*); 117 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat,const MatFactorInfo*); 118 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_newdatastruct(Mat,Mat,const MatFactorInfo*); 119 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 120 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*); 121 122 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat,const MatFactorInfo*); 123 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_newdatastruct(Mat,Mat,const MatFactorInfo*); 124 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 125 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*); 126 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat,const MatFactorInfo*); 127 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_newdatastruct(Mat,Mat,const MatFactorInfo*); 128 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 129 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*); 130 #if defined(PETSC_HAVE_SSE) 131 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE(Mat,Mat,const MatFactorInfo*); 132 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj(Mat,Mat,const MatFactorInfo*); 133 #else 134 #endif 135 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat,const MatFactorInfo*); 136 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_newdatastruct(Mat,Mat,const MatFactorInfo*); 137 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 138 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*); 139 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat,const MatFactorInfo*); 140 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 141 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7(Mat,Mat,const MatFactorInfo*); 142 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 143 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*); 144 145 EXTERN PetscErrorCode MatMult_SeqBAIJ_1(Mat,Vec,Vec); 146 EXTERN PetscErrorCode MatMult_SeqBAIJ_2(Mat,Vec,Vec); 147 EXTERN PetscErrorCode MatMult_SeqBAIJ_3(Mat,Vec,Vec); 148 EXTERN PetscErrorCode MatMult_SeqBAIJ_4(Mat,Vec,Vec); 149 EXTERN PetscErrorCode MatMult_SeqBAIJ_5(Mat,Vec,Vec); 150 EXTERN PetscErrorCode MatMult_SeqBAIJ_6(Mat,Vec,Vec); 151 EXTERN PetscErrorCode MatMult_SeqBAIJ_7(Mat,Vec,Vec); 152 EXTERN PetscErrorCode MatMult_SeqBAIJ_N(Mat,Vec,Vec); 153 154 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 155 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 156 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 157 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 158 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 159 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 160 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 161 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 162 EXTERN PetscErrorCode MatLoad_SeqBAIJ(PetscViewer, const MatType,Mat*); 163 164 /* 165 Kernel_A_gets_A_times_B_2: A = A * B with size bs=2 166 167 Input Parameters: 168 + A,B - square bs by bs arrays stored in column major order 169 - W - bs*bs work arrary 170 171 Output Parameter: 172 . A = A * B 173 */ 174 175 #define Kernel_A_gets_A_times_B_2(A,B,W) 0;\ 176 {\ 177 PetscMemcpy(W,A,4*sizeof(MatScalar));\ 178 A[0] = W[0]*B[0] + W[2]*B[1];\ 179 A[1] = W[1]*B[0] + W[3]*B[1];\ 180 A[2] = W[0]*B[2] + W[2]*B[3];\ 181 A[3] = W[1]*B[2] + W[3]*B[3];\ 182 } 183 184 /* 185 Kernel_A_gets_A_minus_B_times_C_2: A = A - B * C with size bs=2 186 187 Input Parameters: 188 + A,B,C - square bs by bs arrays stored in column major order 189 190 Output Parameter: 191 . A = A - B*C 192 */ 193 194 #define Kernel_A_gets_A_minus_B_times_C_2(A,B,C) 0;\ 195 {\ 196 A[0] -= B[0]*C[0] + B[2]*C[1];\ 197 A[1] -= B[1]*C[0] + B[3]*C[1];\ 198 A[2] -= B[0]*C[2] + B[2]*C[3];\ 199 A[3] -= B[1]*C[2] + B[3]*C[3];\ 200 } 201 202 /* 203 Kernel_A_gets_A_times_B_3: A = A * B with size bs=3 204 205 Input Parameters: 206 + A,B - square bs by bs arrays stored in column major order 207 - W - bs*bs work arrary 208 209 Output Parameter: 210 . A = A * B 211 */ 212 213 #define Kernel_A_gets_A_times_B_3(A,B,W) 0;\ 214 {\ 215 PetscMemcpy(W,A,9*sizeof(MatScalar));\ 216 A[0] = W[0]*B[0] + W[3]*B[1] + W[6]*B[2];\ 217 A[1] = W[1]*B[0] + W[4]*B[1] + W[7]*B[2];\ 218 A[2] = W[2]*B[0] + W[5]*B[1] + W[8]*B[2];\ 219 A[3] = W[0]*B[3] + W[3]*B[4] + W[6]*B[5];\ 220 A[4] = W[1]*B[3] + W[4]*B[4] + W[7]*B[5];\ 221 A[5] = W[2]*B[3] + W[5]*B[4] + W[8]*B[5];\ 222 A[6] = W[0]*B[6] + W[3]*B[7] + W[6]*B[8];\ 223 A[7] = W[1]*B[6] + W[4]*B[7] + W[7]*B[8];\ 224 A[8] = W[2]*B[6] + W[5]*B[7] + W[8]*B[8];\ 225 } 226 227 /* 228 Kernel_A_gets_A_minus_B_times_C_3: A = A - B * C with size bs=3 229 230 Input Parameters: 231 + A,B,C - square bs by bs arrays stored in column major order 232 233 Output Parameter: 234 . A = A - B*C 235 */ 236 237 #define Kernel_A_gets_A_minus_B_times_C_3(A,B,C) 0;\ 238 {\ 239 A[0] -= B[0]*C[0] + B[3]*C[1] + B[6]*C[2];\ 240 A[1] -= B[1]*C[0] + B[4]*C[1] + B[7]*C[2];\ 241 A[2] -= B[2]*C[0] + B[5]*C[1] + B[8]*C[2];\ 242 A[3] -= B[0]*C[3] + B[3]*C[4] + B[6]*C[5];\ 243 A[4] -= B[1]*C[3] + B[4]*C[4] + B[7]*C[5];\ 244 A[5] -= B[2]*C[3] + B[5]*C[4] + B[8]*C[5];\ 245 A[6] -= B[0]*C[6] + B[3]*C[7] + B[6]*C[8];\ 246 A[7] -= B[1]*C[6] + B[4]*C[7] + B[7]*C[8];\ 247 A[8] -= B[2]*C[6] + B[5]*C[7] + B[8]*C[8];\ 248 } 249 250 /* 251 Kernel_A_gets_A_times_B_4: A = A * B with size bs=4 252 253 Input Parameters: 254 + A,B - square bs by bs arrays stored in column major order 255 - W - bs*bs work arrary 256 257 Output Parameter: 258 . A = A * B 259 */ 260 261 #define Kernel_A_gets_A_times_B_4(A,B,W) 0;\ 262 {\ 263 PetscMemcpy(W,A,16*sizeof(MatScalar));\ 264 A[0] = W[0]*B[0] + W[4]*B[1] + W[8]*B[2] + W[12]*B[3];\ 265 A[1] = W[1]*B[0] + W[5]*B[1] + W[9]*B[2] + W[13]*B[3];\ 266 A[2] = W[2]*B[0] + W[6]*B[1] + W[10]*B[2] + W[14]*B[3];\ 267 A[3] = W[3]*B[0] + W[7]*B[1] + W[11]*B[2] + W[15]*B[3];\ 268 A[4] = W[0]*B[4] + W[4]*B[5] + W[8]*B[6] + W[12]*B[7];\ 269 A[5] = W[1]*B[4] + W[5]*B[5] + W[9]*B[6] + W[13]*B[7];\ 270 A[6] = W[2]*B[4] + W[6]*B[5] + W[10]*B[6] + W[14]*B[7];\ 271 A[7] = W[3]*B[4] + W[7]*B[5] + W[11]*B[6] + W[15]*B[7];\ 272 A[8] = W[0]*B[8] + W[4]*B[9] + W[8]*B[10] + W[12]*B[11];\ 273 A[9] = W[1]*B[8] + W[5]*B[9] + W[9]*B[10] + W[13]*B[11];\ 274 A[10] = W[2]*B[8] + W[6]*B[9] + W[10]*B[10] + W[14]*B[11];\ 275 A[11] = W[3]*B[8] + W[7]*B[9] + W[11]*B[10] + W[15]*B[11];\ 276 A[12] = W[0]*B[12] + W[4]*B[13] + W[8]*B[14] + W[12]*B[15];\ 277 A[13] = W[1]*B[12] + W[5]*B[13] + W[9]*B[14] + W[13]*B[15];\ 278 A[14] = W[2]*B[12] + W[6]*B[13] + W[10]*B[14] + W[14]*B[15];\ 279 A[15] = W[3]*B[12] + W[7]*B[13] + W[11]*B[14] + W[15]*B[15];\ 280 } 281 282 /* 283 Kernel_A_gets_A_minus_B_times_C_4: A = A - B * C with size bs=4 284 285 Input Parameters: 286 + A,B,C - square bs by bs arrays stored in column major order 287 288 Output Parameter: 289 . A = A - B*C 290 */ 291 292 #define Kernel_A_gets_A_minus_B_times_C_4(A,B,C) 0;\ 293 {\ 294 A[0] -= B[0]*C[0] + B[4]*C[1] + B[8]*C[2] + B[12]*C[3];\ 295 A[1] -= B[1]*C[0] + B[5]*C[1] + B[9]*C[2] + B[13]*C[3];\ 296 A[2] -= B[2]*C[0] + B[6]*C[1] + B[10]*C[2] + B[14]*C[3];\ 297 A[3] -= B[3]*C[0] + B[7]*C[1] + B[11]*C[2] + B[15]*C[3];\ 298 A[4] -= B[0]*C[4] + B[4]*C[5] + B[8]*C[6] + B[12]*C[7];\ 299 A[5] -= B[1]*C[4] + B[5]*C[5] + B[9]*C[6] + B[13]*C[7];\ 300 A[6] -= B[2]*C[4] + B[6]*C[5] + B[10]*C[6] + B[14]*C[7];\ 301 A[7] -= B[3]*C[4] + B[7]*C[5] + B[11]*C[6] + B[15]*C[7];\ 302 A[8] -= B[0]*C[8] + B[4]*C[9] + B[8]*C[10] + B[12]*C[11];\ 303 A[9] -= B[1]*C[8] + B[5]*C[9] + B[9]*C[10] + B[13]*C[11];\ 304 A[10] -= B[2]*C[8] + B[6]*C[9] + B[10]*C[10] + B[14]*C[11];\ 305 A[11] -= B[3]*C[8] + B[7]*C[9] + B[11]*C[10] + B[15]*C[11];\ 306 A[12] -= B[0]*C[12] + B[4]*C[13] + B[8]*C[14] + B[12]*C[15];\ 307 A[13] -= B[1]*C[12] + B[5]*C[13] + B[9]*C[14] + B[13]*C[15];\ 308 A[14] -= B[2]*C[12] + B[6]*C[13] + B[10]*C[14] + B[14]*C[15];\ 309 A[15] -= B[3]*C[12] + B[7]*C[13] + B[11]*C[14] + B[15]*C[15];\ 310 } 311 312 313 #define Kernel_A_gets_A_times_B_5(A,B,W) 0;\ 314 {\ 315 PetscMemcpy(W,A,25*sizeof(MatScalar));\ 316 A[0] = W[0]*B[0] + W[5]*B[1] + W[10]*B[2] + W[15]*B[3] + W[20]*B[4];\ 317 A[1] = W[1]*B[0] + W[6]*B[1] + W[11]*B[2] + W[16]*B[3] + W[21]*B[4];\ 318 A[2] = W[2]*B[0] + W[7]*B[1] + W[12]*B[2] + W[17]*B[3] + W[22]*B[4];\ 319 A[3] = W[3]*B[0] + W[8]*B[1] + W[13]*B[2] + W[18]*B[3] + W[23]*B[4];\ 320 A[4] = W[4]*B[0] + W[9]*B[1] + W[14]*B[2] + W[19]*B[3] + W[24]*B[4];\ 321 A[5] = W[0]*B[5] + W[5]*B[6] + W[10]*B[7] + W[15]*B[8] + W[20]*B[9];\ 322 A[6] = W[1]*B[5] + W[6]*B[6] + W[11]*B[7] + W[16]*B[8] + W[21]*B[9];\ 323 A[7] = W[2]*B[5] + W[7]*B[6] + W[12]*B[7] + W[17]*B[8] + W[22]*B[9];\ 324 A[8] = W[3]*B[5] + W[8]*B[6] + W[13]*B[7] + W[18]*B[8] + W[23]*B[9];\ 325 A[9] = W[4]*B[5] + W[9]*B[6] + W[14]*B[7] + W[19]*B[8] + W[24]*B[9];\ 326 A[10] = W[0]*B[10] + W[5]*B[11] + W[10]*B[12] + W[15]*B[13] + W[20]*B[14];\ 327 A[11] = W[1]*B[10] + W[6]*B[11] + W[11]*B[12] + W[16]*B[13] + W[21]*B[14];\ 328 A[12] = W[2]*B[10] + W[7]*B[11] + W[12]*B[12] + W[17]*B[13] + W[22]*B[14];\ 329 A[13] = W[3]*B[10] + W[8]*B[11] + W[13]*B[12] + W[18]*B[13] + W[23]*B[14];\ 330 A[14] = W[4]*B[10] + W[9]*B[11] + W[14]*B[12] + W[19]*B[13] + W[24]*B[14];\ 331 A[15] = W[0]*B[15] + W[5]*B[16] + W[10]*B[17] + W[15]*B[18] + W[20]*B[19];\ 332 A[16] = W[1]*B[15] + W[6]*B[16] + W[11]*B[17] + W[16]*B[18] + W[21]*B[19];\ 333 A[17] = W[2]*B[15] + W[7]*B[16] + W[12]*B[17] + W[17]*B[18] + W[22]*B[19];\ 334 A[18] = W[3]*B[15] + W[8]*B[16] + W[13]*B[17] + W[18]*B[18] + W[23]*B[19];\ 335 A[19] = W[4]*B[15] + W[9]*B[16] + W[14]*B[17] + W[19]*B[18] + W[24]*B[19];\ 336 A[20] = W[0]*B[20] + W[5]*B[21] + W[10]*B[22] + W[15]*B[23] + W[20]*B[24];\ 337 A[21] = W[1]*B[20] + W[6]*B[21] + W[11]*B[22] + W[16]*B[23] + W[21]*B[24];\ 338 A[22] = W[2]*B[20] + W[7]*B[21] + W[12]*B[22] + W[17]*B[23] + W[22]*B[24];\ 339 A[23] = W[3]*B[20] + W[8]*B[21] + W[13]*B[22] + W[18]*B[23] + W[23]*B[24];\ 340 A[24] = W[4]*B[20] + W[9]*B[21] + W[14]*B[22] + W[19]*B[23] + W[24]*B[24];\ 341 } 342 343 /* 344 Kernel_A_gets_A_minus_B_times_C_5: A = A - B * C with size bs=5 345 346 Input Parameters: 347 + A,B,C - square bs by bs arrays stored in column major order 348 349 Output Parameter: 350 . A = A - B*C 351 */ 352 353 #define Kernel_A_gets_A_minus_B_times_C_5(A,B,C) 0;\ 354 {\ 355 A[0] -= B[0]*C[0] + B[5]*C[1] + B[10]*C[2] + B[15]*C[3] + B[20]*C[4];\ 356 A[1] -= B[1]*C[0] + B[6]*C[1] + B[11]*C[2] + B[16]*C[3] + B[21]*C[4];\ 357 A[2] -= B[2]*C[0] + B[7]*C[1] + B[12]*C[2] + B[17]*C[3] + B[22]*C[4];\ 358 A[3] -= B[3]*C[0] + B[8]*C[1] + B[13]*C[2] + B[18]*C[3] + B[23]*C[4];\ 359 A[4] -= B[4]*C[0] + B[9]*C[1] + B[14]*C[2] + B[19]*C[3] + B[24]*C[4];\ 360 A[5] -= B[0]*C[5] + B[5]*C[6] + B[10]*C[7] + B[15]*C[8] + B[20]*C[9];\ 361 A[6] -= B[1]*C[5] + B[6]*C[6] + B[11]*C[7] + B[16]*C[8] + B[21]*C[9];\ 362 A[7] -= B[2]*C[5] + B[7]*C[6] + B[12]*C[7] + B[17]*C[8] + B[22]*C[9];\ 363 A[8] -= B[3]*C[5] + B[8]*C[6] + B[13]*C[7] + B[18]*C[8] + B[23]*C[9];\ 364 A[9] -= B[4]*C[5] + B[9]*C[6] + B[14]*C[7] + B[19]*C[8] + B[24]*C[9];\ 365 A[10] -= B[0]*C[10] + B[5]*C[11] + B[10]*C[12] + B[15]*C[13] + B[20]*C[14];\ 366 A[11] -= B[1]*C[10] + B[6]*C[11] + B[11]*C[12] + B[16]*C[13] + B[21]*C[14];\ 367 A[12] -= B[2]*C[10] + B[7]*C[11] + B[12]*C[12] + B[17]*C[13] + B[22]*C[14];\ 368 A[13] -= B[3]*C[10] + B[8]*C[11] + B[13]*C[12] + B[18]*C[13] + B[23]*C[14];\ 369 A[14] -= B[4]*C[10] + B[9]*C[11] + B[14]*C[12] + B[19]*C[13] + B[24]*C[14];\ 370 A[15] -= B[0]*C[15] + B[5]*C[16] + B[10]*C[17] + B[15]*C[18] + B[20]*C[19];\ 371 A[16] -= B[1]*C[15] + B[6]*C[16] + B[11]*C[17] + B[16]*C[18] + B[21]*C[19];\ 372 A[17] -= B[2]*C[15] + B[7]*C[16] + B[12]*C[17] + B[17]*C[18] + B[22]*C[19];\ 373 A[18] -= B[3]*C[15] + B[8]*C[16] + B[13]*C[17] + B[18]*C[18] + B[23]*C[19];\ 374 A[19] -= B[4]*C[15] + B[9]*C[16] + B[14]*C[17] + B[19]*C[18] + B[24]*C[19];\ 375 A[20] -= B[0]*C[20] + B[5]*C[21] + B[10]*C[22] + B[15]*C[23] + B[20]*C[24];\ 376 A[21] -= B[1]*C[20] + B[6]*C[21] + B[11]*C[22] + B[16]*C[23] + B[21]*C[24];\ 377 A[22] -= B[2]*C[20] + B[7]*C[21] + B[12]*C[22] + B[17]*C[23] + B[22]*C[24];\ 378 A[23] -= B[3]*C[20] + B[8]*C[21] + B[13]*C[22] + B[18]*C[23] + B[23]*C[24];\ 379 A[24] -= B[4]*C[20] + B[9]*C[21] + B[14]*C[22] + B[19]*C[23] + B[24]*C[24];\ 380 } 381 382 #endif 383