1 2 #if !defined(__BAIJ_H) 3 #define __BAIJ_H 4 #include "private/matimpl.h" 5 #include "../src/mat/impls/aij/seq/aij.h" 6 #include "../src/mat/impls/baij/seq/ftn-kernels/fsolvebaij.h" 7 8 /* 9 MATSEQBAIJ format - Block compressed row storage. The i[] and j[] 10 arrays start at 0. 11 */ 12 13 /* This header is shared by the SeqSBAIJ matrix */ 14 #define SEQBAIJHEADER \ 15 PetscInt bs2; /* square of block size */ \ 16 PetscInt mbs,nbs; /* rows/bs, columns/bs */ \ 17 PetscScalar *mult_work; /* work array for matrix vector product*/ \ 18 MatScalar *saved_values; \ 19 \ 20 Mat sbaijMat; /* mat in sbaij format */ \ 21 \ 22 \ 23 MatScalar *idiag; /* inverse of block diagonal */ \ 24 PetscTruth idiagvalid /* if above has correct/current values */ 25 26 27 typedef struct { 28 SEQAIJHEADER(MatScalar); 29 SEQBAIJHEADER; 30 } Mat_SeqBAIJ; 31 32 EXTERN_C_BEGIN 33 EXTERN PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat,PetscInt,PetscInt,PetscInt*); 34 EXTERN_C_END 35 EXTERN PetscErrorCode MatILUFactorSymbolic_SeqBAIJ(Mat,Mat,IS,IS,const MatFactorInfo*); 36 EXTERN PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*); 37 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*); 38 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*); 39 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 40 EXTERN PetscErrorCode MatDuplicate_SeqBAIJ(Mat,MatDuplicateOption,Mat*); 41 EXTERN PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat,PetscTruth*,PetscInt*); 42 EXTERN PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat); 43 EXTERN PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*,Mat*); 44 45 EXTERN PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat,Mat,IS,IS,const MatFactorInfo*); 46 EXTERN PetscErrorCode MatLUFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*); 47 EXTERN PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat,PetscInt,IS*,PetscInt); 48 EXTERN PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatReuse,Mat*); 49 EXTERN PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 50 EXTERN PetscErrorCode MatMultTranspose_SeqBAIJ(Mat,Vec,Vec); 51 EXTERN PetscErrorCode MatMultHermitianTranspose_SeqBAIJ(Mat,Vec,Vec); 52 EXTERN PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 53 EXTERN PetscErrorCode MatMultHermitianTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 54 EXTERN PetscErrorCode MatScale_SeqBAIJ(Mat,PetscScalar); 55 EXTERN PetscErrorCode MatNorm_SeqBAIJ(Mat,NormType,PetscReal *); 56 EXTERN PetscErrorCode MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*); 57 EXTERN PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat,Vec); 58 EXTERN PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 59 EXTERN PetscErrorCode MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 60 EXTERN PetscErrorCode MatZeroEntries_SeqBAIJ(Mat); 61 62 EXTERN PetscErrorCode MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(Mat); 63 64 EXTERN PetscErrorCode MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 65 EXTERN PetscErrorCode MatSolve_SeqBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 66 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 67 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_newdatastruct(Mat,Vec,Vec); 68 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_newdatastruct_v2(Mat,Vec,Vec); 69 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 70 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 71 EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2(Mat,Vec,Vec); 72 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 73 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct_v2(Mat,Vec,Vec); 74 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 75 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_newdatastruct(Mat,Vec,Vec); 76 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_newdatastruct_v2(Mat,Vec,Vec); 77 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 78 EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 79 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 80 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_newdatastruct(Mat,Vec,Vec); 81 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_newdatastruct_v2(Mat,Vec,Vec); 82 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 83 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 84 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct_v2(Mat,Vec,Vec); 85 #if defined(PETSC_HAVE_SSE) 86 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_SSE_Demotion(Mat,Vec,Vec); 87 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion(Mat,Vec,Vec); 88 EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion_usj(Mat,Vec,Vec); 89 #endif 90 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 91 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_newdatastruct(Mat,Vec,Vec); 92 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_newdatastruct_v2(Mat,Vec,Vec); 93 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 94 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 95 EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct_v2(Mat,Vec,Vec); 96 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 97 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_newdatastruct(Mat,Vec,Vec); 98 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_newdatastruct_v2(Mat,Vec,Vec); 99 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 100 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 101 EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct_v2(Mat,Vec,Vec); 102 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 103 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_newdatastruct(Mat,Vec,Vec); 104 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_newdatastruct_v2(Mat,Vec,Vec); 105 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 106 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 107 EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct_v2(Mat,Vec,Vec); 108 EXTERN PetscErrorCode MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 109 EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_newdatastruct(Mat,Vec,Vec); 110 EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_newdatastruct_v2(Mat,Vec,Vec); 111 EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 112 EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct_v2(Mat,Vec,Vec); 113 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_Update(Mat,Vec,Vec); 114 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec); 115 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 116 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec); 117 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 118 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec); 119 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 120 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec); 121 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 122 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec); 123 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 124 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec); 125 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 126 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec); 127 EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 128 129 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N_newdatastruct(Mat,Mat,const MatFactorInfo*); 130 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 131 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat,const MatFactorInfo*); 132 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat,const MatFactorInfo*); 133 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_newdatastruct(Mat,Mat,const MatFactorInfo*); 134 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 135 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 136 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*); 137 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 138 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat,const MatFactorInfo*); 139 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_newdatastruct(Mat,Mat,const MatFactorInfo*); 140 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 141 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 142 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*); 143 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 144 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat,const MatFactorInfo*); 145 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_newdatastruct(Mat,Mat,const MatFactorInfo*); 146 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 147 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 148 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*); 149 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 150 #if defined(PETSC_HAVE_SSE) 151 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE(Mat,Mat,const MatFactorInfo*); 152 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj(Mat,Mat,const MatFactorInfo*); 153 #else 154 #endif 155 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat,const MatFactorInfo*); 156 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_newdatastruct(Mat,Mat,const MatFactorInfo*); 157 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 158 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 159 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*); 160 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 161 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat,const MatFactorInfo*); 162 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_newdatastruct(Mat,Mat,const MatFactorInfo*); 163 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 164 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 165 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*); 166 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 167 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7(Mat,Mat,const MatFactorInfo*); 168 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_newdatastruct(Mat,Mat,const MatFactorInfo*); 169 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 170 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering(Mat,Mat,const MatFactorInfo*); 171 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*); 172 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 173 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*); 174 EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N_newdatastruct_v2(Mat,Mat,const MatFactorInfo*); 175 176 EXTERN PetscErrorCode MatMult_SeqBAIJ_1(Mat,Vec,Vec); 177 EXTERN PetscErrorCode MatMult_SeqBAIJ_2(Mat,Vec,Vec); 178 EXTERN PetscErrorCode MatMult_SeqBAIJ_3(Mat,Vec,Vec); 179 EXTERN PetscErrorCode MatMult_SeqBAIJ_4(Mat,Vec,Vec); 180 EXTERN PetscErrorCode MatMult_SeqBAIJ_5(Mat,Vec,Vec); 181 EXTERN PetscErrorCode MatMult_SeqBAIJ_6(Mat,Vec,Vec); 182 EXTERN PetscErrorCode MatMult_SeqBAIJ_7(Mat,Vec,Vec); 183 EXTERN PetscErrorCode MatMult_SeqBAIJ_N(Mat,Vec,Vec); 184 185 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 186 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 187 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 188 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 189 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 190 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 191 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 192 EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 193 EXTERN PetscErrorCode MatLoad_SeqBAIJ(PetscViewer, const MatType,Mat*); 194 195 /* 196 Kernel_A_gets_A_times_B_2: A = A * B with size bs=2 197 198 Input Parameters: 199 + A,B - square bs by bs arrays stored in column major order 200 - W - bs*bs work arrary 201 202 Output Parameter: 203 . A = A * B 204 */ 205 206 #define Kernel_A_gets_A_times_B_2(A,B,W) 0;\ 207 {\ 208 PetscMemcpy(W,A,4*sizeof(MatScalar));\ 209 A[0] = W[0]*B[0] + W[2]*B[1];\ 210 A[1] = W[1]*B[0] + W[3]*B[1];\ 211 A[2] = W[0]*B[2] + W[2]*B[3];\ 212 A[3] = W[1]*B[2] + W[3]*B[3];\ 213 } 214 215 /* 216 Kernel_A_gets_A_minus_B_times_C_2: A = A - B * C with size bs=2 217 218 Input Parameters: 219 + A,B,C - square bs by bs arrays stored in column major order 220 221 Output Parameter: 222 . A = A - B*C 223 */ 224 225 #define Kernel_A_gets_A_minus_B_times_C_2(A,B,C) 0;\ 226 {\ 227 A[0] -= B[0]*C[0] + B[2]*C[1];\ 228 A[1] -= B[1]*C[0] + B[3]*C[1];\ 229 A[2] -= B[0]*C[2] + B[2]*C[3];\ 230 A[3] -= B[1]*C[2] + B[3]*C[3];\ 231 } 232 233 /* 234 Kernel_A_gets_A_times_B_3: A = A * B with size bs=3 235 236 Input Parameters: 237 + A,B - square bs by bs arrays stored in column major order 238 - W - bs*bs work arrary 239 240 Output Parameter: 241 . A = A * B 242 */ 243 244 #define Kernel_A_gets_A_times_B_3(A,B,W) 0;\ 245 {\ 246 PetscMemcpy(W,A,9*sizeof(MatScalar));\ 247 A[0] = W[0]*B[0] + W[3]*B[1] + W[6]*B[2];\ 248 A[1] = W[1]*B[0] + W[4]*B[1] + W[7]*B[2];\ 249 A[2] = W[2]*B[0] + W[5]*B[1] + W[8]*B[2];\ 250 A[3] = W[0]*B[3] + W[3]*B[4] + W[6]*B[5];\ 251 A[4] = W[1]*B[3] + W[4]*B[4] + W[7]*B[5];\ 252 A[5] = W[2]*B[3] + W[5]*B[4] + W[8]*B[5];\ 253 A[6] = W[0]*B[6] + W[3]*B[7] + W[6]*B[8];\ 254 A[7] = W[1]*B[6] + W[4]*B[7] + W[7]*B[8];\ 255 A[8] = W[2]*B[6] + W[5]*B[7] + W[8]*B[8];\ 256 } 257 258 /* 259 Kernel_A_gets_A_minus_B_times_C_3: A = A - B * C with size bs=3 260 261 Input Parameters: 262 + A,B,C - square bs by bs arrays stored in column major order 263 264 Output Parameter: 265 . A = A - B*C 266 */ 267 268 #define Kernel_A_gets_A_minus_B_times_C_3(A,B,C) 0;\ 269 {\ 270 A[0] -= B[0]*C[0] + B[3]*C[1] + B[6]*C[2];\ 271 A[1] -= B[1]*C[0] + B[4]*C[1] + B[7]*C[2];\ 272 A[2] -= B[2]*C[0] + B[5]*C[1] + B[8]*C[2];\ 273 A[3] -= B[0]*C[3] + B[3]*C[4] + B[6]*C[5];\ 274 A[4] -= B[1]*C[3] + B[4]*C[4] + B[7]*C[5];\ 275 A[5] -= B[2]*C[3] + B[5]*C[4] + B[8]*C[5];\ 276 A[6] -= B[0]*C[6] + B[3]*C[7] + B[6]*C[8];\ 277 A[7] -= B[1]*C[6] + B[4]*C[7] + B[7]*C[8];\ 278 A[8] -= B[2]*C[6] + B[5]*C[7] + B[8]*C[8];\ 279 } 280 281 /* 282 Kernel_A_gets_A_times_B_4: A = A * B with size bs=4 283 284 Input Parameters: 285 + A,B - square bs by bs arrays stored in column major order 286 - W - bs*bs work arrary 287 288 Output Parameter: 289 . A = A * B 290 */ 291 292 #define Kernel_A_gets_A_times_B_4(A,B,W) 0;\ 293 {\ 294 PetscMemcpy(W,A,16*sizeof(MatScalar));\ 295 A[0] = W[0]*B[0] + W[4]*B[1] + W[8]*B[2] + W[12]*B[3];\ 296 A[1] = W[1]*B[0] + W[5]*B[1] + W[9]*B[2] + W[13]*B[3];\ 297 A[2] = W[2]*B[0] + W[6]*B[1] + W[10]*B[2] + W[14]*B[3];\ 298 A[3] = W[3]*B[0] + W[7]*B[1] + W[11]*B[2] + W[15]*B[3];\ 299 A[4] = W[0]*B[4] + W[4]*B[5] + W[8]*B[6] + W[12]*B[7];\ 300 A[5] = W[1]*B[4] + W[5]*B[5] + W[9]*B[6] + W[13]*B[7];\ 301 A[6] = W[2]*B[4] + W[6]*B[5] + W[10]*B[6] + W[14]*B[7];\ 302 A[7] = W[3]*B[4] + W[7]*B[5] + W[11]*B[6] + W[15]*B[7];\ 303 A[8] = W[0]*B[8] + W[4]*B[9] + W[8]*B[10] + W[12]*B[11];\ 304 A[9] = W[1]*B[8] + W[5]*B[9] + W[9]*B[10] + W[13]*B[11];\ 305 A[10] = W[2]*B[8] + W[6]*B[9] + W[10]*B[10] + W[14]*B[11];\ 306 A[11] = W[3]*B[8] + W[7]*B[9] + W[11]*B[10] + W[15]*B[11];\ 307 A[12] = W[0]*B[12] + W[4]*B[13] + W[8]*B[14] + W[12]*B[15];\ 308 A[13] = W[1]*B[12] + W[5]*B[13] + W[9]*B[14] + W[13]*B[15];\ 309 A[14] = W[2]*B[12] + W[6]*B[13] + W[10]*B[14] + W[14]*B[15];\ 310 A[15] = W[3]*B[12] + W[7]*B[13] + W[11]*B[14] + W[15]*B[15];\ 311 } 312 313 /* 314 Kernel_A_gets_A_minus_B_times_C_4: A = A - B * C with size bs=4 315 316 Input Parameters: 317 + A,B,C - square bs by bs arrays stored in column major order 318 319 Output Parameter: 320 . A = A - B*C 321 */ 322 323 #define Kernel_A_gets_A_minus_B_times_C_4(A,B,C) 0;\ 324 {\ 325 A[0] -= B[0]*C[0] + B[4]*C[1] + B[8]*C[2] + B[12]*C[3];\ 326 A[1] -= B[1]*C[0] + B[5]*C[1] + B[9]*C[2] + B[13]*C[3];\ 327 A[2] -= B[2]*C[0] + B[6]*C[1] + B[10]*C[2] + B[14]*C[3];\ 328 A[3] -= B[3]*C[0] + B[7]*C[1] + B[11]*C[2] + B[15]*C[3];\ 329 A[4] -= B[0]*C[4] + B[4]*C[5] + B[8]*C[6] + B[12]*C[7];\ 330 A[5] -= B[1]*C[4] + B[5]*C[5] + B[9]*C[6] + B[13]*C[7];\ 331 A[6] -= B[2]*C[4] + B[6]*C[5] + B[10]*C[6] + B[14]*C[7];\ 332 A[7] -= B[3]*C[4] + B[7]*C[5] + B[11]*C[6] + B[15]*C[7];\ 333 A[8] -= B[0]*C[8] + B[4]*C[9] + B[8]*C[10] + B[12]*C[11];\ 334 A[9] -= B[1]*C[8] + B[5]*C[9] + B[9]*C[10] + B[13]*C[11];\ 335 A[10] -= B[2]*C[8] + B[6]*C[9] + B[10]*C[10] + B[14]*C[11];\ 336 A[11] -= B[3]*C[8] + B[7]*C[9] + B[11]*C[10] + B[15]*C[11];\ 337 A[12] -= B[0]*C[12] + B[4]*C[13] + B[8]*C[14] + B[12]*C[15];\ 338 A[13] -= B[1]*C[12] + B[5]*C[13] + B[9]*C[14] + B[13]*C[15];\ 339 A[14] -= B[2]*C[12] + B[6]*C[13] + B[10]*C[14] + B[14]*C[15];\ 340 A[15] -= B[3]*C[12] + B[7]*C[13] + B[11]*C[14] + B[15]*C[15];\ 341 } 342 343 #define Kernel_A_gets_A_times_B_5(A,B,W) 0;\ 344 {\ 345 PetscMemcpy(W,A,25*sizeof(MatScalar));\ 346 A[0] = W[0]*B[0] + W[5]*B[1] + W[10]*B[2] + W[15]*B[3] + W[20]*B[4];\ 347 A[1] = W[1]*B[0] + W[6]*B[1] + W[11]*B[2] + W[16]*B[3] + W[21]*B[4];\ 348 A[2] = W[2]*B[0] + W[7]*B[1] + W[12]*B[2] + W[17]*B[3] + W[22]*B[4];\ 349 A[3] = W[3]*B[0] + W[8]*B[1] + W[13]*B[2] + W[18]*B[3] + W[23]*B[4];\ 350 A[4] = W[4]*B[0] + W[9]*B[1] + W[14]*B[2] + W[19]*B[3] + W[24]*B[4];\ 351 A[5] = W[0]*B[5] + W[5]*B[6] + W[10]*B[7] + W[15]*B[8] + W[20]*B[9];\ 352 A[6] = W[1]*B[5] + W[6]*B[6] + W[11]*B[7] + W[16]*B[8] + W[21]*B[9];\ 353 A[7] = W[2]*B[5] + W[7]*B[6] + W[12]*B[7] + W[17]*B[8] + W[22]*B[9];\ 354 A[8] = W[3]*B[5] + W[8]*B[6] + W[13]*B[7] + W[18]*B[8] + W[23]*B[9];\ 355 A[9] = W[4]*B[5] + W[9]*B[6] + W[14]*B[7] + W[19]*B[8] + W[24]*B[9];\ 356 A[10] = W[0]*B[10] + W[5]*B[11] + W[10]*B[12] + W[15]*B[13] + W[20]*B[14];\ 357 A[11] = W[1]*B[10] + W[6]*B[11] + W[11]*B[12] + W[16]*B[13] + W[21]*B[14];\ 358 A[12] = W[2]*B[10] + W[7]*B[11] + W[12]*B[12] + W[17]*B[13] + W[22]*B[14];\ 359 A[13] = W[3]*B[10] + W[8]*B[11] + W[13]*B[12] + W[18]*B[13] + W[23]*B[14];\ 360 A[14] = W[4]*B[10] + W[9]*B[11] + W[14]*B[12] + W[19]*B[13] + W[24]*B[14];\ 361 A[15] = W[0]*B[15] + W[5]*B[16] + W[10]*B[17] + W[15]*B[18] + W[20]*B[19];\ 362 A[16] = W[1]*B[15] + W[6]*B[16] + W[11]*B[17] + W[16]*B[18] + W[21]*B[19];\ 363 A[17] = W[2]*B[15] + W[7]*B[16] + W[12]*B[17] + W[17]*B[18] + W[22]*B[19];\ 364 A[18] = W[3]*B[15] + W[8]*B[16] + W[13]*B[17] + W[18]*B[18] + W[23]*B[19];\ 365 A[19] = W[4]*B[15] + W[9]*B[16] + W[14]*B[17] + W[19]*B[18] + W[24]*B[19];\ 366 A[20] = W[0]*B[20] + W[5]*B[21] + W[10]*B[22] + W[15]*B[23] + W[20]*B[24];\ 367 A[21] = W[1]*B[20] + W[6]*B[21] + W[11]*B[22] + W[16]*B[23] + W[21]*B[24];\ 368 A[22] = W[2]*B[20] + W[7]*B[21] + W[12]*B[22] + W[17]*B[23] + W[22]*B[24];\ 369 A[23] = W[3]*B[20] + W[8]*B[21] + W[13]*B[22] + W[18]*B[23] + W[23]*B[24];\ 370 A[24] = W[4]*B[20] + W[9]*B[21] + W[14]*B[22] + W[19]*B[23] + W[24]*B[24];\ 371 } 372 373 /* 374 Kernel_A_gets_A_minus_B_times_C_5: A = A - B * C with size bs=5 375 376 Input Parameters: 377 + A,B,C - square bs by bs arrays stored in column major order 378 379 Output Parameter: 380 . A = A - B*C 381 */ 382 383 #define Kernel_A_gets_A_minus_B_times_C_5(A,B,C) 0;\ 384 {\ 385 A[0] -= B[0]*C[0] + B[5]*C[1] + B[10]*C[2] + B[15]*C[3] + B[20]*C[4];\ 386 A[1] -= B[1]*C[0] + B[6]*C[1] + B[11]*C[2] + B[16]*C[3] + B[21]*C[4];\ 387 A[2] -= B[2]*C[0] + B[7]*C[1] + B[12]*C[2] + B[17]*C[3] + B[22]*C[4];\ 388 A[3] -= B[3]*C[0] + B[8]*C[1] + B[13]*C[2] + B[18]*C[3] + B[23]*C[4];\ 389 A[4] -= B[4]*C[0] + B[9]*C[1] + B[14]*C[2] + B[19]*C[3] + B[24]*C[4];\ 390 A[5] -= B[0]*C[5] + B[5]*C[6] + B[10]*C[7] + B[15]*C[8] + B[20]*C[9];\ 391 A[6] -= B[1]*C[5] + B[6]*C[6] + B[11]*C[7] + B[16]*C[8] + B[21]*C[9];\ 392 A[7] -= B[2]*C[5] + B[7]*C[6] + B[12]*C[7] + B[17]*C[8] + B[22]*C[9];\ 393 A[8] -= B[3]*C[5] + B[8]*C[6] + B[13]*C[7] + B[18]*C[8] + B[23]*C[9];\ 394 A[9] -= B[4]*C[5] + B[9]*C[6] + B[14]*C[7] + B[19]*C[8] + B[24]*C[9];\ 395 A[10] -= B[0]*C[10] + B[5]*C[11] + B[10]*C[12] + B[15]*C[13] + B[20]*C[14];\ 396 A[11] -= B[1]*C[10] + B[6]*C[11] + B[11]*C[12] + B[16]*C[13] + B[21]*C[14];\ 397 A[12] -= B[2]*C[10] + B[7]*C[11] + B[12]*C[12] + B[17]*C[13] + B[22]*C[14];\ 398 A[13] -= B[3]*C[10] + B[8]*C[11] + B[13]*C[12] + B[18]*C[13] + B[23]*C[14];\ 399 A[14] -= B[4]*C[10] + B[9]*C[11] + B[14]*C[12] + B[19]*C[13] + B[24]*C[14];\ 400 A[15] -= B[0]*C[15] + B[5]*C[16] + B[10]*C[17] + B[15]*C[18] + B[20]*C[19];\ 401 A[16] -= B[1]*C[15] + B[6]*C[16] + B[11]*C[17] + B[16]*C[18] + B[21]*C[19];\ 402 A[17] -= B[2]*C[15] + B[7]*C[16] + B[12]*C[17] + B[17]*C[18] + B[22]*C[19];\ 403 A[18] -= B[3]*C[15] + B[8]*C[16] + B[13]*C[17] + B[18]*C[18] + B[23]*C[19];\ 404 A[19] -= B[4]*C[15] + B[9]*C[16] + B[14]*C[17] + B[19]*C[18] + B[24]*C[19];\ 405 A[20] -= B[0]*C[20] + B[5]*C[21] + B[10]*C[22] + B[15]*C[23] + B[20]*C[24];\ 406 A[21] -= B[1]*C[20] + B[6]*C[21] + B[11]*C[22] + B[16]*C[23] + B[21]*C[24];\ 407 A[22] -= B[2]*C[20] + B[7]*C[21] + B[12]*C[22] + B[17]*C[23] + B[22]*C[24];\ 408 A[23] -= B[3]*C[20] + B[8]*C[21] + B[13]*C[22] + B[18]*C[23] + B[23]*C[24];\ 409 A[24] -= B[4]*C[20] + B[9]*C[21] + B[14]*C[22] + B[19]*C[23] + B[24]*C[24];\ 410 } 411 412 #define Kernel_A_gets_A_times_B_6(A,B,W) 0;\ 413 {\ 414 PetscMemcpy(W,A,36*sizeof(MatScalar));\ 415 A[0] = W[0]*B[0] + W[6]*B[1] + W[12]*B[2] + W[18]*B[3] + W[24]*B[4] + W[30]*B[5];\ 416 A[1] = W[1]*B[0] + W[7]*B[1] + W[13]*B[2] + W[19]*B[3] + W[25]*B[4] + W[31]*B[5];\ 417 A[2] = W[2]*B[0] + W[8]*B[1] + W[14]*B[2] + W[20]*B[3] + W[26]*B[4] + W[32]*B[5];\ 418 A[3] = W[3]*B[0] + W[9]*B[1] + W[15]*B[2] + W[21]*B[3] + W[27]*B[4] + W[33]*B[5];\ 419 A[4] = W[4]*B[0] + W[10]*B[1] + W[16]*B[2] + W[22]*B[3] + W[28]*B[4] + W[34]*B[5];\ 420 A[5] = W[5]*B[0] + W[11]*B[1] + W[17]*B[2] + W[23]*B[3] + W[29]*B[4] + W[35]*B[5];\ 421 A[6] = W[0]*B[6] + W[6]*B[7] + W[12]*B[8] + W[18]*B[9] + W[24]*B[10] + W[30]*B[11];\ 422 A[7] = W[1]*B[6] + W[7]*B[7] + W[13]*B[8] + W[19]*B[9] + W[25]*B[10] + W[31]*B[11];\ 423 A[8] = W[2]*B[6] + W[8]*B[7] + W[14]*B[8] + W[20]*B[9] + W[26]*B[10] + W[32]*B[11];\ 424 A[9] = W[3]*B[6] + W[9]*B[7] + W[15]*B[8] + W[21]*B[9] + W[27]*B[10] + W[33]*B[11];\ 425 A[10] = W[4]*B[6] + W[10]*B[7] + W[16]*B[8] + W[22]*B[9] + W[28]*B[10] + W[34]*B[11];\ 426 A[11] = W[5]*B[6] + W[11]*B[7] + W[17]*B[8] + W[23]*B[9] + W[29]*B[10] + W[35]*B[11];\ 427 A[12] = W[0]*B[12] + W[6]*B[13] + W[12]*B[14] + W[18]*B[15] + W[24]*B[16] + W[30]*B[17];\ 428 A[13] = W[1]*B[12] + W[7]*B[13] + W[13]*B[14] + W[19]*B[15] + W[25]*B[16] + W[31]*B[17];\ 429 A[14] = W[2]*B[12] + W[8]*B[13] + W[14]*B[14] + W[20]*B[15] + W[26]*B[16] + W[32]*B[17];\ 430 A[15] = W[3]*B[12] + W[9]*B[13] + W[15]*B[14] + W[21]*B[15] + W[27]*B[16] + W[33]*B[17];\ 431 A[16] = W[4]*B[12] + W[10]*B[13] + W[16]*B[14] + W[22]*B[15] + W[28]*B[16] + W[34]*B[17];\ 432 A[17] = W[5]*B[12] + W[11]*B[13] + W[17]*B[14] + W[23]*B[15] + W[29]*B[16] + W[35]*B[17];\ 433 A[18] = W[0]*B[18] + W[6]*B[19] + W[12]*B[20] + W[18]*B[21] + W[24]*B[22] + W[30]*B[23];\ 434 A[19] = W[1]*B[18] + W[7]*B[19] + W[13]*B[20] + W[19]*B[21] + W[25]*B[22] + W[31]*B[23];\ 435 A[20] = W[2]*B[18] + W[8]*B[19] + W[14]*B[20] + W[20]*B[21] + W[26]*B[22] + W[32]*B[23];\ 436 A[21] = W[3]*B[18] + W[9]*B[19] + W[15]*B[20] + W[21]*B[21] + W[27]*B[22] + W[33]*B[23];\ 437 A[22] = W[4]*B[18] + W[10]*B[19] + W[16]*B[20] + W[22]*B[21] + W[28]*B[22] + W[34]*B[23];\ 438 A[23] = W[5]*B[18] + W[11]*B[19] + W[17]*B[20] + W[23]*B[21] + W[29]*B[22] + W[35]*B[23];\ 439 A[24] = W[0]*B[24] + W[6]*B[25] + W[12]*B[26] + W[18]*B[27] + W[24]*B[28] + W[30]*B[29];\ 440 A[25] = W[1]*B[24] + W[7]*B[25] + W[13]*B[26] + W[19]*B[27] + W[25]*B[28] + W[31]*B[29];\ 441 A[26] = W[2]*B[24] + W[8]*B[25] + W[14]*B[26] + W[20]*B[27] + W[26]*B[28] + W[32]*B[29];\ 442 A[27] = W[3]*B[24] + W[9]*B[25] + W[15]*B[26] + W[21]*B[27] + W[27]*B[28] + W[33]*B[29];\ 443 A[28] = W[4]*B[24] + W[10]*B[25] + W[16]*B[26] + W[22]*B[27] + W[28]*B[28] + W[34]*B[29];\ 444 A[29] = W[5]*B[24] + W[11]*B[25] + W[17]*B[26] + W[23]*B[27] + W[29]*B[28] + W[35]*B[29];\ 445 A[30] = W[0]*B[30] + W[6]*B[31] + W[12]*B[32] + W[18]*B[33] + W[24]*B[34] + W[30]*B[35];\ 446 A[31] = W[1]*B[30] + W[7]*B[31] + W[13]*B[32] + W[19]*B[33] + W[25]*B[34] + W[31]*B[35];\ 447 A[32] = W[2]*B[30] + W[8]*B[31] + W[14]*B[32] + W[20]*B[33] + W[26]*B[34] + W[32]*B[35];\ 448 A[33] = W[3]*B[30] + W[9]*B[31] + W[15]*B[32] + W[21]*B[33] + W[27]*B[34] + W[33]*B[35];\ 449 A[34] = W[4]*B[30] + W[10]*B[31] + W[16]*B[32] + W[22]*B[33] + W[28]*B[34] + W[34]*B[35];\ 450 A[35] = W[5]*B[30] + W[11]*B[31] + W[17]*B[32] + W[23]*B[33] + W[29]*B[34] + W[35]*B[35];\ 451 } 452 453 /* 454 Kernel_A_gets_A_minus_B_times_C_6: A = A - B * C with size bs=6 455 456 Input Parameters: 457 + A,B,C - square bs by bs arrays stored in column major order 458 459 Output Parameter: 460 . A = A - B*C 461 */ 462 463 #define Kernel_A_gets_A_minus_B_times_C_6(A,B,C) 0;\ 464 {\ 465 A[0] -= B[0]*C[0] + B[6]*C[1] + B[12]*C[2] + B[18]*C[3] + B[24]*C[4] + B[30]*C[5];\ 466 A[1] -= B[1]*C[0] + B[7]*C[1] + B[13]*C[2] + B[19]*C[3] + B[25]*C[4] + B[31]*C[5];\ 467 A[2] -= B[2]*C[0] + B[8]*C[1] + B[14]*C[2] + B[20]*C[3] + B[26]*C[4] + B[32]*C[5];\ 468 A[3] -= B[3]*C[0] + B[9]*C[1] + B[15]*C[2] + B[21]*C[3] + B[27]*C[4] + B[33]*C[5];\ 469 A[4] -= B[4]*C[0] + B[10]*C[1] + B[16]*C[2] + B[22]*C[3] + B[28]*C[4] + B[34]*C[5];\ 470 A[5] -= B[5]*C[0] + B[11]*C[1] + B[17]*C[2] + B[23]*C[3] + B[29]*C[4] + B[35]*C[5];\ 471 A[6] -= B[0]*C[6] + B[6]*C[7] + B[12]*C[8] + B[18]*C[9] + B[24]*C[10] + B[30]*C[11];\ 472 A[7] -= B[1]*C[6] + B[7]*C[7] + B[13]*C[8] + B[19]*C[9] + B[25]*C[10] + B[31]*C[11];\ 473 A[8] -= B[2]*C[6] + B[8]*C[7] + B[14]*C[8] + B[20]*C[9] + B[26]*C[10] + B[32]*C[11];\ 474 A[9] -= B[3]*C[6] + B[9]*C[7] + B[15]*C[8] + B[21]*C[9] + B[27]*C[10] + B[33]*C[11];\ 475 A[10] -= B[4]*C[6] + B[10]*C[7] + B[16]*C[8] + B[22]*C[9] + B[28]*C[10] + B[34]*C[11];\ 476 A[11] -= B[5]*C[6] + B[11]*C[7] + B[17]*C[8] + B[23]*C[9] + B[29]*C[10] + B[35]*C[11];\ 477 A[12] -= B[0]*C[12] + B[6]*C[13] + B[12]*C[14] + B[18]*C[15] + B[24]*C[16] + B[30]*C[17];\ 478 A[13] -= B[1]*C[12] + B[7]*C[13] + B[13]*C[14] + B[19]*C[15] + B[25]*C[16] + B[31]*C[17];\ 479 A[14] -= B[2]*C[12] + B[8]*C[13] + B[14]*C[14] + B[20]*C[15] + B[26]*C[16] + B[32]*C[17];\ 480 A[15] -= B[3]*C[12] + B[9]*C[13] + B[15]*C[14] + B[21]*C[15] + B[27]*C[16] + B[33]*C[17];\ 481 A[16] -= B[4]*C[12] + B[10]*C[13] + B[16]*C[14] + B[22]*C[15] + B[28]*C[16] + B[34]*C[17];\ 482 A[17] -= B[5]*C[12] + B[11]*C[13] + B[17]*C[14] + B[23]*C[15] + B[29]*C[16] + B[35]*C[17];\ 483 A[18] -= B[0]*C[18] + B[6]*C[19] + B[12]*C[20] + B[18]*C[21] + B[24]*C[22] + B[30]*C[23];\ 484 A[19] -= B[1]*C[18] + B[7]*C[19] + B[13]*C[20] + B[19]*C[21] + B[25]*C[22] + B[31]*C[23];\ 485 A[20] -= B[2]*C[18] + B[8]*C[19] + B[14]*C[20] + B[20]*C[21] + B[26]*C[22] + B[32]*C[23];\ 486 A[21] -= B[3]*C[18] + B[9]*C[19] + B[15]*C[20] + B[21]*C[21] + B[27]*C[22] + B[33]*C[23];\ 487 A[22] -= B[4]*C[18] + B[10]*C[19] + B[16]*C[20] + B[22]*C[21] + B[28]*C[22] + B[34]*C[23];\ 488 A[23] -= B[5]*C[18] + B[11]*C[19] + B[17]*C[20] + B[23]*C[21] + B[29]*C[22] + B[35]*C[23];\ 489 A[24] -= B[0]*C[24] + B[6]*C[25] + B[12]*C[26] + B[18]*C[27] + B[24]*C[28] + B[30]*C[29];\ 490 A[25] -= B[1]*C[24] + B[7]*C[25] + B[13]*C[26] + B[19]*C[27] + B[25]*C[28] + B[31]*C[29];\ 491 A[26] -= B[2]*C[24] + B[8]*C[25] + B[14]*C[26] + B[20]*C[27] + B[26]*C[28] + B[32]*C[29];\ 492 A[27] -= B[3]*C[24] + B[9]*C[25] + B[15]*C[26] + B[21]*C[27] + B[27]*C[28] + B[33]*C[29];\ 493 A[28] -= B[4]*C[24] + B[10]*C[25] + B[16]*C[26] + B[22]*C[27] + B[28]*C[28] + B[34]*C[29];\ 494 A[29] -= B[5]*C[24] + B[11]*C[25] + B[17]*C[26] + B[23]*C[27] + B[29]*C[28] + B[35]*C[29];\ 495 A[30] -= B[0]*C[30] + B[6]*C[31] + B[12]*C[32] + B[18]*C[33] + B[24]*C[34] + B[30]*C[35];\ 496 A[31] -= B[1]*C[30] + B[7]*C[31] + B[13]*C[32] + B[19]*C[33] + B[25]*C[34] + B[31]*C[35];\ 497 A[32] -= B[2]*C[30] + B[8]*C[31] + B[14]*C[32] + B[20]*C[33] + B[26]*C[34] + B[32]*C[35];\ 498 A[33] -= B[3]*C[30] + B[9]*C[31] + B[15]*C[32] + B[21]*C[33] + B[27]*C[34] + B[33]*C[35];\ 499 A[34] -= B[4]*C[30] + B[10]*C[31] + B[16]*C[32] + B[22]*C[33] + B[28]*C[34] + B[34]*C[35];\ 500 A[35] -= B[5]*C[30] + B[11]*C[31] + B[17]*C[32] + B[23]*C[33] + B[29]*C[34] + B[35]*C[35];\ 501 } 502 503 #define Kernel_A_gets_A_times_B_7(A,B,W) 0;\ 504 {\ 505 PetscMemcpy(W,A,49*sizeof(MatScalar));\ 506 A[0] = W[0]*B[0] + W[7]*B[1] + W[14]*B[2] + W[21]*B[3] + W[28]*B[4] + W[35]*B[5] + W[42]*B[6];\ 507 A[1] = W[1]*B[0] + W[8]*B[1] + W[15]*B[2] + W[22]*B[3] + W[29]*B[4] + W[36]*B[5] + W[43]*B[6];\ 508 A[2] = W[2]*B[0] + W[9]*B[1] + W[16]*B[2] + W[23]*B[3] + W[30]*B[4] + W[37]*B[5] + W[44]*B[6];\ 509 A[3] = W[3]*B[0] + W[10]*B[1] + W[17]*B[2] + W[24]*B[3] + W[31]*B[4] + W[38]*B[5] + W[45]*B[6];\ 510 A[4] = W[4]*B[0] + W[11]*B[1] + W[18]*B[2] + W[25]*B[3] + W[32]*B[4] + W[39]*B[5] + W[46]*B[6];\ 511 A[5] = W[5]*B[0] + W[12]*B[1] + W[19]*B[2] + W[26]*B[3] + W[33]*B[4] + W[40]*B[5] + W[47]*B[6];\ 512 A[6] = W[6]*B[0] + W[13]*B[1] + W[20]*B[2] + W[27]*B[3] + W[34]*B[4] + W[41]*B[5] + W[48]*B[6];\ 513 A[7] = W[0]*B[7] + W[7]*B[8] + W[14]*B[9] + W[21]*B[10] + W[28]*B[11] + W[35]*B[12] + W[42]*B[13];\ 514 A[8] = W[1]*B[7] + W[8]*B[8] + W[15]*B[9] + W[22]*B[10] + W[29]*B[11] + W[36]*B[12] + W[43]*B[13];\ 515 A[9] = W[2]*B[7] + W[9]*B[8] + W[16]*B[9] + W[23]*B[10] + W[30]*B[11] + W[37]*B[12] + W[44]*B[13];\ 516 A[10] = W[3]*B[7] + W[10]*B[8] + W[17]*B[9] + W[24]*B[10] + W[31]*B[11] + W[38]*B[12] + W[45]*B[13];\ 517 A[11] = W[4]*B[7] + W[11]*B[8] + W[18]*B[9] + W[25]*B[10] + W[32]*B[11] + W[39]*B[12] + W[46]*B[13];\ 518 A[12] = W[5]*B[7] + W[12]*B[8] + W[19]*B[9] + W[26]*B[10] + W[33]*B[11] + W[40]*B[12] + W[47]*B[13];\ 519 A[13] = W[6]*B[7] + W[13]*B[8] + W[20]*B[9] + W[27]*B[10] + W[34]*B[11] + W[41]*B[12] + W[48]*B[13];\ 520 A[14] = W[0]*B[14] + W[7]*B[15] + W[14]*B[16] + W[21]*B[17] + W[28]*B[18] + W[35]*B[19] + W[42]*B[20];\ 521 A[15] = W[1]*B[14] + W[8]*B[15] + W[15]*B[16] + W[22]*B[17] + W[29]*B[18] + W[36]*B[19] + W[43]*B[20];\ 522 A[16] = W[2]*B[14] + W[9]*B[15] + W[16]*B[16] + W[23]*B[17] + W[30]*B[18] + W[37]*B[19] + W[44]*B[20];\ 523 A[17] = W[3]*B[14] + W[10]*B[15] + W[17]*B[16] + W[24]*B[17] + W[31]*B[18] + W[38]*B[19] + W[45]*B[20];\ 524 A[18] = W[4]*B[14] + W[11]*B[15] + W[18]*B[16] + W[25]*B[17] + W[32]*B[18] + W[39]*B[19] + W[46]*B[20];\ 525 A[19] = W[5]*B[14] + W[12]*B[15] + W[19]*B[16] + W[26]*B[17] + W[33]*B[18] + W[40]*B[19] + W[47]*B[20];\ 526 A[20] = W[6]*B[14] + W[13]*B[15] + W[20]*B[16] + W[27]*B[17] + W[34]*B[18] + W[41]*B[19] + W[48]*B[20];\ 527 A[21] = W[0]*B[21] + W[7]*B[22] + W[14]*B[23] + W[21]*B[24] + W[28]*B[25] + W[35]*B[26] + W[42]*B[27];\ 528 A[22] = W[1]*B[21] + W[8]*B[22] + W[15]*B[23] + W[22]*B[24] + W[29]*B[25] + W[36]*B[26] + W[43]*B[27];\ 529 A[23] = W[2]*B[21] + W[9]*B[22] + W[16]*B[23] + W[23]*B[24] + W[30]*B[25] + W[37]*B[26] + W[44]*B[27];\ 530 A[24] = W[3]*B[21] + W[10]*B[22] + W[17]*B[23] + W[24]*B[24] + W[31]*B[25] + W[38]*B[26] + W[45]*B[27];\ 531 A[25] = W[4]*B[21] + W[11]*B[22] + W[18]*B[23] + W[25]*B[24] + W[32]*B[25] + W[39]*B[26] + W[46]*B[27];\ 532 A[26] = W[5]*B[21] + W[12]*B[22] + W[19]*B[23] + W[26]*B[24] + W[33]*B[25] + W[40]*B[26] + W[47]*B[27];\ 533 A[27] = W[6]*B[21] + W[13]*B[22] + W[20]*B[23] + W[27]*B[24] + W[34]*B[25] + W[41]*B[26] + W[48]*B[27];\ 534 A[28] = W[0]*B[28] + W[7]*B[29] + W[14]*B[30] + W[21]*B[31] + W[28]*B[32] + W[35]*B[33] + W[42]*B[34];\ 535 A[29] = W[1]*B[28] + W[8]*B[29] + W[15]*B[30] + W[22]*B[31] + W[29]*B[32] + W[36]*B[33] + W[43]*B[34];\ 536 A[30] = W[2]*B[28] + W[9]*B[29] + W[16]*B[30] + W[23]*B[31] + W[30]*B[32] + W[37]*B[33] + W[44]*B[34];\ 537 A[31] = W[3]*B[28] + W[10]*B[29] + W[17]*B[30] + W[24]*B[31] + W[31]*B[32] + W[38]*B[33] + W[45]*B[34];\ 538 A[32] = W[4]*B[28] + W[11]*B[29] + W[18]*B[30] + W[25]*B[31] + W[32]*B[32] + W[39]*B[33] + W[46]*B[34];\ 539 A[33] = W[5]*B[28] + W[12]*B[29] + W[19]*B[30] + W[26]*B[31] + W[33]*B[32] + W[40]*B[33] + W[47]*B[34];\ 540 A[34] = W[6]*B[28] + W[13]*B[29] + W[20]*B[30] + W[27]*B[31] + W[34]*B[32] + W[41]*B[33] + W[48]*B[34];\ 541 A[35] = W[0]*B[35] + W[7]*B[36] + W[14]*B[37] + W[21]*B[38] + W[28]*B[39] + W[35]*B[40] + W[42]*B[41];\ 542 A[36] = W[1]*B[35] + W[8]*B[36] + W[15]*B[37] + W[22]*B[38] + W[29]*B[39] + W[36]*B[40] + W[43]*B[41];\ 543 A[37] = W[2]*B[35] + W[9]*B[36] + W[16]*B[37] + W[23]*B[38] + W[30]*B[39] + W[37]*B[40] + W[44]*B[41];\ 544 A[38] = W[3]*B[35] + W[10]*B[36] + W[17]*B[37] + W[24]*B[38] + W[31]*B[39] + W[38]*B[40] + W[45]*B[41];\ 545 A[39] = W[4]*B[35] + W[11]*B[36] + W[18]*B[37] + W[25]*B[38] + W[32]*B[39] + W[39]*B[40] + W[46]*B[41];\ 546 A[40] = W[5]*B[35] + W[12]*B[36] + W[19]*B[37] + W[26]*B[38] + W[33]*B[39] + W[40]*B[40] + W[47]*B[41];\ 547 A[41] = W[6]*B[35] + W[13]*B[36] + W[20]*B[37] + W[27]*B[38] + W[34]*B[39] + W[41]*B[40] + W[48]*B[41];\ 548 A[42] = W[0]*B[42] + W[7]*B[43] + W[14]*B[44] + W[21]*B[45] + W[28]*B[46] + W[35]*B[47] + W[42]*B[48];\ 549 A[43] = W[1]*B[42] + W[8]*B[43] + W[15]*B[44] + W[22]*B[45] + W[29]*B[46] + W[36]*B[47] + W[43]*B[48];\ 550 A[44] = W[2]*B[42] + W[9]*B[43] + W[16]*B[44] + W[23]*B[45] + W[30]*B[46] + W[37]*B[47] + W[44]*B[48];\ 551 A[45] = W[3]*B[42] + W[10]*B[43] + W[17]*B[44] + W[24]*B[45] + W[31]*B[46] + W[38]*B[47] + W[45]*B[48];\ 552 A[46] = W[4]*B[42] + W[11]*B[43] + W[18]*B[44] + W[25]*B[45] + W[32]*B[46] + W[39]*B[47] + W[46]*B[48];\ 553 A[47] = W[5]*B[42] + W[12]*B[43] + W[19]*B[44] + W[26]*B[45] + W[33]*B[46] + W[40]*B[47] + W[47]*B[48];\ 554 A[48] = W[6]*B[42] + W[13]*B[43] + W[20]*B[44] + W[27]*B[45] + W[34]*B[46] + W[41]*B[47] + W[48]*B[48];\ 555 } 556 557 /* 558 Kernel_A_gets_A_minus_B_times_C_7: A = A - B * C with size bs=7 559 560 Input Parameters: 561 + A,B,C - square bs by bs arrays stored in column major order 562 563 Output Parameter: 564 . A = A - B*C 565 */ 566 567 #define Kernel_A_gets_A_minus_B_times_C_7(A,B,C) 0;\ 568 {\ 569 A[0] -= B[0]*C[0] + B[7]*C[1] + B[14]*C[2] + B[21]*C[3] + B[28]*C[4] + B[35]*C[5] + B[42]*C[6];\ 570 A[1] -= B[1]*C[0] + B[8]*C[1] + B[15]*C[2] + B[22]*C[3] + B[29]*C[4] + B[36]*C[5] + B[43]*C[6];\ 571 A[2] -= B[2]*C[0] + B[9]*C[1] + B[16]*C[2] + B[23]*C[3] + B[30]*C[4] + B[37]*C[5] + B[44]*C[6];\ 572 A[3] -= B[3]*C[0] + B[10]*C[1] + B[17]*C[2] + B[24]*C[3] + B[31]*C[4] + B[38]*C[5] + B[45]*C[6];\ 573 A[4] -= B[4]*C[0] + B[11]*C[1] + B[18]*C[2] + B[25]*C[3] + B[32]*C[4] + B[39]*C[5] + B[46]*C[6];\ 574 A[5] -= B[5]*C[0] + B[12]*C[1] + B[19]*C[2] + B[26]*C[3] + B[33]*C[4] + B[40]*C[5] + B[47]*C[6];\ 575 A[6] -= B[6]*C[0] + B[13]*C[1] + B[20]*C[2] + B[27]*C[3] + B[34]*C[4] + B[41]*C[5] + B[48]*C[6];\ 576 A[7] -= B[0]*C[7] + B[7]*C[8] + B[14]*C[9] + B[21]*C[10] + B[28]*C[11] + B[35]*C[12] + B[42]*C[13];\ 577 A[8] -= B[1]*C[7] + B[8]*C[8] + B[15]*C[9] + B[22]*C[10] + B[29]*C[11] + B[36]*C[12] + B[43]*C[13];\ 578 A[9] -= B[2]*C[7] + B[9]*C[8] + B[16]*C[9] + B[23]*C[10] + B[30]*C[11] + B[37]*C[12] + B[44]*C[13];\ 579 A[10] -= B[3]*C[7] + B[10]*C[8] + B[17]*C[9] + B[24]*C[10] + B[31]*C[11] + B[38]*C[12] + B[45]*C[13];\ 580 A[11] -= B[4]*C[7] + B[11]*C[8] + B[18]*C[9] + B[25]*C[10] + B[32]*C[11] + B[39]*C[12] + B[46]*C[13];\ 581 A[12] -= B[5]*C[7] + B[12]*C[8] + B[19]*C[9] + B[26]*C[10] + B[33]*C[11] + B[40]*C[12] + B[47]*C[13];\ 582 A[13] -= B[6]*C[7] + B[13]*C[8] + B[20]*C[9] + B[27]*C[10] + B[34]*C[11] + B[41]*C[12] + B[48]*C[13];\ 583 A[14] -= B[0]*C[14] + B[7]*C[15] + B[14]*C[16] + B[21]*C[17] + B[28]*C[18] + B[35]*C[19] + B[42]*C[20];\ 584 A[15] -= B[1]*C[14] + B[8]*C[15] + B[15]*C[16] + B[22]*C[17] + B[29]*C[18] + B[36]*C[19] + B[43]*C[20];\ 585 A[16] -= B[2]*C[14] + B[9]*C[15] + B[16]*C[16] + B[23]*C[17] + B[30]*C[18] + B[37]*C[19] + B[44]*C[20];\ 586 A[17] -= B[3]*C[14] + B[10]*C[15] + B[17]*C[16] + B[24]*C[17] + B[31]*C[18] + B[38]*C[19] + B[45]*C[20];\ 587 A[18] -= B[4]*C[14] + B[11]*C[15] + B[18]*C[16] + B[25]*C[17] + B[32]*C[18] + B[39]*C[19] + B[46]*C[20];\ 588 A[19] -= B[5]*C[14] + B[12]*C[15] + B[19]*C[16] + B[26]*C[17] + B[33]*C[18] + B[40]*C[19] + B[47]*C[20];\ 589 A[20] -= B[6]*C[14] + B[13]*C[15] + B[20]*C[16] + B[27]*C[17] + B[34]*C[18] + B[41]*C[19] + B[48]*C[20];\ 590 A[21] -= B[0]*C[21] + B[7]*C[22] + B[14]*C[23] + B[21]*C[24] + B[28]*C[25] + B[35]*C[26] + B[42]*C[27];\ 591 A[22] -= B[1]*C[21] + B[8]*C[22] + B[15]*C[23] + B[22]*C[24] + B[29]*C[25] + B[36]*C[26] + B[43]*C[27];\ 592 A[23] -= B[2]*C[21] + B[9]*C[22] + B[16]*C[23] + B[23]*C[24] + B[30]*C[25] + B[37]*C[26] + B[44]*C[27];\ 593 A[24] -= B[3]*C[21] + B[10]*C[22] + B[17]*C[23] + B[24]*C[24] + B[31]*C[25] + B[38]*C[26] + B[45]*C[27];\ 594 A[25] -= B[4]*C[21] + B[11]*C[22] + B[18]*C[23] + B[25]*C[24] + B[32]*C[25] + B[39]*C[26] + B[46]*C[27];\ 595 A[26] -= B[5]*C[21] + B[12]*C[22] + B[19]*C[23] + B[26]*C[24] + B[33]*C[25] + B[40]*C[26] + B[47]*C[27];\ 596 A[27] -= B[6]*C[21] + B[13]*C[22] + B[20]*C[23] + B[27]*C[24] + B[34]*C[25] + B[41]*C[26] + B[48]*C[27];\ 597 A[28] -= B[0]*C[28] + B[7]*C[29] + B[14]*C[30] + B[21]*C[31] + B[28]*C[32] + B[35]*C[33] + B[42]*C[34];\ 598 A[29] -= B[1]*C[28] + B[8]*C[29] + B[15]*C[30] + B[22]*C[31] + B[29]*C[32] + B[36]*C[33] + B[43]*C[34];\ 599 A[30] -= B[2]*C[28] + B[9]*C[29] + B[16]*C[30] + B[23]*C[31] + B[30]*C[32] + B[37]*C[33] + B[44]*C[34];\ 600 A[31] -= B[3]*C[28] + B[10]*C[29] + B[17]*C[30] + B[24]*C[31] + B[31]*C[32] + B[38]*C[33] + B[45]*C[34];\ 601 A[32] -= B[4]*C[28] + B[11]*C[29] + B[18]*C[30] + B[25]*C[31] + B[32]*C[32] + B[39]*C[33] + B[46]*C[34];\ 602 A[33] -= B[5]*C[28] + B[12]*C[29] + B[19]*C[30] + B[26]*C[31] + B[33]*C[32] + B[40]*C[33] + B[47]*C[34];\ 603 A[34] -= B[6]*C[28] + B[13]*C[29] + B[20]*C[30] + B[27]*C[31] + B[34]*C[32] + B[41]*C[33] + B[48]*C[34];\ 604 A[35] -= B[0]*C[35] + B[7]*C[36] + B[14]*C[37] + B[21]*C[38] + B[28]*C[39] + B[35]*C[40] + B[42]*C[41];\ 605 A[36] -= B[1]*C[35] + B[8]*C[36] + B[15]*C[37] + B[22]*C[38] + B[29]*C[39] + B[36]*C[40] + B[43]*C[41];\ 606 A[37] -= B[2]*C[35] + B[9]*C[36] + B[16]*C[37] + B[23]*C[38] + B[30]*C[39] + B[37]*C[40] + B[44]*C[41];\ 607 A[38] -= B[3]*C[35] + B[10]*C[36] + B[17]*C[37] + B[24]*C[38] + B[31]*C[39] + B[38]*C[40] + B[45]*C[41];\ 608 A[39] -= B[4]*C[35] + B[11]*C[36] + B[18]*C[37] + B[25]*C[38] + B[32]*C[39] + B[39]*C[40] + B[46]*C[41];\ 609 A[40] -= B[5]*C[35] + B[12]*C[36] + B[19]*C[37] + B[26]*C[38] + B[33]*C[39] + B[40]*C[40] + B[47]*C[41];\ 610 A[41] -= B[6]*C[35] + B[13]*C[36] + B[20]*C[37] + B[27]*C[38] + B[34]*C[39] + B[41]*C[40] + B[48]*C[41];\ 611 A[42] -= B[0]*C[42] + B[7]*C[43] + B[14]*C[44] + B[21]*C[45] + B[28]*C[46] + B[35]*C[47] + B[42]*C[48];\ 612 A[43] -= B[1]*C[42] + B[8]*C[43] + B[15]*C[44] + B[22]*C[45] + B[29]*C[46] + B[36]*C[47] + B[43]*C[48];\ 613 A[44] -= B[2]*C[42] + B[9]*C[43] + B[16]*C[44] + B[23]*C[45] + B[30]*C[46] + B[37]*C[47] + B[44]*C[48];\ 614 A[45] -= B[3]*C[42] + B[10]*C[43] + B[17]*C[44] + B[24]*C[45] + B[31]*C[46] + B[38]*C[47] + B[45]*C[48];\ 615 A[46] -= B[4]*C[42] + B[11]*C[43] + B[18]*C[44] + B[25]*C[45] + B[32]*C[46] + B[39]*C[47] + B[46]*C[48];\ 616 A[47] -= B[5]*C[42] + B[12]*C[43] + B[19]*C[44] + B[26]*C[45] + B[33]*C[46] + B[40]*C[47] + B[47]*C[48];\ 617 A[48] -= B[6]*C[42] + B[13]*C[43] + B[20]*C[44] + B[27]*C[45] + B[34]*C[46] + B[41]*C[47] + B[48]*C[48];\ 618 } 619 620 621 #endif 622