1 2 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/utils/freespace.h> 4 5 /*@ 6 MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7 8 Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9 10 Input Parameter: 11 . A - the `MATMAIJ` matrix 12 13 Output Parameter: 14 . B - the `MATAIJ` matrix 15 16 Level: advanced 17 18 Note: 19 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20 21 .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22 @*/ 23 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 24 { 25 PetscBool ismpimaij, isseqmaij; 26 27 PetscFunctionBegin; 28 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 29 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 30 if (ismpimaij) { 31 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 32 33 *B = b->A; 34 } else if (isseqmaij) { 35 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 36 37 *B = b->AIJ; 38 } else { 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 /*@ 45 MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 46 47 Logically Collective 48 49 Input Parameters: 50 + A - the `MATMAIJ` matrix 51 - dof - the block size for the new matrix 52 53 Output Parameter: 54 . B - the new `MATMAIJ` matrix 55 56 Level: advanced 57 58 .seealso: `MATMAIJ`, `MatCreateMAIJ()` 59 @*/ 60 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 61 { 62 Mat Aij = NULL; 63 64 PetscFunctionBegin; 65 PetscValidLogicalCollectiveInt(A, dof, 2); 66 PetscCall(MatMAIJGetAIJ(A, &Aij)); 67 PetscCall(MatCreateMAIJ(Aij, dof, B)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 72 { 73 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 74 75 PetscFunctionBegin; 76 PetscCall(MatDestroy(&b->AIJ)); 77 PetscCall(PetscFree(A->data)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 79 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 static PetscErrorCode MatSetUp_MAIJ(Mat A) 85 { 86 PetscFunctionBegin; 87 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 88 } 89 90 static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 91 { 92 Mat B; 93 94 PetscFunctionBegin; 95 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 96 PetscCall(MatView(B, viewer)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 107 PetscCall(MatView(B, viewer)); 108 PetscCall(MatDestroy(&B)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 113 { 114 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 115 116 PetscFunctionBegin; 117 PetscCall(MatDestroy(&b->AIJ)); 118 PetscCall(MatDestroy(&b->OAIJ)); 119 PetscCall(MatDestroy(&b->A)); 120 PetscCall(VecScatterDestroy(&b->ctx)); 121 PetscCall(VecDestroy(&b->w)); 122 PetscCall(PetscFree(A->data)); 123 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 124 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 125 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 126 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 /*MC 131 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 132 multicomponent problems, interpolating or restricting each component the same way independently. 133 The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 134 135 Operations provided: 136 .vb 137 MatMult() 138 MatMultTranspose() 139 MatMultAdd() 140 MatMultTransposeAdd() 141 .ve 142 143 Level: advanced 144 145 .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 146 M*/ 147 148 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 149 { 150 Mat_MPIMAIJ *b; 151 PetscMPIInt size; 152 153 PetscFunctionBegin; 154 PetscCall(PetscNew(&b)); 155 A->data = (void *)b; 156 157 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 158 159 A->ops->setup = MatSetUp_MAIJ; 160 161 b->AIJ = NULL; 162 b->dof = 0; 163 b->OAIJ = NULL; 164 b->ctx = NULL; 165 b->w = NULL; 166 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 167 if (size == 1) { 168 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 169 } else { 170 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 171 } 172 A->preallocated = PETSC_TRUE; 173 A->assembled = PETSC_TRUE; 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 #if PetscHasAttribute(always_inline) 178 #define PETSC_FORCE_INLINE __attribute__((always_inline)) 179 #else 180 #define PETSC_FORCE_INLINE 181 #endif 182 183 #if defined(__clang__) || defined(__INTEL_COMPILER) || defined(__ICC) 184 #define PETSC_PRAGMA_UNROLL _Pragma("unroll") 185 #else 186 #define PETSC_PRAGMA_UNROLL 187 #endif 188 189 enum { 190 MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 191 }; 192 193 // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 194 // keyword into account for these... 195 PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 196 { 197 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 198 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 199 const Mat baij = b->AIJ; 200 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 201 const PetscInt m = baij->rmap->n; 202 const PetscInt nz = a->nz; 203 const PetscInt *idx = a->j; 204 const PetscInt *ii = a->i; 205 const PetscScalar *v = a->a; 206 PetscInt nonzerorow = 0; 207 const PetscScalar *x; 208 PetscScalar *z; 209 210 PetscFunctionBegin; 211 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 212 if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 213 PetscCall(VecGetArrayRead(xx, &x)); 214 if (mult_add) { 215 PetscCall(VecGetArray(zz, &z)); 216 } else { 217 PetscCall(VecGetArrayWrite(zz, &z)); 218 } 219 220 for (PetscInt i = 0; i < m; ++i) { 221 PetscInt jrow = ii[i]; 222 const PetscInt n = ii[i + 1] - jrow; 223 // leave a line so clang-format does not align these decls 224 PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 225 226 nonzerorow += n > 0; 227 for (PetscInt j = 0; j < n; ++j, ++jrow) { 228 const PetscScalar v_jrow = v[jrow]; 229 const PetscInt N_idx_jrow = N * idx[jrow]; 230 231 PETSC_PRAGMA_UNROLL 232 for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 233 } 234 235 PETSC_PRAGMA_UNROLL 236 for (int k = 0; k < N; ++k) { 237 const PetscInt z_idx = N * i + k; 238 239 if (mult_add) { 240 z[z_idx] += sum[k]; 241 } else { 242 z[z_idx] = sum[k]; 243 } 244 } 245 } 246 PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 247 PetscCall(VecRestoreArrayRead(xx, &x)); 248 if (mult_add) { 249 PetscCall(VecRestoreArray(zz, &z)); 250 } else { 251 PetscCall(VecRestoreArrayWrite(zz, &z)); 252 } 253 PetscFunctionReturn(PETSC_SUCCESS); 254 } 255 256 PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 257 { 258 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 259 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 260 const Mat baij = b->AIJ; 261 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 262 const PetscInt m = baij->rmap->n; 263 const PetscInt nz = a->nz; 264 const PetscInt *a_j = a->j; 265 const PetscInt *a_i = a->i; 266 const PetscScalar *a_a = a->a; 267 const PetscScalar *x; 268 PetscScalar *z; 269 270 PetscFunctionBegin; 271 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 272 if (mult_add) { 273 if (yy != zz) PetscCall(VecCopy(yy, zz)); 274 } else { 275 PetscCall(VecSet(zz, 0.0)); 276 } 277 PetscCall(VecGetArrayRead(xx, &x)); 278 PetscCall(VecGetArray(zz, &z)); 279 280 for (PetscInt i = 0; i < m; i++) { 281 const PetscInt a_ii = a_i[i]; 282 const PetscInt *idx = a_j + a_ii; 283 const PetscScalar *v = a_a + a_ii; 284 const PetscInt n = a_i[i + 1] - a_ii; 285 PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 286 287 PETSC_PRAGMA_UNROLL 288 for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 289 for (PetscInt j = 0; j < n; ++j) { 290 const PetscInt N_idx_j = N * idx[j]; 291 const PetscScalar v_j = v[j]; 292 293 PETSC_PRAGMA_UNROLL 294 for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 295 } 296 } 297 298 PetscCall(PetscLogFlops(2 * N * nz)); 299 PetscCall(VecRestoreArrayRead(xx, &x)); 300 PetscCall(VecRestoreArray(zz, &z)); 301 PetscFunctionReturn(PETSC_SUCCESS); 302 } 303 304 /* -------------------------------------------------------------------------------------- */ 305 306 static PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) 307 { 308 PetscFunctionBegin; 309 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 2)); 310 PetscFunctionReturn(PETSC_SUCCESS); 311 } 312 313 static PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) 314 { 315 PetscFunctionBegin; 316 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 2)); 317 PetscFunctionReturn(PETSC_SUCCESS); 318 } 319 320 static PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) 321 { 322 PetscFunctionBegin; 323 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 2)); 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) 328 { 329 PetscFunctionBegin; 330 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 2)); 331 PetscFunctionReturn(PETSC_SUCCESS); 332 } 333 334 /* -------------------------------------------------------------------------------------- */ 335 336 static PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) 337 { 338 PetscFunctionBegin; 339 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 3)); 340 PetscFunctionReturn(PETSC_SUCCESS); 341 } 342 343 static PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) 344 { 345 PetscFunctionBegin; 346 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 3)); 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) 351 { 352 PetscFunctionBegin; 353 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 3)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) 358 { 359 PetscFunctionBegin; 360 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 3)); 361 PetscFunctionReturn(PETSC_SUCCESS); 362 } 363 364 /* ------------------------------------------------------------------------------ */ 365 366 static PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) 367 { 368 PetscFunctionBegin; 369 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 4)); 370 PetscFunctionReturn(PETSC_SUCCESS); 371 } 372 373 static PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) 374 { 375 PetscFunctionBegin; 376 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 4)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 static PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) 381 { 382 PetscFunctionBegin; 383 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 4)); 384 PetscFunctionReturn(PETSC_SUCCESS); 385 } 386 387 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) 388 { 389 PetscFunctionBegin; 390 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 4)); 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 /* ------------------------------------------------------------------------------ */ 395 396 static PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) 397 { 398 PetscFunctionBegin; 399 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 5)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) 404 { 405 PetscFunctionBegin; 406 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 5)); 407 PetscFunctionReturn(PETSC_SUCCESS); 408 } 409 410 static PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) 411 { 412 PetscFunctionBegin; 413 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 5)); 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) 418 { 419 PetscFunctionBegin; 420 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 5)); 421 PetscFunctionReturn(PETSC_SUCCESS); 422 } 423 424 /* ------------------------------------------------------------------------------ */ 425 426 static PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) 427 { 428 PetscFunctionBegin; 429 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 6)); 430 PetscFunctionReturn(PETSC_SUCCESS); 431 } 432 433 static PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) 434 { 435 PetscFunctionBegin; 436 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 6)); 437 PetscFunctionReturn(PETSC_SUCCESS); 438 } 439 440 static PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) 441 { 442 PetscFunctionBegin; 443 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 6)); 444 PetscFunctionReturn(PETSC_SUCCESS); 445 } 446 447 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) 448 { 449 PetscFunctionBegin; 450 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 6)); 451 PetscFunctionReturn(PETSC_SUCCESS); 452 } 453 454 /* ------------------------------------------------------------------------------ */ 455 456 static PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) 457 { 458 PetscFunctionBegin; 459 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 7)); 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 static PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) 464 { 465 PetscFunctionBegin; 466 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 7)); 467 PetscFunctionReturn(PETSC_SUCCESS); 468 } 469 470 static PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) 471 { 472 PetscFunctionBegin; 473 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 7)); 474 PetscFunctionReturn(PETSC_SUCCESS); 475 } 476 477 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) 478 { 479 PetscFunctionBegin; 480 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 7)); 481 PetscFunctionReturn(PETSC_SUCCESS); 482 } 483 484 /* ------------------------------------------------------------------------------ */ 485 486 static PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) 487 { 488 PetscFunctionBegin; 489 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 8)); 490 PetscFunctionReturn(PETSC_SUCCESS); 491 } 492 493 static PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) 494 { 495 PetscFunctionBegin; 496 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 8)); 497 PetscFunctionReturn(PETSC_SUCCESS); 498 } 499 500 static PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) 501 { 502 PetscFunctionBegin; 503 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 8)); 504 PetscFunctionReturn(PETSC_SUCCESS); 505 } 506 507 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) 508 { 509 PetscFunctionBegin; 510 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 8)); 511 PetscFunctionReturn(PETSC_SUCCESS); 512 } 513 514 /* ------------------------------------------------------------------------------ */ 515 516 static PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) 517 { 518 PetscFunctionBegin; 519 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 9)); 520 PetscFunctionReturn(PETSC_SUCCESS); 521 } 522 523 static PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) 524 { 525 PetscFunctionBegin; 526 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 9)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 static PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) 531 { 532 PetscFunctionBegin; 533 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 9)); 534 PetscFunctionReturn(PETSC_SUCCESS); 535 } 536 537 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) 538 { 539 PetscFunctionBegin; 540 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 9)); 541 PetscFunctionReturn(PETSC_SUCCESS); 542 } 543 544 /* ------------------------------------------------------------------------------ */ 545 546 static PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) 547 { 548 PetscFunctionBegin; 549 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 10)); 550 PetscFunctionReturn(PETSC_SUCCESS); 551 } 552 553 static PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) 554 { 555 PetscFunctionBegin; 556 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 10)); 557 PetscFunctionReturn(PETSC_SUCCESS); 558 } 559 560 static PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) 561 { 562 PetscFunctionBegin; 563 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 10)); 564 PetscFunctionReturn(PETSC_SUCCESS); 565 } 566 567 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) 568 { 569 PetscFunctionBegin; 570 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 10)); 571 PetscFunctionReturn(PETSC_SUCCESS); 572 } 573 574 /*-------------------------------------------------------------------------------------------- */ 575 576 static PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) 577 { 578 PetscFunctionBegin; 579 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 11)); 580 PetscFunctionReturn(PETSC_SUCCESS); 581 } 582 583 static PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) 584 { 585 PetscFunctionBegin; 586 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 11)); 587 PetscFunctionReturn(PETSC_SUCCESS); 588 } 589 590 static PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) 591 { 592 PetscFunctionBegin; 593 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 11)); 594 PetscFunctionReturn(PETSC_SUCCESS); 595 } 596 597 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) 598 { 599 PetscFunctionBegin; 600 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 11)); 601 PetscFunctionReturn(PETSC_SUCCESS); 602 } 603 604 /*-------------------------------------------------------------------------------------------- */ 605 606 static PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) 607 { 608 PetscFunctionBegin; 609 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 16)); 610 PetscFunctionReturn(PETSC_SUCCESS); 611 } 612 613 static PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) 614 { 615 PetscFunctionBegin; 616 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 16)); 617 PetscFunctionReturn(PETSC_SUCCESS); 618 } 619 620 static PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) 621 { 622 PetscFunctionBegin; 623 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 16)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) 628 { 629 PetscFunctionBegin; 630 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 16)); 631 PetscFunctionReturn(PETSC_SUCCESS); 632 } 633 634 /*-------------------------------------------------------------------------------------------- */ 635 636 static PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) 637 { 638 PetscFunctionBegin; 639 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 18)); 640 PetscFunctionReturn(PETSC_SUCCESS); 641 } 642 643 static PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) 644 { 645 PetscFunctionBegin; 646 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 18)); 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 static PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) 651 { 652 PetscFunctionBegin; 653 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 18)); 654 PetscFunctionReturn(PETSC_SUCCESS); 655 } 656 657 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) 658 { 659 PetscFunctionBegin; 660 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 18)); 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 /*-------------------------------------------------------------------------------------------- */ 665 666 static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 667 { 668 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 669 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 670 const PetscScalar *x, *v; 671 PetscScalar *y, *sums; 672 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 673 PetscInt n, i, jrow, j, dof = b->dof, k; 674 675 PetscFunctionBegin; 676 PetscCall(VecGetArrayRead(xx, &x)); 677 PetscCall(VecSet(yy, 0.0)); 678 PetscCall(VecGetArray(yy, &y)); 679 idx = a->j; 680 v = a->a; 681 ii = a->i; 682 683 for (i = 0; i < m; i++) { 684 jrow = ii[i]; 685 n = ii[i + 1] - jrow; 686 sums = y + dof * i; 687 for (j = 0; j < n; j++) { 688 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 689 jrow++; 690 } 691 } 692 693 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 694 PetscCall(VecRestoreArrayRead(xx, &x)); 695 PetscCall(VecRestoreArray(yy, &y)); 696 PetscFunctionReturn(PETSC_SUCCESS); 697 } 698 699 static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 700 { 701 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 702 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 703 const PetscScalar *x, *v; 704 PetscScalar *y, *sums; 705 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 706 PetscInt n, i, jrow, j, dof = b->dof, k; 707 708 PetscFunctionBegin; 709 if (yy != zz) PetscCall(VecCopy(yy, zz)); 710 PetscCall(VecGetArrayRead(xx, &x)); 711 PetscCall(VecGetArray(zz, &y)); 712 idx = a->j; 713 v = a->a; 714 ii = a->i; 715 716 for (i = 0; i < m; i++) { 717 jrow = ii[i]; 718 n = ii[i + 1] - jrow; 719 sums = y + dof * i; 720 for (j = 0; j < n; j++) { 721 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 722 jrow++; 723 } 724 } 725 726 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 727 PetscCall(VecRestoreArrayRead(xx, &x)); 728 PetscCall(VecRestoreArray(zz, &y)); 729 PetscFunctionReturn(PETSC_SUCCESS); 730 } 731 732 static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 733 { 734 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 735 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 736 const PetscScalar *x, *v, *alpha; 737 PetscScalar *y; 738 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 739 PetscInt n, i, k; 740 741 PetscFunctionBegin; 742 PetscCall(VecGetArrayRead(xx, &x)); 743 PetscCall(VecSet(yy, 0.0)); 744 PetscCall(VecGetArray(yy, &y)); 745 for (i = 0; i < m; i++) { 746 idx = a->j + a->i[i]; 747 v = a->a + a->i[i]; 748 n = a->i[i + 1] - a->i[i]; 749 alpha = x + dof * i; 750 while (n-- > 0) { 751 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 752 idx++; 753 v++; 754 } 755 } 756 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 757 PetscCall(VecRestoreArrayRead(xx, &x)); 758 PetscCall(VecRestoreArray(yy, &y)); 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 763 { 764 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 765 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 766 const PetscScalar *x, *v, *alpha; 767 PetscScalar *y; 768 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 769 PetscInt n, i, k; 770 771 PetscFunctionBegin; 772 if (yy != zz) PetscCall(VecCopy(yy, zz)); 773 PetscCall(VecGetArrayRead(xx, &x)); 774 PetscCall(VecGetArray(zz, &y)); 775 for (i = 0; i < m; i++) { 776 idx = a->j + a->i[i]; 777 v = a->a + a->i[i]; 778 n = a->i[i + 1] - a->i[i]; 779 alpha = x + dof * i; 780 while (n-- > 0) { 781 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 782 idx++; 783 v++; 784 } 785 } 786 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 787 PetscCall(VecRestoreArrayRead(xx, &x)); 788 PetscCall(VecRestoreArray(zz, &y)); 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 /*-------------------------------------------------------------------------------------------- */ 793 794 static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 795 { 796 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 797 798 PetscFunctionBegin; 799 /* start the scatter */ 800 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 801 PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 802 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 803 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 804 PetscFunctionReturn(PETSC_SUCCESS); 805 } 806 807 static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 808 { 809 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 810 811 PetscFunctionBegin; 812 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 813 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 814 PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 815 PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 816 PetscFunctionReturn(PETSC_SUCCESS); 817 } 818 819 static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 820 { 821 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 822 823 PetscFunctionBegin; 824 /* start the scatter */ 825 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 826 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 827 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 828 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 829 PetscFunctionReturn(PETSC_SUCCESS); 830 } 831 832 static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 833 { 834 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 835 836 PetscFunctionBegin; 837 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 838 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 839 PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 840 PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 /* ---------------------------------------------------------------- */ 845 846 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 847 { 848 Mat_Product *product = C->product; 849 850 PetscFunctionBegin; 851 if (product->type == MATPRODUCT_PtAP) { 852 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 853 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 854 PetscFunctionReturn(PETSC_SUCCESS); 855 } 856 857 static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 858 { 859 Mat_Product *product = C->product; 860 PetscBool flg = PETSC_FALSE; 861 Mat A = product->A, P = product->B; 862 PetscInt alg = 1; /* set default algorithm */ 863 #if !defined(PETSC_HAVE_HYPRE) 864 const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 865 PetscInt nalg = 4; 866 #else 867 const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 868 PetscInt nalg = 5; 869 #endif 870 871 PetscFunctionBegin; 872 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 873 874 /* PtAP */ 875 /* Check matrix local sizes */ 876 PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 877 A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 878 PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 879 A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 880 881 /* Set the default algorithm */ 882 PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 883 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 884 885 /* Get runtime option */ 886 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 887 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 888 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 889 PetscOptionsEnd(); 890 891 PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 892 if (flg) { 893 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 897 PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 898 if (flg) { 899 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 900 PetscFunctionReturn(PETSC_SUCCESS); 901 } 902 903 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 904 PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 905 PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 906 PetscCall(MatProductSetFromOptions(C)); 907 PetscFunctionReturn(PETSC_SUCCESS); 908 } 909 910 /* ---------------------------------------------------------------- */ 911 static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 912 { 913 /* This routine requires testing -- first draft only */ 914 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 915 Mat P = pp->AIJ; 916 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 917 Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 918 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 919 const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 920 const PetscInt *ci = c->i, *cj = c->j, *cjj; 921 const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 922 PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 923 const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 924 MatScalar *ca = c->a, *caj, *apa; 925 926 PetscFunctionBegin; 927 /* Allocate temporary array for storage of one row of A*P */ 928 PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 929 930 /* Clear old values in C */ 931 PetscCall(PetscArrayzero(ca, ci[cm])); 932 933 for (i = 0; i < am; i++) { 934 /* Form sparse row of A*P */ 935 anzi = ai[i + 1] - ai[i]; 936 apnzj = 0; 937 for (j = 0; j < anzi; j++) { 938 /* Get offset within block of P */ 939 pshift = *aj % ppdof; 940 /* Get block row of P */ 941 prow = *aj++ / ppdof; /* integer division */ 942 pnzj = pi[prow + 1] - pi[prow]; 943 pjj = pj + pi[prow]; 944 paj = pa + pi[prow]; 945 for (k = 0; k < pnzj; k++) { 946 poffset = pjj[k] * ppdof + pshift; 947 if (!apjdense[poffset]) { 948 apjdense[poffset] = -1; 949 apj[apnzj++] = poffset; 950 } 951 apa[poffset] += (*aa) * paj[k]; 952 } 953 PetscCall(PetscLogFlops(2.0 * pnzj)); 954 aa++; 955 } 956 957 /* Sort the j index array for quick sparse axpy. */ 958 /* Note: a array does not need sorting as it is in dense storage locations. */ 959 PetscCall(PetscSortInt(apnzj, apj)); 960 961 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 962 prow = i / ppdof; /* integer division */ 963 pshift = i % ppdof; 964 poffset = pi[prow]; 965 pnzi = pi[prow + 1] - poffset; 966 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 967 pJ = pj + poffset; 968 pA = pa + poffset; 969 for (j = 0; j < pnzi; j++) { 970 crow = (*pJ) * ppdof + pshift; 971 cjj = cj + ci[crow]; 972 caj = ca + ci[crow]; 973 pJ++; 974 /* Perform sparse axpy operation. Note cjj includes apj. */ 975 for (k = 0, nextap = 0; nextap < apnzj; k++) { 976 if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 977 } 978 PetscCall(PetscLogFlops(2.0 * apnzj)); 979 pA++; 980 } 981 982 /* Zero the current row info for A*P */ 983 for (j = 0; j < apnzj; j++) { 984 apa[apj[j]] = 0.; 985 apjdense[apj[j]] = 0; 986 } 987 } 988 989 /* Assemble the final matrix and clean up */ 990 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 991 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 992 PetscCall(PetscFree3(apa, apj, apjdense)); 993 PetscFunctionReturn(PETSC_SUCCESS); 994 } 995 996 static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 997 { 998 PetscFreeSpaceList free_space = NULL, current_space = NULL; 999 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 1000 Mat P = pp->AIJ; 1001 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 1002 PetscInt *pti, *ptj, *ptJ; 1003 PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 1004 const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 1005 PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 1006 MatScalar *ca; 1007 const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 1008 1009 PetscFunctionBegin; 1010 /* Get ij structure of P^T */ 1011 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 1012 1013 cn = pn * ppdof; 1014 /* Allocate ci array, arrays for fill computation and */ 1015 /* free space for accumulating nonzero column info */ 1016 PetscCall(PetscMalloc1(cn + 1, &ci)); 1017 ci[0] = 0; 1018 1019 /* Work arrays for rows of P^T*A */ 1020 PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 1021 PetscCall(PetscArrayzero(ptadenserow, an)); 1022 PetscCall(PetscArrayzero(denserow, cn)); 1023 1024 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1025 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1026 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 1027 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 1028 current_space = free_space; 1029 1030 /* Determine symbolic info for each row of C: */ 1031 for (i = 0; i < pn; i++) { 1032 ptnzi = pti[i + 1] - pti[i]; 1033 ptJ = ptj + pti[i]; 1034 for (dof = 0; dof < ppdof; dof++) { 1035 ptanzi = 0; 1036 /* Determine symbolic row of PtA: */ 1037 for (j = 0; j < ptnzi; j++) { 1038 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 1039 arow = ptJ[j] * ppdof + dof; 1040 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 1041 anzj = ai[arow + 1] - ai[arow]; 1042 ajj = aj + ai[arow]; 1043 for (k = 0; k < anzj; k++) { 1044 if (!ptadenserow[ajj[k]]) { 1045 ptadenserow[ajj[k]] = -1; 1046 ptasparserow[ptanzi++] = ajj[k]; 1047 } 1048 } 1049 } 1050 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1051 ptaj = ptasparserow; 1052 cnzi = 0; 1053 for (j = 0; j < ptanzi; j++) { 1054 /* Get offset within block of P */ 1055 pshift = *ptaj % ppdof; 1056 /* Get block row of P */ 1057 prow = (*ptaj++) / ppdof; /* integer division */ 1058 /* P has same number of nonzeros per row as the compressed form */ 1059 pnzj = pi[prow + 1] - pi[prow]; 1060 pjj = pj + pi[prow]; 1061 for (k = 0; k < pnzj; k++) { 1062 /* Locations in C are shifted by the offset within the block */ 1063 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 1064 if (!denserow[pjj[k] * ppdof + pshift]) { 1065 denserow[pjj[k] * ppdof + pshift] = -1; 1066 sparserow[cnzi++] = pjj[k] * ppdof + pshift; 1067 } 1068 } 1069 } 1070 1071 /* sort sparserow */ 1072 PetscCall(PetscSortInt(cnzi, sparserow)); 1073 1074 /* If free space is not available, make more free space */ 1075 /* Double the amount of total space in the list */ 1076 if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 1077 1078 /* Copy data into free space, and zero out denserows */ 1079 PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 1080 1081 current_space->array += cnzi; 1082 current_space->local_used += cnzi; 1083 current_space->local_remaining -= cnzi; 1084 1085 for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 1086 for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 1087 1088 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1089 /* For now, we will recompute what is needed. */ 1090 ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 1091 } 1092 } 1093 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1094 /* Allocate space for cj, initialize cj, and */ 1095 /* destroy list of free space and other temporary array(s) */ 1096 PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 1097 PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 1098 PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 1099 1100 /* Allocate space for ca */ 1101 PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 1102 1103 /* put together the new matrix */ 1104 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 1105 PetscCall(MatSetBlockSize(C, pp->dof)); 1106 1107 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1108 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1109 c = (Mat_SeqAIJ *)(C->data); 1110 c->free_a = PETSC_TRUE; 1111 c->free_ij = PETSC_TRUE; 1112 c->nonew = 0; 1113 1114 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 1115 C->ops->productnumeric = MatProductNumeric_PtAP; 1116 1117 /* Clean up. */ 1118 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 1119 PetscFunctionReturn(PETSC_SUCCESS); 1120 } 1121 1122 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 1123 { 1124 Mat_Product *product = C->product; 1125 Mat A = product->A, P = product->B; 1126 1127 PetscFunctionBegin; 1128 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131 1132 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 1133 1134 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 1135 { 1136 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1137 1138 PetscFunctionBegin; 1139 1140 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 1141 PetscFunctionReturn(PETSC_SUCCESS); 1142 } 1143 1144 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 1145 1146 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 1147 { 1148 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1149 1150 PetscFunctionBegin; 1151 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 1152 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 1153 PetscFunctionReturn(PETSC_SUCCESS); 1154 } 1155 1156 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 1157 1158 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 1159 { 1160 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1161 1162 PetscFunctionBegin; 1163 1164 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 1165 PetscFunctionReturn(PETSC_SUCCESS); 1166 } 1167 1168 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 1169 1170 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 1171 { 1172 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1173 1174 PetscFunctionBegin; 1175 1176 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 1177 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 1178 PetscFunctionReturn(PETSC_SUCCESS); 1179 } 1180 1181 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 1182 { 1183 Mat_Product *product = C->product; 1184 Mat A = product->A, P = product->B; 1185 PetscBool flg; 1186 1187 PetscFunctionBegin; 1188 PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 1189 if (flg) { 1190 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 1191 C->ops->productnumeric = MatProductNumeric_PtAP; 1192 PetscFunctionReturn(PETSC_SUCCESS); 1193 } 1194 1195 PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 1196 if (flg) { 1197 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 1198 C->ops->productnumeric = MatProductNumeric_PtAP; 1199 PetscFunctionReturn(PETSC_SUCCESS); 1200 } 1201 1202 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 1203 } 1204 1205 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1206 { 1207 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1208 Mat a = b->AIJ, B; 1209 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 1210 PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 1211 PetscInt *cols; 1212 PetscScalar *vals; 1213 1214 PetscFunctionBegin; 1215 PetscCall(MatGetSize(a, &m, &n)); 1216 PetscCall(PetscMalloc1(dof * m, &ilen)); 1217 for (i = 0; i < m; i++) { 1218 nmax = PetscMax(nmax, aij->ilen[i]); 1219 for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 1220 } 1221 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 1222 PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 1223 PetscCall(MatSetType(B, newtype)); 1224 PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 1225 PetscCall(PetscFree(ilen)); 1226 PetscCall(PetscMalloc1(nmax, &icols)); 1227 ii = 0; 1228 for (i = 0; i < m; i++) { 1229 PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 1230 for (j = 0; j < dof; j++) { 1231 for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 1232 PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 1233 ii++; 1234 } 1235 PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 1236 } 1237 PetscCall(PetscFree(icols)); 1238 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1239 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1240 1241 if (reuse == MAT_INPLACE_MATRIX) { 1242 PetscCall(MatHeaderReplace(A, &B)); 1243 } else { 1244 *newmat = B; 1245 } 1246 PetscFunctionReturn(PETSC_SUCCESS); 1247 } 1248 1249 #include <../src/mat/impls/aij/mpi/mpiaij.h> 1250 1251 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1252 { 1253 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 1254 Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 1255 Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 1256 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 1257 Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 1258 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 1259 PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 1260 PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 1261 PetscInt rstart, cstart, *garray, ii, k; 1262 PetscScalar *vals, *ovals; 1263 1264 PetscFunctionBegin; 1265 PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 1266 for (i = 0; i < A->rmap->n / dof; i++) { 1267 nmax = PetscMax(nmax, AIJ->ilen[i]); 1268 onmax = PetscMax(onmax, OAIJ->ilen[i]); 1269 for (j = 0; j < dof; j++) { 1270 dnz[dof * i + j] = AIJ->ilen[i]; 1271 onz[dof * i + j] = OAIJ->ilen[i]; 1272 } 1273 } 1274 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1275 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1276 PetscCall(MatSetType(B, newtype)); 1277 PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 1278 PetscCall(MatSetBlockSize(B, dof)); 1279 PetscCall(PetscFree2(dnz, onz)); 1280 1281 PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 1282 rstart = dof * maij->A->rmap->rstart; 1283 cstart = dof * maij->A->cmap->rstart; 1284 garray = mpiaij->garray; 1285 1286 ii = rstart; 1287 for (i = 0; i < A->rmap->n / dof; i++) { 1288 PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 1289 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 1290 for (j = 0; j < dof; j++) { 1291 for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 1292 for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 1293 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 1294 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 1295 ii++; 1296 } 1297 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 1298 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 1299 } 1300 PetscCall(PetscFree2(icols, oicols)); 1301 1302 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1303 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1304 1305 if (reuse == MAT_INPLACE_MATRIX) { 1306 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 1307 ((PetscObject)A)->refct = 1; 1308 1309 PetscCall(MatHeaderReplace(A, &B)); 1310 1311 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 1312 } else { 1313 *newmat = B; 1314 } 1315 PetscFunctionReturn(PETSC_SUCCESS); 1316 } 1317 1318 static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 1319 { 1320 Mat A; 1321 1322 PetscFunctionBegin; 1323 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1324 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1325 PetscCall(MatDestroy(&A)); 1326 PetscFunctionReturn(PETSC_SUCCESS); 1327 } 1328 1329 static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 1330 { 1331 Mat A; 1332 1333 PetscFunctionBegin; 1334 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1335 PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 1336 PetscCall(MatDestroy(&A)); 1337 PetscFunctionReturn(PETSC_SUCCESS); 1338 } 1339 1340 /* ---------------------------------------------------------------------------------- */ 1341 /*@ 1342 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1343 operations for multicomponent problems. It interpolates each component the same 1344 way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 1345 and `MATMPIAIJ` for distributed matrices. 1346 1347 Collective 1348 1349 Input Parameters: 1350 + A - the `MATAIJ` matrix describing the action on blocks 1351 - dof - the block size (number of components per node) 1352 1353 Output Parameter: 1354 . maij - the new `MATMAIJ` matrix 1355 1356 Operations provided: 1357 .vb 1358 MatMult() 1359 MatMultTranspose() 1360 MatMultAdd() 1361 MatMultTransposeAdd() 1362 MatView() 1363 .ve 1364 1365 Level: advanced 1366 1367 .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 1368 @*/ 1369 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1370 { 1371 PetscInt n; 1372 Mat B; 1373 PetscBool flg; 1374 #if defined(PETSC_HAVE_CUDA) 1375 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1376 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1377 #endif 1378 1379 PetscFunctionBegin; 1380 dof = PetscAbs(dof); 1381 PetscCall(PetscObjectReference((PetscObject)A)); 1382 1383 if (dof == 1) *maij = A; 1384 else { 1385 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1386 /* propagate vec type */ 1387 PetscCall(MatSetVecType(B, A->defaultvectype)); 1388 PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 1389 PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 1390 PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 1391 PetscCall(PetscLayoutSetUp(B->rmap)); 1392 PetscCall(PetscLayoutSetUp(B->cmap)); 1393 1394 B->assembled = PETSC_TRUE; 1395 1396 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 1397 if (flg) { 1398 Mat_SeqMAIJ *b; 1399 1400 PetscCall(MatSetType(B, MATSEQMAIJ)); 1401 1402 B->ops->setup = NULL; 1403 B->ops->destroy = MatDestroy_SeqMAIJ; 1404 B->ops->view = MatView_SeqMAIJ; 1405 1406 b = (Mat_SeqMAIJ *)B->data; 1407 b->dof = dof; 1408 b->AIJ = A; 1409 1410 if (dof == 2) { 1411 B->ops->mult = MatMult_SeqMAIJ_2; 1412 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1413 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1414 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1415 } else if (dof == 3) { 1416 B->ops->mult = MatMult_SeqMAIJ_3; 1417 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1418 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1419 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1420 } else if (dof == 4) { 1421 B->ops->mult = MatMult_SeqMAIJ_4; 1422 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1423 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1424 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1425 } else if (dof == 5) { 1426 B->ops->mult = MatMult_SeqMAIJ_5; 1427 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1428 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1429 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1430 } else if (dof == 6) { 1431 B->ops->mult = MatMult_SeqMAIJ_6; 1432 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1433 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1434 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1435 } else if (dof == 7) { 1436 B->ops->mult = MatMult_SeqMAIJ_7; 1437 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1438 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1439 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 1440 } else if (dof == 8) { 1441 B->ops->mult = MatMult_SeqMAIJ_8; 1442 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1443 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1444 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1445 } else if (dof == 9) { 1446 B->ops->mult = MatMult_SeqMAIJ_9; 1447 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 1448 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 1449 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1450 } else if (dof == 10) { 1451 B->ops->mult = MatMult_SeqMAIJ_10; 1452 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1453 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1454 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1455 } else if (dof == 11) { 1456 B->ops->mult = MatMult_SeqMAIJ_11; 1457 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1458 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1459 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 1460 } else if (dof == 16) { 1461 B->ops->mult = MatMult_SeqMAIJ_16; 1462 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1463 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1464 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1465 } else if (dof == 18) { 1466 B->ops->mult = MatMult_SeqMAIJ_18; 1467 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1468 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1469 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 1470 } else { 1471 B->ops->mult = MatMult_SeqMAIJ_N; 1472 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 1473 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 1474 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 1475 } 1476 #if defined(PETSC_HAVE_CUDA) 1477 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1478 #endif 1479 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 1480 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1481 } else { 1482 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1483 Mat_MPIMAIJ *b; 1484 IS from, to; 1485 Vec gvec; 1486 1487 PetscCall(MatSetType(B, MATMPIMAIJ)); 1488 1489 B->ops->setup = NULL; 1490 B->ops->destroy = MatDestroy_MPIMAIJ; 1491 B->ops->view = MatView_MPIMAIJ; 1492 1493 b = (Mat_MPIMAIJ *)B->data; 1494 b->dof = dof; 1495 b->A = A; 1496 1497 PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 1498 PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 1499 1500 PetscCall(VecGetSize(mpiaij->lvec, &n)); 1501 PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 1502 PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 1503 PetscCall(VecSetBlockSize(b->w, dof)); 1504 PetscCall(VecSetType(b->w, VECSEQ)); 1505 1506 /* create two temporary Index sets for build scatter gather */ 1507 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 1508 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1509 1510 /* create temporary global vector to generate scatter context */ 1511 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1512 1513 /* generate the scatter context */ 1514 PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1515 1516 PetscCall(ISDestroy(&from)); 1517 PetscCall(ISDestroy(&to)); 1518 PetscCall(VecDestroy(&gvec)); 1519 1520 B->ops->mult = MatMult_MPIMAIJ_dof; 1521 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1522 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1523 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1524 1525 #if defined(PETSC_HAVE_CUDA) 1526 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1527 #endif 1528 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 1529 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1530 } 1531 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1532 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 1533 PetscCall(MatSetUp(B)); 1534 #if defined(PETSC_HAVE_CUDA) 1535 /* temporary until we have CUDA implementation of MAIJ */ 1536 { 1537 PetscBool flg; 1538 if (convert) { 1539 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 1540 if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1541 } 1542 } 1543 #endif 1544 *maij = B; 1545 } 1546 PetscFunctionReturn(PETSC_SUCCESS); 1547 } 1548