1 2 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/utils/freespace.h> 4 5 /*@ 6 MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7 8 Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9 10 Input Parameter: 11 . A - the `MATMAIJ` matrix 12 13 Output Parameter: 14 . B - the `MATAIJ` matrix 15 16 Level: advanced 17 18 Note: 19 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20 21 .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22 @*/ 23 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 24 { 25 PetscBool ismpimaij, isseqmaij; 26 27 PetscFunctionBegin; 28 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 29 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 30 if (ismpimaij) { 31 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 32 33 *B = b->A; 34 } else if (isseqmaij) { 35 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 36 37 *B = b->AIJ; 38 } else { 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 /*@ 45 MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 46 47 Logically Collective 48 49 Input Parameters: 50 + A - the `MATMAIJ` matrix 51 - dof - the block size for the new matrix 52 53 Output Parameter: 54 . B - the new `MATMAIJ` matrix 55 56 Level: advanced 57 58 .seealso: `MATMAIJ`, `MatCreateMAIJ()` 59 @*/ 60 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 61 { 62 Mat Aij = NULL; 63 64 PetscFunctionBegin; 65 PetscValidLogicalCollectiveInt(A, dof, 2); 66 PetscCall(MatMAIJGetAIJ(A, &Aij)); 67 PetscCall(MatCreateMAIJ(Aij, dof, B)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 72 { 73 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 74 75 PetscFunctionBegin; 76 PetscCall(MatDestroy(&b->AIJ)); 77 PetscCall(PetscFree(A->data)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 79 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 static PetscErrorCode MatSetUp_MAIJ(Mat A) 85 { 86 PetscFunctionBegin; 87 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 88 } 89 90 static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 91 { 92 Mat B; 93 94 PetscFunctionBegin; 95 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 96 PetscCall(MatView(B, viewer)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 107 PetscCall(MatView(B, viewer)); 108 PetscCall(MatDestroy(&B)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 113 { 114 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 115 116 PetscFunctionBegin; 117 PetscCall(MatDestroy(&b->AIJ)); 118 PetscCall(MatDestroy(&b->OAIJ)); 119 PetscCall(MatDestroy(&b->A)); 120 PetscCall(VecScatterDestroy(&b->ctx)); 121 PetscCall(VecDestroy(&b->w)); 122 PetscCall(PetscFree(A->data)); 123 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 124 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 125 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 126 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 /*MC 131 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 132 multicomponent problems, interpolating or restricting each component the same way independently. 133 The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 134 135 Operations provided: 136 .vb 137 MatMult() 138 MatMultTranspose() 139 MatMultAdd() 140 MatMultTransposeAdd() 141 .ve 142 143 Level: advanced 144 145 .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 146 M*/ 147 148 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 149 { 150 Mat_MPIMAIJ *b; 151 PetscMPIInt size; 152 153 PetscFunctionBegin; 154 PetscCall(PetscNew(&b)); 155 A->data = (void *)b; 156 157 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 158 159 A->ops->setup = MatSetUp_MAIJ; 160 161 b->AIJ = NULL; 162 b->dof = 0; 163 b->OAIJ = NULL; 164 b->ctx = NULL; 165 b->w = NULL; 166 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 167 if (size == 1) { 168 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 169 } else { 170 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 171 } 172 A->preallocated = PETSC_TRUE; 173 A->assembled = PETSC_TRUE; 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 #if PetscHasAttribute(always_inline) 178 #define PETSC_FORCE_INLINE __attribute__((always_inline)) 179 #else 180 #define PETSC_FORCE_INLINE 181 #endif 182 enum { 183 MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 184 }; 185 186 // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 187 // keyword into account for these... 188 PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 189 { 190 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 191 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 192 const Mat baij = b->AIJ; 193 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 194 const PetscInt m = baij->rmap->n; 195 const PetscInt nz = a->nz; 196 const PetscInt *idx = a->j; 197 const PetscInt *ii = a->i; 198 const PetscScalar *v = a->a; 199 PetscInt nonzerorow = 0; 200 const PetscScalar *x; 201 PetscScalar *z; 202 203 PetscFunctionBegin; 204 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 205 if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 206 PetscCall(VecGetArrayRead(xx, &x)); 207 if (mult_add) { 208 PetscCall(VecGetArray(zz, &z)); 209 } else { 210 PetscCall(VecGetArrayWrite(zz, &z)); 211 } 212 213 for (PetscInt i = 0; i < m; ++i) { 214 PetscInt jrow = ii[i]; 215 const PetscInt n = ii[i + 1] - jrow; 216 // leave a line so clang-format does not align these decls 217 PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 218 219 nonzerorow += n > 0; 220 for (PetscInt j = 0; j < n; ++j, ++jrow) { 221 const PetscScalar v_jrow = v[jrow]; 222 const PetscInt N_idx_jrow = N * idx[jrow]; 223 224 #pragma unroll 225 for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 226 } 227 228 #pragma unroll 229 for (int k = 0; k < N; ++k) { 230 const PetscInt z_idx = N * i + k; 231 232 if (mult_add) { 233 z[z_idx] += sum[k]; 234 } else { 235 z[z_idx] = sum[k]; 236 } 237 } 238 } 239 PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 240 PetscCall(VecRestoreArrayRead(xx, &x)); 241 if (mult_add) { 242 PetscCall(VecRestoreArray(zz, &z)); 243 } else { 244 PetscCall(VecRestoreArrayWrite(zz, &z)); 245 } 246 PetscFunctionReturn(PETSC_SUCCESS); 247 } 248 249 PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 250 { 251 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 252 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 253 const Mat baij = b->AIJ; 254 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 255 const PetscInt m = baij->rmap->n; 256 const PetscInt nz = a->nz; 257 const PetscInt *a_j = a->j; 258 const PetscInt *a_i = a->i; 259 const PetscScalar *a_a = a->a; 260 const PetscScalar *x; 261 PetscScalar *z; 262 263 PetscFunctionBegin; 264 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 265 if (mult_add) { 266 if (yy != zz) PetscCall(VecCopy(yy, zz)); 267 } else { 268 PetscCall(VecSet(zz, 0.0)); 269 } 270 PetscCall(VecGetArrayRead(xx, &x)); 271 PetscCall(VecGetArray(zz, &z)); 272 273 for (PetscInt i = 0; i < m; i++) { 274 const PetscInt a_ii = a_i[i]; 275 const PetscInt *idx = a_j + a_ii; 276 const PetscScalar *v = a_a + a_ii; 277 const PetscInt n = a_i[i + 1] - a_ii; 278 PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 279 280 #pragma unroll 281 for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 282 for (PetscInt j = 0; j < n; ++j) { 283 const PetscInt N_idx_j = N * idx[j]; 284 const PetscScalar v_j = v[j]; 285 286 #pragma unroll 287 for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 288 } 289 } 290 291 PetscCall(PetscLogFlops(2 * N * nz)); 292 PetscCall(VecRestoreArrayRead(xx, &x)); 293 PetscCall(VecRestoreArray(zz, &z)); 294 PetscFunctionReturn(PETSC_SUCCESS); 295 } 296 297 /* --------------------------------------------------------------------------------------*/ 298 299 static PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) 300 { 301 PetscFunctionBegin; 302 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 2)); 303 PetscFunctionReturn(PETSC_SUCCESS); 304 } 305 306 static PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) 307 { 308 PetscFunctionBegin; 309 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 2)); 310 PetscFunctionReturn(PETSC_SUCCESS); 311 } 312 313 static PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) 314 { 315 PetscFunctionBegin; 316 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 2)); 317 PetscFunctionReturn(PETSC_SUCCESS); 318 } 319 320 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) 321 { 322 PetscFunctionBegin; 323 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 2)); 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 /* --------------------------------------------------------------------------------------*/ 328 329 static PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) 330 { 331 PetscFunctionBegin; 332 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 3)); 333 PetscFunctionReturn(PETSC_SUCCESS); 334 } 335 336 static PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) 337 { 338 PetscFunctionBegin; 339 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 3)); 340 PetscFunctionReturn(PETSC_SUCCESS); 341 } 342 343 static PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) 344 { 345 PetscFunctionBegin; 346 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 3)); 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) 351 { 352 PetscFunctionBegin; 353 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 3)); 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 /* ------------------------------------------------------------------------------*/ 358 359 static PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) 360 { 361 PetscFunctionBegin; 362 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 4)); 363 PetscFunctionReturn(PETSC_SUCCESS); 364 } 365 366 static PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) 367 { 368 PetscFunctionBegin; 369 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 4)); 370 PetscFunctionReturn(PETSC_SUCCESS); 371 } 372 373 static PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) 374 { 375 PetscFunctionBegin; 376 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 4)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) 381 { 382 PetscFunctionBegin; 383 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 4)); 384 PetscFunctionReturn(PETSC_SUCCESS); 385 } 386 387 /* ------------------------------------------------------------------------------*/ 388 389 static PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) 390 { 391 PetscFunctionBegin; 392 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 5)); 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 396 static PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) 397 { 398 PetscFunctionBegin; 399 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 5)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) 404 { 405 PetscFunctionBegin; 406 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 5)); 407 PetscFunctionReturn(PETSC_SUCCESS); 408 } 409 410 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) 411 { 412 PetscFunctionBegin; 413 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 5)); 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 /* ------------------------------------------------------------------------------*/ 418 419 static PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) 420 { 421 PetscFunctionBegin; 422 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 6)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) 427 { 428 PetscFunctionBegin; 429 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 6)); 430 PetscFunctionReturn(PETSC_SUCCESS); 431 } 432 433 static PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) 434 { 435 PetscFunctionBegin; 436 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 6)); 437 PetscFunctionReturn(PETSC_SUCCESS); 438 } 439 440 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) 441 { 442 PetscFunctionBegin; 443 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 6)); 444 PetscFunctionReturn(PETSC_SUCCESS); 445 } 446 447 /* ------------------------------------------------------------------------------*/ 448 449 static PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) 450 { 451 PetscFunctionBegin; 452 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 7)); 453 PetscFunctionReturn(PETSC_SUCCESS); 454 } 455 456 static PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) 457 { 458 PetscFunctionBegin; 459 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 7)); 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 static PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) 464 { 465 PetscFunctionBegin; 466 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 7)); 467 PetscFunctionReturn(PETSC_SUCCESS); 468 } 469 470 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) 471 { 472 PetscFunctionBegin; 473 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 7)); 474 PetscFunctionReturn(PETSC_SUCCESS); 475 } 476 477 /* ------------------------------------------------------------------------------*/ 478 479 static PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) 480 { 481 PetscFunctionBegin; 482 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 8)); 483 PetscFunctionReturn(PETSC_SUCCESS); 484 } 485 486 static PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) 487 { 488 PetscFunctionBegin; 489 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 8)); 490 PetscFunctionReturn(PETSC_SUCCESS); 491 } 492 493 static PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) 494 { 495 PetscFunctionBegin; 496 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 8)); 497 PetscFunctionReturn(PETSC_SUCCESS); 498 } 499 500 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) 501 { 502 PetscFunctionBegin; 503 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 8)); 504 PetscFunctionReturn(PETSC_SUCCESS); 505 } 506 507 /* ------------------------------------------------------------------------------*/ 508 509 static PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) 510 { 511 PetscFunctionBegin; 512 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 9)); 513 PetscFunctionReturn(PETSC_SUCCESS); 514 } 515 516 static PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) 517 { 518 PetscFunctionBegin; 519 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 9)); 520 PetscFunctionReturn(PETSC_SUCCESS); 521 } 522 523 static PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) 524 { 525 PetscFunctionBegin; 526 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 9)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) 531 { 532 PetscFunctionBegin; 533 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 9)); 534 PetscFunctionReturn(PETSC_SUCCESS); 535 } 536 537 /* ------------------------------------------------------------------------------*/ 538 539 static PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) 540 { 541 PetscFunctionBegin; 542 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 10)); 543 PetscFunctionReturn(PETSC_SUCCESS); 544 } 545 546 static PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) 547 { 548 PetscFunctionBegin; 549 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 10)); 550 PetscFunctionReturn(PETSC_SUCCESS); 551 } 552 553 static PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) 554 { 555 PetscFunctionBegin; 556 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 10)); 557 PetscFunctionReturn(PETSC_SUCCESS); 558 } 559 560 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) 561 { 562 PetscFunctionBegin; 563 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 10)); 564 PetscFunctionReturn(PETSC_SUCCESS); 565 } 566 567 /*--------------------------------------------------------------------------------------------*/ 568 569 static PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) 570 { 571 PetscFunctionBegin; 572 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 11)); 573 PetscFunctionReturn(PETSC_SUCCESS); 574 } 575 576 static PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) 577 { 578 PetscFunctionBegin; 579 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 11)); 580 PetscFunctionReturn(PETSC_SUCCESS); 581 } 582 583 static PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) 584 { 585 PetscFunctionBegin; 586 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 11)); 587 PetscFunctionReturn(PETSC_SUCCESS); 588 } 589 590 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) 591 { 592 PetscFunctionBegin; 593 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 11)); 594 PetscFunctionReturn(PETSC_SUCCESS); 595 } 596 597 /*--------------------------------------------------------------------------------------------*/ 598 599 static PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) 600 { 601 PetscFunctionBegin; 602 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 16)); 603 PetscFunctionReturn(PETSC_SUCCESS); 604 } 605 606 static PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) 607 { 608 PetscFunctionBegin; 609 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 16)); 610 PetscFunctionReturn(PETSC_SUCCESS); 611 } 612 613 static PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) 614 { 615 PetscFunctionBegin; 616 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 16)); 617 PetscFunctionReturn(PETSC_SUCCESS); 618 } 619 620 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) 621 { 622 PetscFunctionBegin; 623 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 16)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 /*--------------------------------------------------------------------------------------------*/ 628 629 static PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) 630 { 631 PetscFunctionBegin; 632 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, 18)); 633 PetscFunctionReturn(PETSC_SUCCESS); 634 } 635 636 static PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) 637 { 638 PetscFunctionBegin; 639 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, 18)); 640 PetscFunctionReturn(PETSC_SUCCESS); 641 } 642 643 static PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) 644 { 645 PetscFunctionBegin; 646 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, 18)); 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) 651 { 652 PetscFunctionBegin; 653 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, 18)); 654 PetscFunctionReturn(PETSC_SUCCESS); 655 } 656 657 /*--------------------------------------------------------------------------------------------*/ 658 659 static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 660 { 661 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 663 const PetscScalar *x, *v; 664 PetscScalar *y, *sums; 665 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 666 PetscInt n, i, jrow, j, dof = b->dof, k; 667 668 PetscFunctionBegin; 669 PetscCall(VecGetArrayRead(xx, &x)); 670 PetscCall(VecSet(yy, 0.0)); 671 PetscCall(VecGetArray(yy, &y)); 672 idx = a->j; 673 v = a->a; 674 ii = a->i; 675 676 for (i = 0; i < m; i++) { 677 jrow = ii[i]; 678 n = ii[i + 1] - jrow; 679 sums = y + dof * i; 680 for (j = 0; j < n; j++) { 681 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 682 jrow++; 683 } 684 } 685 686 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 687 PetscCall(VecRestoreArrayRead(xx, &x)); 688 PetscCall(VecRestoreArray(yy, &y)); 689 PetscFunctionReturn(PETSC_SUCCESS); 690 } 691 692 static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 693 { 694 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 695 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 696 const PetscScalar *x, *v; 697 PetscScalar *y, *sums; 698 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 699 PetscInt n, i, jrow, j, dof = b->dof, k; 700 701 PetscFunctionBegin; 702 if (yy != zz) PetscCall(VecCopy(yy, zz)); 703 PetscCall(VecGetArrayRead(xx, &x)); 704 PetscCall(VecGetArray(zz, &y)); 705 idx = a->j; 706 v = a->a; 707 ii = a->i; 708 709 for (i = 0; i < m; i++) { 710 jrow = ii[i]; 711 n = ii[i + 1] - jrow; 712 sums = y + dof * i; 713 for (j = 0; j < n; j++) { 714 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 715 jrow++; 716 } 717 } 718 719 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 720 PetscCall(VecRestoreArrayRead(xx, &x)); 721 PetscCall(VecRestoreArray(zz, &y)); 722 PetscFunctionReturn(PETSC_SUCCESS); 723 } 724 725 static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 726 { 727 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 728 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 729 const PetscScalar *x, *v, *alpha; 730 PetscScalar *y; 731 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 732 PetscInt n, i, k; 733 734 PetscFunctionBegin; 735 PetscCall(VecGetArrayRead(xx, &x)); 736 PetscCall(VecSet(yy, 0.0)); 737 PetscCall(VecGetArray(yy, &y)); 738 for (i = 0; i < m; i++) { 739 idx = a->j + a->i[i]; 740 v = a->a + a->i[i]; 741 n = a->i[i + 1] - a->i[i]; 742 alpha = x + dof * i; 743 while (n-- > 0) { 744 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 745 idx++; 746 v++; 747 } 748 } 749 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 750 PetscCall(VecRestoreArrayRead(xx, &x)); 751 PetscCall(VecRestoreArray(yy, &y)); 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 756 { 757 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 758 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 759 const PetscScalar *x, *v, *alpha; 760 PetscScalar *y; 761 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 762 PetscInt n, i, k; 763 764 PetscFunctionBegin; 765 if (yy != zz) PetscCall(VecCopy(yy, zz)); 766 PetscCall(VecGetArrayRead(xx, &x)); 767 PetscCall(VecGetArray(zz, &y)); 768 for (i = 0; i < m; i++) { 769 idx = a->j + a->i[i]; 770 v = a->a + a->i[i]; 771 n = a->i[i + 1] - a->i[i]; 772 alpha = x + dof * i; 773 while (n-- > 0) { 774 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 775 idx++; 776 v++; 777 } 778 } 779 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 780 PetscCall(VecRestoreArrayRead(xx, &x)); 781 PetscCall(VecRestoreArray(zz, &y)); 782 PetscFunctionReturn(PETSC_SUCCESS); 783 } 784 785 /*--------------------------------------------------------------------------------------------*/ 786 787 static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 788 { 789 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 790 791 PetscFunctionBegin; 792 /* start the scatter */ 793 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 794 PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 795 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 796 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 797 PetscFunctionReturn(PETSC_SUCCESS); 798 } 799 800 static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 801 { 802 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 803 804 PetscFunctionBegin; 805 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 806 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 807 PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 808 PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 809 PetscFunctionReturn(PETSC_SUCCESS); 810 } 811 812 static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 813 { 814 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 815 816 PetscFunctionBegin; 817 /* start the scatter */ 818 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 819 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 820 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 821 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 822 PetscFunctionReturn(PETSC_SUCCESS); 823 } 824 825 static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 826 { 827 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 828 829 PetscFunctionBegin; 830 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 831 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 832 PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 833 PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 834 PetscFunctionReturn(PETSC_SUCCESS); 835 } 836 837 /* ----------------------------------------------------------------*/ 838 839 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 840 { 841 Mat_Product *product = C->product; 842 843 PetscFunctionBegin; 844 if (product->type == MATPRODUCT_PtAP) { 845 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 846 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 851 { 852 Mat_Product *product = C->product; 853 PetscBool flg = PETSC_FALSE; 854 Mat A = product->A, P = product->B; 855 PetscInt alg = 1; /* set default algorithm */ 856 #if !defined(PETSC_HAVE_HYPRE) 857 const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 858 PetscInt nalg = 4; 859 #else 860 const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 861 PetscInt nalg = 5; 862 #endif 863 864 PetscFunctionBegin; 865 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 866 867 /* PtAP */ 868 /* Check matrix local sizes */ 869 PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 870 A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 871 PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 872 A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 873 874 /* Set the default algorithm */ 875 PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 876 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 877 878 /* Get runtime option */ 879 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 880 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 881 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 882 PetscOptionsEnd(); 883 884 PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 885 if (flg) { 886 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 887 PetscFunctionReturn(PETSC_SUCCESS); 888 } 889 890 PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 891 if (flg) { 892 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 893 PetscFunctionReturn(PETSC_SUCCESS); 894 } 895 896 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 897 PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 898 PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 899 PetscCall(MatProductSetFromOptions(C)); 900 PetscFunctionReturn(PETSC_SUCCESS); 901 } 902 903 /* ----------------------------------------------------------------*/ 904 static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 905 { 906 /* This routine requires testing -- first draft only */ 907 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 908 Mat P = pp->AIJ; 909 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 910 Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 911 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 912 const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 913 const PetscInt *ci = c->i, *cj = c->j, *cjj; 914 const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 915 PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 916 const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 917 MatScalar *ca = c->a, *caj, *apa; 918 919 PetscFunctionBegin; 920 /* Allocate temporary array for storage of one row of A*P */ 921 PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 922 923 /* Clear old values in C */ 924 PetscCall(PetscArrayzero(ca, ci[cm])); 925 926 for (i = 0; i < am; i++) { 927 /* Form sparse row of A*P */ 928 anzi = ai[i + 1] - ai[i]; 929 apnzj = 0; 930 for (j = 0; j < anzi; j++) { 931 /* Get offset within block of P */ 932 pshift = *aj % ppdof; 933 /* Get block row of P */ 934 prow = *aj++ / ppdof; /* integer division */ 935 pnzj = pi[prow + 1] - pi[prow]; 936 pjj = pj + pi[prow]; 937 paj = pa + pi[prow]; 938 for (k = 0; k < pnzj; k++) { 939 poffset = pjj[k] * ppdof + pshift; 940 if (!apjdense[poffset]) { 941 apjdense[poffset] = -1; 942 apj[apnzj++] = poffset; 943 } 944 apa[poffset] += (*aa) * paj[k]; 945 } 946 PetscCall(PetscLogFlops(2.0 * pnzj)); 947 aa++; 948 } 949 950 /* Sort the j index array for quick sparse axpy. */ 951 /* Note: a array does not need sorting as it is in dense storage locations. */ 952 PetscCall(PetscSortInt(apnzj, apj)); 953 954 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 955 prow = i / ppdof; /* integer division */ 956 pshift = i % ppdof; 957 poffset = pi[prow]; 958 pnzi = pi[prow + 1] - poffset; 959 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 960 pJ = pj + poffset; 961 pA = pa + poffset; 962 for (j = 0; j < pnzi; j++) { 963 crow = (*pJ) * ppdof + pshift; 964 cjj = cj + ci[crow]; 965 caj = ca + ci[crow]; 966 pJ++; 967 /* Perform sparse axpy operation. Note cjj includes apj. */ 968 for (k = 0, nextap = 0; nextap < apnzj; k++) { 969 if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 970 } 971 PetscCall(PetscLogFlops(2.0 * apnzj)); 972 pA++; 973 } 974 975 /* Zero the current row info for A*P */ 976 for (j = 0; j < apnzj; j++) { 977 apa[apj[j]] = 0.; 978 apjdense[apj[j]] = 0; 979 } 980 } 981 982 /* Assemble the final matrix and clean up */ 983 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 984 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 985 PetscCall(PetscFree3(apa, apj, apjdense)); 986 PetscFunctionReturn(PETSC_SUCCESS); 987 } 988 989 static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 990 { 991 PetscFreeSpaceList free_space = NULL, current_space = NULL; 992 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 993 Mat P = pp->AIJ; 994 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 995 PetscInt *pti, *ptj, *ptJ; 996 PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 997 const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 998 PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 999 MatScalar *ca; 1000 const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 1001 1002 PetscFunctionBegin; 1003 /* Get ij structure of P^T */ 1004 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 1005 1006 cn = pn * ppdof; 1007 /* Allocate ci array, arrays for fill computation and */ 1008 /* free space for accumulating nonzero column info */ 1009 PetscCall(PetscMalloc1(cn + 1, &ci)); 1010 ci[0] = 0; 1011 1012 /* Work arrays for rows of P^T*A */ 1013 PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 1014 PetscCall(PetscArrayzero(ptadenserow, an)); 1015 PetscCall(PetscArrayzero(denserow, cn)); 1016 1017 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1018 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1019 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 1020 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 1021 current_space = free_space; 1022 1023 /* Determine symbolic info for each row of C: */ 1024 for (i = 0; i < pn; i++) { 1025 ptnzi = pti[i + 1] - pti[i]; 1026 ptJ = ptj + pti[i]; 1027 for (dof = 0; dof < ppdof; dof++) { 1028 ptanzi = 0; 1029 /* Determine symbolic row of PtA: */ 1030 for (j = 0; j < ptnzi; j++) { 1031 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 1032 arow = ptJ[j] * ppdof + dof; 1033 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 1034 anzj = ai[arow + 1] - ai[arow]; 1035 ajj = aj + ai[arow]; 1036 for (k = 0; k < anzj; k++) { 1037 if (!ptadenserow[ajj[k]]) { 1038 ptadenserow[ajj[k]] = -1; 1039 ptasparserow[ptanzi++] = ajj[k]; 1040 } 1041 } 1042 } 1043 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1044 ptaj = ptasparserow; 1045 cnzi = 0; 1046 for (j = 0; j < ptanzi; j++) { 1047 /* Get offset within block of P */ 1048 pshift = *ptaj % ppdof; 1049 /* Get block row of P */ 1050 prow = (*ptaj++) / ppdof; /* integer division */ 1051 /* P has same number of nonzeros per row as the compressed form */ 1052 pnzj = pi[prow + 1] - pi[prow]; 1053 pjj = pj + pi[prow]; 1054 for (k = 0; k < pnzj; k++) { 1055 /* Locations in C are shifted by the offset within the block */ 1056 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 1057 if (!denserow[pjj[k] * ppdof + pshift]) { 1058 denserow[pjj[k] * ppdof + pshift] = -1; 1059 sparserow[cnzi++] = pjj[k] * ppdof + pshift; 1060 } 1061 } 1062 } 1063 1064 /* sort sparserow */ 1065 PetscCall(PetscSortInt(cnzi, sparserow)); 1066 1067 /* If free space is not available, make more free space */ 1068 /* Double the amount of total space in the list */ 1069 if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 1070 1071 /* Copy data into free space, and zero out denserows */ 1072 PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 1073 1074 current_space->array += cnzi; 1075 current_space->local_used += cnzi; 1076 current_space->local_remaining -= cnzi; 1077 1078 for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 1079 for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 1080 1081 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1082 /* For now, we will recompute what is needed. */ 1083 ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 1084 } 1085 } 1086 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1087 /* Allocate space for cj, initialize cj, and */ 1088 /* destroy list of free space and other temporary array(s) */ 1089 PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 1090 PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 1091 PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 1092 1093 /* Allocate space for ca */ 1094 PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 1095 1096 /* put together the new matrix */ 1097 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 1098 PetscCall(MatSetBlockSize(C, pp->dof)); 1099 1100 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1101 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1102 c = (Mat_SeqAIJ *)(C->data); 1103 c->free_a = PETSC_TRUE; 1104 c->free_ij = PETSC_TRUE; 1105 c->nonew = 0; 1106 1107 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 1108 C->ops->productnumeric = MatProductNumeric_PtAP; 1109 1110 /* Clean up. */ 1111 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 1112 PetscFunctionReturn(PETSC_SUCCESS); 1113 } 1114 1115 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 1116 { 1117 Mat_Product *product = C->product; 1118 Mat A = product->A, P = product->B; 1119 1120 PetscFunctionBegin; 1121 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 1126 1127 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 1128 { 1129 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1130 1131 PetscFunctionBegin; 1132 1133 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 1134 PetscFunctionReturn(PETSC_SUCCESS); 1135 } 1136 1137 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 1138 1139 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 1140 { 1141 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1142 1143 PetscFunctionBegin; 1144 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 1145 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 1146 PetscFunctionReturn(PETSC_SUCCESS); 1147 } 1148 1149 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 1150 1151 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 1152 { 1153 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1154 1155 PetscFunctionBegin; 1156 1157 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 1158 PetscFunctionReturn(PETSC_SUCCESS); 1159 } 1160 1161 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 1162 1163 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 1164 { 1165 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 1166 1167 PetscFunctionBegin; 1168 1169 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 1170 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 1171 PetscFunctionReturn(PETSC_SUCCESS); 1172 } 1173 1174 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 1175 { 1176 Mat_Product *product = C->product; 1177 Mat A = product->A, P = product->B; 1178 PetscBool flg; 1179 1180 PetscFunctionBegin; 1181 PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 1182 if (flg) { 1183 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 1184 C->ops->productnumeric = MatProductNumeric_PtAP; 1185 PetscFunctionReturn(PETSC_SUCCESS); 1186 } 1187 1188 PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 1189 if (flg) { 1190 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 1191 C->ops->productnumeric = MatProductNumeric_PtAP; 1192 PetscFunctionReturn(PETSC_SUCCESS); 1193 } 1194 1195 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 1196 } 1197 1198 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1199 { 1200 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1201 Mat a = b->AIJ, B; 1202 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 1203 PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 1204 PetscInt *cols; 1205 PetscScalar *vals; 1206 1207 PetscFunctionBegin; 1208 PetscCall(MatGetSize(a, &m, &n)); 1209 PetscCall(PetscMalloc1(dof * m, &ilen)); 1210 for (i = 0; i < m; i++) { 1211 nmax = PetscMax(nmax, aij->ilen[i]); 1212 for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 1213 } 1214 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 1215 PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 1216 PetscCall(MatSetType(B, newtype)); 1217 PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 1218 PetscCall(PetscFree(ilen)); 1219 PetscCall(PetscMalloc1(nmax, &icols)); 1220 ii = 0; 1221 for (i = 0; i < m; i++) { 1222 PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 1223 for (j = 0; j < dof; j++) { 1224 for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 1225 PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 1226 ii++; 1227 } 1228 PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 1229 } 1230 PetscCall(PetscFree(icols)); 1231 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1232 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1233 1234 if (reuse == MAT_INPLACE_MATRIX) { 1235 PetscCall(MatHeaderReplace(A, &B)); 1236 } else { 1237 *newmat = B; 1238 } 1239 PetscFunctionReturn(PETSC_SUCCESS); 1240 } 1241 1242 #include <../src/mat/impls/aij/mpi/mpiaij.h> 1243 1244 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1245 { 1246 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 1247 Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 1248 Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 1249 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 1250 Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 1251 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 1252 PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 1253 PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 1254 PetscInt rstart, cstart, *garray, ii, k; 1255 PetscScalar *vals, *ovals; 1256 1257 PetscFunctionBegin; 1258 PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 1259 for (i = 0; i < A->rmap->n / dof; i++) { 1260 nmax = PetscMax(nmax, AIJ->ilen[i]); 1261 onmax = PetscMax(onmax, OAIJ->ilen[i]); 1262 for (j = 0; j < dof; j++) { 1263 dnz[dof * i + j] = AIJ->ilen[i]; 1264 onz[dof * i + j] = OAIJ->ilen[i]; 1265 } 1266 } 1267 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1268 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1269 PetscCall(MatSetType(B, newtype)); 1270 PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 1271 PetscCall(MatSetBlockSize(B, dof)); 1272 PetscCall(PetscFree2(dnz, onz)); 1273 1274 PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 1275 rstart = dof * maij->A->rmap->rstart; 1276 cstart = dof * maij->A->cmap->rstart; 1277 garray = mpiaij->garray; 1278 1279 ii = rstart; 1280 for (i = 0; i < A->rmap->n / dof; i++) { 1281 PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 1282 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 1283 for (j = 0; j < dof; j++) { 1284 for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 1285 for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 1286 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 1287 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 1288 ii++; 1289 } 1290 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 1291 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 1292 } 1293 PetscCall(PetscFree2(icols, oicols)); 1294 1295 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1296 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1297 1298 if (reuse == MAT_INPLACE_MATRIX) { 1299 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 1300 ((PetscObject)A)->refct = 1; 1301 1302 PetscCall(MatHeaderReplace(A, &B)); 1303 1304 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 1305 } else { 1306 *newmat = B; 1307 } 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 1311 static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 1312 { 1313 Mat A; 1314 1315 PetscFunctionBegin; 1316 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1317 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1318 PetscCall(MatDestroy(&A)); 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } 1321 1322 static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 1323 { 1324 Mat A; 1325 1326 PetscFunctionBegin; 1327 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1328 PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 1329 PetscCall(MatDestroy(&A)); 1330 PetscFunctionReturn(PETSC_SUCCESS); 1331 } 1332 1333 /* ---------------------------------------------------------------------------------- */ 1334 /*@ 1335 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1336 operations for multicomponent problems. It interpolates each component the same 1337 way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 1338 and `MATMPIAIJ` for distributed matrices. 1339 1340 Collective 1341 1342 Input Parameters: 1343 + A - the `MATAIJ` matrix describing the action on blocks 1344 - dof - the block size (number of components per node) 1345 1346 Output Parameter: 1347 . maij - the new `MATMAIJ` matrix 1348 1349 Operations provided: 1350 .vb 1351 MatMult() 1352 MatMultTranspose() 1353 MatMultAdd() 1354 MatMultTransposeAdd() 1355 MatView() 1356 .ve 1357 1358 Level: advanced 1359 1360 .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 1361 @*/ 1362 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1363 { 1364 PetscInt n; 1365 Mat B; 1366 PetscBool flg; 1367 #if defined(PETSC_HAVE_CUDA) 1368 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1369 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1370 #endif 1371 1372 PetscFunctionBegin; 1373 dof = PetscAbs(dof); 1374 PetscCall(PetscObjectReference((PetscObject)A)); 1375 1376 if (dof == 1) *maij = A; 1377 else { 1378 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1379 /* propagate vec type */ 1380 PetscCall(MatSetVecType(B, A->defaultvectype)); 1381 PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 1382 PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 1383 PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 1384 PetscCall(PetscLayoutSetUp(B->rmap)); 1385 PetscCall(PetscLayoutSetUp(B->cmap)); 1386 1387 B->assembled = PETSC_TRUE; 1388 1389 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 1390 if (flg) { 1391 Mat_SeqMAIJ *b; 1392 1393 PetscCall(MatSetType(B, MATSEQMAIJ)); 1394 1395 B->ops->setup = NULL; 1396 B->ops->destroy = MatDestroy_SeqMAIJ; 1397 B->ops->view = MatView_SeqMAIJ; 1398 1399 b = (Mat_SeqMAIJ *)B->data; 1400 b->dof = dof; 1401 b->AIJ = A; 1402 1403 if (dof == 2) { 1404 B->ops->mult = MatMult_SeqMAIJ_2; 1405 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1406 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1407 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1408 } else if (dof == 3) { 1409 B->ops->mult = MatMult_SeqMAIJ_3; 1410 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1411 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1412 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1413 } else if (dof == 4) { 1414 B->ops->mult = MatMult_SeqMAIJ_4; 1415 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1416 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1417 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1418 } else if (dof == 5) { 1419 B->ops->mult = MatMult_SeqMAIJ_5; 1420 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1421 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1422 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1423 } else if (dof == 6) { 1424 B->ops->mult = MatMult_SeqMAIJ_6; 1425 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1426 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1427 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1428 } else if (dof == 7) { 1429 B->ops->mult = MatMult_SeqMAIJ_7; 1430 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1431 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1432 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 1433 } else if (dof == 8) { 1434 B->ops->mult = MatMult_SeqMAIJ_8; 1435 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1436 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1437 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1438 } else if (dof == 9) { 1439 B->ops->mult = MatMult_SeqMAIJ_9; 1440 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 1441 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 1442 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1443 } else if (dof == 10) { 1444 B->ops->mult = MatMult_SeqMAIJ_10; 1445 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1446 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1447 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1448 } else if (dof == 11) { 1449 B->ops->mult = MatMult_SeqMAIJ_11; 1450 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1451 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1452 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 1453 } else if (dof == 16) { 1454 B->ops->mult = MatMult_SeqMAIJ_16; 1455 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1456 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1457 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1458 } else if (dof == 18) { 1459 B->ops->mult = MatMult_SeqMAIJ_18; 1460 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1461 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1462 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 1463 } else { 1464 B->ops->mult = MatMult_SeqMAIJ_N; 1465 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 1466 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 1467 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 1468 } 1469 #if defined(PETSC_HAVE_CUDA) 1470 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1471 #endif 1472 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 1473 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1474 } else { 1475 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1476 Mat_MPIMAIJ *b; 1477 IS from, to; 1478 Vec gvec; 1479 1480 PetscCall(MatSetType(B, MATMPIMAIJ)); 1481 1482 B->ops->setup = NULL; 1483 B->ops->destroy = MatDestroy_MPIMAIJ; 1484 B->ops->view = MatView_MPIMAIJ; 1485 1486 b = (Mat_MPIMAIJ *)B->data; 1487 b->dof = dof; 1488 b->A = A; 1489 1490 PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 1491 PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 1492 1493 PetscCall(VecGetSize(mpiaij->lvec, &n)); 1494 PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 1495 PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 1496 PetscCall(VecSetBlockSize(b->w, dof)); 1497 PetscCall(VecSetType(b->w, VECSEQ)); 1498 1499 /* create two temporary Index sets for build scatter gather */ 1500 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 1501 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1502 1503 /* create temporary global vector to generate scatter context */ 1504 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1505 1506 /* generate the scatter context */ 1507 PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1508 1509 PetscCall(ISDestroy(&from)); 1510 PetscCall(ISDestroy(&to)); 1511 PetscCall(VecDestroy(&gvec)); 1512 1513 B->ops->mult = MatMult_MPIMAIJ_dof; 1514 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1515 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1516 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1517 1518 #if defined(PETSC_HAVE_CUDA) 1519 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1520 #endif 1521 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 1522 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1523 } 1524 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1525 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 1526 PetscCall(MatSetUp(B)); 1527 #if defined(PETSC_HAVE_CUDA) 1528 /* temporary until we have CUDA implementation of MAIJ */ 1529 { 1530 PetscBool flg; 1531 if (convert) { 1532 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 1533 if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1534 } 1535 } 1536 #endif 1537 *maij = B; 1538 } 1539 PetscFunctionReturn(PETSC_SUCCESS); 1540 } 1541