1 2 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/utils/freespace.h> 4 5 /*@ 6 MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7 8 Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9 10 Input Parameter: 11 . A - the `MATMAIJ` matrix 12 13 Output Parameter: 14 . B - the `MATAIJ` matrix 15 16 Level: advanced 17 18 Note: 19 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20 21 .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22 @*/ 23 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 24 { 25 PetscBool ismpimaij, isseqmaij; 26 27 PetscFunctionBegin; 28 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 29 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 30 if (ismpimaij) { 31 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 32 33 *B = b->A; 34 } else if (isseqmaij) { 35 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 36 37 *B = b->AIJ; 38 } else { 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 /*@ 45 MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 46 47 Logically Collective 48 49 Input Parameters: 50 + A - the `MATMAIJ` matrix 51 - dof - the block size for the new matrix 52 53 Output Parameter: 54 . B - the new `MATMAIJ` matrix 55 56 Level: advanced 57 58 .seealso: `MATMAIJ`, `MatCreateMAIJ()` 59 @*/ 60 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 61 { 62 Mat Aij = NULL; 63 64 PetscFunctionBegin; 65 PetscValidLogicalCollectiveInt(A, dof, 2); 66 PetscCall(MatMAIJGetAIJ(A, &Aij)); 67 PetscCall(MatCreateMAIJ(Aij, dof, B)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 72 { 73 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 74 75 PetscFunctionBegin; 76 PetscCall(MatDestroy(&b->AIJ)); 77 PetscCall(PetscFree(A->data)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 79 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 static PetscErrorCode MatSetUp_MAIJ(Mat A) 85 { 86 PetscFunctionBegin; 87 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 88 } 89 90 static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 91 { 92 Mat B; 93 94 PetscFunctionBegin; 95 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 96 PetscCall(MatView(B, viewer)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 107 PetscCall(MatView(B, viewer)); 108 PetscCall(MatDestroy(&B)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 113 { 114 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 115 116 PetscFunctionBegin; 117 PetscCall(MatDestroy(&b->AIJ)); 118 PetscCall(MatDestroy(&b->OAIJ)); 119 PetscCall(MatDestroy(&b->A)); 120 PetscCall(VecScatterDestroy(&b->ctx)); 121 PetscCall(VecDestroy(&b->w)); 122 PetscCall(PetscFree(A->data)); 123 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 124 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 125 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 126 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 /*MC 131 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 132 multicomponent problems, interpolating or restricting each component the same way independently. 133 The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 134 135 Operations provided: 136 .vb 137 MatMult() 138 MatMultTranspose() 139 MatMultAdd() 140 MatMultTransposeAdd() 141 .ve 142 143 Level: advanced 144 145 .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 146 M*/ 147 148 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 149 { 150 Mat_MPIMAIJ *b; 151 PetscMPIInt size; 152 153 PetscFunctionBegin; 154 PetscCall(PetscNew(&b)); 155 A->data = (void *)b; 156 157 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 158 159 A->ops->setup = MatSetUp_MAIJ; 160 161 b->AIJ = NULL; 162 b->dof = 0; 163 b->OAIJ = NULL; 164 b->ctx = NULL; 165 b->w = NULL; 166 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 167 if (size == 1) { 168 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 169 } else { 170 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 171 } 172 A->preallocated = PETSC_TRUE; 173 A->assembled = PETSC_TRUE; 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 #if PetscHasAttribute(always_inline) 178 #define PETSC_FORCE_INLINE __attribute__((always_inline)) 179 #else 180 #define PETSC_FORCE_INLINE 181 #endif 182 183 #if defined(__clang__) 184 #define PETSC_PRAGMA_UNROLL _Pragma("unroll") 185 #else 186 #define PETSC_PRAGMA_UNROLL 187 #endif 188 189 enum { 190 MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 191 }; 192 193 // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 194 // keyword into account for these... 195 PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 196 { 197 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 198 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 199 const Mat baij = b->AIJ; 200 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 201 const PetscInt m = baij->rmap->n; 202 const PetscInt nz = a->nz; 203 const PetscInt *idx = a->j; 204 const PetscInt *ii = a->i; 205 const PetscScalar *v = a->a; 206 PetscInt nonzerorow = 0; 207 const PetscScalar *x; 208 PetscScalar *z; 209 210 PetscFunctionBegin; 211 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 212 if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 213 PetscCall(VecGetArrayRead(xx, &x)); 214 if (mult_add) { 215 PetscCall(VecGetArray(zz, &z)); 216 } else { 217 PetscCall(VecGetArrayWrite(zz, &z)); 218 } 219 220 for (PetscInt i = 0; i < m; ++i) { 221 PetscInt jrow = ii[i]; 222 const PetscInt n = ii[i + 1] - jrow; 223 // leave a line so clang-format does not align these decls 224 PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 225 226 nonzerorow += n > 0; 227 for (PetscInt j = 0; j < n; ++j, ++jrow) { 228 const PetscScalar v_jrow = v[jrow]; 229 const PetscInt N_idx_jrow = N * idx[jrow]; 230 231 PETSC_PRAGMA_UNROLL 232 for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 233 } 234 235 PETSC_PRAGMA_UNROLL 236 for (int k = 0; k < N; ++k) { 237 const PetscInt z_idx = N * i + k; 238 239 if (mult_add) { 240 z[z_idx] += sum[k]; 241 } else { 242 z[z_idx] = sum[k]; 243 } 244 } 245 } 246 PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 247 PetscCall(VecRestoreArrayRead(xx, &x)); 248 if (mult_add) { 249 PetscCall(VecRestoreArray(zz, &z)); 250 } else { 251 PetscCall(VecRestoreArrayWrite(zz, &z)); 252 } 253 PetscFunctionReturn(PETSC_SUCCESS); 254 } 255 256 PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 257 { 258 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 259 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 260 const Mat baij = b->AIJ; 261 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 262 const PetscInt m = baij->rmap->n; 263 const PetscInt nz = a->nz; 264 const PetscInt *a_j = a->j; 265 const PetscInt *a_i = a->i; 266 const PetscScalar *a_a = a->a; 267 const PetscScalar *x; 268 PetscScalar *z; 269 270 PetscFunctionBegin; 271 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 272 if (mult_add) { 273 if (yy != zz) PetscCall(VecCopy(yy, zz)); 274 } else { 275 PetscCall(VecSet(zz, 0.0)); 276 } 277 PetscCall(VecGetArrayRead(xx, &x)); 278 PetscCall(VecGetArray(zz, &z)); 279 280 for (PetscInt i = 0; i < m; i++) { 281 const PetscInt a_ii = a_i[i]; 282 const PetscInt *idx = a_j + a_ii; 283 const PetscScalar *v = a_a + a_ii; 284 const PetscInt n = a_i[i + 1] - a_ii; 285 PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 286 287 PETSC_PRAGMA_UNROLL 288 for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 289 for (PetscInt j = 0; j < n; ++j) { 290 const PetscInt N_idx_j = N * idx[j]; 291 const PetscScalar v_j = v[j]; 292 293 PETSC_PRAGMA_UNROLL 294 for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 295 } 296 } 297 298 PetscCall(PetscLogFlops(2 * N * nz)); 299 PetscCall(VecRestoreArrayRead(xx, &x)); 300 PetscCall(VecRestoreArray(zz, &z)); 301 PetscFunctionReturn(PETSC_SUCCESS); 302 } 303 304 /* -------------------------------------------------------------------------------------- */ 305 306 #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \ 307 static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 308 { \ 309 PetscFunctionBegin; \ 310 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 311 PetscFunctionReturn(PETSC_SUCCESS); \ 312 } \ 313 static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 314 { \ 315 PetscFunctionBegin; \ 316 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 317 PetscFunctionReturn(PETSC_SUCCESS); \ 318 } \ 319 static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 320 { \ 321 PetscFunctionBegin; \ 322 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 323 PetscFunctionReturn(PETSC_SUCCESS); \ 324 } \ 325 static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 326 { \ 327 PetscFunctionBegin; \ 328 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 329 PetscFunctionReturn(PETSC_SUCCESS); \ 330 } 331 332 // clang-format off 333 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2) 334 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3) 335 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4) 336 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5) 337 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6) 338 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7) 339 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8) 340 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9) 341 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10) 342 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11) 343 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16) 344 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18) 345 // clang-format on 346 347 #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE 348 349 /*-------------------------------------------------------------------------------------------- */ 350 351 static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 352 { 353 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 354 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 355 const PetscScalar *x, *v; 356 PetscScalar *y, *sums; 357 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 358 PetscInt n, i, jrow, j, dof = b->dof, k; 359 360 PetscFunctionBegin; 361 PetscCall(VecGetArrayRead(xx, &x)); 362 PetscCall(VecSet(yy, 0.0)); 363 PetscCall(VecGetArray(yy, &y)); 364 idx = a->j; 365 v = a->a; 366 ii = a->i; 367 368 for (i = 0; i < m; i++) { 369 jrow = ii[i]; 370 n = ii[i + 1] - jrow; 371 sums = y + dof * i; 372 for (j = 0; j < n; j++) { 373 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 374 jrow++; 375 } 376 } 377 378 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 379 PetscCall(VecRestoreArrayRead(xx, &x)); 380 PetscCall(VecRestoreArray(yy, &y)); 381 PetscFunctionReturn(PETSC_SUCCESS); 382 } 383 384 static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 385 { 386 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 387 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 388 const PetscScalar *x, *v; 389 PetscScalar *y, *sums; 390 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 391 PetscInt n, i, jrow, j, dof = b->dof, k; 392 393 PetscFunctionBegin; 394 if (yy != zz) PetscCall(VecCopy(yy, zz)); 395 PetscCall(VecGetArrayRead(xx, &x)); 396 PetscCall(VecGetArray(zz, &y)); 397 idx = a->j; 398 v = a->a; 399 ii = a->i; 400 401 for (i = 0; i < m; i++) { 402 jrow = ii[i]; 403 n = ii[i + 1] - jrow; 404 sums = y + dof * i; 405 for (j = 0; j < n; j++) { 406 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 407 jrow++; 408 } 409 } 410 411 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 412 PetscCall(VecRestoreArrayRead(xx, &x)); 413 PetscCall(VecRestoreArray(zz, &y)); 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 418 { 419 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 420 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 421 const PetscScalar *x, *v, *alpha; 422 PetscScalar *y; 423 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 424 PetscInt n, i, k; 425 426 PetscFunctionBegin; 427 PetscCall(VecGetArrayRead(xx, &x)); 428 PetscCall(VecSet(yy, 0.0)); 429 PetscCall(VecGetArray(yy, &y)); 430 for (i = 0; i < m; i++) { 431 idx = a->j + a->i[i]; 432 v = a->a + a->i[i]; 433 n = a->i[i + 1] - a->i[i]; 434 alpha = x + dof * i; 435 while (n-- > 0) { 436 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 437 idx++; 438 v++; 439 } 440 } 441 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 442 PetscCall(VecRestoreArrayRead(xx, &x)); 443 PetscCall(VecRestoreArray(yy, &y)); 444 PetscFunctionReturn(PETSC_SUCCESS); 445 } 446 447 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 448 { 449 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 450 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 451 const PetscScalar *x, *v, *alpha; 452 PetscScalar *y; 453 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 454 PetscInt n, i, k; 455 456 PetscFunctionBegin; 457 if (yy != zz) PetscCall(VecCopy(yy, zz)); 458 PetscCall(VecGetArrayRead(xx, &x)); 459 PetscCall(VecGetArray(zz, &y)); 460 for (i = 0; i < m; i++) { 461 idx = a->j + a->i[i]; 462 v = a->a + a->i[i]; 463 n = a->i[i + 1] - a->i[i]; 464 alpha = x + dof * i; 465 while (n-- > 0) { 466 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 467 idx++; 468 v++; 469 } 470 } 471 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 472 PetscCall(VecRestoreArrayRead(xx, &x)); 473 PetscCall(VecRestoreArray(zz, &y)); 474 PetscFunctionReturn(PETSC_SUCCESS); 475 } 476 477 /*-------------------------------------------------------------------------------------------- */ 478 479 static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 480 { 481 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 482 483 PetscFunctionBegin; 484 /* start the scatter */ 485 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 486 PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 487 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 488 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 489 PetscFunctionReturn(PETSC_SUCCESS); 490 } 491 492 static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 493 { 494 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 495 496 PetscFunctionBegin; 497 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 498 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 499 PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 500 PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 501 PetscFunctionReturn(PETSC_SUCCESS); 502 } 503 504 static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 505 { 506 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 507 508 PetscFunctionBegin; 509 /* start the scatter */ 510 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 511 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 512 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 513 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 514 PetscFunctionReturn(PETSC_SUCCESS); 515 } 516 517 static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 518 { 519 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 520 521 PetscFunctionBegin; 522 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 523 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 524 PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 525 PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 /* ---------------------------------------------------------------- */ 530 531 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 532 { 533 Mat_Product *product = C->product; 534 535 PetscFunctionBegin; 536 if (product->type == MATPRODUCT_PtAP) { 537 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 538 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 539 PetscFunctionReturn(PETSC_SUCCESS); 540 } 541 542 static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 543 { 544 Mat_Product *product = C->product; 545 PetscBool flg = PETSC_FALSE; 546 Mat A = product->A, P = product->B; 547 PetscInt alg = 1; /* set default algorithm */ 548 #if !defined(PETSC_HAVE_HYPRE) 549 const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 550 PetscInt nalg = 4; 551 #else 552 const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 553 PetscInt nalg = 5; 554 #endif 555 556 PetscFunctionBegin; 557 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 558 559 /* PtAP */ 560 /* Check matrix local sizes */ 561 PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 562 A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 563 PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 564 A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 565 566 /* Set the default algorithm */ 567 PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 568 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 569 570 /* Get runtime option */ 571 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 572 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 573 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 574 PetscOptionsEnd(); 575 576 PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 577 if (flg) { 578 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 579 PetscFunctionReturn(PETSC_SUCCESS); 580 } 581 582 PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 583 if (flg) { 584 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 585 PetscFunctionReturn(PETSC_SUCCESS); 586 } 587 588 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 589 PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 590 PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 591 PetscCall(MatProductSetFromOptions(C)); 592 PetscFunctionReturn(PETSC_SUCCESS); 593 } 594 595 /* ---------------------------------------------------------------- */ 596 static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 597 { 598 /* This routine requires testing -- first draft only */ 599 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 600 Mat P = pp->AIJ; 601 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 602 Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 603 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 604 const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 605 const PetscInt *ci = c->i, *cj = c->j, *cjj; 606 const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 607 PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 608 const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 609 MatScalar *ca = c->a, *caj, *apa; 610 611 PetscFunctionBegin; 612 /* Allocate temporary array for storage of one row of A*P */ 613 PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 614 615 /* Clear old values in C */ 616 PetscCall(PetscArrayzero(ca, ci[cm])); 617 618 for (i = 0; i < am; i++) { 619 /* Form sparse row of A*P */ 620 anzi = ai[i + 1] - ai[i]; 621 apnzj = 0; 622 for (j = 0; j < anzi; j++) { 623 /* Get offset within block of P */ 624 pshift = *aj % ppdof; 625 /* Get block row of P */ 626 prow = *aj++ / ppdof; /* integer division */ 627 pnzj = pi[prow + 1] - pi[prow]; 628 pjj = pj + pi[prow]; 629 paj = pa + pi[prow]; 630 for (k = 0; k < pnzj; k++) { 631 poffset = pjj[k] * ppdof + pshift; 632 if (!apjdense[poffset]) { 633 apjdense[poffset] = -1; 634 apj[apnzj++] = poffset; 635 } 636 apa[poffset] += (*aa) * paj[k]; 637 } 638 PetscCall(PetscLogFlops(2.0 * pnzj)); 639 aa++; 640 } 641 642 /* Sort the j index array for quick sparse axpy. */ 643 /* Note: a array does not need sorting as it is in dense storage locations. */ 644 PetscCall(PetscSortInt(apnzj, apj)); 645 646 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 647 prow = i / ppdof; /* integer division */ 648 pshift = i % ppdof; 649 poffset = pi[prow]; 650 pnzi = pi[prow + 1] - poffset; 651 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 652 pJ = pj + poffset; 653 pA = pa + poffset; 654 for (j = 0; j < pnzi; j++) { 655 crow = (*pJ) * ppdof + pshift; 656 cjj = cj + ci[crow]; 657 caj = ca + ci[crow]; 658 pJ++; 659 /* Perform sparse axpy operation. Note cjj includes apj. */ 660 for (k = 0, nextap = 0; nextap < apnzj; k++) { 661 if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 662 } 663 PetscCall(PetscLogFlops(2.0 * apnzj)); 664 pA++; 665 } 666 667 /* Zero the current row info for A*P */ 668 for (j = 0; j < apnzj; j++) { 669 apa[apj[j]] = 0.; 670 apjdense[apj[j]] = 0; 671 } 672 } 673 674 /* Assemble the final matrix and clean up */ 675 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 676 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 677 PetscCall(PetscFree3(apa, apj, apjdense)); 678 PetscFunctionReturn(PETSC_SUCCESS); 679 } 680 681 static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 682 { 683 PetscFreeSpaceList free_space = NULL, current_space = NULL; 684 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 685 Mat P = pp->AIJ; 686 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 687 PetscInt *pti, *ptj, *ptJ; 688 PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 689 const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 690 PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 691 MatScalar *ca; 692 const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 693 694 PetscFunctionBegin; 695 /* Get ij structure of P^T */ 696 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 697 698 cn = pn * ppdof; 699 /* Allocate ci array, arrays for fill computation and */ 700 /* free space for accumulating nonzero column info */ 701 PetscCall(PetscMalloc1(cn + 1, &ci)); 702 ci[0] = 0; 703 704 /* Work arrays for rows of P^T*A */ 705 PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 706 PetscCall(PetscArrayzero(ptadenserow, an)); 707 PetscCall(PetscArrayzero(denserow, cn)); 708 709 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 710 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 711 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 712 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 713 current_space = free_space; 714 715 /* Determine symbolic info for each row of C: */ 716 for (i = 0; i < pn; i++) { 717 ptnzi = pti[i + 1] - pti[i]; 718 ptJ = ptj + pti[i]; 719 for (dof = 0; dof < ppdof; dof++) { 720 ptanzi = 0; 721 /* Determine symbolic row of PtA: */ 722 for (j = 0; j < ptnzi; j++) { 723 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 724 arow = ptJ[j] * ppdof + dof; 725 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 726 anzj = ai[arow + 1] - ai[arow]; 727 ajj = aj + ai[arow]; 728 for (k = 0; k < anzj; k++) { 729 if (!ptadenserow[ajj[k]]) { 730 ptadenserow[ajj[k]] = -1; 731 ptasparserow[ptanzi++] = ajj[k]; 732 } 733 } 734 } 735 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 736 ptaj = ptasparserow; 737 cnzi = 0; 738 for (j = 0; j < ptanzi; j++) { 739 /* Get offset within block of P */ 740 pshift = *ptaj % ppdof; 741 /* Get block row of P */ 742 prow = (*ptaj++) / ppdof; /* integer division */ 743 /* P has same number of nonzeros per row as the compressed form */ 744 pnzj = pi[prow + 1] - pi[prow]; 745 pjj = pj + pi[prow]; 746 for (k = 0; k < pnzj; k++) { 747 /* Locations in C are shifted by the offset within the block */ 748 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 749 if (!denserow[pjj[k] * ppdof + pshift]) { 750 denserow[pjj[k] * ppdof + pshift] = -1; 751 sparserow[cnzi++] = pjj[k] * ppdof + pshift; 752 } 753 } 754 } 755 756 /* sort sparserow */ 757 PetscCall(PetscSortInt(cnzi, sparserow)); 758 759 /* If free space is not available, make more free space */ 760 /* Double the amount of total space in the list */ 761 if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 762 763 /* Copy data into free space, and zero out denserows */ 764 PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 765 766 current_space->array += cnzi; 767 current_space->local_used += cnzi; 768 current_space->local_remaining -= cnzi; 769 770 for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 771 for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 772 773 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 774 /* For now, we will recompute what is needed. */ 775 ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 776 } 777 } 778 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 779 /* Allocate space for cj, initialize cj, and */ 780 /* destroy list of free space and other temporary array(s) */ 781 PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 782 PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 783 PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 784 785 /* Allocate space for ca */ 786 PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 787 788 /* put together the new matrix */ 789 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 790 PetscCall(MatSetBlockSize(C, pp->dof)); 791 792 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 793 /* Since these are PETSc arrays, change flags to free them as necessary. */ 794 c = (Mat_SeqAIJ *)(C->data); 795 c->free_a = PETSC_TRUE; 796 c->free_ij = PETSC_TRUE; 797 c->nonew = 0; 798 799 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 800 C->ops->productnumeric = MatProductNumeric_PtAP; 801 802 /* Clean up. */ 803 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 804 PetscFunctionReturn(PETSC_SUCCESS); 805 } 806 807 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 808 { 809 Mat_Product *product = C->product; 810 Mat A = product->A, P = product->B; 811 812 PetscFunctionBegin; 813 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 814 PetscFunctionReturn(PETSC_SUCCESS); 815 } 816 817 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 818 819 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 820 { 821 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 822 823 PetscFunctionBegin; 824 825 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 826 PetscFunctionReturn(PETSC_SUCCESS); 827 } 828 829 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 830 831 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 832 { 833 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 834 835 PetscFunctionBegin; 836 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 837 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 838 PetscFunctionReturn(PETSC_SUCCESS); 839 } 840 841 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 842 843 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 844 { 845 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 846 847 PetscFunctionBegin; 848 849 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 854 855 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 856 { 857 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 858 859 PetscFunctionBegin; 860 861 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 862 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 863 PetscFunctionReturn(PETSC_SUCCESS); 864 } 865 866 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 867 { 868 Mat_Product *product = C->product; 869 Mat A = product->A, P = product->B; 870 PetscBool flg; 871 872 PetscFunctionBegin; 873 PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 874 if (flg) { 875 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 876 C->ops->productnumeric = MatProductNumeric_PtAP; 877 PetscFunctionReturn(PETSC_SUCCESS); 878 } 879 880 PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 881 if (flg) { 882 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 883 C->ops->productnumeric = MatProductNumeric_PtAP; 884 PetscFunctionReturn(PETSC_SUCCESS); 885 } 886 887 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 888 } 889 890 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 891 { 892 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 893 Mat a = b->AIJ, B; 894 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 895 PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 896 PetscInt *cols; 897 PetscScalar *vals; 898 899 PetscFunctionBegin; 900 PetscCall(MatGetSize(a, &m, &n)); 901 PetscCall(PetscMalloc1(dof * m, &ilen)); 902 for (i = 0; i < m; i++) { 903 nmax = PetscMax(nmax, aij->ilen[i]); 904 for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 905 } 906 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 907 PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 908 PetscCall(MatSetType(B, newtype)); 909 PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 910 PetscCall(PetscFree(ilen)); 911 PetscCall(PetscMalloc1(nmax, &icols)); 912 ii = 0; 913 for (i = 0; i < m; i++) { 914 PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 915 for (j = 0; j < dof; j++) { 916 for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 917 PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 918 ii++; 919 } 920 PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 921 } 922 PetscCall(PetscFree(icols)); 923 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 925 926 if (reuse == MAT_INPLACE_MATRIX) { 927 PetscCall(MatHeaderReplace(A, &B)); 928 } else { 929 *newmat = B; 930 } 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 #include <../src/mat/impls/aij/mpi/mpiaij.h> 935 936 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 937 { 938 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 939 Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 940 Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 941 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 942 Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 943 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 944 PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 945 PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 946 PetscInt rstart, cstart, *garray, ii, k; 947 PetscScalar *vals, *ovals; 948 949 PetscFunctionBegin; 950 PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 951 for (i = 0; i < A->rmap->n / dof; i++) { 952 nmax = PetscMax(nmax, AIJ->ilen[i]); 953 onmax = PetscMax(onmax, OAIJ->ilen[i]); 954 for (j = 0; j < dof; j++) { 955 dnz[dof * i + j] = AIJ->ilen[i]; 956 onz[dof * i + j] = OAIJ->ilen[i]; 957 } 958 } 959 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 960 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 961 PetscCall(MatSetType(B, newtype)); 962 PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 963 PetscCall(MatSetBlockSize(B, dof)); 964 PetscCall(PetscFree2(dnz, onz)); 965 966 PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 967 rstart = dof * maij->A->rmap->rstart; 968 cstart = dof * maij->A->cmap->rstart; 969 garray = mpiaij->garray; 970 971 ii = rstart; 972 for (i = 0; i < A->rmap->n / dof; i++) { 973 PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 974 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 975 for (j = 0; j < dof; j++) { 976 for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 977 for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 978 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 979 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 980 ii++; 981 } 982 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 983 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 984 } 985 PetscCall(PetscFree2(icols, oicols)); 986 987 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 988 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 989 990 if (reuse == MAT_INPLACE_MATRIX) { 991 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 992 ((PetscObject)A)->refct = 1; 993 994 PetscCall(MatHeaderReplace(A, &B)); 995 996 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 997 } else { 998 *newmat = B; 999 } 1000 PetscFunctionReturn(PETSC_SUCCESS); 1001 } 1002 1003 static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 1004 { 1005 Mat A; 1006 1007 PetscFunctionBegin; 1008 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1009 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1010 PetscCall(MatDestroy(&A)); 1011 PetscFunctionReturn(PETSC_SUCCESS); 1012 } 1013 1014 static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 1015 { 1016 Mat A; 1017 1018 PetscFunctionBegin; 1019 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1020 PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 1021 PetscCall(MatDestroy(&A)); 1022 PetscFunctionReturn(PETSC_SUCCESS); 1023 } 1024 1025 /* ---------------------------------------------------------------------------------- */ 1026 /*@ 1027 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1028 operations for multicomponent problems. It interpolates each component the same 1029 way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 1030 and `MATMPIAIJ` for distributed matrices. 1031 1032 Collective 1033 1034 Input Parameters: 1035 + A - the `MATAIJ` matrix describing the action on blocks 1036 - dof - the block size (number of components per node) 1037 1038 Output Parameter: 1039 . maij - the new `MATMAIJ` matrix 1040 1041 Operations provided: 1042 .vb 1043 MatMult() 1044 MatMultTranspose() 1045 MatMultAdd() 1046 MatMultTransposeAdd() 1047 MatView() 1048 .ve 1049 1050 Level: advanced 1051 1052 .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 1053 @*/ 1054 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1055 { 1056 PetscInt n; 1057 Mat B; 1058 PetscBool flg; 1059 #if defined(PETSC_HAVE_CUDA) 1060 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1061 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1062 #endif 1063 1064 PetscFunctionBegin; 1065 dof = PetscAbs(dof); 1066 PetscCall(PetscObjectReference((PetscObject)A)); 1067 1068 if (dof == 1) *maij = A; 1069 else { 1070 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1071 /* propagate vec type */ 1072 PetscCall(MatSetVecType(B, A->defaultvectype)); 1073 PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 1074 PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 1075 PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 1076 PetscCall(PetscLayoutSetUp(B->rmap)); 1077 PetscCall(PetscLayoutSetUp(B->cmap)); 1078 1079 B->assembled = PETSC_TRUE; 1080 1081 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 1082 if (flg) { 1083 Mat_SeqMAIJ *b; 1084 1085 PetscCall(MatSetType(B, MATSEQMAIJ)); 1086 1087 B->ops->setup = NULL; 1088 B->ops->destroy = MatDestroy_SeqMAIJ; 1089 B->ops->view = MatView_SeqMAIJ; 1090 1091 b = (Mat_SeqMAIJ *)B->data; 1092 b->dof = dof; 1093 b->AIJ = A; 1094 1095 if (dof == 2) { 1096 B->ops->mult = MatMult_SeqMAIJ_2; 1097 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1098 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1099 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1100 } else if (dof == 3) { 1101 B->ops->mult = MatMult_SeqMAIJ_3; 1102 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1103 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1104 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1105 } else if (dof == 4) { 1106 B->ops->mult = MatMult_SeqMAIJ_4; 1107 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1108 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1109 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1110 } else if (dof == 5) { 1111 B->ops->mult = MatMult_SeqMAIJ_5; 1112 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1113 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1114 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1115 } else if (dof == 6) { 1116 B->ops->mult = MatMult_SeqMAIJ_6; 1117 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1118 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1119 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1120 } else if (dof == 7) { 1121 B->ops->mult = MatMult_SeqMAIJ_7; 1122 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1123 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1124 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 1125 } else if (dof == 8) { 1126 B->ops->mult = MatMult_SeqMAIJ_8; 1127 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1128 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1129 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1130 } else if (dof == 9) { 1131 B->ops->mult = MatMult_SeqMAIJ_9; 1132 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 1133 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 1134 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1135 } else if (dof == 10) { 1136 B->ops->mult = MatMult_SeqMAIJ_10; 1137 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1138 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1139 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1140 } else if (dof == 11) { 1141 B->ops->mult = MatMult_SeqMAIJ_11; 1142 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1143 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1144 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 1145 } else if (dof == 16) { 1146 B->ops->mult = MatMult_SeqMAIJ_16; 1147 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1148 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1149 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1150 } else if (dof == 18) { 1151 B->ops->mult = MatMult_SeqMAIJ_18; 1152 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1153 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1154 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 1155 } else { 1156 B->ops->mult = MatMult_SeqMAIJ_N; 1157 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 1158 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 1159 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 1160 } 1161 #if defined(PETSC_HAVE_CUDA) 1162 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1163 #endif 1164 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1166 } else { 1167 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1168 Mat_MPIMAIJ *b; 1169 IS from, to; 1170 Vec gvec; 1171 1172 PetscCall(MatSetType(B, MATMPIMAIJ)); 1173 1174 B->ops->setup = NULL; 1175 B->ops->destroy = MatDestroy_MPIMAIJ; 1176 B->ops->view = MatView_MPIMAIJ; 1177 1178 b = (Mat_MPIMAIJ *)B->data; 1179 b->dof = dof; 1180 b->A = A; 1181 1182 PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 1183 PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 1184 1185 PetscCall(VecGetSize(mpiaij->lvec, &n)); 1186 PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 1187 PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 1188 PetscCall(VecSetBlockSize(b->w, dof)); 1189 PetscCall(VecSetType(b->w, VECSEQ)); 1190 1191 /* create two temporary Index sets for build scatter gather */ 1192 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 1193 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1194 1195 /* create temporary global vector to generate scatter context */ 1196 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1197 1198 /* generate the scatter context */ 1199 PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1200 1201 PetscCall(ISDestroy(&from)); 1202 PetscCall(ISDestroy(&to)); 1203 PetscCall(VecDestroy(&gvec)); 1204 1205 B->ops->mult = MatMult_MPIMAIJ_dof; 1206 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1207 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1208 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1209 1210 #if defined(PETSC_HAVE_CUDA) 1211 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1212 #endif 1213 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1215 } 1216 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1217 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 1218 PetscCall(MatSetUp(B)); 1219 #if defined(PETSC_HAVE_CUDA) 1220 /* temporary until we have CUDA implementation of MAIJ */ 1221 { 1222 PetscBool flg; 1223 if (convert) { 1224 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 1225 if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1226 } 1227 } 1228 #endif 1229 *maij = B; 1230 } 1231 PetscFunctionReturn(PETSC_SUCCESS); 1232 } 1233